@fugood/buttress-server 2.24.5 → 2.24.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +238 -19
  2. package/lib/index.mjs +3 -3
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -100,48 +100,267 @@ The token claims `{ k: 'ba', w_id, st: 'ws', sid, jti, exp }` and any buttress-s
100
100
 
101
101
  ## Configuration
102
102
 
103
- Configuration can be provided via:
104
- - `--config` / `-c` flag with TOML file path
103
+ Configuration is loaded from a TOML file passed via `--config` / `-c`. Every top-level table is optional — missing sections fall back to defaults. See `config/sample.toml` for an end-to-end example.
105
104
 
106
- ### Configuration Format (TOML)
105
+ ### Top-level sections
106
+
107
+ | Section | Purpose |
108
+ | ------------------------- | -------------------------------------------------------------------------------------------------- |
109
+ | `[env]` | Environment variables exported into the process **only if not already set** |
110
+ | `[server]` | HTTP/RPC listener (port, log level, body limits) |
111
+ | `[runtime]` | Global defaults shared by every generator (most `[generators.model]` keys may live here too) |
112
+ | `[runtime.session_cache]` | KV-cache reuse store — see [Session State Cache](#session-state-cache) |
113
+ | `[autodiscover]` | LAN UDP / HTTP / mDNS discovery toggles |
114
+ | `[openai_compat]` | Enable `/oai-compat/v1/*` — see [Compatibility Endpoints](#compatibility-endpoints-experimental) |
115
+ | `[anthropic_messages]` | Enable `/anthropic-messages` — see [Compatibility Endpoints](#compatibility-endpoints-experimental)|
116
+ | `[[generators]]` | Array of generator instances — one entry per loaded model |
117
+
118
+ ### `[env]`
107
119
 
108
120
  ```toml
109
- # Environment variables (only set if not already defined in system)
110
121
  [env]
111
- HF_TOKEN = "your_huggingface_token_here"
122
+ HUGGINGFACE_TOKEN = "hf_xxx" # ggml backends read this; HF_TOKEN is not picked up automatically
112
123
  CUDA_VISIBLE_DEVICES = "0"
124
+ ```
125
+
126
+ Values here are exported only when the variable isn't already set in the process — see [Environment Variable Priority](#environment-variable-priority). For HuggingFace auth across all backends, `[runtime] huggingface_token = "hf_xxx"` works regardless of variable name.
127
+
128
+ ### `[server]`
129
+
130
+ | Key | Type | Default |
131
+ | ----------------- | -------------- | ---------------------------------------------------------------------- |
132
+ | `id` | string | `buttress-<machineId>` — stable id used for autodiscover / binding |
133
+ | `name` | string | `Buttress Server (<short id>)` — display name |
134
+ | `port` | number | `2080` (overridden by `--port`) |
135
+ | `log_level` | `"debug"`/`"info"`/`"warn"`/`"error"` | unset |
136
+ | `max_body_size` | string\|number | `"50MB"` — e.g. `"100MB"`, `"1GB"`, or raw bytes |
137
+ | `session_timeout` | string\|number | `60000` ms — accepts ms numbers or duration strings (`"30s"`) |
138
+ | `temp_file_dir` | string | `$TMPDIR/.buttress` |
139
+
140
+ ### `[runtime]` — global generator defaults
141
+
142
+ Most ggml-llm `[generators.model]` keys can also live in `[runtime]` as defaults. Per-generator values win; otherwise the runtime default applies.
143
+
144
+ | Key | Type | Notes |
145
+ | ---------------------------- | ----------------------------- | ---------------------------------------------------------------------- |
146
+ | `cache_dir` | string | Model + metadata cache root (default `~/.buttress/models`) |
147
+ | `huggingface_token` | string | Falls back to `$HUGGINGFACE_TOKEN` |
148
+ | `http_headers` | table | Extra headers attached to HF / HTTP downloads |
149
+ | `context_release_delay_ms` | number | Idle time before unloading a context (default `10000`; `0` = immediate)|
150
+ | `prefer_variants` | string[] | Override variant probe order (ggml backends) |
151
+ | `n_threads` | number | CPU thread count |
152
+ | `n_ctx` | number | Context window (per-model value wins; auto-capped at training context) |
153
+ | `n_gpu_layers` | number\|`"auto"` | Layers offloaded to GPU (default `"auto"`) |
154
+ | `n_batch` / `n_ubatch` | number | Prompt batch / micro-batch size. **Note:** `n_batch` has a model-level default of `512` that shadows the runtime value unless `[generators.model] n_batch` is set explicitly. |
155
+ | `n_parallel` | number | Parallel sequences (default `4`) |
156
+ | `n_cpu_moe` | number | MoE expert layers offloaded to CPU |
157
+ | `flash_attn_type` | `"on"` / `"off"` / `"auto"` | When a GPU backend is selected, defaults to `"auto"`; on CPU, defaults to `"off"`. Explicit `"on"` / `"off"` / `"auto"` overrides. |
158
+ | `cache_type_k`, `cache_type_v` | string | KV-cache dtype (`f16`, `f32`, `q8_0`, `q4_0`, …) |
159
+ | `kv_unified` | boolean | Use a unified KV cache across sequences |
160
+ | `swa_full` | boolean | Materialize full attention even for sliding-window layers |
161
+ | `ctx_shift` | boolean | Allow llama.cpp's rolling context shift |
162
+ | `use_mmap`, `use_mlock` | boolean | Memory-mapping / locking |
163
+ | `no_extra_bufts` | boolean | Disable extra compute buffer types |
164
+ | `cpu_mask`, `cpu_strict` | string / boolean | CPU affinity (advanced) |
165
+ | `devices` | string[] | Restrict to specific GGML devices |
166
+ | Speculative keys | various | `speculative`, `spec_type`, `spec_draft_n_max/n_min/p_min/p_split` |
167
+
168
+ ### `[autodiscover]`
169
+
170
+ Set `autodiscover = true` for defaults, `false` (or omit) to disable, or a table for fine control:
113
171
 
114
- [server]
115
- port = 2080
116
- log_level = "info"
172
+ ```toml
173
+ [autodiscover]
174
+ udp.port = 8089
175
+ udp.announcements = { enabled = true, interval = 5000 }
176
+ udp.requests = { enabled = true, responseDelay = 100 }
177
+ http.enabled = true
178
+ http.path = "/buttress/info"
179
+ http.cors = true
180
+ # mdns.enabled = false # Bonjour/Avahi advertisement (optional)
181
+ ```
117
182
 
118
- [runtime]
119
- cache_dir = "~/.buttress/models"
183
+ ### `[[generators]]`
120
184
 
121
- # Session state cache for ggml-llm (saves KV cache to disk for prompt reuse)
122
- [runtime.session_cache]
123
- enabled = true
124
- max_size_bytes = "10GB" # Supports string (e.g., "10GB", "500MB") or number
125
- max_entries = 1000
185
+ Every generator entry has a `type`, an optional `[generators.backend]` table, and a `[generators.model]` table:
126
186
 
127
- # GGML LLM generator
187
+ ```toml
188
+ [[generators]]
189
+ type = "ggml-llm" # or "ggml-stt" / "mlx-llm"
190
+
191
+ [generators.backend]
192
+ # (see per-type sections below)
193
+
194
+ [generators.model]
195
+ repo_id = "..."
196
+ # (see per-type sections below)
197
+ ```
198
+
199
+ #### Common `[generators.model]` keys
200
+
201
+ Shared by **all** generator types:
202
+
203
+ | Key | Type | Notes |
204
+ | ------------------------- | --------- | -------------------------------------------------------------------------------- |
205
+ | `repo_id` *(required)* | string | HuggingFace repo (`org/repo`) |
206
+ | `revision` | string | Default `"main"` |
207
+ | `download` | boolean | Pre-download at server startup (default `false`) |
208
+
209
+ Additional keys honored by **ggml-llm** and **ggml-stt** (mlx-llm gets quantization from the repo itself and does not use these):
210
+
211
+ | Key | Type | Notes |
212
+ | ------------------------- | --------- | -------------------------------------------------------------------------------- |
213
+ | `filename` | string | Pin a specific artifact in the repo |
214
+ | `url` | string | Direct download URL (skips manifest lookup) |
215
+ | `quantization` | string | Preferred quant tag — e.g. `q4_0`, `q8_0`, `mxfp4` |
216
+ | `preferred_quantizations` | string[] | Ordered fallback list when `quantization` doesn't match (alias: `quantizations`) |
217
+ | `allow_local_file` | boolean | Required to use `local_path` / `mmproj_local_path` |
218
+ | `local_path` | string | Use a local file as the load path. Repo metadata is still resolved from HF, so `repo_id` is still required. |
219
+ | `api_base`, `base_url` | string | Override HF API / blob hosts (mirrors / proxies) |
220
+
221
+ ---
222
+
223
+ ### `ggml-llm` (llama.cpp via `@fugood/llama.node`)
224
+
225
+ Loads a GGUF LLM. Runtime keys above can be overridden per-generator under `[generators.model]`; `[generators.backend]` only controls backend selection and resource planning.
226
+
227
+ **`[generators.backend]`**
228
+
229
+ | Key | Type | Default | Notes |
230
+ | --------------------- | -------- | --------------------------------------------- | ---------------------------------------------------------------- |
231
+ | `variant` | string | auto | Force `cuda` / `vulkan` / `snapdragon` / `default` |
232
+ | `variant_preference` | string[] | `["cuda","vulkan","snapdragon","default"]` | Probe order when `variant` is unset |
233
+ | `gpu_memory_fraction` | number | `0.85` | Max GPU fraction the hardware guardrails may plan against |
234
+ | `cpu_memory_fraction` | number | `0.5` | Max RAM fraction for CPU-side buffers |
235
+
236
+ **`[generators.model]`** — in addition to the common keys above:
237
+
238
+ | Key | Type | Notes |
239
+ | ----------------------------------------------------------------------------- | ---------------- | -------------------------------------------------------------------- |
240
+ | `n_ctx` | number | Context window. Auto-capped at the model's training context. |
241
+ | `n_gpu_layers` | number\|`"auto"` | Layers offloaded to GPU (default `"auto"`) |
242
+ | `n_batch` | number | Prompt batch size (default `512`) |
243
+ | `n_ubatch`, `n_threads`, `n_parallel`, `n_cpu_moe` | number | Same semantics as the `[runtime]` defaults |
244
+ | `flash_attn_type`, `cache_type_k`, `cache_type_v`, `kv_unified`, `swa_full`, `ctx_shift`, `use_mmap`, `use_mlock`, `no_extra_bufts`, `cpu_mask`, `cpu_strict`, `devices` | various | Per-model overrides for the `[runtime]` defaults |
245
+
246
+ **Multimodal (mtmd)** — auto-downloads the matching `mmproj-*.gguf` from the same repo and calls `initMultimodal`:
247
+
248
+ | Key | Type | Notes |
249
+ | ------------------------- | ------- | ------------------------------------------------------------------ |
250
+ | `enable_mtmd` | boolean | Default `false` |
251
+ | `mmproj_filename` | string | Pin a specific projector file |
252
+ | `mmproj_url` | string | Direct URL override |
253
+ | `mmproj_local_path` | string | Local projector (requires `allow_local_file = true`) |
254
+ | `mmproj_use_gpu` | boolean | `null` = auto (true when `n_gpu_layers > 0`) |
255
+ | `mmproj_image_min_tokens` | number | Min visual tokens (dynamic-resolution models; `-1` = unset) |
256
+ | `mmproj_image_max_tokens` | number | Max visual tokens (`-1` = unset) |
257
+
258
+ **Speculative decoding**
259
+
260
+ | Key | Type | Notes |
261
+ | -------------------- | ------ | -------------------------------------------------- |
262
+ | `speculative` | string | Draft model identifier |
263
+ | `spec_type` | string | Strategy (backend-defined) |
264
+ | `spec_draft_n_max` | int | Max drafted tokens per step |
265
+ | `spec_draft_n_min` | int | Min drafted tokens |
266
+ | `spec_draft_p_min` | number | Min acceptance probability |
267
+ | `spec_draft_p_split` | number | Split threshold |
268
+
269
+ **Example**
270
+
271
+ ```toml
128
272
  [[generators]]
129
273
  type = "ggml-llm"
130
274
  [generators.backend]
131
275
  variant_preference = ["cuda", "vulkan", "default"]
276
+ gpu_memory_fraction = 0.95
132
277
  [generators.model]
133
278
  repo_id = "ggml-org/gpt-oss-20b-GGUF"
134
279
  quantization = "mxfp4"
135
280
  n_ctx = 12800
281
+ download = true
282
+ ```
283
+
284
+ ---
285
+
286
+ ### `ggml-stt` (whisper.cpp via `@fugood/whisper.node`)
287
+
288
+ Loads a Whisper GGML model for speech-to-text.
289
+
290
+ **`[generators.backend]`**
136
291
 
137
- # GGML STT (Speech-to-Text) generator
292
+ | Key | Type | Default | Notes |
293
+ | --------------------- | -------- | ----------------------------- | ---------------------------------- |
294
+ | `variant` | string | auto | `cuda` / `vulkan` / `default` |
295
+ | `variant_preference` | string[] | `["cuda","vulkan","default"]` | Probe order |
296
+ | `gpu_memory_fraction` | number | `0.85` | |
297
+ | `cpu_memory_fraction` | number | `0.5` | |
298
+
299
+ **`[generators.model]`** — common keys plus:
300
+
301
+ | Key | Type | Default | Notes |
302
+ | ------------------------- | ----------------------------- | -------------------------------- | ---------------------------------------------------- |
303
+ | `repo_id` | string | `"BricksDisplay/whisper-ggml"` | Defaulted (unlike ggml-llm) |
304
+ | `preferred_quantizations` | string[] | `["q8_0", <no-quant>, "q5_1"]` | Default fallback chain |
305
+ | `use_gpu` | boolean | `true` | Force-disable GPU even when available |
306
+ | `use_flash_attn` | `"on"` / `"off"` / `"auto"` / boolean | `"auto"` | `"auto"` enables flash-attn when GPU is in use. `true`/`false` are accepted as shortcuts for `"on"`/`"off"`. |
307
+
308
+ **Runtime extras** — under `[runtime]` for ggml-stt only:
309
+
310
+ | Key | Type | Notes |
311
+ | ------------- | ------ | ------------------------------------------- |
312
+ | `max_threads` | number | Caps the whisper.cpp thread count |
313
+
314
+ **Example**
315
+
316
+ ```toml
138
317
  [[generators]]
139
318
  type = "ggml-stt"
140
319
  [generators.backend]
141
- variant_preference = ["coreml", "default"]
320
+ variant_preference = ["cuda", "vulkan", "default"]
142
321
  [generators.model]
143
322
  repo_id = "BricksDisplay/whisper-ggml"
144
- filename = "ggml-small.bin"
323
+ filename = "ggml-large-v3-turbo-q8_0.bin"
324
+ use_gpu = true
325
+ use_flash_attn = "on"
326
+ download = true
327
+ ```
328
+
329
+ ---
330
+
331
+ ### `mlx-llm` (Apple Silicon, Python `mlx-lm` / `mlx-vlm` bridge)
332
+
333
+ Loads an MLX-format model on Apple Silicon. On first use, the backend creates a virtualenv at `{cache_dir}/mlx-env` and installs `mlx_lm_package`, `mlx_vlm_package`, plus `torch` and `torchvision` (required by some VLM processors). If an existing venv already has `mlx_vlm` and `torch` importable, the install step is skipped. There is no `[generators.backend]` section.
334
+
335
+ **`[generators.model]`** — common `repo_id` / `revision` / `download` plus:
336
+
337
+ | Key | Type | Default | Notes |
338
+ | ------------------ | ------------------- | --------- | ----------------------------------------------------------------------------- |
339
+ | `adapter_path` | string | — | Local LoRA adapter directory |
340
+ | `vlm` | `"auto"` / boolean | `"auto"` | Force VLM (`true`) vs text-only (`false`); `"auto"` infers from the repo |
341
+ | `tokenizer_config` | table | — | Forwarded to `mlx_lm.load(..., tokenizer_config=...)` |
342
+ | `model_config` | table | — | Forwarded to `mlx_lm.load(..., model_config=...)` |
343
+
344
+ `quantization`, `filename`, and `preferred_quantizations` are **not** used — the MLX repo itself determines the quantization.
345
+
346
+ **Runtime extras** — under `[runtime]` for mlx-llm:
347
+
348
+ | Key | Type | Default | Notes |
349
+ | ------------------- | ------ | ----------------------------- | -------------------------------------------------------------------- |
350
+ | `mlx_env_dir` | string | `{cache_dir}/mlx-env` | Location of the auto-managed Python venv |
351
+ | `mlx_lm_package` | string | `"mlx-lm==0.31.1"` | pip spec used when provisioning the venv |
352
+ | `mlx_vlm_package` | string | `"mlx-vlm==0.4.0"` | pip spec used when provisioning the venv |
353
+ | `session_cache.*` | table | enabled, `5GB`, 100 entries | Separate cache from ggml-llm (lives in `{cache_dir}/mlx-session-cache`) |
354
+
355
+ **Example**
356
+
357
+ ```toml
358
+ [[generators]]
359
+ type = "mlx-llm"
360
+ [generators.model]
361
+ repo_id = "mlx-community/Qwen2.5-VL-3B-Instruct-4bit"
362
+ vlm = true
363
+ download = true
145
364
  ```
146
365
 
147
366
  ### Programmatic Usage
package/lib/index.mjs CHANGED
@@ -1,12 +1,12 @@
1
1
  #!/usr/bin/env node
2
- import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import l from"node:path";import*as u from"node:stream/web";import{ReadableStream as d}from"node:stream/web";import f,{mkdir as p,open as m,readFile as h,readdir as g,rename as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:os";import{gguf as S}from"@huggingface/gguf";import{getBackendDevicesInfo as C,isLibVariantAvailable as w,loadModel as T}from"@fugood/llama.node";import E from"bytes";import{EventEmitter as D}from"node:events";import{initWhisper as ee}from"@fugood/whisper.node";import{fileURLToPath as te}from"node:url";import{execFile as ne,execSync as O,spawn as re}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as j,z as M}from"zod";import{importSPKI as N,jwtVerify as P}from"jose";import{cors as ie}from"@elysiajs/cors";import F from"node-machine-id";import I from"ms";import{Buffer as L}from"node:buffer";import R from"node:dgram";const z=1024**3,ae=(e,t,n)=>Math.min(Math.max(e,t),n),oe=e=>e?40:0,se=(e=0)=>e?ae(e/(12*z)*20,0,20):0,ce=(e=0)=>e?ae(e/(32*z)*10,0,10):0,le=e=>e?10:0,B=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ue=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=oe(n)+B(t,e)+se(r),s=ce(i),c=le(a);return Math.min(100,Math.round(o+s+c))},de=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:oe(n),variant:B(t,e),gpuMemory:se(r),cpuMemory:ce(i),availability:le(a)}),fe=[`cuda`,`vulkan`,`snapdragon`,`default`],pe=.85,me=.5,he=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ge=e=>e&&String(e).trim().toLowerCase()||null,_e=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=fe}={})=>{let i=[];e&&i.push(e),i.push(...he(t)),i.push(...he(n)),i.push(...r);let a=new Set;for(let e of i){let t=ge(e);t&&a.add(t)}return Array.from(a)},ve=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ye=e=>Array.isArray(e)?e.map(e=>({...e})):[],be=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,xe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ye(be(n,r)),l=c.some(ve),u=c.filter(e=>ve(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ue(m),breakdown:o?de(m):null,error:s,timestamp:new Date().toISOString()}},Se=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},Ce=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=fe}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=_e({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Se({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Se({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},we=`ggml-llm`,Te=[`cuda`,`vulkan`,`default`],Ee=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>Ce({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Te,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Te}),De=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ue(d),p=l?de(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Se({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Se({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Oe=new Map([[we,Ce],[`ggml-stt`,Ee],[`mlx-llm`,De]]),ke=async({platform:e,totalMemoryInBytes:t,backend:n=we,dependencies:r,...i}={})=>{let a=Oe.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ae={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},je=e=>Ae[e?String(e).toLowerCase():`f16`]||Ae.f16,Me=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=je(a.k),h=je(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ne=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Pe=e=>e?String(e).trim().toLowerCase():null,Fe=(e={},t=null)=>{if(!e)return null;let n=Pe(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ie=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Le=({arch:e,nLayer:t=0})=>({arch:Pe(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Re=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ze=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Pe(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Fe(t,r),o=r?Re.get(r):null;if(!o)return Le({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Le({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Ie(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Be=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Ve=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),He=e=>e?String(e).trim().toLowerCase():null,Ue=e=>{let t=He(e);return t?Be.has(t):!1},We=e=>{let t=He(e);return t?Ve.has(t):!1},Ge=e=>Ue(e)?`recurrent`:We(e)?`hybrid`:`transformer`,Ke=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},qe=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ge(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Me(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Je=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ge(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ye=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new D;V.setMaxListeners(100);const Xe=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ze=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Xe(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Xe(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Xe(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new Ze,U=new Ze;let Qe=0;function $e(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function et(e){return Qe+=1,{subscriberId:Qe,unsubscribe:$e(e)}}function tt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function nt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function rt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function it(e){return{timestamp:new Date().toISOString(),ggmlLlm:tt(e),ggmlStt:nt(e),mlxLlm:rt(e)}}const{ReadableStream:at,WritableStream:ot}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,st=(e,t)=>Object.prototype.hasOwnProperty.call(e||{},t),ct=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ct(e[t],n)):e[t]=n}),e),lt=`https://huggingface.co`,ut=`https://huggingface.co/api`,W=l.join(x.homedir(),`.buttress`,`models`),dt=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],ft=.5,pt=[`speculative`,`spec_type`,`spec_draft_n_max`,`spec_draft_n_min`,`spec_draft_p_min`,`spec_draft_p_split`],mt=(e,t)=>{if(!(t==null||t===``)){if(e===`spec_draft_n_max`||e===`spec_draft_n_min`){let e=Number(t);return Number.isFinite(e)?Math.max(0,Math.floor(e)):void 0}if(e===`spec_draft_p_min`||e===`spec_draft_p_split`){let e=Number(t);return Number.isFinite(e)?Math.max(0,e):void 0}return t}},ht=e=>{let t={};for(let n of pt){let r=mt(n,e.model[n]??e.runtime[n]);r!==void 0&&(t[n]=r)}return t},gt=(e={})=>{let t={};for(let n of pt){let r=mt(n,e[n]);r!==void 0&&(t[n]=r)}return t},_t=(e={},t={})=>{let n=e||{},r=gt(t);return!Object.keys(r).length||pt.some(e=>st(n,e))?n:{...r,...n}},vt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:ft},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:ut,base_url:lt,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1,speculative:null,spec_type:null,spec_draft_n_max:null,spec_draft_n_min:null,spec_draft_p_min:null,spec_draft_p_split:null},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},yt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],bt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},xt=(e={})=>{let t=structuredClone(vt);if(ct(t,e),t.backend.variant=bt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(yt(t.backend.variant_preference).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(yt(t.runtime.prefer_variants).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(yt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||lt,t.model.api_base=t.model.api_base||ut,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...vt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||vt.runtime.context_release_delay_ms),t},St=e=>{let t=e.toLowerCase();return dt.find(e=>t.includes(e))||null},Ct=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=bt(e);return t?[t]:[]})))},G=async e=>{await p(e,{recursive:!0})},wt=(e=W)=>l.join(e,`.metadata-cache`),Tt=(e,t,n=W)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(wt(n),t,`${r}.json`)},Et=async(e,t,n=W)=>{try{let r=Tt(e,t,n),i=await h(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,l.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},Dt=async(e,t,n,r=W)=>{try{let i=Tt(e,t,r);await G(l.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,l.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},Ot=(e=W)=>l.join(e,`.session-state-cache`),kt=(e=W)=>l.join(Ot(e),`cache-map.json`),At=(e=W)=>l.join(Ot(e),`temp`),jt=(e=W)=>l.join(Ot(e),`states`),Mt=()=>({version:1,entries:{},totalSize:0}),Nt=async(e=W)=>{try{let t=await h(kt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?Mt():n}catch{return Mt()}},Pt=async(e,t=W)=>{let n=kt(t),r=`${n}.tmp.${Date.now()}`;try{await G(l.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await _(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},Ft=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},It=(e,t=W)=>l.join(jt(t),`${e}.bin`),Lt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return l.join(At(e),`${t}.bin`)},Rt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,zt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Bt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Rt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=zt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Vt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Ht=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Rt(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Ut=async(e=W)=>{let t=At(e);try{let e=await g(t),n=Date.now();await Promise.all(e.map(async e=>{let r=l.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Wt=async e=>{try{return await v(e),!0}catch{return!1}},Gt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t;var Kt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Gt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Pt(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Wt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(Ot(this.baseDir)),await G(At(this.baseDir)),await G(jt(this.baseDir)),this.cacheMap=await Nt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Rt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Bt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Wt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Ft(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=It(o,this.baseDir),u=i?It(`${o}-prompt`,this.baseDir):null;try{await G(l.dirname(c)),await _(n,c);let s=await v(c),d=0;if(i&&u)try{await _(i,u),d=(await v(u)).size,console.log(`[SessionCache] Saved prompt state: ${u}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let f={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:u||null,promptStateSize:d,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=f,this.cacheMap.totalSize+=s.size+d,this.requiresExactMatch()||await Ht(this.cacheMap,e,o,this.metadata),await Vt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Pt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),f}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(At(this.baseDir)),Lt(this.baseDir)}async cleanup(){await Ut(this.baseDir)}};const qt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Jt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Yt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await Et(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await S(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await Dt(r,`range-metadata`,o,n),o},Xt=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Zt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new ot({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Qt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await Et(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Jt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await Dt(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:dt,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=St(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||St(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Jt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await Dt(i,`artifact-info`,y,r),y},$t=/^mmproj-.*\.gguf$/i,en=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await Et(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Jt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await Dt(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:l.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:l.resolve(e.model.mmproj_local_path)};return await Dt(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await qt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),u=c?.siblings||c?.files||[],d=u.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),f=e.model.mmproj_filename;if(f){if(!d.includes(f))throw Error(`mmproj file "${f}" not found in repo ${i}`)}else{let e=d.filter(e=>$t.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();f=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let p=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${f}`,m=u.find(e=>(e.rfilename||e.path||e.filename)===f),h=Number(m?.size);if(!Number.isFinite(h)||h<=0){let e=await Jt(p,{headers:r});h=Number(e.headers.get(`content-length`))||null}let g={repoId:i,revision:a,filename:f,url:p,size:h,headers:r};return await Dt(o,`artifact-info`,g,n),g},tn=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},nn=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=Ct(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},rn=async e=>{let t=await Qt(e),n=await en(e,t),r=await Yt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift,...ht(e)},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let F={k:A.cache_type_k,v:A.cache_type_v},I=t.size>0?t.size:0,L=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:F,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),R=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),z=await nn(e,{modelBytes:I,kvCacheBytes:L(M)+R}),ae=z.selected.totalMemory||0,oe=ae*(e.backend.gpu_memory_fraction||1),se=e.backend.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ce=Math.max(0,x.totalmem()*se),le=z.selected.hasGpu?oe:ce,B=Ye({maxCtx:M,availableMemory:le,modelBytes:I,kvBytesForCtx:L});if(!j&&B){let e=a?Math.min(B,a):B,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>B&&(M=B);let ue=Math.floor(B);console.log(`[buttress] Memory-limited context length: ${ue}`);let de=L(M),fe=I+de+R,pe=w?I/(w+1):I,me=0;z.selected.hasGpu&&pe>0&&(me=Math.min(w+1,Math.max(0,Math.floor(oe/pe)))),console.log(`[buttress] Auto GPU layer capacity (${z.selected.variant}): ${me}/${w+1}`);let he;he=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?me:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let ge=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:z.selected.hasGpu?`auto`:`off`})(),_e=e.runtime.cache_dir,ve=Xt(e,t),ye=await K(ve,t.size),be=tn(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:z.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:he,auto_gpu_layers:me,flash_attn_type:ge,cache_type_k:F.k,cache_type_v:F.v,estimated_max_n_ctx:ue},resources:{modelBytes:I,kvCacheBytes:de,recurrentMemoryBytes:R,totalEstimatedBytes:fe,gpuCapacityBytes:ae,gpuUsableBytes:oe,cpuUsableBytes:ce,fit:z.selected.fit},devices:{selected:z.selected,attempts:z.attempts},download:{cacheDir:_e,localPath:ve,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:z,cacheTypes:F,localPath:ve,localExists:ye}},an=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new at({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},on=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new at({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},sn=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},cn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),u=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),d=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=l.join(t,u);await K(f)||await Zt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Zt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},ln=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Zt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},un=async(e,t)=>{let n=sn(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await cn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};for(let t of pt){let n=e.plan.info.runtime[t];n!=null&&(o[t]=n)}e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await T(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await ln(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},dn=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},fn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return dn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?dn(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await dn(e,t)},i),!0)};async function pn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=xt(t),a=await rn(i),o=new Kt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await un(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=_t(t,s.plan.info.runtime),i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);await a.ready;let o=r.prompt||``,c=null,l=null;if(!o&&r.messages){({messages:c}=r),l={chatTemplate:r.chat_template||r.chatTemplate,jinja:r.jinja??!0,tools:r.tools,parallel_tool_calls:r.parallel_tool_calls,tool_choice:r.tool_choice,reasoning_format:r.reasoning_format,enable_thinking:r.enable_thinking,add_generation_prompt:r.add_generation_prompt,now:r.now,chat_template_kwargs:r.chat_template_kwargs,force_pure_content:r.force_pure_content};let e=await a.context.getFormattedChat(c,l.chatTemplate,l);o=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&o){let{options:e}=await s.sessionCache.prepareCompletionOptions(r,o,a.context),t=await s.sessionCache.generateTempStatePath(),n=(await a.context.tokenize(o))?.tokens?.length||0,i={...e,save_state_path:t},c=s.sessionCache.requiresExactMatch(),l=!!i.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),i.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return on(a.context,i,s.sessionCache,o,t,n,s.id,d,u)}let u={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return an(a.context,r,s.id,u)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=sn(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=sn(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=sn(s),t=s.contexts.get(e);return t?fn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):dn(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const mn=e=>{let t=xt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function hn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=xt(e),o=await Qt(n),s=Xt(n,o),{repoId:c}=o,u=await en(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),d=tn(n,u),f=async()=>{if(!u||!d||u.localPath)return;if(await K(d,u.size)){console.log(`[Download] mmproj already exists: ${d}`);return}let e=t.getDownload(d);if(e){await e;return}let n=(async()=>{try{await Zt(u.url,u.headers,d,u.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(d)}})();t.setDownload(d,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await f().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let p=t.getDownload(s);if(p)return console.log(`[Download] Already downloading: ${c}`),p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let m=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let u=String(s).padStart(5,`0`),d=o.filename.replace(e,`-${u}-of-${String(i).padStart(5,`0`)}.gguf`),f=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${d}`,p=l.join(t,d);await K(p)||await Zt(f,o.headers,p,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Zt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await f(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,m),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function gn(e){let t=xt(e),n=await Qt(t),r=await Yt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),F=t.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),I=t.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),L=await nn(t,{modelBytes:M,kvCacheBytes:P(A)}),R=(L.selected.totalMemory||0)*F,z=Math.max(0,x.totalmem()*I),ae=Ye({maxCtx:A,availableMemory:L.selected.hasGpu?R:z,modelBytes:M,kvBytesForCtx:P}),oe=P(A),se=P(ae);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:oe,limitedKvCacheBytes:se,memoryLimitedCtx:ae,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const _n=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function vn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await gn(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=_n(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=_n(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:yn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,bn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),bn(e[t],n)):e[t]=n}),e),xn=`https://huggingface.co`,Sn=`https://huggingface.co/api`,Cn=l.join(x.homedir(),`.buttress`,`models`),wn=[`cuda`,`vulkan`,`default`],Tn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],En=`fp16`,Dn=.5,On=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],kn=e=>{if(!e)return null;let t=e.toLowerCase();return On.find(e=>t.includes(e))||null},An={backend:{type:`ggml-stt`,variant:null,variant_preference:wn,gpu_memory_fraction:.85,cpu_memory_fraction:Dn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,En,`q5_1`],allow_local_file:!1,local_path:null,api_base:Sn,base_url:xn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:Cn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},jn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Mn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Nn=(e={})=>{let t=structuredClone(An);if(bn(t,e),t.backend.variant=Mn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(jn(t.backend.variant_preference||wn).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...wn]),t.runtime.prefer_variants=Array.from(new Set(jn(t.runtime.prefer_variants).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(jn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||xn,t.model.api_base=t.model.api_base||Sn,t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):Cn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||An.runtime.context_release_delay_ms),t},Pn=e=>{let t=e.toLowerCase();return Tn.find(e=>t.includes(e))||null},Fn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Mn(e);t&&n.add(t)}return Array.from(n)},In=async e=>{await p(e,{recursive:!0})},Ln=(e=Cn)=>l.join(e,`.metadata-cache`),Rn=(e,t,n=Cn)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(Ln(n),t,`${r}.json`)},zn=async(e,t,n=Cn)=>{try{let r=await h(Rn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Bn=async(e,t,n,r=Cn)=>{try{let i=Rn(e,t,r);await In(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},Vn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Hn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Un=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Wn=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Gn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await In(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new yn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Kn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await zn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Hn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:Pn(c||``),headers:o};return await Bn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Vn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:Tn,f=()=>{for(let e of d)if(e===En){let e=u.find(e=>{let t=e.toLowerCase();return!Tn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=Pn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||Pn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Hn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Bn(i,`artifact-info`,g,r),g},qn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Fn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?An.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?Dn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Jn=async e=>{let t=await Kn(e),n=Ne({modelBytes:t.size>0?t.size:0}),r=await qn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Un(e,t),l=await Wn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:kn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Yn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Wn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Wn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Gn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Xn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Zn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Qn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Yn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await ee({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},$n=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),er=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return $n(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?$n(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await $n(e,t)},i),!0)};async function tr(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Nn(t),a=await Jn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Xn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await $n(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Qn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},u=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?er(o,e):!1},d=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Zn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=l.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>d(e),transcribeData:async(e={})=>d(e),releaseContext:u,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const nr=e=>{let t=Nn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=kn(t.model.filename);return r?`${n}:${r}`:n};async function rr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Nn(e),o=await Kn(n),s=Un(n,o),{repoId:c}=o;if(await Wn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Gn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const ir=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function ar(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Kn(Nn(i));o=e.size??null,{processingBufferBytes:s}=Ne({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=ir(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=ir(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:or}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,sr=te(import.meta.url),cr=l.dirname(sr),lr=l.join(cr,`mlx-bridge.py`),ur=`mlx-vlm==0.4.0`,dr=`mlx-lm==0.31.1`,fr=l.join(x.homedir(),`.buttress`,`models`),pr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:fr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:dr,mlx_vlm_package:ur,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},mr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t,hr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),hr(e[t],n)):e[t]=n}),e),gr=(e={})=>{let t=structuredClone(pr);return hr(t,e),t},_r=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},vr=async e=>{await p(e,{recursive:!0})},yr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(n,`.metadata-cache`,t,`${r}.json`)},br=async(e,t,n)=>{try{let r=await h(yr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},xr=async(e,t,n,r)=>{try{let i=yr(e,t,r);await vr(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function Sr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await br(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await _r(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await _r(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await xr(i,`mlx-model-metadata`,D,n),D}function Cr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const wr=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ne(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),Tr=new Map;async function Er({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.resolve(e),a=Tr.get(i);if(a){let e=await a;return r?.(1),e}let o=Or({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});Tr.set(i,o);try{return await o}finally{Tr.delete(i)}}const Dr=[3,10];async function Or({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.join(e,`bin`,`python3`),a=l.join(e,`bin`,`pip`);if(await wr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await wr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<Dr[0]||t===Dr[0]&&n<Dr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${Dr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await p(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var kr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[lr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
2
+ import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import l from"node:path";import*as u from"node:stream/web";import{ReadableStream as d}from"node:stream/web";import f,{mkdir as p,open as m,readFile as h,readdir as g,rename as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:os";import{gguf as S}from"@huggingface/gguf";import{getBackendDevicesInfo as C,isLibVariantAvailable as w,loadModel as T}from"@fugood/llama.node";import E from"bytes";import{EventEmitter as D}from"node:events";import{initWhisper as ee}from"@fugood/whisper.node";import{fileURLToPath as te}from"node:url";import{execFile as ne,execSync as O,spawn as re}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as j,z as M}from"zod";import{importSPKI as N,jwtVerify as P}from"jose";import{cors as ie}from"@elysiajs/cors";import F from"node-machine-id";import I from"ms";import{Buffer as L}from"node:buffer";import R from"node:dgram";const z=1024**3,ae=(e,t,n)=>Math.min(Math.max(e,t),n),oe=e=>e?40:0,se=(e=0)=>e?ae(e/(12*z)*20,0,20):0,ce=(e=0)=>e?ae(e/(32*z)*10,0,10):0,le=e=>e?10:0,B=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ue=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=oe(n)+B(t,e)+se(r),s=ce(i),c=le(a);return Math.min(100,Math.round(o+s+c))},de=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:oe(n),variant:B(t,e),gpuMemory:se(r),cpuMemory:ce(i),availability:le(a)}),fe=[`cuda`,`vulkan`,`snapdragon`,`default`],pe=.85,me=.5,he=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ge=e=>e&&String(e).trim().toLowerCase()||null,_e=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=fe}={})=>{let i=[];e&&i.push(e),i.push(...he(t)),i.push(...he(n)),i.push(...r);let a=new Set;for(let e of i){let t=ge(e);t&&a.add(t)}return Array.from(a)},ve=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ye=e=>Array.isArray(e)?e.map(e=>({...e})):[],be=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,xe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ye(be(n,r)),l=c.some(ve),u=c.filter(e=>ve(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ue(m),breakdown:o?de(m):null,error:s,timestamp:new Date().toISOString()}},Se=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},Ce=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=fe}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=_e({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Se({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Se({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},we=`ggml-llm`,Te=[`cuda`,`vulkan`,`default`],Ee=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>Ce({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Te,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Te}),De=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ue(d),p=l?de(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Se({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Se({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Oe=new Map([[we,Ce],[`ggml-stt`,Ee],[`mlx-llm`,De]]),ke=async({platform:e,totalMemoryInBytes:t,backend:n=we,dependencies:r,...i}={})=>{let a=Oe.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ae={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},je=e=>Ae[e?String(e).toLowerCase():`f16`]||Ae.f16,Me=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=je(a.k),h=je(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ne=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Pe=e=>e?String(e).trim().toLowerCase():null,Fe=(e={},t=null)=>{if(!e)return null;let n=Pe(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ie=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Le=({arch:e,nLayer:t=0})=>({arch:Pe(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Re=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ze=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Pe(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Fe(t,r),o=r?Re.get(r):null;if(!o)return Le({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Le({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Ie(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Be=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Ve=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),He=e=>e?String(e).trim().toLowerCase():null,Ue=e=>{let t=He(e);return t?Be.has(t):!1},We=e=>{let t=He(e);return t?Ve.has(t):!1},Ge=e=>Ue(e)?`recurrent`:We(e)?`hybrid`:`transformer`,Ke=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},qe=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ge(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Me(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Je=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ge(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ye=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new D;V.setMaxListeners(100);const Xe=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ze=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Xe(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Xe(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Xe(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new Ze,U=new Ze;let Qe=0;function $e(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function et(e){return Qe+=1,{subscriberId:Qe,unsubscribe:$e(e)}}function tt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function nt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function rt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function it(e){return{timestamp:new Date().toISOString(),ggmlLlm:tt(e),ggmlStt:nt(e),mlxLlm:rt(e)}}const{ReadableStream:at,WritableStream:ot}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,st=(e,t)=>Object.prototype.hasOwnProperty.call(e||{},t),ct=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ct(e[t],n)):e[t]=n}),e),lt=`https://huggingface.co`,ut=`https://huggingface.co/api`,W=l.join(x.homedir(),`.buttress`,`models`),dt=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],ft=.5,pt=[`speculative`,`spec_type`,`spec_draft_n_max`,`spec_draft_n_min`,`spec_draft_p_min`,`spec_draft_p_split`],mt=(e,t)=>{if(!(t==null||t===``)){if(e===`spec_draft_n_max`||e===`spec_draft_n_min`){let e=Number(t);return Number.isFinite(e)?Math.max(0,Math.floor(e)):void 0}if(e===`spec_draft_p_min`||e===`spec_draft_p_split`){let e=Number(t);return Number.isFinite(e)?Math.max(0,e):void 0}return t}},ht=e=>{let t={};for(let n of pt){let r=mt(n,e.model[n]??e.runtime[n]);r!==void 0&&(t[n]=r)}return t},gt=(e={})=>{let t={};for(let n of pt){let r=mt(n,e[n]);r!==void 0&&(t[n]=r)}return t},_t=(e={},t={})=>{let n=e||{},r=gt(t);return!Object.keys(r).length||pt.some(e=>st(n,e))?n:{...r,...n}},vt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:ft},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:ut,base_url:lt,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1,speculative:null,spec_type:null,spec_draft_n_max:null,spec_draft_n_min:null,spec_draft_p_min:null,spec_draft_p_split:null},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},yt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],bt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},xt=(e={})=>{let t=structuredClone(vt);if(ct(t,e),t.backend.variant=bt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(yt(t.backend.variant_preference).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(yt(t.runtime.prefer_variants).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(yt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||lt,t.model.api_base=t.model.api_base||ut,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...vt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||vt.runtime.context_release_delay_ms),t},St=e=>{let t=e.toLowerCase();return dt.find(e=>t.includes(e))||null},Ct=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=bt(e);return t?[t]:[]})))},G=async e=>{await p(e,{recursive:!0})},wt=(e=W)=>l.join(e,`.metadata-cache`),Tt=(e,t,n=W)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(wt(n),t,`${r}.json`)},Et=async(e,t,n=W)=>{try{let r=Tt(e,t,n),i=await h(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,l.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},Dt=async(e,t,n,r=W)=>{try{let i=Tt(e,t,r);await G(l.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,l.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},Ot=(e=W)=>l.join(e,`.session-state-cache`),kt=(e=W)=>l.join(Ot(e),`cache-map.json`),At=(e=W)=>l.join(Ot(e),`temp`),jt=(e=W)=>l.join(Ot(e),`states`),Mt=()=>({version:1,entries:{},totalSize:0}),Nt=async(e=W)=>{try{let t=await h(kt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?Mt():n}catch{return Mt()}},Pt=async(e,t=W)=>{let n=kt(t),r=`${n}.tmp.${Date.now()}`;try{await G(l.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await _(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},Ft=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},It=(e,t=W)=>l.join(jt(t),`${e}.bin`),Lt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return l.join(At(e),`${t}.bin`)},Rt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,zt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Bt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Rt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=zt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Vt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Ht=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Rt(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Ut=async(e=W)=>{let t=At(e);try{let e=await g(t),n=Date.now();await Promise.all(e.map(async e=>{let r=l.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Wt=async e=>{try{return await v(e),!0}catch{return!1}},Gt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t;var Kt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Gt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Pt(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Wt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(Ot(this.baseDir)),await G(At(this.baseDir)),await G(jt(this.baseDir)),this.cacheMap=await Nt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Rt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Bt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Wt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Ft(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=It(o,this.baseDir),u=i?It(`${o}-prompt`,this.baseDir):null;try{await G(l.dirname(c)),await _(n,c);let s=await v(c),d=0;if(i&&u)try{await _(i,u),d=(await v(u)).size,console.log(`[SessionCache] Saved prompt state: ${u}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let f={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:u||null,promptStateSize:d,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=f,this.cacheMap.totalSize+=s.size+d,this.requiresExactMatch()||await Ht(this.cacheMap,e,o,this.metadata),await Vt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Pt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),f}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(At(this.baseDir)),Lt(this.baseDir)}async cleanup(){await Ut(this.baseDir)}};const qt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Jt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Yt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await Et(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await S(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await Dt(r,`range-metadata`,o,n),o},Xt=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Zt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new ot({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Qt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await Et(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Jt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await Dt(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:dt,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=St(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||St(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Jt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await Dt(i,`artifact-info`,y,r),y},$t=/^mmproj-.*\.gguf$/i,en=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await Et(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Jt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await Dt(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:l.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:l.resolve(e.model.mmproj_local_path)};return await Dt(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await qt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),u=c?.siblings||c?.files||[],d=u.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),f=e.model.mmproj_filename;if(f){if(!d.includes(f))throw Error(`mmproj file "${f}" not found in repo ${i}`)}else{let e=d.filter(e=>$t.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();f=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let p=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${f}`,m=u.find(e=>(e.rfilename||e.path||e.filename)===f),h=Number(m?.size);if(!Number.isFinite(h)||h<=0){let e=await Jt(p,{headers:r});h=Number(e.headers.get(`content-length`))||null}let g={repoId:i,revision:a,filename:f,url:p,size:h,headers:r};return await Dt(o,`artifact-info`,g,n),g},tn=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},nn=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=Ct(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},rn=async e=>{let t=await Qt(e),n=await en(e,t),r=await Yt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift,...ht(e)},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let F={k:A.cache_type_k,v:A.cache_type_v},I=t.size>0?t.size:0,L=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:F,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),R=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),z=await nn(e,{modelBytes:I,kvCacheBytes:L(M)+R}),ae=z.selected.totalMemory||0,oe=ae*(e.backend.gpu_memory_fraction||1),se=e.backend.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ce=Math.max(0,x.totalmem()*se),le=z.selected.hasGpu?oe:ce,B=Ye({maxCtx:M,availableMemory:le,modelBytes:I,kvBytesForCtx:L});if(!j&&B){let e=a?Math.min(B,a):B,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>B&&(M=B);let ue=Math.floor(B);console.log(`[buttress] Memory-limited context length: ${ue}`);let de=L(M),fe=I+de+R,pe=w?I/(w+1):I,me=0;z.selected.hasGpu&&pe>0&&(me=Math.min(w+1,Math.max(0,Math.floor(oe/pe)))),console.log(`[buttress] Auto GPU layer capacity (${z.selected.variant}): ${me}/${w+1}`);let he;he=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?me:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let ge=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:z.selected.hasGpu?`auto`:`off`})(),_e=e.runtime.cache_dir,ve=Xt(e,t),ye=await K(ve,t.size),be=tn(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:z.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:he,auto_gpu_layers:me,flash_attn_type:ge,cache_type_k:F.k,cache_type_v:F.v,estimated_max_n_ctx:ue},resources:{modelBytes:I,kvCacheBytes:de,recurrentMemoryBytes:R,totalEstimatedBytes:fe,gpuCapacityBytes:ae,gpuUsableBytes:oe,cpuUsableBytes:ce,fit:z.selected.fit},devices:{selected:z.selected,attempts:z.attempts},download:{cacheDir:_e,localPath:ve,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:z,cacheTypes:F,localPath:ve,localExists:ye}},an=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new at({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},on=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new at({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},sn=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},cn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),u=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),d=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=l.join(t,u);await K(f)||await Zt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Zt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},ln=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Zt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},un=async(e,t)=>{let n=sn(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await cn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};for(let t of pt){let n=e.plan.info.runtime[t];n!=null&&(o[t]=n)}e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await T(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await ln(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},dn=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},fn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return dn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?dn(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await dn(e,t)},i),!0)};async function pn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=xt(t),a=await rn(i),o=new Kt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await un(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=_t(t,s.plan.info.runtime),i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);await a.ready;let o=r.prompt||``,c=null,l=null;if(!o&&r.messages){({messages:c}=r),l={chatTemplate:r.chat_template||r.chatTemplate,jinja:r.jinja??!0,tools:r.tools,parallel_tool_calls:r.parallel_tool_calls,tool_choice:r.tool_choice,reasoning_format:r.reasoning_format,enable_thinking:r.enable_thinking,add_generation_prompt:r.add_generation_prompt,now:r.now,chat_template_kwargs:r.chat_template_kwargs,force_pure_content:r.force_pure_content};let e=await a.context.getFormattedChat(c,l.chatTemplate,l);o=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&o){let{options:e}=await s.sessionCache.prepareCompletionOptions(r,o,a.context),t=await s.sessionCache.generateTempStatePath(),n=(await a.context.tokenize(o))?.tokens?.length||0,i={...e,save_state_path:t},c=s.sessionCache.requiresExactMatch(),l=!!i.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),i.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return on(a.context,i,s.sessionCache,o,t,n,s.id,d,u)}let u={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return an(a.context,r,s.id,u)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=sn(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=sn(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=sn(s),t=s.contexts.get(e);return t?fn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):dn(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const mn=e=>{let t=xt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function hn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=xt(e),o=await Qt(n),s=Xt(n,o),{repoId:c}=o,u=await en(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),d=tn(n,u),f=async()=>{if(!u||!d||u.localPath)return;if(await K(d,u.size)){console.log(`[Download] mmproj already exists: ${d}`);return}let e=t.getDownload(d);if(e){await e;return}let n=(async()=>{try{await Zt(u.url,u.headers,d,u.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(d)}})();t.setDownload(d,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await f().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let p=t.getDownload(s);if(p)return console.log(`[Download] Already downloading: ${c}`),p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let m=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let u=String(s).padStart(5,`0`),d=o.filename.replace(e,`-${u}-of-${String(i).padStart(5,`0`)}.gguf`),f=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${d}`,p=l.join(t,d);await K(p)||await Zt(f,o.headers,p,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Zt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await f(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,m),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function gn(e){let t=xt(e),n=await Qt(t),r=await Yt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),F=t.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),I=t.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),L=await nn(t,{modelBytes:M,kvCacheBytes:P(A)}),R=(L.selected.totalMemory||0)*F,z=Math.max(0,x.totalmem()*I),ae=Ye({maxCtx:A,availableMemory:L.selected.hasGpu?R:z,modelBytes:M,kvBytesForCtx:P}),oe=P(A),se=P(ae);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:oe,limitedKvCacheBytes:se,memoryLimitedCtx:ae,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const _n=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function vn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await gn(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=_n(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=_n(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:yn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,bn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),bn(e[t],n)):e[t]=n}),e),xn=`https://huggingface.co`,Sn=`https://huggingface.co/api`,Cn=l.join(x.homedir(),`.buttress`,`models`),wn=[`cuda`,`vulkan`,`default`],Tn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],En=`fp16`,Dn=.5,On=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],kn=e=>{if(!e)return null;let t=e.toLowerCase();return On.find(e=>t.includes(e))||null},An={backend:{type:`ggml-stt`,variant:null,variant_preference:wn,gpu_memory_fraction:.85,cpu_memory_fraction:Dn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,En,`q5_1`],allow_local_file:!1,local_path:null,api_base:Sn,base_url:xn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:Cn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},jn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Mn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Nn=(e={})=>{let t=structuredClone(An);if(bn(t,e),t.backend.variant=Mn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(jn(t.backend.variant_preference||wn).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...wn]),t.runtime.prefer_variants=Array.from(new Set(jn(t.runtime.prefer_variants).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(jn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||xn,t.model.api_base=t.model.api_base||Sn,t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):Cn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||An.runtime.context_release_delay_ms),t},Pn=e=>{let t=e.toLowerCase();return Tn.find(e=>t.includes(e))||null},Fn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Mn(e);t&&n.add(t)}return Array.from(n)},In=async e=>{await p(e,{recursive:!0})},Ln=(e=Cn)=>l.join(e,`.metadata-cache`),Rn=(e,t,n=Cn)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(Ln(n),t,`${r}.json`)},zn=async(e,t,n=Cn)=>{try{let r=await h(Rn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Bn=async(e,t,n,r=Cn)=>{try{let i=Rn(e,t,r);await In(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},Vn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Hn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Un=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Wn=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Gn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await In(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new yn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Kn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await zn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Hn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:Pn(c||``),headers:o};return await Bn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Vn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:Tn,f=()=>{for(let e of d)if(e===En){let e=u.find(e=>{let t=e.toLowerCase();return!Tn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=Pn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||Pn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Hn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Bn(i,`artifact-info`,g,r),g},qn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Fn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?An.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?Dn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Jn=async e=>{let t=await Kn(e),n=Ne({modelBytes:t.size>0?t.size:0}),r=await qn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Un(e,t),l=await Wn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:kn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Yn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Wn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Wn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Gn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Xn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Zn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Qn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Yn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await ee({filePath:i,useFlashAttn:!!e.plan.info.runtime.use_flash_attn,useGpu:!!e.plan.info.runtime.use_gpu,nThreads:e.plan.info.runtime.max_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},$n=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),er=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return $n(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?$n(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await $n(e,t)},i),!0)};async function tr(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Nn(t),a=await Jn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Xn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await $n(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Qn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},u=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?er(o,e):!1},d=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Zn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=l.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>d(e),transcribeData:async(e={})=>d(e),releaseContext:u,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const nr=e=>{let t=Nn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=kn(t.model.filename);return r?`${n}:${r}`:n};async function rr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Nn(e),o=await Kn(n),s=Un(n,o),{repoId:c}=o;if(await Wn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Gn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const ir=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function ar(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Kn(Nn(i));o=e.size??null,{processingBufferBytes:s}=Ne({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=ir(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=ir(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:or}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,sr=te(import.meta.url),cr=l.dirname(sr),lr=l.join(cr,`mlx-bridge.py`),ur=`mlx-vlm==0.4.0`,dr=`mlx-lm==0.31.1`,fr=l.join(x.homedir(),`.buttress`,`models`),pr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:fr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:dr,mlx_vlm_package:ur,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},mr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t,hr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),hr(e[t],n)):e[t]=n}),e),gr=(e={})=>{let t=structuredClone(pr);return hr(t,e),t},_r=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},vr=async e=>{await p(e,{recursive:!0})},yr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(n,`.metadata-cache`,t,`${r}.json`)},br=async(e,t,n)=>{try{let r=await h(yr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},xr=async(e,t,n,r)=>{try{let i=yr(e,t,r);await vr(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function Sr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await br(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await _r(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await _r(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await xr(i,`mlx-model-metadata`,D,n),D}function Cr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const wr=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ne(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),Tr=new Map;async function Er({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.resolve(e),a=Tr.get(i);if(a){let e=await a;return r?.(1),e}let o=Or({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});Tr.set(i,o);try{return await o}finally{Tr.delete(i)}}const Dr=[3,10];async function Or({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.join(e,`bin`,`python3`),a=l.join(e,`bin`,`pip`);if(await wr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await wr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<Dr[0]||t===Dr[0]&&n<Dr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${Dr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await p(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var kr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[lr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
3
3
  `);this.buffer=t.pop();for(let e of t)if(e.trim())try{this.handleMessage(JSON.parse(e))}catch(t){console.error(`[mlx-llm] Failed to parse bridge message:`,e,t)}}),this.readyPromise=new Promise((e,t)=>{this.pendingRequests.set(`__init__`,{resolve:()=>e(),reject:t}),setTimeout(()=>t(Error(`Bridge startup timeout`)),3e4)}),this.readyPromise}handleMessage(e){let t=this.pendingRequests.get(e.id);t&&(e.error?(t.reject(Error(e.error.message)),this.pendingRequests.delete(e.id)):e.event?e.event===`result`?(t.resolve(e.data),this.pendingRequests.delete(e.id)):t.onEvent?.(e.event,e.data):e.result!==void 0&&(t.resolve(e.result),this.pendingRequests.delete(e.id)))}async call(e,t={}){if(!this.process)throw Error(`Bridge not running`);let n=String(++this.requestCounter);return new Promise((r,i)=>{this.pendingRequests.set(n,{resolve:r,reject:i}),this.write({id:n,method:e,params:t})})}stream(e,t,n){if(!this.process)throw Error(`Bridge not running`);let r=String(++this.requestCounter);return{id:r,promise:new Promise((i,a)=>{this.pendingRequests.set(r,{resolve:i,reject:a,onEvent:n}),this.write({id:r,method:e,params:t})})}}cancel(e){this.process&&this.write({id:`cancel-${e}`,method:`cancel`,params:{request_id:e}})}write(e){this.process?.stdin?.write(JSON.stringify(e)+`
4
- `)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function Ar(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),x.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function jr(e){let t=Ar();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Mr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Nr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Pr=`</think>`;function Fr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Pr);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Pr.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Ir(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new or({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Fr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Fr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function Lr(e,t,n={}){let r=gr(t),i=jr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Nr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||fr,n=await Er({envDir:r.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||dr,mlxVlmPackage:r.runtime.mlx_vlm_package||ur,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new kr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let u=r.runtime.session_cache;u?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:l.join(e,`mlx-session-cache`),max_entries:u?.max_entries||100,max_size_bytes:mr(u?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Mr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new or({start(e){a.queue.enqueue(async()=>{let n=Ir(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Rr=e=>gr(e).model.repo_id||null;async function zr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=Ar(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await De({platform:process.platform,arch:x.arch(),totalMemoryInBytes:x.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=gr(r),t=e.model.repo_id;if(t)try{l=await Sr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||fr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=Cr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=Cr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:x.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Br(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=gr(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=Ar();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let u=`mlx:${s}`;if(t?.isDownloading(u))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let d=(async()=>{try{let e=o.runtime.cache_dir||fr,t=await Er({envDir:o.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||dr,mlxVlmPackage:o.runtime.mlx_vlm_package||ur,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
4
+ `)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function Ar(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),x.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function jr(e){let t=Ar();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Mr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Nr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Pr=`</think>`;function Fr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Pr);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Pr.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Ir(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new or({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Fr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Fr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function Lr(e,t,n={}){let r=gr(t),i=jr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Nr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||fr,n=await Er({envDir:r.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||dr,mlxVlmPackage:r.runtime.mlx_vlm_package||ur,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new kr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.model.tokenizer_config&&(o.tokenizer_config=r.model.tokenizer_config),r.model.model_config&&(o.model_config=r.model.model_config),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let u=r.runtime.session_cache;u?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:l.join(e,`mlx-session-cache`),max_entries:u?.max_entries||100,max_size_bytes:mr(u?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Mr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new or({start(e){a.queue.enqueue(async()=>{let n=Ir(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Rr=e=>gr(e).model.repo_id||null;async function zr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=Ar(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await De({platform:process.platform,arch:x.arch(),totalMemoryInBytes:x.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=gr(r),t=e.model.repo_id;if(t)try{l=await Sr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||fr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=Cr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=Cr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:x.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Br(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=gr(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=Ar();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let u=`mlx:${s}`;if(t?.isDownloading(u))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let d=(async()=>{try{let e=o.runtime.cache_dir||fr,t=await Er({envDir:o.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||dr,mlxVlmPackage:o.runtime.mlx_vlm_package||ur,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
5
5
  from huggingface_hub import snapshot_download
6
6
  path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
7
7
  print(path)
8
8
  `.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let u=c.stdout.trim().split(`
9
- `).pop();i?.({localPath:u,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(u)}})();return t?.setDownload(u,d),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return vn(t,n);if(e===`ggml-stt`)return ar(t,n);if(e===`mlx-llm`)return zr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.1`,Vr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0`,"@fugood/llama.node":`^1.7.3`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Hr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Ur({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
9
+ `).pop();i?.({localPath:u,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(u)}})();return t?.setDownload(u,d),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return vn(t,n);if(e===`ggml-stt`)return ar(t,n);if(e===`mlx-llm`)return zr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.1`,Vr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0`,"@fugood/llama.node":`^1.7.4`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Hr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Ur({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
10
10
  `),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
11
11
  `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=d(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=qe(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,u=d(l),p=d(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=f(t?.fit?.fitsInGpu),h=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${u} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=d(i),l=d(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=f(t?.limitedFit?.fitsInGpu),h=f(t?.limitedFit?.fitsInCpu);(e!==o||c!==u||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
12
12
  ---`),n.push(`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fugood/buttress-server",
3
- "version": "2.24.5",
3
+ "version": "2.24.6",
4
4
  "main": "lib/index.mjs",
5
5
  "types": "lib/index.d.mts",
6
6
  "type": "module",
@@ -30,7 +30,7 @@
30
30
  "dependencies": {
31
31
  "@elysiajs/cors": "^1.1.1",
32
32
  "@elysiajs/node": "^1.4.2",
33
- "@fugood/llama.node": "^1.7.3",
33
+ "@fugood/llama.node": "^1.7.4",
34
34
  "@fugood/whisper.node": "^1.0.19",
35
35
  "@huggingface/gguf": "^0.3.2",
36
36
  "@iarna/toml": "^3.0.0",