@fugood/buttress-server 2.24.2 → 2.25.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/config/sample.toml +9 -0
- package/lib/index.d.mts +14 -2
- package/lib/index.mjs +22 -24
- package/package.json +3 -2
- package/public/lib/index.d.ts +27 -0
- package/public/lib/index.mjs +110 -0
package/lib/index.mjs
CHANGED
|
@@ -1,61 +1,61 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import{fileURLToPath as D}from"node:url";import{execFile as ee,execSync as O,spawn as te}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as ne,z as j}from"zod";import{cors as M}from"@elysiajs/cors";import N from"node-machine-id";import P from"ms";import{Buffer as F}from"node:buffer";import re from"node:dgram";const I=1024**3,L=(e,t,n)=>Math.min(Math.max(e,t),n),ie=e=>e?40:0,ae=(e=0)=>e?L(e/(12*I)*20,0,20):0,R=(e=0)=>e?L(e/(32*I)*10,0,10):0,oe=e=>e?10:0,se=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ce=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=ie(n)+se(t,e)+ae(r),s=R(i),c=oe(a);return Math.min(100,Math.round(o+s+c))},le=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:ie(n),variant:se(t,e),gpuMemory:ae(r),cpuMemory:R(i),availability:oe(a)}),ue=[`cuda`,`vulkan`,`snapdragon`,`default`],z=.85,de=.5,fe=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],pe=e=>e&&String(e).trim().toLowerCase()||null,me=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=ue}={})=>{let i=[];e&&i.push(e),i.push(...fe(t)),i.push(...fe(n)),i.push(...r);let a=new Set;for(let e of i){let t=pe(e);t&&a.add(t)}return Array.from(a)},he=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ge=e=>Array.isArray(e)?e.map(e=>({...e})):[],_e=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,ve=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ge(_e(n,r)),l=c.some(he),u=c.filter(e=>he(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ce(m),breakdown:o?le(m):null,error:s,timestamp:new Date().toISOString()}},B=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},ye=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=ue}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=me({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=B({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=B({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},be=`ggml-llm`,xe=[`cuda`,`vulkan`,`default`],Se=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>ye({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:xe,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:xe}),Ce=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ce(d),p=l?le(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=B({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=B({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},we=new Map([[be,ye],[`ggml-stt`,Se],[`mlx-llm`,Ce]]),Te=async({platform:e,totalMemoryInBytes:t,backend:n=be,dependencies:r,...i}={})=>{let a=we.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ee={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},De=e=>Ee[e?String(e).toLowerCase():`f16`]||Ee.f16,Oe=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=De(a.k),h=De(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},ke=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Ae=e=>e?String(e).trim().toLowerCase():null,je=(e={},t=null)=>{if(!e)return null;let n=Ae(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Me=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Ne=({arch:e,nLayer:t=0})=>({arch:Ae(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Pe=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),Fe=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Ae(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=je(t,r),o=r?Pe.get(r):null;if(!o)return Ne({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Ne({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Me(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ie=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Le=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Re=e=>e?String(e).trim().toLowerCase():null,ze=e=>{let t=Re(e);return t?Ie.has(t):!1},Be=e=>{let t=Re(e);return t?Le.has(t):!1},Ve=e=>ze(e)?`recurrent`:Be(e)?`hybrid`:`transformer`,He=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},Ue=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ve(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Oe(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},We=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ve(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ge=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new T;V.setMaxListeners(100);const Ke=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var qe=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Ke(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Ke(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Ke(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new qe,U=new qe;let Je=0;function Ye(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function Xe(e){return Je+=1,{subscriberId:Je,unsubscribe:Ye(e)}}function Ze(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function Qe(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function $e(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function et(e){return{timestamp:new Date().toISOString(),ggmlLlm:Ze(e),ggmlStt:Qe(e),mlxLlm:$e(e)}}const{ReadableStream:tt,WritableStream:nt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,rt=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),rt(e[t],n)):e[t]=n}),e),it=`https://huggingface.co`,at=`https://huggingface.co/api`,W=_.join(v.homedir(),`.buttress`,`models`),ot=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],st=.5,ct={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:st},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:at,base_url:it,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},lt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],ut=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},dt=(e={})=>{let t=structuredClone(ct);if(rt(t,e),t.backend.variant=ut(t.backend.variant),t.backend.variant_preference=Array.from(new Set(lt(t.backend.variant_preference).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(lt(t.runtime.prefer_variants).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(lt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||it,t.model.api_base=t.model.api_base||at,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...ct.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||ct.runtime.context_release_delay_ms),t},ft=e=>{let t=e.toLowerCase();return ot.find(e=>t.includes(e))||null},pt=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=ut(e);return t?[t]:[]})))},G=async e=>{await l(e,{recursive:!0})},mt=(e=W)=>_.join(e,`.metadata-cache`),ht=(e,t,n=W)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(mt(n),t,`${r}.json`)},gt=async(e,t,n=W)=>{try{let r=ht(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},_t=async(e,t,n,r=W)=>{try{let i=ht(e,t,r);await G(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},vt=(e=W)=>_.join(e,`.session-state-cache`),yt=(e=W)=>_.join(vt(e),`cache-map.json`),bt=(e=W)=>_.join(vt(e),`temp`),xt=(e=W)=>_.join(vt(e),`states`),St=()=>({version:1,entries:{},totalSize:0}),Ct=async(e=W)=>{try{let t=await d(yt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?St():n}catch{return St()}},wt=async(e,t=W)=>{let n=yt(t),r=`${n}.tmp.${Date.now()}`;try{await G(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},Tt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},Et=(e,t=W)=>_.join(xt(t),`${e}.bin`),Dt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(bt(e),`${t}.bin`)},Ot=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,kt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},At=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Ot(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=kt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},jt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Mt=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Ot(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Nt=async(e=W)=>{let t=bt(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Pt=async e=>{try{return await m(e),!0}catch{return!1}},Ft=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var It=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Ft(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await wt(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Pt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(vt(this.baseDir)),await G(bt(this.baseDir)),await G(xt(this.baseDir)),this.cacheMap=await Ct(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Ot(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=At(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Pt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Tt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=Et(o,this.baseDir),l=i?Et(`${o}-prompt`,this.baseDir):null;try{await G(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Mt(this.cacheMap,e,o,this.metadata),await jt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await wt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(bt(this.baseDir)),Dt(this.baseDir)}async cleanup(){await Nt(this.baseDir)}};const Lt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Rt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},zt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await gt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await _t(r,`range-metadata`,o,n),o},Bt=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},K=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Vt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new nt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Ht=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await gt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Rt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await _t(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ot,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=ft(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||ft(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Rt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await _t(i,`artifact-info`,y,r),y},Ut=/^mmproj-.*\.gguf$/i,Wt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await gt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Rt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await _t(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:_.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:_.resolve(e.model.mmproj_local_path)};return await _t(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Lt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),l=c?.siblings||c?.files||[],u=l.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),d=e.model.mmproj_filename;if(d){if(!u.includes(d))throw Error(`mmproj file "${d}" not found in repo ${i}`)}else{let e=u.filter(e=>Ut.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();d=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let f=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${d}`,p=l.find(e=>(e.rfilename||e.path||e.filename)===d),m=Number(p?.size);if(!Number.isFinite(m)||m<=0){let e=await Rt(f,{headers:r});m=Number(e.headers.get(`content-length`))||null}let h={repoId:i,revision:a,filename:d,url:f,size:m,headers:r};return await _t(o,`artifact-info`,h,n),h},Gt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Kt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=pt(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},qt=async e=>{let t=await Ht(e),n=await Wt(e,t),r=await zt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},M=j.n_ctx?Number(j.n_ctx):null,N=M||a||4096,P=[],F=[],re=!0;if(M&&a&&M>a){re=!1;let e=`Requested context length (${M}) exceeds model training context (${a})`;P.push(e),F.push(e),N=a}M&&!a&&P.push(`Model metadata missing training context length, using requested value`);let I={k:j.cache_type_k,v:j.cache_type_v},L=t.size>0?t.size:0,ie=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:I,swaConfig:k,kvUnified:j.kv_unified,nParallel:j.n_parallel,swaFull:j.swa_full,arch:i,attentionLayerCount:m}),ae=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:j.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Kt(e,{modelBytes:L,kvCacheBytes:ie(N)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),le=Math.max(0,v.totalmem()*ce),ue=R.selected.hasGpu?se:le,z=Ge({maxCtx:N,availableMemory:ue,modelBytes:L,kvBytesForCtx:ie});if(!M&&z){let e=a?Math.min(z,a):z,t=Math.max(32,e);t<N&&P.push(`Context length capped to ${t} by memory limits`),N=t}N>z&&(N=z);let de=Math.floor(z);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=ie(N),pe=L+fe+ae,me=w?L/(w+1):L,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=j.n_gpu_layers===`auto`||j.n_gpu_layers==null?he:Math.max(0,Math.min(Number(j.n_gpu_layers)||0,w+1));let _e=(()=>{let e=j.flash_attn_type&&String(j.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,B=Bt(e,t),ye=await K(B,t.size),be=Gt(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:re,backend:`ggml-llm`,warnings:P,errors:F,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,dense_first:k.denseFirst,type:k.type,layers:k.swaLayers}:null}},runtime:{...j,variant:R.selected.variant,n_ctx:N,requested_ctx:M,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:I.k,cache_type_v:I.v,estimated_max_n_ctx:de},resources:{modelBytes:L,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:le,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:B,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:I,localPath:B,localExists:ye}},Jt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new tt({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Yt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new tt({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Xt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Zt=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await K(d)||await Vt(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Vt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Qt=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Vt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},$t=async(e,t)=>{let n=Xt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Zt(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await Qt(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},en=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},tn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return en(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?en(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await en(e,t)},i),!0)};async function nn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=dt(t),a=await qt(i),o=new It(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await $t(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Yt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Jt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Xt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Xt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Xt(s),t=s.contexts.get(e);return t?tn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):en(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const rn=e=>{let t=dt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function an(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=dt(e),o=await Ht(n),s=Bt(n,o),{repoId:c}=o,l=await Wt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),u=Gt(n,l),d=async()=>{if(!l||!u||l.localPath)return;if(await K(u,l.size)){console.log(`[Download] mmproj already exists: ${u}`);return}let e=t.getDownload(u);if(e){await e;return}let n=(async()=>{try{await Vt(l.url,l.headers,u,l.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(u)}})();t.setDownload(u,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await d().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let f=t.getDownload(s);if(f)return console.log(`[Download] Already downloading: ${c}`),f.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let p=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await K(f)||await Vt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Vt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await d(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,p),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function on(e){let t=dt(e),n=await Ht(t),r=await zt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,M={k:t.model.cache_type_k,v:t.model.cache_type_v},N=n.size>0?n.size:0,P=t.model.n_parallel||4,F=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:M,swaConfig:k,kvUnified:t.model.kv_unified,nParallel:P,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),re=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:P,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),I=t.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),L=t.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),ie=await Kt(t,{modelBytes:N,kvCacheBytes:F(j)}),ae=(ie.selected.totalMemory||0)*I,R=Math.max(0,v.totalmem()*L),oe=Ge({maxCtx:j,availableMemory:ie.selected.hasGpu?ae:R,modelBytes:N,kvBytesForCtx:F}),se=F(j),ce=F(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:O,nEmbdHeadV:te,nHeadCount:E,nHeadKvCount:D,kvLayerCount:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,denseFirst:k.denseFirst,type:k.type,layers:k.swaLayers}:null},modelBytes:N,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:re,quantization:{name:n.quantization||null,fileType:p,version:f}}}const sn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function cn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await on(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=sn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=sn(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:ln}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,un=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),un(e[t],n)):e[t]=n}),e),dn=`https://huggingface.co`,fn=`https://huggingface.co/api`,pn=_.join(v.homedir(),`.buttress`,`models`),mn=[`cuda`,`vulkan`,`default`],hn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],gn=`fp16`,_n=.5,vn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],yn=e=>{if(!e)return null;let t=e.toLowerCase();return vn.find(e=>t.includes(e))||null},bn={backend:{type:`ggml-stt`,variant:null,variant_preference:mn,gpu_memory_fraction:.85,cpu_memory_fraction:_n},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,gn,`q5_1`],allow_local_file:!1,local_path:null,api_base:fn,base_url:dn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:pn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},xn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Sn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Cn=(e={})=>{let t=structuredClone(bn);if(un(t,e),t.backend.variant=Sn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(xn(t.backend.variant_preference||mn).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...mn]),t.runtime.prefer_variants=Array.from(new Set(xn(t.runtime.prefer_variants).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(xn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||dn,t.model.api_base=t.model.api_base||fn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):pn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||bn.runtime.context_release_delay_ms),t},wn=e=>{let t=e.toLowerCase();return hn.find(e=>t.includes(e))||null},Tn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Sn(e);t&&n.add(t)}return Array.from(n)},En=async e=>{await l(e,{recursive:!0})},Dn=(e=pn)=>_.join(e,`.metadata-cache`),On=(e,t,n=pn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(Dn(n),t,`${r}.json`)},kn=async(e,t,n=pn)=>{try{let r=await d(On(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},An=async(e,t,n,r=pn)=>{try{let i=On(e,t,r);await En(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},jn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Mn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Nn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Pn=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Fn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await En(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new ln({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},In=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await kn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Mn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:wn(c||``),headers:o};return await An(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await jn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:hn,f=()=>{for(let e of d)if(e===gn){let e=u.find(e=>{let t=e.toLowerCase();return!hn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=wn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||wn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Mn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await An(i,`artifact-info`,g,r),g},Ln=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Tn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?bn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?_n:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Rn=async e=>{let t=await In(e),n=ke({modelBytes:t.size>0?t.size:0}),r=await Ln(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Nn(e,t),l=await Pn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:yn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},zn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Pn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Pn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Fn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Bn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Vn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Hn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await zn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Un=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Wn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Un(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Un(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Un(e,t)},i),!0)};async function Gn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Cn(t),a=await Rn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Bn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Un(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Hn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Wn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Vn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Kn=e=>{let t=Cn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=yn(t.model.filename);return r?`${n}:${r}`:n};async function qn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Cn(e),o=await In(n),s=Nn(n,o),{repoId:c}=o;if(await Pn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Fn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Jn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function Yn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await In(Cn(i));o=e.size??null,{processingBufferBytes:s}=ke({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Jn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Jn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:Xn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,Zn=D(import.meta.url),Qn=_.dirname(Zn),$n=_.join(Qn,`mlx-bridge.py`),er=`mlx-vlm==0.4.0`,tr=`mlx-lm==0.31.1`,nr=_.join(v.homedir(),`.buttress`,`models`),rr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:nr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:tr,mlx_vlm_package:er,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},ir=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t,ar=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ar(e[t],n)):e[t]=n}),e),or=(e={})=>{let t=structuredClone(rr);return ar(t,e),t},sr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},cr=async e=>{await l(e,{recursive:!0})},lr=(e,t,n)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(n,`.metadata-cache`,t,`${r}.json`)},ur=async(e,t,n)=>{try{let r=await d(lr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},dr=async(e,t,n,r)=>{try{let i=lr(e,t,r);await cr(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}};async function fr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await ur(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await sr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await sr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await dr(i,`mlx-model-metadata`,D,n),D}function pr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const mr=async e=>{try{return await m(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ee(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),hr=new Map;async function gr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.resolve(e),a=hr.get(i);if(a){let e=await a;return r?.(1),e}let o=vr({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});hr.set(i,o);try{return await o}finally{hr.delete(i)}}const _r=[3,10];async function vr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.join(e,`bin`,`python3`),a=_.join(e,`bin`,`pip`);if(await mr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await mr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<_r[0]||t===_r[0]&&n<_r[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${_r.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await l(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var yr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=te(e,[$n],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
|
|
2
|
+
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import{fileURLToPath as D}from"node:url";import{execFile as ee,execSync as O,spawn as te}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as ne,z as j}from"zod";import{importSPKI as M,jwtVerify as N}from"jose";import{cors as P}from"@elysiajs/cors";import F from"node-machine-id";import re from"ms";import{Buffer as I}from"node:buffer";import L from"node:dgram";const ie=1024**3,ae=(e,t,n)=>Math.min(Math.max(e,t),n),R=e=>e?40:0,oe=(e=0)=>e?ae(e/(12*ie)*20,0,20):0,se=(e=0)=>e?ae(e/(32*ie)*10,0,10):0,ce=e=>e?10:0,le=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ue=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=R(n)+le(t,e)+oe(r),s=se(i),c=ce(a);return Math.min(100,Math.round(o+s+c))},z=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:R(n),variant:le(t,e),gpuMemory:oe(r),cpuMemory:se(i),availability:ce(a)}),de=[`cuda`,`vulkan`,`snapdragon`,`default`],fe=.85,pe=.5,me=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],he=e=>e&&String(e).trim().toLowerCase()||null,ge=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=de}={})=>{let i=[];e&&i.push(e),i.push(...me(t)),i.push(...me(n)),i.push(...r);let a=new Set;for(let e of i){let t=he(e);t&&a.add(t)}return Array.from(a)},_e=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ve=e=>Array.isArray(e)?e.map(e=>({...e})):[],ye=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,be=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ve(ye(n,r)),l=c.some(_e),u=c.filter(e=>_e(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ue(m),breakdown:o?z(m):null,error:s,timestamp:new Date().toISOString()}},B=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},xe=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=fe,cpuMemoryFraction:a=pe,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=de}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=ge({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(be({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(be({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=B({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=B({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},Se=`ggml-llm`,Ce=[`cuda`,`vulkan`,`default`],we=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=fe,cpuMemoryFraction:a=pe,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>xe({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Ce,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Ce}),Te=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ue(d),p=l?z(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=B({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=B({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Ee=new Map([[Se,xe],[`ggml-stt`,we],[`mlx-llm`,Te]]),De=async({platform:e,totalMemoryInBytes:t,backend:n=Se,dependencies:r,...i}={})=>{let a=Ee.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Oe={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},ke=e=>Oe[e?String(e).toLowerCase():`f16`]||Oe.f16,Ae=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=ke(a.k),h=ke(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},je=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Me=e=>e?String(e).trim().toLowerCase():null,Ne=(e={},t=null)=>{if(!e)return null;let n=Me(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Pe=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Fe=({arch:e,nLayer:t=0})=>({arch:Me(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Ie=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),Le=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Me(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Ne(t,r),o=r?Ie.get(r):null;if(!o)return Fe({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Fe({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Pe(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Re=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),ze=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Be=e=>e?String(e).trim().toLowerCase():null,Ve=e=>{let t=Be(e);return t?Re.has(t):!1},He=e=>{let t=Be(e);return t?ze.has(t):!1},Ue=e=>Ve(e)?`recurrent`:He(e)?`hybrid`:`transformer`,We=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},Ge=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ue(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Ae(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Ke=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ue(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},qe=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new T;V.setMaxListeners(100);const Je=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ye=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Je(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Je(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Je(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new Ye,U=new Ye;let Xe=0;function Ze(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function Qe(e){return Xe+=1,{subscriberId:Xe,unsubscribe:Ze(e)}}function $e(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function et(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function tt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function nt(e){return{timestamp:new Date().toISOString(),ggmlLlm:$e(e),ggmlStt:et(e),mlxLlm:tt(e)}}const{ReadableStream:rt,WritableStream:it}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,at=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),at(e[t],n)):e[t]=n}),e),ot=`https://huggingface.co`,st=`https://huggingface.co/api`,W=_.join(v.homedir(),`.buttress`,`models`),ct=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],lt=.5,ut={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:lt},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:st,base_url:ot,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},dt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],ft=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},pt=(e={})=>{let t=structuredClone(ut);if(at(t,e),t.backend.variant=ft(t.backend.variant),t.backend.variant_preference=Array.from(new Set(dt(t.backend.variant_preference).flatMap(e=>{let t=ft(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(dt(t.runtime.prefer_variants).flatMap(e=>{let t=ft(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(dt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||ot,t.model.api_base=t.model.api_base||st,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...ut.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||ut.runtime.context_release_delay_ms),t},mt=e=>{let t=e.toLowerCase();return ct.find(e=>t.includes(e))||null},ht=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=ft(e);return t?[t]:[]})))},G=async e=>{await l(e,{recursive:!0})},gt=(e=W)=>_.join(e,`.metadata-cache`),_t=(e,t,n=W)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(gt(n),t,`${r}.json`)},vt=async(e,t,n=W)=>{try{let r=_t(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},yt=async(e,t,n,r=W)=>{try{let i=_t(e,t,r);await G(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},bt=(e=W)=>_.join(e,`.session-state-cache`),xt=(e=W)=>_.join(bt(e),`cache-map.json`),St=(e=W)=>_.join(bt(e),`temp`),Ct=(e=W)=>_.join(bt(e),`states`),wt=()=>({version:1,entries:{},totalSize:0}),Tt=async(e=W)=>{try{let t=await d(xt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?wt():n}catch{return wt()}},Et=async(e,t=W)=>{let n=xt(t),r=`${n}.tmp.${Date.now()}`;try{await G(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},Dt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},Ot=(e,t=W)=>_.join(Ct(t),`${e}.bin`),kt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(St(e),`${t}.bin`)},At=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,jt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Mt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>At(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=jt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Nt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Pt=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&At(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Ft=async(e=W)=>{let t=St(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},It=async e=>{try{return await m(e),!0}catch{return!1}},Lt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var Rt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Lt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Et(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await It(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(bt(this.baseDir)),await G(St(this.baseDir)),await G(Ct(this.baseDir)),this.cacheMap=await Tt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>At(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Mt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await It(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Dt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=Ot(o,this.baseDir),l=i?Ot(`${o}-prompt`,this.baseDir):null;try{await G(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Pt(this.cacheMap,e,o,this.metadata),await Nt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Et(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(St(this.baseDir)),kt(this.baseDir)}async cleanup(){await Ft(this.baseDir)}};const zt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Bt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Vt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await vt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await yt(r,`range-metadata`,o,n),o},Ht=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},K=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Ut=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new it({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Wt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await vt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Bt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await yt(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await zt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ct,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=mt(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||mt(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await zt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Bt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await yt(i,`artifact-info`,y,r),y},Gt=/^mmproj-.*\.gguf$/i,Kt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await vt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Bt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await yt(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:_.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:_.resolve(e.model.mmproj_local_path)};return await yt(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await zt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),l=c?.siblings||c?.files||[],u=l.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),d=e.model.mmproj_filename;if(d){if(!u.includes(d))throw Error(`mmproj file "${d}" not found in repo ${i}`)}else{let e=u.filter(e=>Gt.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();d=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let f=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${d}`,p=l.find(e=>(e.rfilename||e.path||e.filename)===d),m=Number(p?.size);if(!Number.isFinite(m)||m<=0){let e=await Bt(f,{headers:r});m=Number(e.headers.get(`content-length`))||null}let h={repoId:i,revision:a,filename:d,url:f,size:m,headers:r};return await yt(o,`artifact-info`,h,n),h},qt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Jt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=ht(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?ut.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?lt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await De({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Yt=async e=>{let t=await Wt(e),n=await Kt(e,t),r=await Vt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=We(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Le({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},M=j.n_ctx?Number(j.n_ctx):null,N=M||a||4096,P=[],F=[],re=!0;if(M&&a&&M>a){re=!1;let e=`Requested context length (${M}) exceeds model training context (${a})`;P.push(e),F.push(e),N=a}M&&!a&&P.push(`Model metadata missing training context length, using requested value`);let I={k:j.cache_type_k,v:j.cache_type_v},L=t.size>0?t.size:0,ie=Ge({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:I,swaConfig:k,kvUnified:j.kv_unified,nParallel:j.n_parallel,swaFull:j.swa_full,arch:i,attentionLayerCount:m}),ae=Ke({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:j.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Jt(e,{modelBytes:L,kvCacheBytes:ie(N)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?lt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),le=Math.max(0,v.totalmem()*ce),ue=R.selected.hasGpu?se:le,z=qe({maxCtx:N,availableMemory:ue,modelBytes:L,kvBytesForCtx:ie});if(!M&&z){let e=a?Math.min(z,a):z,t=Math.max(32,e);t<N&&P.push(`Context length capped to ${t} by memory limits`),N=t}N>z&&(N=z);let de=Math.floor(z);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=ie(N),pe=L+fe+ae,me=w?L/(w+1):L,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=j.n_gpu_layers===`auto`||j.n_gpu_layers==null?he:Math.max(0,Math.min(Number(j.n_gpu_layers)||0,w+1));let _e=(()=>{let e=j.flash_attn_type&&String(j.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,ye=Ht(e,t),be=await K(ye,t.size),B=qt(e,n),xe=B?await K(B,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:B,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:re,backend:`ggml-llm`,warnings:P,errors:F,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,dense_first:k.denseFirst,type:k.type,layers:k.swaLayers}:null}},runtime:{...j,variant:R.selected.variant,n_ctx:N,requested_ctx:M,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:I.k,cache_type_v:I.v,estimated_max_n_ctx:de},resources:{modelBytes:L,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:le,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:ye,exists:be},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:B,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:I,localPath:ye,localExists:be}},Xt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new rt({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Zt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new rt({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Qt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},$t=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await K(d)||await Ut(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Ut(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},en=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Ut(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},tn=async(e,t)=>{let n=Qt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await $t(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await en(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},nn=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},rn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return nn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?nn(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await nn(e,t)},i),!0)};async function an(e,t,n={}){let{globalDownloadManager:r=null}=n,i=pt(t),a=await Yt(i),o=new Rt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await tn(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Qt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Zt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Xt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Qt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Qt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Qt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Qt(s),t=s.contexts.get(e);return t?rn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):nn(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const on=e=>{let t=pt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function sn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=pt(e),o=await Wt(n),s=Ht(n,o),{repoId:c}=o,l=await Kt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),u=qt(n,l),d=async()=>{if(!l||!u||l.localPath)return;if(await K(u,l.size)){console.log(`[Download] mmproj already exists: ${u}`);return}let e=t.getDownload(u);if(e){await e;return}let n=(async()=>{try{await Ut(l.url,l.headers,u,l.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(u)}})();t.setDownload(u,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await d().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let f=t.getDownload(s);if(f)return console.log(`[Download] Already downloading: ${c}`),f.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let p=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await K(f)||await Ut(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Ut(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await d(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,p),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function cn(e){let t=pt(e),n=await Wt(t),r=await Vt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=We(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Le({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,M={k:t.model.cache_type_k,v:t.model.cache_type_v},N=n.size>0?n.size:0,P=t.model.n_parallel||4,F=Ge({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:M,swaConfig:k,kvUnified:t.model.kv_unified,nParallel:P,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),re=Ke({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:P,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),I=t.backend?.gpu_memory_fraction==null?ut.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),L=t.backend?.cpu_memory_fraction==null?lt:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),ie=await Jt(t,{modelBytes:N,kvCacheBytes:F(j)}),ae=(ie.selected.totalMemory||0)*I,R=Math.max(0,v.totalmem()*L),oe=qe({maxCtx:j,availableMemory:ie.selected.hasGpu?ae:R,modelBytes:N,kvBytesForCtx:F}),se=F(j),ce=F(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:O,nEmbdHeadV:te,nHeadCount:E,nHeadKvCount:D,kvLayerCount:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,denseFirst:k.denseFirst,type:k.type,layers:k.swaLayers}:null},modelBytes:N,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:re,quantization:{name:n.quantization||null,fileType:p,version:f}}}const ln=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function un(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await cn(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await De({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=ln(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=ln(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:dn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,fn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),fn(e[t],n)):e[t]=n}),e),pn=`https://huggingface.co`,mn=`https://huggingface.co/api`,hn=_.join(v.homedir(),`.buttress`,`models`),gn=[`cuda`,`vulkan`,`default`],_n=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],vn=`fp16`,yn=.5,bn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],xn=e=>{if(!e)return null;let t=e.toLowerCase();return bn.find(e=>t.includes(e))||null},Sn={backend:{type:`ggml-stt`,variant:null,variant_preference:gn,gpu_memory_fraction:.85,cpu_memory_fraction:yn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,vn,`q5_1`],allow_local_file:!1,local_path:null,api_base:mn,base_url:pn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:hn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},Cn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],wn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Tn=(e={})=>{let t=structuredClone(Sn);if(fn(t,e),t.backend.variant=wn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(Cn(t.backend.variant_preference||gn).flatMap(e=>{let t=wn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...gn]),t.runtime.prefer_variants=Array.from(new Set(Cn(t.runtime.prefer_variants).flatMap(e=>{let t=wn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(Cn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||pn,t.model.api_base=t.model.api_base||mn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):hn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||Sn.runtime.context_release_delay_ms),t},En=e=>{let t=e.toLowerCase();return _n.find(e=>t.includes(e))||null},Dn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=wn(e);t&&n.add(t)}return Array.from(n)},On=async e=>{await l(e,{recursive:!0})},kn=(e=hn)=>_.join(e,`.metadata-cache`),An=(e,t,n=hn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(kn(n),t,`${r}.json`)},jn=async(e,t,n=hn)=>{try{let r=await d(An(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Mn=async(e,t,n,r=hn)=>{try{let i=An(e,t,r);await On(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},Nn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Pn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Fn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},In=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Ln=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await On(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new dn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Rn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await jn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Pn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:En(c||``),headers:o};return await Mn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Nn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:_n,f=()=>{for(let e of d)if(e===vn){let e=u.find(e=>{let t=e.toLowerCase();return!_n.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=En(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||En(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Pn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Mn(i,`artifact-info`,g,r),g},zn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Dn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?Sn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?yn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await De({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Bn=async e=>{let t=await Rn(e),n=je({modelBytes:t.size>0?t.size:0}),r=await zn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Fn(e,t),l=await In(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:xn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Vn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await In(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await In(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Ln(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Hn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Un=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Wn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Vn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Gn=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Kn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Gn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Gn(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Gn(e,t)},i),!0)};async function qn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Tn(t),a=await Bn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Hn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Gn(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Wn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Kn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Un(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Jn=e=>{let t=Tn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=xn(t.model.filename);return r?`${n}:${r}`:n};async function Yn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Tn(e),o=await Rn(n),s=Fn(n,o),{repoId:c}=o;if(await In(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Ln(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Xn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function Zn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Rn(Tn(i));o=e.size??null,{processingBufferBytes:s}=je({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await De({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Xn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Xn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:Qn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,$n=D(import.meta.url),er=_.dirname($n),tr=_.join(er,`mlx-bridge.py`),nr=`mlx-vlm==0.4.0`,rr=`mlx-lm==0.31.1`,ir=_.join(v.homedir(),`.buttress`,`models`),ar={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:ir,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:rr,mlx_vlm_package:nr,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},or=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t,sr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),sr(e[t],n)):e[t]=n}),e),cr=(e={})=>{let t=structuredClone(ar);return sr(t,e),t},lr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},ur=async e=>{await l(e,{recursive:!0})},dr=(e,t,n)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(n,`.metadata-cache`,t,`${r}.json`)},fr=async(e,t,n)=>{try{let r=await d(dr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},pr=async(e,t,n,r)=>{try{let i=dr(e,t,r);await ur(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}};async function mr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await fr(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await lr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await lr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await pr(i,`mlx-model-metadata`,D,n),D}function hr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const gr=async e=>{try{return await m(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ee(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),_r=new Map;async function vr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.resolve(e),a=_r.get(i);if(a){let e=await a;return r?.(1),e}let o=br({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});_r.set(i,o);try{return await o}finally{_r.delete(i)}}const yr=[3,10];async function br({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.join(e,`bin`,`python3`),a=_.join(e,`bin`,`pip`);if(await gr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await gr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<yr[0]||t===yr[0]&&n<yr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${yr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await l(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var xr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=te(e,[tr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
|
|
3
3
|
`);this.buffer=t.pop();for(let e of t)if(e.trim())try{this.handleMessage(JSON.parse(e))}catch(t){console.error(`[mlx-llm] Failed to parse bridge message:`,e,t)}}),this.readyPromise=new Promise((e,t)=>{this.pendingRequests.set(`__init__`,{resolve:()=>e(),reject:t}),setTimeout(()=>t(Error(`Bridge startup timeout`)),3e4)}),this.readyPromise}handleMessage(e){let t=this.pendingRequests.get(e.id);t&&(e.error?(t.reject(Error(e.error.message)),this.pendingRequests.delete(e.id)):e.event?e.event===`result`?(t.resolve(e.data),this.pendingRequests.delete(e.id)):t.onEvent?.(e.event,e.data):e.result!==void 0&&(t.resolve(e.result),this.pendingRequests.delete(e.id)))}async call(e,t={}){if(!this.process)throw Error(`Bridge not running`);let n=String(++this.requestCounter);return new Promise((r,i)=>{this.pendingRequests.set(n,{resolve:r,reject:i}),this.write({id:n,method:e,params:t})})}stream(e,t,n){if(!this.process)throw Error(`Bridge not running`);let r=String(++this.requestCounter);return{id:r,promise:new Promise((i,a)=>{this.pendingRequests.set(r,{resolve:i,reject:a,onEvent:n}),this.write({id:r,method:e,params:t})})}}cancel(e){this.process&&this.write({id:`cancel-${e}`,method:`cancel`,params:{request_id:e}})}write(e){this.process?.stdin?.write(JSON.stringify(e)+`
|
|
4
|
-
`)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function br(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),v.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function xr(e){let t=br();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Sr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Cr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const wr=`</think>`;function Tr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(wr);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(wr.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Er(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new Xn({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Tr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Tr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function Dr(e,t,n={}){let r=or(t),i=xr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Cr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||nr,n=await gr({envDir:r.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||tr,mlxVlmPackage:r.runtime.mlx_vlm_package||er,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new yr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let l=r.runtime.session_cache;l?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:_.join(e,`mlx-session-cache`),max_entries:l?.max_entries||100,max_size_bytes:ir(l?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Sr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new Xn({start(e){a.queue.enqueue(async()=>{let n=Er(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Or=e=>or(e).model.repo_id||null;async function kr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=br(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await Ce({platform:process.platform,arch:v.arch(),totalMemoryInBytes:v.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=or(r),t=e.model.repo_id;if(t)try{l=await fr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||nr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=pr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=pr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:v.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Ar(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=or(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=br();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let l=`mlx:${s}`;if(t?.isDownloading(l))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let u=(async()=>{try{let e=o.runtime.cache_dir||nr,t=await gr({envDir:o.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||tr,mlxVlmPackage:o.runtime.mlx_vlm_package||er,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
|
|
4
|
+
`)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function Sr(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),v.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function Cr(e){let t=Sr();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const wr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Tr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Er=`</think>`;function Dr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Er);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Er.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Or(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new Qn({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Dr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Dr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function kr(e,t,n={}){let r=cr(t),i=Cr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Tr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||ir,n=await vr({envDir:r.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||rr,mlxVlmPackage:r.runtime.mlx_vlm_package||nr,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new xr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let l=r.runtime.session_cache;l?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:_.join(e,`mlx-session-cache`),max_entries:l?.max_entries||100,max_size_bytes:or(l?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=wr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new Qn({start(e){a.queue.enqueue(async()=>{let n=Or(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Ar=e=>cr(e).model.repo_id||null;async function jr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=Sr(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await Te({platform:process.platform,arch:v.arch(),totalMemoryInBytes:v.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=cr(r),t=e.model.repo_id;if(t)try{l=await mr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||ir,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=hr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=hr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:v.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Mr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=cr(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=Sr();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let l=`mlx:${s}`;if(t?.isDownloading(l))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let u=(async()=>{try{let e=o.runtime.cache_dir||ir,t=await vr({envDir:o.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||rr,mlxVlmPackage:o.runtime.mlx_vlm_package||nr,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
|
|
5
5
|
from huggingface_hub import snapshot_download
|
|
6
6
|
path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
|
|
7
7
|
print(path)
|
|
8
8
|
`.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let l=c.stdout.trim().split(`
|
|
9
|
-
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return
|
|
9
|
+
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return un(t,n);if(e===`ggml-stt`)return Zn(t,n);if(e===`mlx-llm`)return jr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.25.0-beta.5`,Nr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.25.0-beta.5`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Pr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Fr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
10
10
|
`),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
|
|
11
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=
|
|
11
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=Ge(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
|
|
12
12
|
---`),n.push(`
|
|
13
13
|
### System Information`);let f=null;if(process.platform!==`win32`)try{f=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${v.hostname()}`),n.push(`- **OS:** ${v.type()} ${v.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${v.cpus().length}`),n.push(`- **Total System Memory:** ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
14
14
|
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
15
|
-
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),
|
|
15
|
+
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),Nr.description&&n.push(`- **Description:** ${Nr.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
16
16
|
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=_.join(process.cwd(),m);k.writeFileSync(h,n.join(`
|
|
17
|
-
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function
|
|
17
|
+
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function Ir({modelId:e=null,defaultConfig:t=null}={}){console.log(`${Y} v${X}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
|
|
18
18
|
=== Model Information ===`);let t=e||u?.repoId;console.log(`Repository ID: ${t}`),u?.quantization&&console.log(`Quantization: ${u.quantization}`),u?.nCtx&&console.log(`Context Length: ${u.nCtx}`),l?.quantization?.name&&console.log(`Model Quantization: ${l.quantization.name.toUpperCase()}`);let n=u?.cache_type_k||`f16`,r=u?.cache_type_v||`f16`;if(console.log(`KV Cache Type: K=${n}, V=${r}`),l?.modelBytes&&l?.kvCacheBytes){if(console.log(`Model Size: ${(l.modelBytes/1024/1024/1024).toFixed(2)} GB`),l.kvInfo?console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(l.kvInfo)})`):console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Total Required Memory: ${((l.modelBytes+l.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),l.memoryLimitedCtx!=null){let e=l.memoryLimitedCtx,t=l.kvInfo?.nCtxTrain;t?console.log(`\nMemory-Limited Context: ${e} (Train: ${t})`):console.log(`\nMemory-Limited Context: ${e}`),l.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(l.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(c.buttress?.selected?.fit){let{totalRequiredBytes:e}=c.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}}if(c.buttress?.selected){let{selected:e}=c.buttress;console.log(`
|
|
19
19
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
20
20
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
21
21
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),e.limitedFit&&(console.log(`
|
|
22
22
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU (Limited): ${e.limitedFit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
23
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
23
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function Lr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating STT model capabilities comparison...
|
|
24
24
|
`),n.push(`${Y} v${X}`),n.push(`## STT Model Capabilities Comparison
|
|
25
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=
|
|
25
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=Pr(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});l.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let u=e=>e?(e/1024/1024).toFixed(1):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),l.forEach(({modelId:e,modelInfo:t})=>{let r=u(t?.modelBytes),i=u(t?.processingBytes||t?.kvCacheBytes),a=u(t?.fit?.totalRequiredBytes),o=d(t?.fit?.fitsInGpu),s=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
|
|
26
26
|
---`),n.push(`
|
|
27
27
|
### System Information`);let f=null;if(process.platform!==`win32`)try{f=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${v.hostname()}`),n.push(`- **OS:** ${v.type()} ${v.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${v.cpus().length}`),n.push(`- **Total System Memory:** ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
28
28
|
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
29
|
-
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),
|
|
29
|
+
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),Nr.description&&n.push(`- **Description:** ${Nr.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
30
30
|
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=_.join(process.cwd(),m);k.writeFileSync(h,n.join(`
|
|
31
|
-
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function
|
|
31
|
+
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function Rr({modelId:e=null,defaultConfig:t=null}={}){console.log(`${Y} v${X}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=Pr(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
|
|
32
32
|
=== Model Information ===`);let e=n||f?.repoId;console.log(`Repository ID: ${e}`),r&&console.log(`Filename: ${r}`),d?.modelBytes&&console.log(`Model Size: ${(d.modelBytes/1024/1024).toFixed(1)} MB`);let t=d?.processingBytes||d?.kvCacheBytes;if(t&&console.log(`Processing Buffer: ${(t/1024/1024).toFixed(1)} MB`),d?.modelBytes&&t)console.log(`Total Required Memory: ${((d.modelBytes+t)/1024/1024).toFixed(1)} MB`);else if(u.buttress?.selected?.fit){let{totalRequiredBytes:e}=u.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024).toFixed(1)} MB`)}}if(u.buttress?.selected){let{selected:e}=u.buttress;console.log(`
|
|
33
33
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
34
34
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
35
35
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
36
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Lr=e({finalizeGenerator:()=>Vr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Gr,ggmlLlm:()=>Hr,ggmlStt:()=>Wr,globalDownloadManager:()=>Rr,mlxLlm:()=>Ur,showModelsTable:()=>Nr,showSttModelsTable:()=>Fr,startGenerator:()=>Br,startModelDownload:()=>qr,status:()=>Kr,testGgmlLlmCapabilities:()=>Pr,testGgmlSttCapabilities:()=>Ir});const Z=new Map,Rr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},zr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=zr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Br(e,t){let n={"ggml-llm":{create:nn,getId:rn},"ggml-stt":{create:Gn,getId:Kn},"mlx-llm":{create:Dr,getId:Or}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Rr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Vr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Hr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Ur={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Wr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Gr(e,t){return e===`ggml-llm`?rn(t):e===`ggml-stt`?Kn(t):e===`mlx-llm`?Or(t):null}const Kr={getFullStatus:()=>et(Z),getGgmlLlmStatus:()=>Ze(Z),getGgmlSttStatus:()=>Qe(Z),getMlxLlmStatus:()=>$e(Z),subscribeToStatus:Ye,subscribeToStatusWithId:Xe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function qr(e,t,n={}){let r={"ggml-llm":an,"ggml-stt":qn,"mlx-llm":Ar}[e];return r?r(t,Rr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Jr=`@fugood/buttress-server`,Yr=`2.24.1`,Xr={name:Jr,version:Yr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`./bin/bricks-buttress`},files:[`lib`,`bin`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`984a440ed04862f12c65f3cf62bdc70a938fcdd6`};const Zr=()=>({version:Yr,name:Jr,description:Xr.description}),Qr=typeof process<`u`&&process.versions&&process.versions.node,$r=e=>new n({adapter:Qr?t():void 0,...e}),ei=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),ti=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ni=e=>{let t=$r(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,ti,{response:ei}),t};const ri=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ii=$r().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return ri?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ai=_.dirname(D(import.meta.url)),oi=async()=>{let e=[_.join(ai,`..`,`public`,`status.html`),_.join(ai,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},si=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ci=async()=>{let e=await oi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var li=$r().get(`/status`,ci).get(`/status/`,ci).get(`/buttress/status`,({store:{backend:e}})=>si(e));const ui=[`ggml-llm`,`mlx-llm`],di=new Map;function fi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function pi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ui.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=di.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return di.set(c,h),await fi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function mi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function hi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const gi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function _i(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=hi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function vi({global:e}){let t=$r({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await pi(a,r,c,`[OpenAI]`),t=gi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await fi(a,e.type).completion(e.id,{options:b});if(!s)return await _i(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=hi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const yi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function bi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function xi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Si(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ci(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Si(u),l=u.stopping_word||u.stoppingWord||null,c=xi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function wi({global:e}){let t=$r({prefix:`/anthropic-messages`});return t.use(M({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=bi(o),n=await pi(a,r,o.model,`[Anthropic]`),s=yi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await fi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ci(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Si(r),w=r.stopping_word||r.stoppingWord||null,C=xi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=bi(a),t=await pi(i,r,a.model,`[Anthropic]`),n=fi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ti=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ti(n[e]||{},t):n[e]=t}),n},Ei=e=>e&&typeof e==`object`?structuredClone(e):null,Di=(e,t)=>Ti(Ei(e)||{},Ei(t)||{}),Oi=(e,t)=>Ti(structuredClone(e.global),t||{}),ki=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Oi(e,a)}}return Object.keys(e.global).length>0?Oi(e,{}):null},Ai={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},ji=e=>e?e===!0?{...Ai}:Ti(Ai,e):null,Mi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ni=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Pi=6e4,Fi=1024*1024*50,Ii=e=>{let t=N.machineIdSync(),n=Ti({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Pi,max_body_size:Fi},autodiscover:!1},Ei(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:ji(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ni(i.max_body_size,w.parse,Fi),session_timeout:Ni(i.session_timeout,P,Pi)},global:s,generators:r}},Li={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ri={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Ei(i),c=Di(ki(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Ei(i),o=Di(ki(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const zi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Bi(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Vi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Hi(e,t){return{initContext:Bi(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Vi(e,`${t}Release Context`)}}var Ui=Hi(e=>e.ggmlLlm,``);const Wi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Gi=e=>e.ggmlStt,Ki={common:Ri,ggmlLlm:Ui,ggmlStt:{initContext:Bi(Gi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Vi(Gi,`Release STT Context`)},mlxLlm:Hi(e=>e.mlxLlm,`MLX `)},qi={common:Li,ggmlLlm:zi,ggmlStt:Wi,mlxLlm:zi};var Ji=Ki;const Yi=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Xi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Zi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=re.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const $i=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=Zr(),ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ta({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
36
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var zr=e({finalizeGenerator:()=>Ur,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>qr,ggmlLlm:()=>Wr,ggmlStt:()=>Kr,globalDownloadManager:()=>Br,mlxLlm:()=>Gr,showModelsTable:()=>Fr,showSttModelsTable:()=>Lr,startGenerator:()=>Hr,startModelDownload:()=>Yr,status:()=>Jr,testGgmlLlmCapabilities:()=>Ir,testGgmlSttCapabilities:()=>Rr});const Z=new Map,Br={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Vr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Vr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Hr(e,t){let n={"ggml-llm":{create:an,getId:on},"ggml-stt":{create:qn,getId:Jn},"mlx-llm":{create:kr,getId:Ar}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Br}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Ur(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Wr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Gr={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Kr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function qr(e,t){return e===`ggml-llm`?on(t):e===`ggml-stt`?Jn(t):e===`mlx-llm`?Ar(t):null}const Jr={getFullStatus:()=>nt(Z),getGgmlLlmStatus:()=>$e(Z),getGgmlSttStatus:()=>et(Z),getMlxLlmStatus:()=>tt(Z),subscribeToStatus:Ze,subscribeToStatusWithId:Qe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function Yr(e,t,n={}){let r={"ggml-llm":sn,"ggml-stt":Yn,"mlx-llm":Mr}[e];return r?r(t,Br,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Xr=`@fugood/buttress-server`,Zr=`2.25.0-beta.9`,Qr={name:Xr,version:Zr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`./bin/bricks-buttress`},files:[`lib`,`bin`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,jose:`^5.9.6`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`9274cc21f1e882d26f80e45a972d814d648c861a`};const $r=()=>({version:Zr,name:Xr,description:Qr.description}),ei=typeof process<`u`&&process.versions&&process.versions.node,ti=e=>new n({adapter:ei?t():void 0,...e}),ni=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String(),score:a.Optional(a.Number()),hasGpu:a.Optional(a.Boolean()),usableBytes:a.Optional(a.Number())})),authentication:a.Object({required:a.Boolean(),type:a.String(),kid:a.Optional(a.String()),bound:a.Optional(a.Boolean())}),workspace:a.Optional(a.Object({id:a.String(),name:a.Optional(a.String())}))}),ri=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication,workspace:e.workspace});var ii=e=>{let t=ti(),n=e.autodiscover?.http?.path??`/buttress/info`;return t.get(n,ri,{response:ni}),t};let ai=null;const oi=async e=>{if(ai&&ai.kid===e.kid)return ai.key;let t=await M(e.issuerPublicKey,`EdDSA`);return ai={kid:e.kid,key:t},t},si=/^Bearer\s+(.+)$/i,ci=(e,t)=>{if(e){let t=e.authorization||e.Authorization;if(t){let e=t.match(si);return e?e[1].trim():t.trim()}}if(t){let e=t.access_token??t.token;if(typeof e==`string`&&e)return e}return null},li=async(e,t)=>{if(!e||!t)return null;try{let{payload:n}=await N(e,await oi(t),{algorithms:[`EdDSA`]}),r=n;return r.k!==`ba`||r.w_id!==t.id||r.st!==`ws`&&r.st!==`dev`||!r.sid||!r.exp?null:{workspaceId:r.w_id,subjectType:r.st,subjectId:r.sid,jti:r.jti,exp:r.exp}}catch{return null}},ui=async({headers:e,query:t,set:n,store:r})=>{let i=r.workspaceState?.workspace;if(i&&!await li(ci(e,t),i))return n.status=401,n.headers&&(n.headers[`WWW-Authenticate`]=`Bearer`),{error:{code:`UNAUTHORIZED`,message:`Invalid or missing workspace access token`}}},di=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var fi=ti().onBeforeHandle(ui).post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return di?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const pi=_.dirname(D(import.meta.url)),mi=async()=>{let e=[_.join(pi,`..`,`public`,`status.html`),_.join(pi,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},hi=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},gi=async()=>{let e=await mi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var _i=ti().get(`/status`,gi).get(`/status/`,gi).get(`/buttress/status`,({store:{backend:e}})=>hi(e));const vi=[`ggml-llm`,`mlx-llm`],yi=new Map;function bi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function xi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>vi.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=yi.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return yi.set(c,h),await bi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function Si(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function Ci(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Si(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const wi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function Ti(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=Ci(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function Ei({global:e}){let t=ti({prefix:`/oai-compat`});return t.use(P({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.onBeforeHandle(ui),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>vi.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await xi(a,r,c,`[OpenAI]`),t=wi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await bi(a,e.type).completion(e.id,{options:b});if(!s)return await Ti(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=Ci(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const Di=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function Oi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function ki(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Ai(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Si(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function ji(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Ai(u),l=u.stopping_word||u.stoppingWord||null,c=ki(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function Mi({global:e}){let t=ti({prefix:`/anthropic-messages`});return t.use(P({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.onBeforeHandle(ui),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=Oi(o),n=await xi(a,r,o.model,`[Anthropic]`),s=Di(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await bi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await ji(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Ai(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Ai(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Ai(r),w=r.stopping_word||r.stoppingWord||null,C=ki(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=Oi(a),t=await xi(i,r,a.model,`[Anthropic]`),n=bi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>vi.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ni=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ni(n[e]||{},t):n[e]=t}),n},Pi=e=>e&&typeof e==`object`?structuredClone(e):null,Fi=(e,t)=>Ni(Pi(e)||{},Pi(t)||{}),Ii=(e,t)=>Ni(structuredClone(e.global),t||{}),Li=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Ii(e,a)}}return Object.keys(e.global).length>0?Ii(e,{}):null},Ri={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},zi=e=>e?e===!0?{...Ri}:Ni(Ri,e):null,Bi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Vi=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Hi=6e4,Ui=1024*1024*50,Wi=e=>{let t=F.machineIdSync(),n=Ni({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Hi,max_body_size:Ui},autodiscover:!1},Pi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:zi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Vi(i.max_body_size,w.parse,Ui),session_timeout:Vi(i.session_timeout,re,Hi)},global:s,generators:r}},Gi={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ki={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Pi(i),c=Fi(Li(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Pi(i),o=Fi(Li(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const qi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Ji(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Yi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Xi(e,t){return{initContext:Ji(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Yi(e,`${t}Release Context`)}}var Zi=Xi(e=>e.ggmlLlm,``);const Qi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},$i=e=>e.ggmlStt,ea={common:Ki,ggmlLlm:Zi,ggmlStt:{initContext:Ji($i),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Yi($i,`Release STT Context`)},mlxLlm:Xi(e=>e.mlxLlm,`MLX `)},ta={common:Gi,ggmlLlm:qi,ggmlStt:Qi,mlxLlm:qi};var na=ea;const ra=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return I.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=I.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},ia=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof I?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:I.from(t).toString(`base64`)}:t)}catch{return e}},aa={score:0,hasGpu:!1,usableBytes:0},oa=e=>e?{score:Number(e.score)||0,hasGpu:!!e.hasGpu,usableBytes:Number(e.gpuUsableBytes||e.cpuUsableBytes||0)}:aa,sa=async e=>{let t=[];for(let n of e){let e=aa;try{e=oa((await J(n.type,null,{}))?.buttress?.selected)}catch(e){console.warn(`[Caps] Failed to detect capabilities for "${n.type}":`,e instanceof Error?e.message:e)}t.push({...n,...e})}return t},ca=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},la=e=>{let t=e.split(`.`).map(Number);return t.length!==4||t.some(e=>Number.isNaN(e))?0:(t[0]<<24|t[1]<<16|t[2]<<8|t[3])>>>0},ua=e=>[e>>>24&255,e>>>16&255,e>>>8&255,e&255].join(`.`),da=()=>{let e=[],t=new Set;for(let n of Object.values(v.networkInterfaces()))for(let r of n??[]){if(r.family!==`IPv4`||r.internal||!r.address||!r.netmask)continue;let n=la(r.address),i=la(r.netmask);if(!n||!i||i===4294967295)continue;let a=ua((n&i|~i>>>0)>>>0);if(a===r.address)continue;let o=`${r.address}->${a}`;t.has(o)||(t.add(o),e.push({address:r.address,broadcast:a}))}return e},fa=e=>{if(!e)return!1;let t=e;return t.code===`ENOTSUP`||/Failed to bind socket/i.test(t.message??``)};var pa=class{name=`udp`;receiver=null;senders=[];announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){this.receiver=await this.bindReceiver(!0).catch(e=>{if(!fa(e))throw e;return console.warn(`[Autodiscover UDP] SO_REUSEPORT not supported by this runtime; falling back to REUSEADDR only (multiple buttress instances on one host will not coexist on the discovery port).`),this.bindReceiver(!1)}),this.receiver.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.receiver.on(`error`,e=>{console.error(`[Autodiscover UDP] Receiver error:`,e.message)}),this.receiver.setBroadcast(!0),this.senders=await this.createSenders();let e=this.senders.map(e=>`${e.address}->${e.broadcast}`).join(`, `)||`<none>`;if(console.log(`[Autodiscover UDP] Listening on port ${this.port}; announce interfaces: ${e}`),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),await Promise.all(this.senders.map(({socket:e})=>new Promise(t=>{e.close(()=>t())}))),this.senders=[],this.receiver&&=(await new Promise(e=>{this.receiver.close(()=>e())}),null)}async bindReceiver(e){let t=L.createSocket(e?{type:`udp4`,reuseAddr:!0,reusePort:!0}:{type:`udp4`,reuseAddr:!0});return new Promise((e,n)=>{let r=e=>{t.close(),n(e)};t.once(`error`,r),t.bind(this.port,()=>{t.off(`error`,r),e(t)})})}async createSenders(){let e=da();return(await Promise.all(e.map(async e=>{try{let t=L.createSocket({type:`udp4`});return await new Promise((n,r)=>{let i=e=>{t.close(),r(e)};t.once(`error`,i),t.bind({port:0,address:e.address},()=>{t.off(`error`,i),t.setBroadcast(!0),n()})}),t.on(`error`,t=>{console.error(`[Autodiscover UDP] Sender ${e.address} error:`,t.message)}),{...e,socket:t}}catch(t){return console.warn(`[Autodiscover UDP] Failed to bind sender on ${e.address}:`,t.message),null}}))).filter(e=>e!==null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(this.senders.length===0)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));for(let{broadcast:e,socket:n,address:r}of this.senders)n.send(t,0,t.length,this.port,e,t=>{t&&console.error(`[Autodiscover UDP] Announcement ${r}->${e} error:`,t.message)})}sendResponse(e,t){if(!this.receiver)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.receiver.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},ma=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new pa(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const ha=()=>process.env.BRICKS_BUTTRESS_STATE_DIR||_.join(v.homedir(),`.bricks-cli`,`buttress`),ga=()=>_.join(ha(),`state.json`),_a=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.id==`string`&&typeof t.serverId==`string`&&typeof t.issuerPublicKey==`string`&&typeof t.kid==`string`},va=()=>{let e=ga();try{let t=k.readFileSync(e,`utf8`),n=JSON.parse(t);return n&&_a(n.workspace)?{workspace:n.workspace}:{workspace:null}}catch(e){return e.code!==`ENOENT`&&console.warn(`[Buttress] Failed to read workspace state:`,e.message),{workspace:null}}},$=$r(),ya=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ba({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
37
37
|
`),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
|
|
38
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=
|
|
38
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=Ge(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
|
|
39
39
|
---`),n.push(`
|
|
40
40
|
### System Information`);let f=null;if(process.platform!==`win32`)try{f=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${v.hostname()}`),n.push(`- **OS:** ${v.type()} ${v.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${v.cpus().length}`),n.push(`- **Total System Memory:** ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
41
41
|
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
42
42
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
43
43
|
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=_.join(process.cwd(),m);k.writeFileSync(h,n.join(`
|
|
44
|
-
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function
|
|
44
|
+
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function xa({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
|
|
45
45
|
=== Model Information ===`);let t=e||u?.repoId;if(console.log(`Repository ID: ${t}`),u?.quantization&&console.log(`Quantization: ${u.quantization}`),u?.nCtx&&console.log(`Context Length: ${u.nCtx}`),l?.quantization){let{fileType:e}=l.quantization;e!=null&&console.log(`Model File Type (GGUF): ${e}`)}let n=u?.cache_type_k||`f16`,r=u?.cache_type_v||`f16`;if(console.log(`KV Cache Type: K=${n}, V=${r}`),l?.modelBytes&&l?.kvCacheBytes!=null){console.log(`Model Size: ${(l.modelBytes/1024/1024/1024).toFixed(2)} GB`),l.kvInfo?console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(l.kvInfo)})`):console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);let e=l.recurrentMemoryBytes||0;e>0&&console.log(`Recurrent Memory: ${(e/1024/1024/1024).toFixed(2)} GB`);let t=l.modelBytes+l.kvCacheBytes+e;if(console.log(`Total Required Memory: ${(t/1024/1024/1024).toFixed(2)} GB`),l.memoryLimitedCtx!=null){let e=l.memoryLimitedCtx,t=l.kvInfo?.nCtxTrain;t?console.log(`\nMemory-Limited Context: ${e} (Train: ${t})`):console.log(`\nMemory-Limited Context: ${e}`),l.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(l.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(c.buttress?.selected?.fit){let{totalRequiredBytes:e}=c.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}}if(c.buttress?.selected){let{selected:e}=c.buttress;console.log(`
|
|
46
46
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
47
47
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
48
48
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),e.limitedFit&&(console.log(`
|
|
49
49
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU (Limited): ${e.limitedFit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
50
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
50
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function Sa({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating STT model capabilities comparison...
|
|
51
51
|
`),n.push(`${$.name} v${$.version}`),n.push(`## STT Model Capabilities Comparison
|
|
52
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=
|
|
52
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=ya(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});l.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let u=e=>e?(e/1024/1024).toFixed(1):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),l.forEach(({modelId:e,modelInfo:t})=>{let r=u(t?.modelBytes),i=u(t?.processingBytes||t?.kvCacheBytes),a=u(t?.fit?.totalRequiredBytes),o=d(t?.fit?.fitsInGpu),s=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
|
|
53
53
|
---`),n.push(`
|
|
54
54
|
### System Information`);let f=null;if(process.platform!==`win32`)try{f=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${v.hostname()}`),n.push(`- **OS:** ${v.type()} ${v.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${v.cpus().length}`),n.push(`- **Total System Memory:** ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
55
55
|
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
56
56
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
57
57
|
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=_.join(process.cwd(),m);k.writeFileSync(h,n.join(`
|
|
58
|
-
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function
|
|
58
|
+
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function Ca({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating MLX model capabilities comparison...
|
|
59
59
|
`),n.push(`${$.name} v${$.version}`),n.push(`## MLX Model Capabilities Comparison
|
|
60
60
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`mlx-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...r?.model||{},repo_id:n}};let i=await J(`mlx-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quant | Size (GB) | Context | KV Cache (GB) | Total Required (GB) | Fits Unified Memory |`),n.push(`|----------|-------|-----------|---------|---------------|---------------------|---------------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.bits||r?.quantBits||`N/A`,a=typeof i==`number`?`${i}bit`:i,o=u(t?.modelBytes),s=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,c=u(t?.kvCacheBytes),l=u(t?.fit?.totalRequiredBytes),f=d(t?.fit?.fitsInGpu);n.push(`| ${e} | ${a} | ${o} | ${s} | ${c} | ${l} | ${f} |`);let p=t?.limitedFit!=null&&t?.memoryLimitedCtx!=null,m=!t?.fit?.fitsInGpu;if(p&&m){let e=t.memoryLimitedCtx,r=u(t.limitedKvCacheBytes),i=u(t.limitedFit.totalRequiredBytes),s=d(t.limitedFit.fitsInGpu);n.push(`| ↳ Limited | ${a} | ${o} | ${e} | ${r} | ${i} | ${s} |`)}}),n.push(`
|
|
61
61
|
---`),n.push(`
|
|
@@ -63,20 +63,20 @@ print(path)
|
|
|
63
63
|
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
64
64
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
65
65
|
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`mlx-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=_.join(process.cwd(),m);k.writeFileSync(h,n.join(`
|
|
66
|
-
`),`utf8`),console.log(`\nMLX model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate MLX model table:`,e.message),process.exit(1)}}async function
|
|
66
|
+
`),`utf8`),console.log(`\nMLX model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate MLX model table:`,e.message),process.exit(1)}}async function wa({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: mlx-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=Object.keys(i).length>0?{...i}:null;e&&(a={...a||{},model:{...a?.model||{},repo_id:e}});let o=await J(`mlx-llm`,null,{config:a,includeBreakdown:!0});console.log(`
|
|
67
67
|
=== Platform Information ===`),console.log(`Available: ${o.available?`Yes`:`No`}`),console.log(`OS: ${o.platform.os} (${o.platform.arch})`),o.platform.errors?.length>0&&console.log(`Errors: ${o.platform.errors.join(`; `)}`),console.log(`Python3: ${o.python.available?`Available`:`Not found`}`),console.log(`MLX (system): ${o.mlx.systemAvailable?`Available`:`Not installed (will use venv)`}`);let s=o.buttress?.selected,c=o.modelConfig;if(e||c?.repoId){console.log(`
|
|
68
68
|
=== Model Information ===`);let t=e||c?.repoId;if(console.log(`Repository ID: ${t}`),c?.architecture&&console.log(`Architecture: ${c.architecture}`),c?.quantBits&&console.log(`Quantization: ${c.quantBits}bit (group_size=${c.quantGroupSize||`N/A`})`),c?.nCtx&&console.log(`Max Context Length: ${c.nCtx}`),s?.modelBytes&&console.log(`Model Size: ${(s.modelBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvCacheBytes!=null&&console.log(`KV Cache Size (full context): ${(s.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvInfo&&console.log(`KV Info: ${s.kvInfo.nLayer} layers, ${s.kvInfo.nHeadKv} KV heads, ${s.kvInfo.headDim} head dim`),s?.modelBytes&&s?.kvCacheBytes!=null){let e=s.modelBytes+s.kvCacheBytes;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}s?.memoryLimitedCtx!=null&&(console.log(`\nMemory-Limited Context: ${s.memoryLimitedCtx}`),s.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(s.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`))}if(o.buttress?.selected){let{selected:e}=o.buttress;console.log(`
|
|
69
69
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`),console.log(`
|
|
70
70
|
--- Unified Memory (Metal) ---`),e.devices?.length>0&&console.log(`Device: ${e.devices[0].deviceName}`),console.log(`Total Memory: ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}%)`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
71
71
|
--- Model Fit Analysis ---`),console.log(`Fits in Unified Memory: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),console.log(`Total Required: ${(e.fit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),e.limitedFit&&(console.log(`
|
|
72
72
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
73
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
73
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function Ta({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=ya(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
|
|
74
74
|
=== Model Information ===`);let e=n||f?.repoId;console.log(`Repository ID: ${e}`),r&&console.log(`Filename: ${r}`),d?.modelBytes&&console.log(`Model Size: ${(d.modelBytes/1024/1024).toFixed(1)} MB`);let t=d?.processingBytes||d?.kvCacheBytes;if(t&&console.log(`Processing Buffer: ${(t/1024/1024).toFixed(1)} MB`),d?.modelBytes&&t)console.log(`Total Required Memory: ${((d.modelBytes+t)/1024/1024).toFixed(1)} MB`);else if(u.buttress?.selected?.fit){let{totalRequiredBytes:e}=u.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024).toFixed(1)} MB`)}}if(u.buttress?.selected){let{selected:e}=u.buttress;console.log(`
|
|
75
75
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
76
76
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
77
77
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
78
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const
|
|
79
|
-
bricks-buttress v${
|
|
78
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const Ea=$r();var Da=async()=>{(process.argv.includes(`--version`)||process.argv.includes(`-v`))&&(console.log(Ea.version),process.exit(0)),(process.argv.includes(`--help`)||process.argv.includes(`-h`))&&(console.log(`
|
|
79
|
+
bricks-buttress v${Ea.version}
|
|
80
80
|
|
|
81
81
|
Buttress server for remote inference with GGML backends.
|
|
82
82
|
|
|
@@ -99,9 +99,7 @@ Testing Options:
|
|
|
99
99
|
file (e.g., ggml-llm-model-capabilities-YYYY-MM-DD.md)
|
|
100
100
|
|
|
101
101
|
Environment Variables:
|
|
102
|
-
NODE_ENV
|
|
103
|
-
ENABLE_OPENAI_COMPAT_ENDPOINT Set to '1' to enable OpenAI-compatible API
|
|
104
|
-
ENABLE_ANTHROPIC_MESSAGES_ENDPOINT Set to '1' to enable Anthropic Messages API
|
|
102
|
+
NODE_ENV Set to 'development' for dev mode
|
|
105
103
|
|
|
106
104
|
Examples:
|
|
107
105
|
bricks-buttress
|
|
@@ -111,4 +109,4 @@ Examples:
|
|
|
111
109
|
bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
|
|
112
110
|
bricks-buttress --test-caps mlx-llm --test-models-default
|
|
113
111
|
`),process.exit(0));let e=process.argv.findIndex(e=>e===`--port`||e===`-p`),t=e>=0?Number(process.argv[e+1]):void 0,n=process.argv.findIndex(e=>e===`--config`||e===`-c`),r=n>=0?process.argv[n+1]:null,i=null;if(r){let e;if(r.includes(`
|
|
114
|
-
`))e=r;else{let t=_.resolve(r);try{e=k.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=A.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`&&t!==`mlx-llm`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),qr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],l=process.argv.findIndex(e=>e===`--test-caps`);if(l>=0){let e=process.argv[l+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ia({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ia({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await aa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ra({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ra({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await oa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ta({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ta({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await na({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let u=Ii(i);t&&(u.server.port=t),u.server.port||(u.server.port=2080),_a({config:u,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`}).then(async({port:e,openaiEnabled:t,anthropicMessagesEnabled:n,autoDiscover:r})=>{let o=$i();console.log(`Buttress server listening on port ${e}`),console.log(`--------------------------------`),await ma(),console.log();let s={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`};console.log(`Current supported Generators:`);let c=new Set((u?.generators||[]).map(e=>e.type).filter(Boolean));if(c.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of c)console.log(`- ${s[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${o}:${e} to connect to this server via LAN.`),console.log(`- Visit http://${o}:${e}/status to see status via LAN.`),console.log(),t?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${o}:${e}/oai-compat/v1`),console.log(`- Chat completions: POST http://${o}:${e}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${o}:${e}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable`),console.log()),n?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${o}:${e}/anthropic-messages`),console.log(`- Messages: POST http://${o}:${e}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${o}:${e}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set ENABLE_ANTHROPIC_MESSAGES_ENDPOINT=1 to enable`),console.log()),r&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:la,name:ua}=Zr(),da=async()=>{let e=`https://registry.npmjs.org/${ua}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},fa=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},pa=e=>{console.log(``),console.log(`\x1B[33mâ•─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,la.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,ua.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},ma=async()=>{try{let e=await da();e&&fa(la,e)&&pa(e)}catch{}},ha=typeof process<`u`&&process.versions&&process.versions.node,ga=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:l})=>{try{await c.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let u=$i()||`0.0.0.0`,d={id:i.server.id,name:i.server.name,version:la,address:u,port:i.server.port,url:`http://${u}:${i.server.port}`,generators:Mi(i,i.generators.map(e=>e.type)),authentication:{required:!0,type:`device-group`}},f=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:ha?t():void 0}).state({sessions:new Map,backend:e||Lr,config:i,serverInfo:d});r&&f.use(r),i.autodiscover?.http?.enabled&&f.use(ni(i)),f.use(ii),f.use(li),o&&f.use(vi(i)),l&&f.use(wi(i));let p={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},m=e=>e.id??e.raw?.id??e.remoteAddress;return f.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:p.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),open(e){let t=m(e);if(console.log(`[Request] New connection: ${t}`),!e.data.store.sessions.has(t))e.data.store.sessions.set(t,{streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null});else{let n=e.data.store.sessions.get(t);n&&(clearTimeout(n.timeout),n.timeout=null)}},async message(e,{id:t,method:n,params:r}){let i=m(e);console.log(`[Request] Received request from ${i}: ${n}`);let a=e.data.store.sessions.get(i),[o,c]=n.split(`.`),l=Ji[o]?.[c];if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:p.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){a.streamReaders.has(t)&&(a.streamReaders.get(t)?.cancel(),a.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let u=Yi(r),d=qi[o]?.[c],f=d?d.parse(u):u,p=await l({...e.data.store,peerId:i,session:a},...f);if(p instanceof s){let r=p.getReader();a.streamReaders.set(t,r),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:Xi(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:Xi(r),id:t}))}a.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:Xi(p),id:t}))}catch(r){if(r instanceof ne){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:p.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:p.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=m(e);console.log(`[Request] Connection closed: ${t}`);let{backend:n,sessions:r}=e.data.store,a=r.get(t);a&&(a.streamReaders.forEach(e=>e.cancel()),a.streamReaders.clear(),a.timeout=setTimeout(()=>{r.delete(t),console.log(`[Request] Session timed out: ${t}`),a.generators.forEach(e=>{n.finalizeGenerator(e)})},i.server.session_timeout))}}),{app:f,config:i}},_a=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:o}=await ga({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:s}}=o,c=[new Promise(e=>a.listen(s,e))],l=null;return o.autodiscover&&(l=new Qi(o.autodiscover,()=>a.store.serverInfo),c.push(l.start())),await Promise.all(c),{app:a,port:s,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:l}},va=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||va.includes(process.argv[1]))&&await ca();export{ma as checkAndNotifyUpdates,da as checkForUpdates,fa as compareVersions,ga as createServer,pa as logUpdateMessage,Ii as processConfig,qr as startModelDownload,_a as startServer};
|
|
112
|
+
`))e=r;else{let t=_.resolve(r);try{e=k.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=A.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`&&t!==`mlx-llm`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),Yr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],l=process.argv.findIndex(e=>e===`--test-caps`);if(l>=0){let e=process.argv[l+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Ca({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Ca({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await wa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Sa({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Sa({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Ta({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ba({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ba({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await xa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let u=Wi(i);t&&(u.server.port=t),u.server.port||(u.server.port=2080),Ia({config:u,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`||u.global.openai_compat?.enabled===!0,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`||u.global.anthropic_messages?.enabled===!0}).then(async({app:e,port:t,openaiEnabled:n,anthropicMessagesEnabled:r,autoDiscover:o})=>{let s=ca();console.log(`Buttress server listening on port ${t}`),console.log(`--------------------------------`),await Na(),console.log();let c=e.store.workspaceState.workspace;if(c){let e=c.name?`${c.name} (${c.id})`:c.id;console.log(`Workspace: ${e}`),console.log(`- Server ID: ${c.serverId}`),console.log(`- Issuer kid: ${c.kid}`),console.log(`- Bound at: ${c.boundAt}`)}else console.log(`Workspace: not bound`),console.log(`- State file: ${ga()}`),console.log("- Run `bricks buttress bind` from a workspace-authed CLI to pair.");console.log();let l={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`};console.log(`Current supported Generators:`);let d=new Set((u?.generators||[]).map(e=>e.type).filter(Boolean));if(d.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of d)console.log(`- ${l[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${s}:${t} to connect to this server via LAN.`),console.log(`- Visit http://${s}:${t}/status to see status via LAN.`),console.log(),n?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/oai-compat/v1`),console.log(`- Chat completions: POST http://${s}:${t}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${s}:${t}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set [openai_compat] enabled = true in config to enable`),console.log()),r?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/anthropic-messages`),console.log(`- Messages: POST http://${s}:${t}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${s}:${t}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set [anthropic_messages] enabled = true in config to enable`),console.log()),o&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:Oa,name:ka}=$r(),Aa=async()=>{let e=`https://registry.npmjs.org/${ka}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},ja=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},Ma=e=>{console.log(``),console.log(`\x1B[33mâ•─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,Oa.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,ka.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},Na=async()=>{try{let e=await Aa();e&&ja(Oa,e)&&Ma(e)}catch{}},Pa=typeof process<`u`&&process.versions&&process.versions.node,Fa=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:l})=>{try{await c.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let u=ca()||`0.0.0.0`,d=va(),f=d.workspace!=null,p=await sa(Bi(i,i.generators.map(e=>e.type))),m={id:i.server.id,name:i.server.name,version:Oa,address:u,port:i.server.port,url:`http://${u}:${i.server.port}`,generators:p,authentication:f?{required:!0,type:`workspace-jwt`,kid:d.workspace.kid,bound:!0}:{required:!1,type:`workspace-jwt`,bound:!1},...f?{workspace:{id:d.workspace.id,name:d.workspace.name}}:{}},h=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:Pa?t():void 0}).state({sessions:new Map,backend:e||zr,config:i,serverInfo:m,workspaceState:d});r&&h.use(r),i.autodiscover?.http?.enabled&&h.use(ii(i)),h.use(fi),h.use(_i),o&&h.use(Ei(i)),l&&h.use(Mi(i));let g=(e,t)=>li(ci(e,t),d.workspace),_={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},v=e=>e.id??e.raw?.id??e.remoteAddress;return h.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),async open(e){let t=v(e),n=e.data?.headers,r=e.data?.query,i,a=new Promise(e=>{i=e}),o=e.data.store.sessions.get(t);if(o?(clearTimeout(o.timeout),o.timeout=null,o.identity=null,o.ready=a):e.data.store.sessions.set(t,{streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null,identity:null,ready:a}),!f){console.log(`[Request] New connection: ${t} (unbound, no auth)`),i(!0);return}let s=await g(n||{},r);if(!s){console.warn(`[Auth] Rejecting WS ${t}: invalid or missing workspace-access token`),e.data.store.sessions.delete(t),i(!1),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INVALID_REQUEST,message:`Unauthorized`},id:null})),e.close(1008,`UNAUTHORIZED`);return}console.log(`[Request] New connection: ${t} (subject=${s.subjectType}:${s.subjectId})`);let c=e.data.store.sessions.get(t);c&&(c.identity=s),i(!0)},async message(e,{id:t,method:n,params:r}){let i=v(e);console.log(`[Request] Received request from ${i}: ${n}`);let a=e.data.store.sessions.get(i);if(a?.ready){if(!await a.ready){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}a=e.data.store.sessions.get(i)}if(!a){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INTERNAL_ERROR,message:`Session not found`},id:t}));return}if(f&&!a.identity){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}let[o,c]=n.split(`.`),l=na[o]?.[c];if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){a.streamReaders.has(t)&&(a.streamReaders.get(t)?.cancel(),a.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let u=ra(r),d=ta[o]?.[c],f=d?d.parse(u):u,p=await l({...e.data.store,peerId:i,session:a},...f);if(p instanceof s){let r=p.getReader();a.streamReaders.set(t,r),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:ia(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:ia(r),id:t}))}a.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:ia(p),id:t}))}catch(r){if(r instanceof ne){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:_.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=v(e);console.log(`[Request] Connection closed: ${t}`);let{backend:n,sessions:r}=e.data.store,a=r.get(t);a&&(a.streamReaders.forEach(e=>e.cancel()),a.streamReaders.clear(),a.timeout=setTimeout(()=>{r.delete(t),console.log(`[Request] Session timed out: ${t}`),a.generators.forEach(e=>{n.finalizeGenerator(e)})},i.server.session_timeout))}}),{app:h,config:i}},Ia=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:o}=await Fa({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:s}}=o,c=[new Promise(e=>a.listen(s,e))],l=null,u=a.store.workspaceState.workspace!=null;return o.autodiscover&&u?(l=new ma(o.autodiscover,()=>a.store.serverInfo),c.push(l.start())):o.autodiscover&&!u&&console.warn("[Autodiscover] disabled: buttress-server is not bound to a workspace. Run `bricks buttress bind` from a workspace-authed CLI to pair."),await Promise.all(c),{app:a,port:s,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:l}},La=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||La.includes(process.argv[1]))&&await Da();export{Na as checkAndNotifyUpdates,Aa as checkForUpdates,ja as compareVersions,Fa as createServer,Ma as logUpdateMessage,Wi as processConfig,Yr as startModelDownload,Ia as startServer};
|