@fugood/buttress-server 2.25.0-beta.16 → 2.25.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/index.mjs +14 -14
  2. package/package.json +2 -2
package/lib/index.mjs CHANGED
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
- import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import l from"node:path";import*as u from"node:stream/web";import{ReadableStream as d}from"node:stream/web";import f,{mkdir as p,open as m,readFile as h,readdir as g,rename as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:os";import{gguf as S}from"@huggingface/gguf";import{getBackendDevicesInfo as C,isLibVariantAvailable as w,loadModel as T}from"@fugood/llama.node";import E from"bytes";import{EventEmitter as D}from"node:events";import{initWhisper as ee}from"@fugood/whisper.node";import{fileURLToPath as te}from"node:url";import{execFile as ne,execSync as O,spawn as re}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as j,z as M}from"zod";import{importSPKI as N,jwtVerify as P}from"jose";import{cors as ie}from"@elysiajs/cors";import ae from"node-machine-id";import F from"ms";import{Buffer as I}from"node:buffer";import L from"node:dgram";const R=1024**3,z=(e,t,n)=>Math.min(Math.max(e,t),n),oe=e=>e?40:0,se=(e=0)=>e?z(e/(12*R)*20,0,20):0,ce=(e=0)=>e?z(e/(32*R)*10,0,10):0,le=e=>e?10:0,B=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ue=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=oe(n)+B(t,e)+se(r),s=ce(i),c=le(a);return Math.min(100,Math.round(o+s+c))},de=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:oe(n),variant:B(t,e),gpuMemory:se(r),cpuMemory:ce(i),availability:le(a)}),fe=[`cuda`,`vulkan`,`snapdragon`,`default`],pe=.85,me=.5,he=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ge=e=>e&&String(e).trim().toLowerCase()||null,_e=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=fe}={})=>{let i=[];e&&i.push(e),i.push(...he(t)),i.push(...he(n)),i.push(...r);let a=new Set;for(let e of i){let t=ge(e);t&&a.add(t)}return Array.from(a)},ve=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ye=e=>Array.isArray(e)?e.map(e=>({...e})):[],be=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,xe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ye(be(n,r)),l=c.some(ve),u=c.filter(e=>ve(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ue(m),breakdown:o?de(m):null,error:s,timestamp:new Date().toISOString()}},Se=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},Ce=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=fe}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=_e({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Se({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Se({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},we=`ggml-llm`,Te=[`cuda`,`vulkan`,`default`],Ee=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>Ce({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Te,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Te}),De=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ue(d),p=l?de(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Se({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Se({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Oe=new Map([[we,Ce],[`ggml-stt`,Ee],[`mlx-llm`,De]]),ke=async({platform:e,totalMemoryInBytes:t,backend:n=we,dependencies:r,...i}={})=>{let a=Oe.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ae={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},je=e=>Ae[e?String(e).toLowerCase():`f16`]||Ae.f16,Me=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=je(a.k),h=je(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ne=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Pe=e=>e?String(e).trim().toLowerCase():null,Fe=(e={},t=null)=>{if(!e)return null;let n=Pe(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ie=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Le=({arch:e,nLayer:t=0})=>({arch:Pe(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Re=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ze=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Pe(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Fe(t,r),o=r?Re.get(r):null;if(!o)return Le({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Le({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Ie(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Be=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Ve=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),He=e=>e?String(e).trim().toLowerCase():null,Ue=e=>{let t=He(e);return t?Be.has(t):!1},We=e=>{let t=He(e);return t?Ve.has(t):!1},Ge=e=>Ue(e)?`recurrent`:We(e)?`hybrid`:`transformer`,Ke=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},qe=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ge(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Me(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Je=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ge(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ye=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new D;V.setMaxListeners(100);const Xe=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ze=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Xe(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Xe(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Xe(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new Ze,U=new Ze;let Qe=0;function $e(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function et(e){return Qe+=1,{subscriberId:Qe,unsubscribe:$e(e)}}function tt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function nt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function rt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function it(e){return{timestamp:new Date().toISOString(),ggmlLlm:tt(e),ggmlStt:nt(e),mlxLlm:rt(e)}}const{ReadableStream:at,WritableStream:ot}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,st=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),st(e[t],n)):e[t]=n}),e),ct=`https://huggingface.co`,lt=`https://huggingface.co/api`,W=l.join(x.homedir(),`.buttress`,`models`),ut=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],dt=.5,ft={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:dt},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:lt,base_url:ct,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},pt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],mt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},ht=(e={})=>{let t=structuredClone(ft);if(st(t,e),t.backend.variant=mt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(pt(t.backend.variant_preference).flatMap(e=>{let t=mt(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(pt(t.runtime.prefer_variants).flatMap(e=>{let t=mt(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(pt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||ct,t.model.api_base=t.model.api_base||lt,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...ft.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||ft.runtime.context_release_delay_ms),t},gt=e=>{let t=e.toLowerCase();return ut.find(e=>t.includes(e))||null},_t=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=mt(e);return t?[t]:[]})))},G=async e=>{await p(e,{recursive:!0})},vt=(e=W)=>l.join(e,`.metadata-cache`),yt=(e,t,n=W)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(vt(n),t,`${r}.json`)},bt=async(e,t,n=W)=>{try{let r=yt(e,t,n),i=await h(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,l.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},xt=async(e,t,n,r=W)=>{try{let i=yt(e,t,r);await G(l.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,l.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},St=(e=W)=>l.join(e,`.session-state-cache`),Ct=(e=W)=>l.join(St(e),`cache-map.json`),wt=(e=W)=>l.join(St(e),`temp`),Tt=(e=W)=>l.join(St(e),`states`),Et=()=>({version:1,entries:{},totalSize:0}),Dt=async(e=W)=>{try{let t=await h(Ct(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?Et():n}catch{return Et()}},Ot=async(e,t=W)=>{let n=Ct(t),r=`${n}.tmp.${Date.now()}`;try{await G(l.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await _(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},kt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},At=(e,t=W)=>l.join(Tt(t),`${e}.bin`),jt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return l.join(wt(e),`${t}.bin`)},Mt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,Nt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Pt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Mt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=Nt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Ft=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},It=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Mt(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Lt=async(e=W)=>{let t=wt(e);try{let e=await g(t),n=Date.now();await Promise.all(e.map(async e=>{let r=l.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Rt=async e=>{try{return await v(e),!0}catch{return!1}},zt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t;var Bt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=zt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Ot(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Rt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(St(this.baseDir)),await G(wt(this.baseDir)),await G(Tt(this.baseDir)),this.cacheMap=await Dt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Mt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Pt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Rt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=kt(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=At(o,this.baseDir),u=i?At(`${o}-prompt`,this.baseDir):null;try{await G(l.dirname(c)),await _(n,c);let s=await v(c),d=0;if(i&&u)try{await _(i,u),d=(await v(u)).size,console.log(`[SessionCache] Saved prompt state: ${u}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let f={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:u||null,promptStateSize:d,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=f,this.cacheMap.totalSize+=s.size+d,this.requiresExactMatch()||await It(this.cacheMap,e,o,this.metadata),await Ft(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Ot(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),f}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(wt(this.baseDir)),jt(this.baseDir)}async cleanup(){await Lt(this.baseDir)}};const Vt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Ht=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Ut=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await bt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await S(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await xt(r,`range-metadata`,o,n),o},Wt=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Gt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new ot({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Kt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await bt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Ht(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await xt(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Vt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ut,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=gt(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||gt(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Vt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Ht(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await xt(i,`artifact-info`,y,r),y},qt=/^mmproj-.*\.gguf$/i,Jt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await bt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Ht(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await xt(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:l.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:l.resolve(e.model.mmproj_local_path)};return await xt(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Vt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),u=c?.siblings||c?.files||[],d=u.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),f=e.model.mmproj_filename;if(f){if(!d.includes(f))throw Error(`mmproj file "${f}" not found in repo ${i}`)}else{let e=d.filter(e=>qt.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();f=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let p=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${f}`,m=u.find(e=>(e.rfilename||e.path||e.filename)===f),h=Number(m?.size);if(!Number.isFinite(h)||h<=0){let e=await Ht(p,{headers:r});h=Number(e.headers.get(`content-length`))||null}let g={repoId:i,revision:a,filename:f,url:p,size:h,headers:r};return await xt(o,`artifact-info`,g,n),g},Yt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Xt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=_t(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?ft.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?dt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Zt=async e=>{let t=await Kt(e),n=await Jt(e,t),r=await Ut(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let ae={k:A.cache_type_k,v:A.cache_type_v},F=t.size>0?t.size:0,I=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:ae,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),L=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Xt(e,{modelBytes:F,kvCacheBytes:I(M)+L}),z=R.selected.totalMemory||0,oe=z*(e.backend.gpu_memory_fraction||1),se=e.backend.cpu_memory_fraction==null?dt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ce=Math.max(0,x.totalmem()*se),le=R.selected.hasGpu?oe:ce,B=Ye({maxCtx:M,availableMemory:le,modelBytes:F,kvBytesForCtx:I});if(!j&&B){let e=a?Math.min(B,a):B,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>B&&(M=B);let ue=Math.floor(B);console.log(`[buttress] Memory-limited context length: ${ue}`);let de=I(M),fe=F+de+L,pe=w?F/(w+1):F,me=0;R.selected.hasGpu&&pe>0&&(me=Math.min(w+1,Math.max(0,Math.floor(oe/pe)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${me}/${w+1}`);let he;he=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?me:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let ge=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),_e=e.runtime.cache_dir,ve=Wt(e,t),ye=await K(ve,t.size),be=Yt(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:R.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:he,auto_gpu_layers:me,flash_attn_type:ge,cache_type_k:ae.k,cache_type_v:ae.v,estimated_max_n_ctx:ue},resources:{modelBytes:F,kvCacheBytes:de,recurrentMemoryBytes:L,totalEstimatedBytes:fe,gpuCapacityBytes:z,gpuUsableBytes:oe,cpuUsableBytes:ce,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:_e,localPath:ve,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:ae,localPath:ve,localExists:ye}},Qt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new at({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},$t=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new at({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},en=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},tn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),u=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),d=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=l.join(t,u);await K(f)||await Gt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Gt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},nn=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Gt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},rn=async(e,t)=>{let n=en(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await tn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await T(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await nn(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},an=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},on=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return an(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?an(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await an(e,t)},i),!0)};async function sn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=ht(t),a=await Zt(i),o=new Bt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await rn(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=en(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return $t(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Qt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=en(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=en(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=en(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=en(s),t=s.contexts.get(e);return t?on(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):an(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const cn=e=>{let t=ht(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function ln(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=ht(e),o=await Kt(n),s=Wt(n,o),{repoId:c}=o,u=await Jt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),d=Yt(n,u),f=async()=>{if(!u||!d||u.localPath)return;if(await K(d,u.size)){console.log(`[Download] mmproj already exists: ${d}`);return}let e=t.getDownload(d);if(e){await e;return}let n=(async()=>{try{await Gt(u.url,u.headers,d,u.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(d)}})();t.setDownload(d,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await f().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let p=t.getDownload(s);if(p)return console.log(`[Download] Already downloading: ${c}`),p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let m=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let u=String(s).padStart(5,`0`),d=o.filename.replace(e,`-${u}-of-${String(i).padStart(5,`0`)}.gguf`),f=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${d}`,p=l.join(t,d);await K(p)||await Gt(f,o.headers,p,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Gt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await f(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,m),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function un(e){let t=ht(e),n=await Kt(t),r=await Ut(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),ae=t.backend?.gpu_memory_fraction==null?ft.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),F=t.backend?.cpu_memory_fraction==null?dt:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),I=await Xt(t,{modelBytes:M,kvCacheBytes:P(A)}),L=(I.selected.totalMemory||0)*ae,R=Math.max(0,x.totalmem()*F),z=Ye({maxCtx:A,availableMemory:I.selected.hasGpu?L:R,modelBytes:M,kvBytesForCtx:P}),oe=P(A),se=P(z);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:oe,limitedKvCacheBytes:se,memoryLimitedCtx:z,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const dn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function fn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await un(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=dn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=dn(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:pn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,mn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),mn(e[t],n)):e[t]=n}),e),hn=`https://huggingface.co`,gn=`https://huggingface.co/api`,_n=l.join(x.homedir(),`.buttress`,`models`),vn=[`cuda`,`vulkan`,`default`],yn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],bn=`fp16`,xn=.5,Sn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],Cn=e=>{if(!e)return null;let t=e.toLowerCase();return Sn.find(e=>t.includes(e))||null},wn={backend:{type:`ggml-stt`,variant:null,variant_preference:vn,gpu_memory_fraction:.85,cpu_memory_fraction:xn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,bn,`q5_1`],allow_local_file:!1,local_path:null,api_base:gn,base_url:hn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:_n,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},Tn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],En=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Dn=(e={})=>{let t=structuredClone(wn);if(mn(t,e),t.backend.variant=En(t.backend.variant),t.backend.variant_preference=Array.from(new Set(Tn(t.backend.variant_preference||vn).flatMap(e=>{let t=En(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...vn]),t.runtime.prefer_variants=Array.from(new Set(Tn(t.runtime.prefer_variants).flatMap(e=>{let t=En(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(Tn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||hn,t.model.api_base=t.model.api_base||gn,t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):_n,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||wn.runtime.context_release_delay_ms),t},On=e=>{let t=e.toLowerCase();return yn.find(e=>t.includes(e))||null},kn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=En(e);t&&n.add(t)}return Array.from(n)},An=async e=>{await p(e,{recursive:!0})},jn=(e=_n)=>l.join(e,`.metadata-cache`),Mn=(e,t,n=_n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(jn(n),t,`${r}.json`)},Nn=async(e,t,n=_n)=>{try{let r=await h(Mn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Pn=async(e,t,n,r=_n)=>{try{let i=Mn(e,t,r);await An(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},Fn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},In=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Ln=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Rn=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},zn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await An(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new pn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Bn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await Nn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await In(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:On(c||``),headers:o};return await Pn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Fn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:yn,f=()=>{for(let e of d)if(e===bn){let e=u.find(e=>{let t=e.toLowerCase();return!yn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=On(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||On(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await In(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Pn(i,`artifact-info`,g,r),g},Vn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=kn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?wn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?xn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Hn=async e=>{let t=await Bn(e),n=Ne({modelBytes:t.size>0?t.size:0}),r=await Vn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Ln(e,t),l=await Rn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:Cn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Un=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Rn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Rn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await zn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Wn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Gn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Kn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Un(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await ee({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},qn=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Jn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return qn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?qn(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await qn(e,t)},i),!0)};async function Yn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Dn(t),a=await Hn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Wn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await qn(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Kn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},u=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Jn(o,e):!1},d=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Gn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=l.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>d(e),transcribeData:async(e={})=>d(e),releaseContext:u,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Xn=e=>{let t=Dn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=Cn(t.model.filename);return r?`${n}:${r}`:n};async function Zn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Dn(e),o=await Bn(n),s=Ln(n,o),{repoId:c}=o;if(await Rn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await zn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Qn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function $n(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Bn(Dn(i));o=e.size??null,{processingBufferBytes:s}=Ne({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Qn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Qn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:er}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,tr=te(import.meta.url),nr=l.dirname(tr),rr=l.join(nr,`mlx-bridge.py`),ir=`mlx-vlm==0.4.0`,ar=`mlx-lm==0.31.1`,or=l.join(x.homedir(),`.buttress`,`models`),sr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:or,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:ar,mlx_vlm_package:ir,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},cr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t,lr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),lr(e[t],n)):e[t]=n}),e),ur=(e={})=>{let t=structuredClone(sr);return lr(t,e),t},dr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},fr=async e=>{await p(e,{recursive:!0})},pr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(n,`.metadata-cache`,t,`${r}.json`)},mr=async(e,t,n)=>{try{let r=await h(pr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},hr=async(e,t,n,r)=>{try{let i=pr(e,t,r);await fr(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function gr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await mr(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await dr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await dr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await hr(i,`mlx-model-metadata`,D,n),D}function _r({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const vr=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ne(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),yr=new Map;async function br({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.resolve(e),a=yr.get(i);if(a){let e=await a;return r?.(1),e}let o=Sr({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});yr.set(i,o);try{return await o}finally{yr.delete(i)}}const xr=[3,10];async function Sr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.join(e,`bin`,`python3`),a=l.join(e,`bin`,`pip`);if(await vr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await vr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<xr[0]||t===xr[0]&&n<xr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${xr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await p(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var Cr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[rr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
2
+ import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import l from"node:path";import*as u from"node:stream/web";import{ReadableStream as d}from"node:stream/web";import f,{mkdir as p,open as m,readFile as h,readdir as g,rename as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:os";import{gguf as S}from"@huggingface/gguf";import{getBackendDevicesInfo as C,isLibVariantAvailable as w,loadModel as T}from"@fugood/llama.node";import E from"bytes";import{EventEmitter as D}from"node:events";import{initWhisper as ee}from"@fugood/whisper.node";import{fileURLToPath as te}from"node:url";import{execFile as ne,execSync as O,spawn as re}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as j,z as M}from"zod";import{importSPKI as N,jwtVerify as P}from"jose";import{cors as ie}from"@elysiajs/cors";import F from"node-machine-id";import I from"ms";import{Buffer as L}from"node:buffer";import ae from"node:dgram";const R=1024**3,oe=(e,t,n)=>Math.min(Math.max(e,t),n),se=e=>e?40:0,ce=(e=0)=>e?oe(e/(12*R)*20,0,20):0,le=(e=0)=>e?oe(e/(32*R)*10,0,10):0,ue=e=>e?10:0,z=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},de=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=se(n)+z(t,e)+ce(r),s=le(i),c=ue(a);return Math.min(100,Math.round(o+s+c))},fe=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:se(n),variant:z(t,e),gpuMemory:ce(r),cpuMemory:le(i),availability:ue(a)}),pe=[`cuda`,`vulkan`,`snapdragon`,`default`],me=.85,he=.5,ge=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],_e=e=>e&&String(e).trim().toLowerCase()||null,ve=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=pe}={})=>{let i=[];e&&i.push(e),i.push(...ge(t)),i.push(...ge(n)),i.push(...r);let a=new Set;for(let e of i){let t=_e(e);t&&a.add(t)}return Array.from(a)},ye=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},be=e=>Array.isArray(e)?e.map(e=>({...e})):[],xe=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,Se=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=be(xe(n,r)),l=c.some(ye),u=c.filter(e=>ye(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:de(m),breakdown:o?fe(m):null,error:s,timestamp:new Date().toISOString()}},Ce=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},we=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=me,cpuMemoryFraction:a=he,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=pe}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=ve({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(Se({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(Se({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Ce({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Ce({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},Te=`ggml-llm`,Ee=[`cuda`,`vulkan`,`default`],De=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=me,cpuMemoryFraction:a=he,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>we({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Ee,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Ee}),Oe=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=de(d),p=l?fe(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Ce({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Ce({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},ke=new Map([[Te,we],[`ggml-stt`,De],[`mlx-llm`,Oe]]),Ae=async({platform:e,totalMemoryInBytes:t,backend:n=Te,dependencies:r,...i}={})=>{let a=ke.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},je={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},Me=e=>je[e?String(e).toLowerCase():`f16`]||je.f16,Ne=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=Me(a.k),h=Me(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Pe=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Fe=e=>e?String(e).trim().toLowerCase():null,Ie=(e={},t=null)=>{if(!e)return null;let n=Fe(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Le=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Re=({arch:e,nLayer:t=0})=>({arch:Fe(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),ze=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),Be=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Fe(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Ie(t,r),o=r?ze.get(r):null;if(!o)return Re({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Re({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Le(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ve=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),He=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Ue=e=>e?String(e).trim().toLowerCase():null,We=e=>{let t=Ue(e);return t?Ve.has(t):!1},Ge=e=>{let t=Ue(e);return t?He.has(t):!1},Ke=e=>We(e)?`recurrent`:Ge(e)?`hybrid`:`transformer`,qe=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},Je=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ke(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Ne(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Ye=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ke(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Xe=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},B=new D;B.setMaxListeners(100);const Ze=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Qe=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Ze(this.modelLoads,e,this.maxEntries),B.emit(`status:modelLoad`,e),B.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Ze(this.completions,e,this.maxEntries),B.emit(`status:completion`,e),B.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Ze(this.transcriptions,e,this.maxEntries),B.emit(`status:transcription`,e),B.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const V=new Qe,H=new Qe;let $e=0;function et(e){let t=t=>e(t);return B.on(`status:change`,t),()=>B.off(`status:change`,t)}function tt(e){return $e+=1,{subscriberId:$e,unsubscribe:et(e)}}function nt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:V.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:V.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function rt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:H.getModelLoadHistory(),transcriptions:H.getTranscriptionHistory()}}}function it(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:V.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:V.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function at(e){return{timestamp:new Date().toISOString(),ggmlLlm:nt(e),ggmlStt:rt(e),mlxLlm:it(e)}}const{ReadableStream:ot,WritableStream:st}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,ct=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ct(e[t],n)):e[t]=n}),e),lt=`https://huggingface.co`,ut=`https://huggingface.co/api`,U=l.join(x.homedir(),`.buttress`,`models`),dt=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],ft=.5,pt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:ft},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:ut,base_url:lt,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:U,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},mt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],ht=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},gt=(e={})=>{let t=structuredClone(pt);if(ct(t,e),t.backend.variant=ht(t.backend.variant),t.backend.variant_preference=Array.from(new Set(mt(t.backend.variant_preference).flatMap(e=>{let t=ht(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(mt(t.runtime.prefer_variants).flatMap(e=>{let t=ht(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(mt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||lt,t.model.api_base=t.model.api_base||ut,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):U,t.runtime.session_cache={...pt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||pt.runtime.context_release_delay_ms),t},_t=e=>{let t=e.toLowerCase();return dt.find(e=>t.includes(e))||null},vt=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=ht(e);return t?[t]:[]})))},W=async e=>{await p(e,{recursive:!0})},yt=(e=U)=>l.join(e,`.metadata-cache`),bt=(e,t,n=U)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(yt(n),t,`${r}.json`)},xt=async(e,t,n=U)=>{try{let r=bt(e,t,n),i=await h(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,l.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},G=async(e,t,n,r=U)=>{try{let i=bt(e,t,r);await W(l.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,l.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},St=(e=U)=>l.join(e,`.session-state-cache`),Ct=(e=U)=>l.join(St(e),`cache-map.json`),wt=(e=U)=>l.join(St(e),`temp`),Tt=(e=U)=>l.join(St(e),`states`),Et=()=>({version:1,entries:{},totalSize:0}),Dt=async(e=U)=>{try{let t=await h(Ct(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?Et():n}catch{return Et()}},Ot=async(e,t=U)=>{let n=Ct(t),r=`${n}.tmp.${Date.now()}`;try{await W(l.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await _(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},kt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},At=(e,t=U)=>l.join(Tt(t),`${e}.bin`),jt=(e=U)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return l.join(wt(e),`${t}.bin`)},Mt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,Nt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Pt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Mt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=Nt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Ft=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},It=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Mt(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Lt=async(e=U)=>{let t=wt(e);try{let e=await g(t),n=Date.now();await Promise.all(e.map(async e=>{let r=l.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Rt=async e=>{try{return await v(e),!0}catch{return!1}},zt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t;var Bt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=zt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Ot(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Rt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await W(St(this.baseDir)),await W(wt(this.baseDir)),await W(Tt(this.baseDir)),this.cacheMap=await Dt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Mt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Pt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Rt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=kt(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=At(o,this.baseDir),u=i?At(`${o}-prompt`,this.baseDir):null;try{await W(l.dirname(c)),await _(n,c);let s=await v(c),d=0;if(i&&u)try{await _(i,u),d=(await v(u)).size,console.log(`[SessionCache] Saved prompt state: ${u}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let f={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:u||null,promptStateSize:d,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=f,this.cacheMap.totalSize+=s.size+d,this.requiresExactMatch()||await It(this.cacheMap,e,o,this.metadata),await Ft(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Ot(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),f}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await W(wt(this.baseDir)),jt(this.baseDir)}async cleanup(){await Lt(this.baseDir)}};const Vt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Ht=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Ut=async(e,t,n=U)=>{let r=JSON.stringify({url:e,headers:t}),i=await xt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await S(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await G(r,`range-metadata`,o,n),o},Wt=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Gt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await W(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new st({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Kt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await xt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Ht(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await G(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Vt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:dt,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=_t(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||_t(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Vt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Ht(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await G(i,`artifact-info`,y,r),y},qt=/^mmproj-.*\.gguf$/i,Jt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await xt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Ht(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await G(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:l.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:l.resolve(e.model.mmproj_local_path)};return await G(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Vt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),u=c?.siblings||c?.files||[],d=u.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),f=e.model.mmproj_filename;if(f){if(!d.includes(f))throw Error(`mmproj file "${f}" not found in repo ${i}`)}else{let e=d.filter(e=>qt.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();f=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let p=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${f}`,m=u.find(e=>(e.rfilename||e.path||e.filename)===f),h=Number(m?.size);if(!Number.isFinite(h)||h<=0){let e=await Ht(p,{headers:r});h=Number(e.headers.get(`content-length`))||null}let g={repoId:i,revision:a,filename:f,url:p,size:h,headers:r};return await G(o,`artifact-info`,g,n),g},Yt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Xt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=vt(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?pt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Ae({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Zt=async e=>{let t=await Kt(e),n=await Jt(e,t),r=await Ut(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=qe(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=Be({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let F={k:A.cache_type_k,v:A.cache_type_v},I=t.size>0?t.size:0,L=Je({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:F,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),ae=Ye({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Xt(e,{modelBytes:I,kvCacheBytes:L(M)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),le=Math.max(0,x.totalmem()*ce),ue=R.selected.hasGpu?se:le,z=Xe({maxCtx:M,availableMemory:ue,modelBytes:I,kvBytesForCtx:L});if(!j&&z){let e=a?Math.min(z,a):z,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>z&&(M=z);let de=Math.floor(z);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=L(M),pe=I+fe+ae,me=w?I/(w+1):I,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?he:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let _e=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,ye=Wt(e,t),be=await K(ye,t.size),xe=Yt(e,n),Se=xe?await K(xe,n?.size):!1,Ce=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:xe,exists:Se,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:R.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:F.k,cache_type_v:F.v,estimated_max_n_ctx:de},resources:{modelBytes:I,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:le,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:ye,exists:be},multimodal:Ce,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:xe,mmprojLocalExists:Se,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:F,localPath:ye,localExists:be}},Qt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new ot({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};V.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),V.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},$t=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new ot({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};V.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),V.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},en=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},tn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),u=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),d=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=l.join(t,u);await K(f)||await Gt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Gt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},nn=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Gt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},rn=async(e,t)=>{let n=en(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await tn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await T(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await nn(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),V.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(V.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},an=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},on=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return an(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?an(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await an(e,t)},i),!0)};async function sn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=gt(t),a=await Zt(i),o=new Bt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await rn(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=en(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return $t(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Qt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=en(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=en(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=en(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=en(s),t=s.contexts.get(e);return t?on(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):an(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const cn=e=>{let t=gt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function ln(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=gt(e),o=await Kt(n),s=Wt(n,o),{repoId:c}=o,u=await Jt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),d=Yt(n,u),f=async()=>{if(!u||!d||u.localPath)return;if(await K(d,u.size)){console.log(`[Download] mmproj already exists: ${d}`);return}let e=t.getDownload(d);if(e){await e;return}let n=(async()=>{try{await Gt(u.url,u.headers,d,u.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(d)}})();t.setDownload(d,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await f().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let p=t.getDownload(s);if(p)return console.log(`[Download] Already downloading: ${c}`),p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let m=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let u=String(s).padStart(5,`0`),d=o.filename.replace(e,`-${u}-of-${String(i).padStart(5,`0`)}.gguf`),f=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${d}`,p=l.join(t,d);await K(p)||await Gt(f,o.headers,p,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Gt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await f(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,m),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function un(e){let t=gt(e),n=await Kt(t),r=await Ut(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=qe(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=Be({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=Je({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=Ye({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),F=t.backend?.gpu_memory_fraction==null?pt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),I=t.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),L=await Xt(t,{modelBytes:M,kvCacheBytes:P(A)}),ae=(L.selected.totalMemory||0)*F,R=Math.max(0,x.totalmem()*I),oe=Xe({maxCtx:A,availableMemory:L.selected.hasGpu?ae:R,modelBytes:M,kvBytesForCtx:P}),se=P(A),ce=P(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const dn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):de(e):0;async function fn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await un(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await Ae({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=dn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=dn(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:pn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,mn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),mn(e[t],n)):e[t]=n}),e),hn=`https://huggingface.co`,gn=`https://huggingface.co/api`,_n=l.join(x.homedir(),`.buttress`,`models`),vn=[`cuda`,`vulkan`,`default`],yn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],bn=`fp16`,xn=.5,Sn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],Cn=e=>{if(!e)return null;let t=e.toLowerCase();return Sn.find(e=>t.includes(e))||null},wn={backend:{type:`ggml-stt`,variant:null,variant_preference:vn,gpu_memory_fraction:.85,cpu_memory_fraction:xn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,bn,`q5_1`],allow_local_file:!1,local_path:null,api_base:gn,base_url:hn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:_n,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},Tn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],En=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Dn=(e={})=>{let t=structuredClone(wn);if(mn(t,e),t.backend.variant=En(t.backend.variant),t.backend.variant_preference=Array.from(new Set(Tn(t.backend.variant_preference||vn).flatMap(e=>{let t=En(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...vn]),t.runtime.prefer_variants=Array.from(new Set(Tn(t.runtime.prefer_variants).flatMap(e=>{let t=En(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(Tn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||hn,t.model.api_base=t.model.api_base||gn,t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):_n,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||wn.runtime.context_release_delay_ms),t},On=e=>{let t=e.toLowerCase();return yn.find(e=>t.includes(e))||null},kn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=En(e);t&&n.add(t)}return Array.from(n)},An=async e=>{await p(e,{recursive:!0})},jn=(e=_n)=>l.join(e,`.metadata-cache`),Mn=(e,t,n=_n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(jn(n),t,`${r}.json`)},Nn=async(e,t,n=_n)=>{try{let r=await h(Mn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Pn=async(e,t,n,r=_n)=>{try{let i=Mn(e,t,r);await An(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},Fn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},In=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Ln=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Rn=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},zn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await An(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new pn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Bn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await Nn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await In(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:On(c||``),headers:o};return await Pn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Fn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:yn,f=()=>{for(let e of d)if(e===bn){let e=u.find(e=>{let t=e.toLowerCase();return!yn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=On(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||On(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await In(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Pn(i,`artifact-info`,g,r),g},Vn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=kn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?wn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?xn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Ae({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Hn=async e=>{let t=await Bn(e),n=Pe({modelBytes:t.size>0?t.size:0}),r=await Vn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Ln(e,t),l=await Rn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:Cn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Un=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Rn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Rn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await zn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Wn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Gn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Kn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Un(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await ee({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return H.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw H.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},qn=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Jn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return qn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?qn(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await qn(e,t)},i),!0)};async function Yn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Dn(t),a=await Hn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Wn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await qn(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Kn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},u=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Jn(o,e):!1},d=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Gn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=l.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return H.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw H.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>d(e),transcribeData:async(e={})=>d(e),releaseContext:u,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Xn=e=>{let t=Dn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=Cn(t.model.filename);return r?`${n}:${r}`:n};async function Zn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Dn(e),o=await Bn(n),s=Ln(n,o),{repoId:c}=o;if(await Rn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await zn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Qn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):de(e):0;async function $n(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Bn(Dn(i));o=e.size??null,{processingBufferBytes:s}=Pe({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await Ae({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Qn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Qn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:er}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,tr=te(import.meta.url),nr=l.dirname(tr),rr=l.join(nr,`mlx-bridge.py`),ir=`mlx-vlm==0.4.0`,ar=`mlx-lm==0.31.1`,or=l.join(x.homedir(),`.buttress`,`models`),sr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:or,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:ar,mlx_vlm_package:ir,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},cr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t,lr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),lr(e[t],n)):e[t]=n}),e),ur=(e={})=>{let t=structuredClone(sr);return lr(t,e),t},dr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},fr=async e=>{await p(e,{recursive:!0})},pr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(n,`.metadata-cache`,t,`${r}.json`)},mr=async(e,t,n)=>{try{let r=await h(pr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},hr=async(e,t,n,r)=>{try{let i=pr(e,t,r);await fr(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function gr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await mr(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await dr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await dr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await hr(i,`mlx-model-metadata`,D,n),D}function _r({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const vr=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ne(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),yr=new Map;async function br({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.resolve(e),a=yr.get(i);if(a){let e=await a;return r?.(1),e}let o=Sr({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});yr.set(i,o);try{return await o}finally{yr.delete(i)}}const xr=[3,10];async function Sr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.join(e,`bin`,`python3`),a=l.join(e,`bin`,`pip`);if(await vr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await vr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<xr[0]||t===xr[0]&&n<xr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${xr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await p(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var Cr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[rr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
3
3
  `);this.buffer=t.pop();for(let e of t)if(e.trim())try{this.handleMessage(JSON.parse(e))}catch(t){console.error(`[mlx-llm] Failed to parse bridge message:`,e,t)}}),this.readyPromise=new Promise((e,t)=>{this.pendingRequests.set(`__init__`,{resolve:()=>e(),reject:t}),setTimeout(()=>t(Error(`Bridge startup timeout`)),3e4)}),this.readyPromise}handleMessage(e){let t=this.pendingRequests.get(e.id);t&&(e.error?(t.reject(Error(e.error.message)),this.pendingRequests.delete(e.id)):e.event?e.event===`result`?(t.resolve(e.data),this.pendingRequests.delete(e.id)):t.onEvent?.(e.event,e.data):e.result!==void 0&&(t.resolve(e.result),this.pendingRequests.delete(e.id)))}async call(e,t={}){if(!this.process)throw Error(`Bridge not running`);let n=String(++this.requestCounter);return new Promise((r,i)=>{this.pendingRequests.set(n,{resolve:r,reject:i}),this.write({id:n,method:e,params:t})})}stream(e,t,n){if(!this.process)throw Error(`Bridge not running`);let r=String(++this.requestCounter);return{id:r,promise:new Promise((i,a)=>{this.pendingRequests.set(r,{resolve:i,reject:a,onEvent:n}),this.write({id:r,method:e,params:t})})}}cancel(e){this.process&&this.write({id:`cancel-${e}`,method:`cancel`,params:{request_id:e}})}write(e){this.process?.stdin?.write(JSON.stringify(e)+`
4
- `)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function wr(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),x.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function Tr(e){let t=wr();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Er=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Dr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Or=`</think>`;function kr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Or);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Or.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Ar(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new er({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=kr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=kr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function jr(e,t,n={}){let r=ur(t),i=Tr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Dr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||or,n=await br({envDir:r.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||ar,mlxVlmPackage:r.runtime.mlx_vlm_package||ir,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new Cr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let u=r.runtime.session_cache;u?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:l.join(e,`mlx-session-cache`),max_entries:u?.max_entries||100,max_size_bytes:cr(u?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Er(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new er({start(e){a.queue.enqueue(async()=>{let n=Ar(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Mr=e=>ur(e).model.repo_id||null;async function Nr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=wr(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await De({platform:process.platform,arch:x.arch(),totalMemoryInBytes:x.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=ur(r),t=e.model.repo_id;if(t)try{l=await gr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||or,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=_r({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=_r({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:x.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Pr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=ur(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=wr();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let u=`mlx:${s}`;if(t?.isDownloading(u))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let d=(async()=>{try{let e=o.runtime.cache_dir||or,t=await br({envDir:o.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||ar,mlxVlmPackage:o.runtime.mlx_vlm_package||ir,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
4
+ `)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function wr(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),x.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function Tr(e){let t=wr();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Er=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Dr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Or=`</think>`;function kr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Or);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Or.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Ar(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new er({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=kr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=kr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),V.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),V.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function jr(e,t,n={}){let r=ur(t),i=Tr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Dr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||or,n=await br({envDir:r.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||ar,mlxVlmPackage:r.runtime.mlx_vlm_package||ir,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new Cr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let u=r.runtime.session_cache;u?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:l.join(e,`mlx-session-cache`),max_entries:u?.max_entries||100,max_size_bytes:cr(u?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),V.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),V.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Er(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new er({start(e){a.queue.enqueue(async()=>{let n=Ar(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Mr=e=>ur(e).model.repo_id||null;async function Nr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=wr(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await Oe({platform:process.platform,arch:x.arch(),totalMemoryInBytes:x.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=ur(r),t=e.model.repo_id;if(t)try{l=await gr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||or,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=_r({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=_r({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:x.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Pr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=ur(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=wr();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let u=`mlx:${s}`;if(t?.isDownloading(u))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let d=(async()=>{try{let e=o.runtime.cache_dir||or,t=await br({envDir:o.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||ar,mlxVlmPackage:o.runtime.mlx_vlm_package||ir,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
5
5
  from huggingface_hub import snapshot_download
6
6
  path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
7
7
  print(path)
8
8
  `.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let u=c.stdout.trim().split(`
9
9
  `).pop();i?.({localPath:u,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(u)}})();return t?.setDownload(u,d),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return fn(t,n);if(e===`ggml-stt`)return $n(t,n);if(e===`mlx-llm`)return Nr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.25.0-beta.15`,Fr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.25.0-beta.11`,"@fugood/llama.node":`^1.7.0`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Ir=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Lr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
10
10
  `),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
11
- `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=d(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=qe(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,u=d(l),p=d(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=f(t?.fit?.fitsInGpu),h=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${u} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=d(i),l=d(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=f(t?.limitedFit?.fitsInGpu),h=f(t?.limitedFit?.fitsInCpu);(e!==o||c!==u||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
11
+ `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=d(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=Je(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,u=d(l),p=d(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=f(t?.fit?.fitsInGpu),h=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${u} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=d(i),l=d(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=f(t?.limitedFit?.fitsInGpu),h=f(t?.limitedFit?.fitsInCpu);(e!==o||c!==u||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
12
12
  ---`),n.push(`
13
13
  ### System Information`);let p=null;if(process.platform!==`win32`)try{p=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(p?n.push(`- **System:** ${p}`):(n.push(`- **Hostname:** ${x.hostname()}`),n.push(`- **OS:** ${x.type()} ${x.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${x.cpus().length}`),n.push(`- **Total System Memory:** ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`),u.length>0){let e=u[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
14
14
  ### Command Used`);let m=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${m}\n\`\`\``),n.push(`
@@ -33,29 +33,29 @@ print(path)
33
33
  === Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${x.hostname()}`),console.log(`OS: ${x.type()} ${x.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${x.cpus().length}`),console.log(`Total System Memory: ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
34
34
  --- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
35
35
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
36
- === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Vr=e({finalizeGenerator:()=>Gr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Yr,ggmlLlm:()=>Kr,ggmlStt:()=>Jr,globalDownloadManager:()=>Hr,mlxLlm:()=>qr,showModelsTable:()=>Lr,showSttModelsTable:()=>zr,startGenerator:()=>Wr,startModelDownload:()=>Zr,status:()=>Xr,testGgmlLlmCapabilities:()=>Rr,testGgmlSttCapabilities:()=>Br});const Z=new Map,Hr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Ur=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Ur(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Wr(e,t){let n={"ggml-llm":{create:sn,getId:cn},"ggml-stt":{create:Yn,getId:Xn},"mlx-llm":{create:jr,getId:Mr}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Hr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Gr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Kr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},qr={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Jr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Yr(e,t){return e===`ggml-llm`?cn(t):e===`ggml-stt`?Xn(t):e===`mlx-llm`?Mr(t):null}const Xr={getFullStatus:()=>it(Z),getGgmlLlmStatus:()=>tt(Z),getGgmlSttStatus:()=>nt(Z),getMlxLlmStatus:()=>rt(Z),subscribeToStatus:$e,subscribeToStatusWithId:et,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function Zr(e,t,n={}){let r={"ggml-llm":ln,"ggml-stt":Zn,"mlx-llm":Pr}[e];return r?r(t,Hr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Qr=`@fugood/buttress-server`,$r=`2.25.0-beta.16`,ei={name:Qr,version:$r,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`./bin/bricks-buttress`},files:[`lib`,`bin`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,jose:`^5.9.6`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`f38cee540522e4cf4616f7664a2a03584cd42d30`};const ti=()=>({version:$r,name:Qr,description:ei.description}),ni=!(`Bun`in globalThis),ri=e=>new n({adapter:ni?t():void 0,...e}),ii=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String(),score:a.Optional(a.Number()),hasGpu:a.Optional(a.Boolean()),usableBytes:a.Optional(a.Number())})),authentication:a.Object({required:a.Boolean(),type:a.String(),kid:a.Optional(a.String()),bound:a.Optional(a.Boolean())}),workspace:a.Optional(a.Object({id:a.String(),name:a.Optional(a.String())}))}),ai=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication,workspace:e.workspace});var oi=e=>{let t=ri(),n=e.autodiscover?.http?.path??`/buttress/info`;return t.get(n,ai,{response:ii}),t};let si=null;const ci=async e=>{if(si&&si.kid===e.kid)return si.key;let t=await N(e.issuerPublicKey,`EdDSA`);return si={kid:e.kid,key:t},t},li=/^Bearer\s+(.+)$/i,ui=(e,t)=>{if(e){let t=e.authorization||e.Authorization;if(t){let e=t.match(li);return e?e[1].trim():t.trim()}}if(t){let e=t.access_token??t.token;if(typeof e==`string`&&e)return e}return null},di=async(e,t)=>{if(!e||!t)return null;try{let{payload:n}=await P(e,await ci(t),{algorithms:[`EdDSA`]}),r=n;return r.k!==`ba`||r.w_id!==t.id||r.st!==`ws`&&r.st!==`dev`||!r.sid||!r.exp?null:{workspaceId:r.w_id,subjectType:r.st,subjectId:r.sid,jti:r.jti,exp:r.exp}}catch{return null}},fi=async({headers:e,query:t,set:n,store:r})=>{let i=r.workspaceState?.workspace;if(i&&!await di(ui(e,t),i))return n.status=401,n.headers&&(n.headers[`WWW-Authenticate`]=`Bearer`),{error:{code:`UNAUTHORIZED`,message:`Invalid or missing workspace access token`}}},pi=e=>{if(!e)return null;let t=e[`x-buttress-sess-id`]??e[`X-BUTTRESS-SESS-ID`];return t?t.trim():null},mi=async({headers:e,query:t,set:n,store:r})=>{let i=r.sessions,a=pi(e);if(!a||!i)return n.status=401,{error:{code:`SESSION_REQUIRED`,message:`Missing buttress session id`}};let o=i.get(a);if(!o)return n.status=401,{error:{code:`SESSION_INVALID`,message:`Unknown or expired buttress session`}};let s=r.workspaceState?.workspace;if(s&&o.identity){let r=await di(ui(e,t),s);if(!r||r.subjectId!==o.identity.subjectId||r.subjectType!==o.identity.subjectType)return n.status=403,{error:{code:`SESSION_FORBIDDEN`,message:`Token identity mismatch for this session`}}}},hi=(e,t)=>{let n=e.sessions,r=pi(t);if(!r||!n)return null;let i=n.get(r);return i?{sessionId:r,session:i}:null},gi=(e,t,n)=>{let r=l.join(e,t),i=l.join(r,n),a=l.relative(r,i);return{sessionDir:r,filePath:i,safe:a!==``&&!a.startsWith(`..`)&&!l.isAbsolute(a)}},_i=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var vi=ri().onBeforeHandle(fi).onBeforeHandle(mi).post(`/buttress/upload`,async({body:{file:e},headers:t,store:n})=>{let r=hi(n,t);if(!r)return{ok:!1,error:`Session guard mis-wired`};let{sessionId:i}=r,{config:a}=n,o=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,{sessionDir:s,filePath:c,safe:l}=gi(a.server.temp_file_dir,i,o);if(!l)return{ok:!1,error:`Invalid file path`};try{return await p(s,{recursive:!0}),_i?await b(c,await e.stream()):await b(c,await e.arrayBuffer()),{ok:!0,filename:o}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},headers:t,store:n,status:i})=>{let a=hi(n,t);if(!a)return i(500),`Session guard mis-wired`;let{sessionId:o}=a,{config:s}=n,{filePath:c,safe:l}=gi(s.server.temp_file_dir,o,e);return l?r(c):(i(400),`Invalid file path`)},{params:a.Object({filename:a.String()})});const yi=l.dirname(te(import.meta.url)),bi=async()=>{let e=[l.join(yi,`..`,`public`,`status.html`),l.join(yi,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>f.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},xi=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},Si=async()=>{let e=await bi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await f.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var Ci=ri().get(`/status`,Si).get(`/status/`,Si).get(`/buttress/status`,({store:{backend:e}})=>xi(e));const wi=[`ggml-llm`,`mlx-llm`],Ti=new Map;function Ei(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function Di(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>wi.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=Ti.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return Ti.set(c,h),await Ei(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function Oi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function ki(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Oi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const Ai=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function ji(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=ki(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function Mi({global:e}){let t=ri({prefix:`/oai-compat`});return t.use(ie({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.onBeforeHandle(fi),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>wi.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await Di(a,r,c,`[OpenAI]`),t=Ai(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await Ei(a,e.type).completion(e.id,{options:b});if(!s)return await ji(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=ki(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const Ni=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function Pi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function Fi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Ii(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Oi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Li(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Ii(u),l=u.stopping_word||u.stoppingWord||null,c=Fi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function Ri({global:e}){let t=ri({prefix:`/anthropic-messages`});return t.use(ie({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.onBeforeHandle(fi),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=Pi(o),n=await Di(a,r,o.model,`[Anthropic]`),s=Ni(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await Ei(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Li(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Ii(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Ii(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Ii(r),w=r.stopping_word||r.stoppingWord||null,C=Fi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=Pi(a),t=await Di(i,r,a.model,`[Anthropic]`),n=Ei(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>wi.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const zi=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=zi(n[e]||{},t):n[e]=t}),n},Bi=e=>e&&typeof e==`object`?structuredClone(e):null,Vi=(e,t)=>zi(Bi(e)||{},Bi(t)||{}),Hi=(e,t)=>zi(structuredClone(e.global),t||{}),Ui=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Hi(e,a)}}return Object.keys(e.global).length>0?Hi(e,{}):null},Wi={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},Gi=e=>e?e===!0?{...Wi}:zi(Wi,e):null,Ki=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},qi=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Ji=6e4,Yi=1024*1024*50,Xi=e=>{let t=ae.machineIdSync(),n=zi({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:l.join(x.tmpdir(),`.buttress`),session_timeout:Ji,max_body_size:Yi},autodiscover:!1},Bi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:Gi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:qi(i.max_body_size,E.parse,Yi),session_timeout:qi(i.session_timeout,F,Ji)},global:s,generators:r}},Zi={getCapabilities:M.tuple([M.object({type:M.string().optional().default(`ggml-llm`),config:M.any().optional(),currentClientCapabilities:M.any().optional(),options:M.any().optional()}).nullable().optional()]),startGenerator:M.tuple([M.string(),M.any().optional()]),finalizeGenerator:M.tuple([M.string()])};var Qi={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Bi(i),c=Vi(Ui(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Bi(i),o=Vi(Ui(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const $i={initContext:M.tuple([M.string(),M.any().optional()]),completion:M.tuple([M.string(),M.any().optional()]),tokenize:M.tuple([M.string(),M.any()]),detokenize:M.tuple([M.string(),M.any()]),applyChatTemplate:M.tuple([M.string(),M.any()]),releaseContext:M.tuple([M.string()])};function ea(e){return function({backend:t,session:n},r,i){return new d({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function ta(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function na(e,t){return{initContext:ea(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:ta(e,`${t}Release Context`)}}var ra=na(e=>e.ggmlLlm,``);const ia={initContext:M.tuple([M.string(),M.any().optional()]),transcribe:M.tuple([M.string(),M.string(),M.any().optional()]),transcribeData:M.tuple([M.string(),M.union([M.instanceof(Buffer),M.instanceof(Uint8Array)]),M.any().optional()]),releaseContext:M.tuple([M.string()])},aa=e=>e.ggmlStt,oa={common:Qi,ggmlLlm:ra,ggmlStt:{initContext:ea(aa),async transcribe({backend:e,config:{server:t},peerId:n},r,i,a){console.log(`[Server] Transcribe:`,{id:r,audioPath:i,options:a});let{filePath:o,safe:s}=gi(t.temp_file_dir,n,i);if(!s)throw Error(`Invalid audioPath`);return e.ggmlStt.transcribe(r,{audioPath:o,options:a})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:ta(aa,`Release STT Context`)},mlxLlm:na(e=>e.mlxLlm,`MLX `)},sa={common:Zi,ggmlLlm:$i,ggmlStt:ia,mlxLlm:$i};var ca=oa;const la=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return I.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=I.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},ua=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof I?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:I.from(t).toString(`base64`)}:t)}catch{return e}},da={score:0,hasGpu:!1,usableBytes:0},fa=e=>e?{score:Number(e.score)||0,hasGpu:!!e.hasGpu,usableBytes:Number(e.gpuUsableBytes||e.cpuUsableBytes||0)}:da,pa=async e=>{let t=[];for(let n of e){let e=da;try{e=fa((await J(n.type,null,{}))?.buttress?.selected)}catch(e){console.warn(`[Caps] Failed to detect capabilities for "${n.type}":`,e instanceof Error?e.message:e)}t.push({...n,...e})}return t},ma=()=>{let e=x.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},ha=e=>{let t=e.split(`.`).map(Number);return t.length!==4||t.some(e=>Number.isNaN(e))?0:(t[0]<<24|t[1]<<16|t[2]<<8|t[3])>>>0},ga=e=>[e>>>24&255,e>>>16&255,e>>>8&255,e&255].join(`.`),_a=()=>{let e=[],t=new Set;for(let n of Object.values(x.networkInterfaces()))for(let r of n??[]){if(r.family!==`IPv4`||r.internal||!r.address||!r.netmask)continue;let n=ha(r.address),i=ha(r.netmask);if(!n||!i||i===4294967295)continue;let a=ga((n&i|~i>>>0)>>>0);if(a===r.address)continue;let o=`${r.address}->${a}`;t.has(o)||(t.add(o),e.push({address:r.address,broadcast:a}))}return e},va=e=>{if(!e)return!1;let t=e;return t.code===`ENOTSUP`||/Failed to bind socket/i.test(t.message??``)};var ya=class e{name=`udp`;receiver=null;senders=[];announcementTimer=null;config;getServerInfo;port;signer;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.port=e.port??8089,this.signer=n}async start(){this.receiver=await this.bindReceiver(!0).catch(e=>{if(!va(e))throw e;return console.warn(`[Autodiscover UDP] SO_REUSEPORT not supported by this runtime; falling back to REUSEADDR only (multiple buttress instances on one host will not coexist on the discovery port).`),this.bindReceiver(!1)}),this.receiver.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.receiver.on(`error`,e=>{console.error(`[Autodiscover UDP] Receiver error:`,e.message)}),this.receiver.setBroadcast(!0),this.senders=await this.createSenders();let e=this.senders.map(e=>`${e.address}->${e.broadcast}`).join(`, `)||`<none>`;if(console.log(`[Autodiscover UDP] Listening on port ${this.port}; announce interfaces: ${e}`),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),await Promise.all(this.senders.map(({socket:e})=>new Promise(t=>{e.close(()=>t())}))),this.senders=[],this.receiver&&=(await new Promise(e=>{this.receiver.close(()=>e())}),null)}async bindReceiver(e){let t=L.createSocket(e?{type:`udp4`,reuseAddr:!0,reusePort:!0}:{type:`udp4`,reuseAddr:!0});return new Promise((e,n)=>{let r=e=>{t.close(),n(e)};t.once(`error`,r),t.bind(this.port,()=>{t.off(`error`,r),e(t)})})}async createSenders(){let e=_a();return(await Promise.all(e.map(async e=>{try{let t=L.createSocket({type:`udp4`});return await new Promise((n,r)=>{let i=e=>{t.close(),r(e)};t.once(`error`,i),t.bind({port:0,address:e.address},()=>{t.off(`error`,i),t.setBroadcast(!0),n()})}),t.on(`error`,t=>{console.error(`[Autodiscover UDP] Sender ${e.address} error:`,t.message)}),{...e,socket:t}}catch(t){return console.warn(`[Autodiscover UDP] Failed to bind sender on ${e.address}:`,t.message),null}}))).filter(e=>e!==null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}static canonicalBytes(e,t,n){return Buffer.from(JSON.stringify({t:e,d:t,ts:n}),`utf8`)}signEnvelope(t,n){if(!this.signer)return null;let r=Math.floor(Date.now()/1e3),i=o.sign(null,e.canonicalBytes(t,n,r),this.signer.privateKey).toString(`base64`);return{t,v:`2.0`,d:n,ts:r,kid:this.signer.kid,sig:i}}sendAnnouncement(){if(this.senders.length===0)return;let e=this.getServerInfo(),t=this.signEnvelope(`ANNOUNCE`,{info:e});if(!t){console.warn("[Autodiscover UDP] no per-server keypair; skipping announcement. Run `bricks buttress bind` to register a key.");return}let n=Buffer.from(JSON.stringify(t));for(let{broadcast:e,socket:t,address:r}of this.senders)t.send(n,0,n.length,this.port,e,t=>{t&&console.error(`[Autodiscover UDP] Announcement ${r}->${e} error:`,t.message)})}sendResponse(e,t){if(!this.receiver)return;let n=this.getServerInfo(),r=this.signEnvelope(`RESPONSE`,{request_id:e,info:n});if(!r)return;let i=Buffer.from(JSON.stringify(r));this.receiver.send(i,0,i.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},ba=class{transports=[];started=!1;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.signer=n,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new ya(e.udp,t,n))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const xa=()=>process.env.BRICKS_BUTTRESS_STATE_DIR||l.join(x.homedir(),`.bricks-cli`,`buttress`),Sa=()=>l.join(xa(),`state.json`),Ca=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.id==`string`&&typeof t.serverId==`string`&&typeof t.issuerPublicKey==`string`&&typeof t.kid==`string`},wa=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.publicKeySpki==`string`&&typeof t.privateKeyPkcs8==`string`&&typeof t.kid==`string`},Ta=()=>{let e=Sa();try{let t=k.readFileSync(e,`utf8`),n=JSON.parse(t);return{workspace:Ca(n?.workspace)?n.workspace:null,serverKeyPair:wa(n?.serverKeyPair)?n.serverKeyPair:null}}catch(e){return e.code!==`ENOENT`&&console.warn(`[Buttress] Failed to read workspace state:`,e.message),{workspace:null,serverKeyPair:null}}},$=ti(),Ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Da({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
36
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Vr=e({finalizeGenerator:()=>Gr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Yr,ggmlLlm:()=>Kr,ggmlStt:()=>Jr,globalDownloadManager:()=>Hr,mlxLlm:()=>qr,showModelsTable:()=>Lr,showSttModelsTable:()=>zr,startGenerator:()=>Wr,startModelDownload:()=>Zr,status:()=>Xr,testGgmlLlmCapabilities:()=>Rr,testGgmlSttCapabilities:()=>Br});const Z=new Map,Hr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Ur=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Ur(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Wr(e,t){let n={"ggml-llm":{create:sn,getId:cn},"ggml-stt":{create:Yn,getId:Xn},"mlx-llm":{create:jr,getId:Mr}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Hr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Gr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Kr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},qr={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Jr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Yr(e,t){return e===`ggml-llm`?cn(t):e===`ggml-stt`?Xn(t):e===`mlx-llm`?Mr(t):null}const Xr={getFullStatus:()=>at(Z),getGgmlLlmStatus:()=>nt(Z),getGgmlSttStatus:()=>rt(Z),getMlxLlmStatus:()=>it(Z),subscribeToStatus:et,subscribeToStatusWithId:tt,llmStatusTracker:V,sttStatusTracker:H,statusEmitter:B};async function Zr(e,t,n={}){let r={"ggml-llm":ln,"ggml-stt":Zn,"mlx-llm":Pr}[e];return r?r(t,Hr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}let Qr=null;const $r={name:`@fugood/buttress-server`,version:`0.0.0`},ei=()=>{if(Qr)return Qr;try{let e=te(new URL(`../package.json`,import.meta.url)),t=JSON.parse(k.readFileSync(e,`utf8`));Qr={name:t.name||$r.name,version:t.version||$r.version,description:t.description}}catch{Qr=$r}return Qr},ti=!(`Bun`in globalThis),ni=e=>new n({adapter:ti?t():void 0,...e}),ri=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String(),score:a.Optional(a.Number()),hasGpu:a.Optional(a.Boolean()),usableBytes:a.Optional(a.Number())})),authentication:a.Object({required:a.Boolean(),type:a.String(),kid:a.Optional(a.String()),bound:a.Optional(a.Boolean())}),workspace:a.Optional(a.Object({id:a.String(),name:a.Optional(a.String())}))}),ii=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication,workspace:e.workspace});var ai=e=>{let t=ni(),n=e.autodiscover?.http?.path??`/buttress/info`;return t.get(n,ii,{response:ri}),t};let oi=null;const si=async e=>{if(oi&&oi.kid===e.kid)return oi.key;let t=await N(e.issuerPublicKey,`EdDSA`);return oi={kid:e.kid,key:t},t},ci=/^Bearer\s+(.+)$/i,li=(e,t)=>{if(e){let t=e.authorization||e.Authorization;if(t){let e=t.match(ci);return e?e[1].trim():t.trim()}}if(t){let e=t.access_token??t.token;if(typeof e==`string`&&e)return e}return null},ui=async(e,t)=>{if(!e||!t)return null;try{let{payload:n}=await P(e,await si(t),{algorithms:[`EdDSA`]}),r=n;return r.k!==`ba`||r.w_id!==t.id||r.st!==`ws`&&r.st!==`dev`||!r.sid||!r.exp?null:{workspaceId:r.w_id,subjectType:r.st,subjectId:r.sid,jti:r.jti,exp:r.exp}}catch{return null}},di=async({headers:e,query:t,set:n,store:r})=>{let i=r.workspaceState?.workspace;if(i&&!await ui(li(e,t),i))return n.status=401,n.headers&&(n.headers[`WWW-Authenticate`]=`Bearer`),{error:{code:`UNAUTHORIZED`,message:`Invalid or missing workspace access token`}}},fi=e=>{if(!e)return null;let t=e[`x-buttress-sess-id`]??e[`X-BUTTRESS-SESS-ID`];return t?t.trim():null},pi=async({headers:e,query:t,set:n,store:r})=>{let i=r.sessions,a=fi(e);if(!a||!i)return n.status=401,{error:{code:`SESSION_REQUIRED`,message:`Missing buttress session id`}};let o=i.get(a);if(!o)return n.status=401,{error:{code:`SESSION_INVALID`,message:`Unknown or expired buttress session`}};let s=r.workspaceState?.workspace;if(s&&o.identity){let r=await ui(li(e,t),s);if(!r||r.subjectId!==o.identity.subjectId||r.subjectType!==o.identity.subjectType)return n.status=403,{error:{code:`SESSION_FORBIDDEN`,message:`Token identity mismatch for this session`}}}},mi=(e,t)=>{let n=e.sessions,r=fi(t);if(!r||!n)return null;let i=n.get(r);return i?{sessionId:r,session:i}:null},hi=(e,t,n)=>{let r=l.join(e,t),i=l.join(r,n),a=l.relative(r,i);return{sessionDir:r,filePath:i,safe:a!==``&&!a.startsWith(`..`)&&!l.isAbsolute(a)}},gi=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var _i=ni().onBeforeHandle(di).onBeforeHandle(pi).post(`/buttress/upload`,async({body:{file:e},headers:t,store:n})=>{let r=mi(n,t);if(!r)return{ok:!1,error:`Session guard mis-wired`};let{sessionId:i}=r,{config:a}=n,o=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,{sessionDir:s,filePath:c,safe:l}=hi(a.server.temp_file_dir,i,o);if(!l)return{ok:!1,error:`Invalid file path`};try{return await p(s,{recursive:!0}),gi?await b(c,await e.stream()):await b(c,await e.arrayBuffer()),{ok:!0,filename:o}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},headers:t,store:n,status:i})=>{let a=mi(n,t);if(!a)return i(500),`Session guard mis-wired`;let{sessionId:o}=a,{config:s}=n,{filePath:c,safe:l}=hi(s.server.temp_file_dir,o,e);return l?r(c):(i(400),`Invalid file path`)},{params:a.Object({filename:a.String()})});const vi=l.dirname(te(import.meta.url)),yi=async()=>{let e=[l.join(vi,`..`,`public`,`status.html`),l.join(vi,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>f.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},bi=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},xi=async()=>{let e=await yi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await f.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var Si=ni().get(`/status`,xi).get(`/status/`,xi).get(`/buttress/status`,({store:{backend:e}})=>bi(e));const Ci=[`ggml-llm`,`mlx-llm`],wi=new Map;function Ti(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function Ei(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>Ci.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=wi.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return wi.set(c,h),await Ti(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function Di(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function Oi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Di(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const ki=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function Ai(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=Oi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function ji({global:e}){let t=ni({prefix:`/oai-compat`});return t.use(ie({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.onBeforeHandle(di),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>Ci.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await Ei(a,r,c,`[OpenAI]`),t=ki(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await Ti(a,e.type).completion(e.id,{options:b});if(!s)return await Ai(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=Oi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const Mi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function Ni(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function Pi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Fi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Di(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ii(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Fi(u),l=u.stopping_word||u.stoppingWord||null,c=Pi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function Li({global:e}){let t=ni({prefix:`/anthropic-messages`});return t.use(ie({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.onBeforeHandle(di),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=Ni(o),n=await Ei(a,r,o.model,`[Anthropic]`),s=Mi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await Ti(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ii(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Fi(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Fi(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Fi(r),w=r.stopping_word||r.stoppingWord||null,C=Pi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=Ni(a),t=await Ei(i,r,a.model,`[Anthropic]`),n=Ti(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>Ci.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ri=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ri(n[e]||{},t):n[e]=t}),n},zi=e=>e&&typeof e==`object`?structuredClone(e):null,Bi=(e,t)=>Ri(zi(e)||{},zi(t)||{}),Vi=(e,t)=>Ri(structuredClone(e.global),t||{}),Hi=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Vi(e,a)}}return Object.keys(e.global).length>0?Vi(e,{}):null},Ui={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},Wi=e=>e?e===!0?{...Ui}:Ri(Ui,e):null,Gi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ki=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,qi=6e4,Ji=1024*1024*50,Yi=e=>{let t=F.machineIdSync(),n=Ri({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:l.join(x.tmpdir(),`.buttress`),session_timeout:qi,max_body_size:Ji},autodiscover:!1},zi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:Wi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ki(i.max_body_size,E.parse,Ji),session_timeout:Ki(i.session_timeout,I,qi)},global:s,generators:r}},Xi={getCapabilities:M.tuple([M.object({type:M.string().optional().default(`ggml-llm`),config:M.any().optional(),currentClientCapabilities:M.any().optional(),options:M.any().optional()}).nullable().optional()]),startGenerator:M.tuple([M.string(),M.any().optional()]),finalizeGenerator:M.tuple([M.string()])};var Zi={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=zi(i),c=Bi(Hi(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=zi(i),o=Bi(Hi(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const Qi={initContext:M.tuple([M.string(),M.any().optional()]),completion:M.tuple([M.string(),M.any().optional()]),tokenize:M.tuple([M.string(),M.any()]),detokenize:M.tuple([M.string(),M.any()]),applyChatTemplate:M.tuple([M.string(),M.any()]),releaseContext:M.tuple([M.string()])};function $i(e){return function({backend:t,session:n},r,i){return new d({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function ea(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function ta(e,t){return{initContext:$i(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:ea(e,`${t}Release Context`)}}var na=ta(e=>e.ggmlLlm,``);const ra={initContext:M.tuple([M.string(),M.any().optional()]),transcribe:M.tuple([M.string(),M.string(),M.any().optional()]),transcribeData:M.tuple([M.string(),M.union([M.instanceof(Buffer),M.instanceof(Uint8Array)]),M.any().optional()]),releaseContext:M.tuple([M.string()])},ia=e=>e.ggmlStt,aa={common:Zi,ggmlLlm:na,ggmlStt:{initContext:$i(ia),async transcribe({backend:e,config:{server:t},peerId:n},r,i,a){console.log(`[Server] Transcribe:`,{id:r,audioPath:i,options:a});let{filePath:o,safe:s}=hi(t.temp_file_dir,n,i);if(!s)throw Error(`Invalid audioPath`);return e.ggmlStt.transcribe(r,{audioPath:o,options:a})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:ea(ia,`Release STT Context`)},mlxLlm:ta(e=>e.mlxLlm,`MLX `)},oa={common:Xi,ggmlLlm:Qi,ggmlStt:ra,mlxLlm:Qi};var sa=aa;const ca=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return L.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=L.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},la=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof L?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:L.from(t).toString(`base64`)}:t)}catch{return e}},ua={score:0,hasGpu:!1,usableBytes:0},da=e=>e?{score:Number(e.score)||0,hasGpu:!!e.hasGpu,usableBytes:Number(e.gpuUsableBytes||e.cpuUsableBytes||0)}:ua,fa=async e=>{let t=[];for(let n of e){let e=ua;try{e=da((await J(n.type,null,{}))?.buttress?.selected)}catch(e){console.warn(`[Caps] Failed to detect capabilities for "${n.type}":`,e instanceof Error?e.message:e)}t.push({...n,...e})}return t},pa=()=>{let e=x.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},ma=e=>{let t=e.split(`.`).map(Number);return t.length!==4||t.some(e=>Number.isNaN(e))?0:(t[0]<<24|t[1]<<16|t[2]<<8|t[3])>>>0},ha=e=>[e>>>24&255,e>>>16&255,e>>>8&255,e&255].join(`.`),ga=()=>{let e=[],t=new Set;for(let n of Object.values(x.networkInterfaces()))for(let r of n??[]){if(r.family!==`IPv4`||r.internal||!r.address||!r.netmask)continue;let n=ma(r.address),i=ma(r.netmask);if(!n||!i||i===4294967295)continue;let a=ha((n&i|~i>>>0)>>>0);if(a===r.address)continue;let o=`${r.address}->${a}`;t.has(o)||(t.add(o),e.push({address:r.address,broadcast:a}))}return e},_a=e=>{if(!e)return!1;let t=e;return t.code===`ENOTSUP`||/Failed to bind socket/i.test(t.message??``)};var va=class e{name=`udp`;receiver=null;senders=[];announcementTimer=null;config;getServerInfo;port;signer;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.port=e.port??8089,this.signer=n}async start(){this.receiver=await this.bindReceiver(!0).catch(e=>{if(!_a(e))throw e;return console.warn(`[Autodiscover UDP] SO_REUSEPORT not supported by this runtime; falling back to REUSEADDR only (multiple buttress instances on one host will not coexist on the discovery port).`),this.bindReceiver(!1)}),this.receiver.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.receiver.on(`error`,e=>{console.error(`[Autodiscover UDP] Receiver error:`,e.message)}),this.receiver.setBroadcast(!0),this.senders=await this.createSenders();let e=this.senders.map(e=>`${e.address}->${e.broadcast}`).join(`, `)||`<none>`;if(console.log(`[Autodiscover UDP] Listening on port ${this.port}; announce interfaces: ${e}`),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),await Promise.all(this.senders.map(({socket:e})=>new Promise(t=>{e.close(()=>t())}))),this.senders=[],this.receiver&&=(await new Promise(e=>{this.receiver.close(()=>e())}),null)}async bindReceiver(e){let t=ae.createSocket(e?{type:`udp4`,reuseAddr:!0,reusePort:!0}:{type:`udp4`,reuseAddr:!0});return new Promise((e,n)=>{let r=e=>{t.close(),n(e)};t.once(`error`,r),t.bind(this.port,()=>{t.off(`error`,r),e(t)})})}async createSenders(){let e=ga();return(await Promise.all(e.map(async e=>{try{let t=ae.createSocket({type:`udp4`});return await new Promise((n,r)=>{let i=e=>{t.close(),r(e)};t.once(`error`,i),t.bind({port:0,address:e.address},()=>{t.off(`error`,i),t.setBroadcast(!0),n()})}),t.on(`error`,t=>{console.error(`[Autodiscover UDP] Sender ${e.address} error:`,t.message)}),{...e,socket:t}}catch(t){return console.warn(`[Autodiscover UDP] Failed to bind sender on ${e.address}:`,t.message),null}}))).filter(e=>e!==null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}static canonicalBytes(e,t,n){return Buffer.from(JSON.stringify({t:e,d:t,ts:n}),`utf8`)}signEnvelope(t,n){if(!this.signer)return null;let r=Math.floor(Date.now()/1e3),i=o.sign(null,e.canonicalBytes(t,n,r),this.signer.privateKey).toString(`base64`);return{t,v:`2.0`,d:n,ts:r,kid:this.signer.kid,sig:i}}sendAnnouncement(){if(this.senders.length===0)return;let e=this.getServerInfo(),t=this.signEnvelope(`ANNOUNCE`,{info:e});if(!t){console.warn("[Autodiscover UDP] no per-server keypair; skipping announcement. Run `bricks buttress bind` to register a key.");return}let n=Buffer.from(JSON.stringify(t));for(let{broadcast:e,socket:t,address:r}of this.senders)t.send(n,0,n.length,this.port,e,t=>{t&&console.error(`[Autodiscover UDP] Announcement ${r}->${e} error:`,t.message)})}sendResponse(e,t){if(!this.receiver)return;let n=this.getServerInfo(),r=this.signEnvelope(`RESPONSE`,{request_id:e,info:n});if(!r)return;let i=Buffer.from(JSON.stringify(r));this.receiver.send(i,0,i.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},ya=class{transports=[];started=!1;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.signer=n,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new va(e.udp,t,n))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const ba=()=>process.env.BRICKS_BUTTRESS_STATE_DIR||l.join(x.homedir(),`.bricks-cli`,`buttress`),xa=()=>l.join(ba(),`state.json`),Sa=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.id==`string`&&typeof t.serverId==`string`&&typeof t.issuerPublicKey==`string`&&typeof t.kid==`string`},Ca=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.publicKeySpki==`string`&&typeof t.privateKeyPkcs8==`string`&&typeof t.kid==`string`},wa=()=>{let e=xa();try{let t=k.readFileSync(e,`utf8`),n=JSON.parse(t);return{workspace:Sa(n?.workspace)?n.workspace:null,serverKeyPair:Ca(n?.serverKeyPair)?n.serverKeyPair:null}}catch(e){return e.code!==`ENOENT`&&console.warn(`[Buttress] Failed to read workspace state:`,e.message),{workspace:null,serverKeyPair:null}}},$=ei(),Ta=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Ea({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
37
37
  `),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
38
- `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=d(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=qe(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=d(c),u=t?.recurrentMemoryBytes||0,p=u>0?d(u):`-`,m=d(t?.modelBytes&&(c!=null||u>0)?t.modelBytes+(c||0)+u:t?.fit?.totalRequiredBytes),h=f(t?.fit?.fitsInGpu),g=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=d(s),h=d(t?.modelBytes&&(s!=null||u>0)?t.modelBytes+(s||0)+u:t?.limitedFit?.totalRequiredBytes),g=f(t?.limitedFit?.fitsInGpu),_=f(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
38
+ `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=d(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=Je(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=d(c),u=t?.recurrentMemoryBytes||0,p=u>0?d(u):`-`,m=d(t?.modelBytes&&(c!=null||u>0)?t.modelBytes+(c||0)+u:t?.fit?.totalRequiredBytes),h=f(t?.fit?.fitsInGpu),g=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=d(s),h=d(t?.modelBytes&&(s!=null||u>0)?t.modelBytes+(s||0)+u:t?.limitedFit?.totalRequiredBytes),g=f(t?.limitedFit?.fitsInGpu),_=f(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
39
39
  ---`),n.push(`
40
40
  ### System Information`);let p=null;if(process.platform!==`win32`)try{p=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(p?n.push(`- **System:** ${p}`):(n.push(`- **Hostname:** ${x.hostname()}`),n.push(`- **OS:** ${x.type()} ${x.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${x.cpus().length}`),n.push(`- **Total System Memory:** ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`),u.length>0){let e=u[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
41
41
  ### Command Used`);let m=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${m}\n\`\`\``),n.push(`
42
42
  ### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
43
43
  ### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let h=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,g=l.join(process.cwd(),h);k.writeFileSync(g,n.join(`
44
- `),`utf8`),console.log(`\nModel capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function Oa({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
44
+ `),`utf8`),console.log(`\nModel capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function Da({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
45
45
  === Model Information ===`);let t=e||u?.repoId;if(console.log(`Repository ID: ${t}`),u?.quantization&&console.log(`Quantization: ${u.quantization}`),u?.nCtx&&console.log(`Context Length: ${u.nCtx}`),l?.quantization){let{fileType:e}=l.quantization;e!=null&&console.log(`Model File Type (GGUF): ${e}`)}let n=u?.cache_type_k||`f16`,r=u?.cache_type_v||`f16`;if(console.log(`KV Cache Type: K=${n}, V=${r}`),l?.modelBytes&&l?.kvCacheBytes!=null){console.log(`Model Size: ${(l.modelBytes/1024/1024/1024).toFixed(2)} GB`),l.kvInfo?console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(l.kvInfo)})`):console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);let e=l.recurrentMemoryBytes||0;e>0&&console.log(`Recurrent Memory: ${(e/1024/1024/1024).toFixed(2)} GB`);let t=l.modelBytes+l.kvCacheBytes+e;if(console.log(`Total Required Memory: ${(t/1024/1024/1024).toFixed(2)} GB`),l.memoryLimitedCtx!=null){let e=l.memoryLimitedCtx,t=l.kvInfo?.nCtxTrain;t?console.log(`\nMemory-Limited Context: ${e} (Train: ${t})`):console.log(`\nMemory-Limited Context: ${e}`),l.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(l.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(c.buttress?.selected?.fit){let{totalRequiredBytes:e}=c.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}}if(c.buttress?.selected){let{selected:e}=c.buttress;console.log(`
46
46
  === Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${x.hostname()}`),console.log(`OS: ${x.type()} ${x.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${x.cpus().length}`),console.log(`Total System Memory: ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
47
47
  --- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
48
48
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),e.limitedFit&&(console.log(`
49
49
  --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU (Limited): ${e.limitedFit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
50
- === Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function ka({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating STT model capabilities comparison...
50
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function Oa({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating STT model capabilities comparison...
51
51
  `),n.push(`${$.name} v${$.version}`),n.push(`## STT Model Capabilities Comparison
52
- `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=Ea(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});u.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let d=e=>e?(e/1024/1024).toFixed(1):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),u.forEach(({modelId:e,modelInfo:t})=>{let r=d(t?.modelBytes),i=d(t?.processingBytes||t?.kvCacheBytes),a=d(t?.fit?.totalRequiredBytes),o=f(t?.fit?.fitsInGpu),s=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
52
+ `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=Ta(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});u.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let d=e=>e?(e/1024/1024).toFixed(1):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),u.forEach(({modelId:e,modelInfo:t})=>{let r=d(t?.modelBytes),i=d(t?.processingBytes||t?.kvCacheBytes),a=d(t?.fit?.totalRequiredBytes),o=f(t?.fit?.fitsInGpu),s=f(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
53
53
  ---`),n.push(`
54
54
  ### System Information`);let p=null;if(process.platform!==`win32`)try{p=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(p?n.push(`- **System:** ${p}`):(n.push(`- **Hostname:** ${x.hostname()}`),n.push(`- **OS:** ${x.type()} ${x.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${x.cpus().length}`),n.push(`- **Total System Memory:** ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`),u.length>0){let e=u[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
55
55
  ### Command Used`);let m=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${m}\n\`\`\``),n.push(`
56
56
  ### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
57
57
  ### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let h=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,g=l.join(process.cwd(),h);k.writeFileSync(g,n.join(`
58
- `),`utf8`),console.log(`\nSTT model capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function Aa({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating MLX model capabilities comparison...
58
+ `),`utf8`),console.log(`\nSTT model capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function ka({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating MLX model capabilities comparison...
59
59
  `),n.push(`${$.name} v${$.version}`),n.push(`## MLX Model Capabilities Comparison
60
60
  `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`mlx-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},u=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...r?.model||{},repo_id:n}};let i=await J(`mlx-llm`,null,{config:r,includeBreakdown:!0});u.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let d=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,f=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quant | Size (GB) | Context | KV Cache (GB) | Total Required (GB) | Fits Unified Memory |`),n.push(`|----------|-------|-----------|---------|---------------|---------------------|---------------------|`),u.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.bits||r?.quantBits||`N/A`,a=typeof i==`number`?`${i}bit`:i,o=d(t?.modelBytes),s=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,c=d(t?.kvCacheBytes),l=d(t?.fit?.totalRequiredBytes),u=f(t?.fit?.fitsInGpu);n.push(`| ${e} | ${a} | ${o} | ${s} | ${c} | ${l} | ${u} |`);let p=t?.limitedFit!=null&&t?.memoryLimitedCtx!=null,m=!t?.fit?.fitsInGpu;if(p&&m){let e=t.memoryLimitedCtx,r=d(t.limitedKvCacheBytes),i=d(t.limitedFit.totalRequiredBytes),s=f(t.limitedFit.fitsInGpu);n.push(`| ↳ Limited | ${a} | ${o} | ${e} | ${r} | ${i} | ${s} |`)}}),n.push(`
61
61
  ---`),n.push(`
@@ -63,20 +63,20 @@ print(path)
63
63
  ### Command Used`);let m=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${m}\n\`\`\``),n.push(`
64
64
  ### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
65
65
  ### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=A.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let h=`mlx-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,g=l.join(process.cwd(),h);k.writeFileSync(g,n.join(`
66
- `),`utf8`),console.log(`\nMLX model capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate MLX model table:`,e.message),process.exit(1)}}async function ja({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: mlx-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=Object.keys(i).length>0?{...i}:null;e&&(a={...a||{},model:{...a?.model||{},repo_id:e}});let o=await J(`mlx-llm`,null,{config:a,includeBreakdown:!0});console.log(`
66
+ `),`utf8`),console.log(`\nMLX model capabilities table saved to: ${g}`),process.exit(0)}catch(e){console.error(`Failed to generate MLX model table:`,e.message),process.exit(1)}}async function Aa({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: mlx-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=Object.keys(i).length>0?{...i}:null;e&&(a={...a||{},model:{...a?.model||{},repo_id:e}});let o=await J(`mlx-llm`,null,{config:a,includeBreakdown:!0});console.log(`
67
67
  === Platform Information ===`),console.log(`Available: ${o.available?`Yes`:`No`}`),console.log(`OS: ${o.platform.os} (${o.platform.arch})`),o.platform.errors?.length>0&&console.log(`Errors: ${o.platform.errors.join(`; `)}`),console.log(`Python3: ${o.python.available?`Available`:`Not found`}`),console.log(`MLX (system): ${o.mlx.systemAvailable?`Available`:`Not installed (will use venv)`}`);let s=o.buttress?.selected,c=o.modelConfig;if(e||c?.repoId){console.log(`
68
68
  === Model Information ===`);let t=e||c?.repoId;if(console.log(`Repository ID: ${t}`),c?.architecture&&console.log(`Architecture: ${c.architecture}`),c?.quantBits&&console.log(`Quantization: ${c.quantBits}bit (group_size=${c.quantGroupSize||`N/A`})`),c?.nCtx&&console.log(`Max Context Length: ${c.nCtx}`),s?.modelBytes&&console.log(`Model Size: ${(s.modelBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvCacheBytes!=null&&console.log(`KV Cache Size (full context): ${(s.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvInfo&&console.log(`KV Info: ${s.kvInfo.nLayer} layers, ${s.kvInfo.nHeadKv} KV heads, ${s.kvInfo.headDim} head dim`),s?.modelBytes&&s?.kvCacheBytes!=null){let e=s.modelBytes+s.kvCacheBytes;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}s?.memoryLimitedCtx!=null&&(console.log(`\nMemory-Limited Context: ${s.memoryLimitedCtx}`),s.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(s.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`))}if(o.buttress?.selected){let{selected:e}=o.buttress;console.log(`
69
69
  === Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${x.hostname()}`),console.log(`OS: ${x.type()} ${x.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${x.cpus().length}`),console.log(`Total System Memory: ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`),console.log(`
70
70
  --- Unified Memory (Metal) ---`),e.devices?.length>0&&console.log(`Device: ${e.devices[0].deviceName}`),console.log(`Total Memory: ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}%)`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
71
71
  --- Model Fit Analysis ---`),console.log(`Fits in Unified Memory: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),console.log(`Total Required: ${(e.fit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),e.limitedFit&&(console.log(`
72
72
  --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
73
- === Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function Ma({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=Ea(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
73
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function ja({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=Ta(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
74
74
  === Model Information ===`);let e=n||f?.repoId;console.log(`Repository ID: ${e}`),r&&console.log(`Filename: ${r}`),d?.modelBytes&&console.log(`Model Size: ${(d.modelBytes/1024/1024).toFixed(1)} MB`);let t=d?.processingBytes||d?.kvCacheBytes;if(t&&console.log(`Processing Buffer: ${(t/1024/1024).toFixed(1)} MB`),d?.modelBytes&&t)console.log(`Total Required Memory: ${((d.modelBytes+t)/1024/1024).toFixed(1)} MB`);else if(u.buttress?.selected?.fit){let{totalRequiredBytes:e}=u.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024).toFixed(1)} MB`)}}if(u.buttress?.selected){let{selected:e}=u.buttress;console.log(`
75
75
  === Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${x.hostname()}`),console.log(`OS: ${x.type()} ${x.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${x.cpus().length}`),console.log(`Total System Memory: ${(x.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
76
76
  --- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
77
77
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
78
- === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const Na=ti();var Pa=async()=>{(process.argv.includes(`--version`)||process.argv.includes(`-v`))&&(console.log(Na.version),process.exit(0)),(process.argv.includes(`--help`)||process.argv.includes(`-h`))&&(console.log(`
79
- bricks-buttress v${Na.version}
78
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const Ma=ei();var Na=async()=>{(process.argv.includes(`--version`)||process.argv.includes(`-v`))&&(console.log(Ma.version),process.exit(0)),(process.argv.includes(`--help`)||process.argv.includes(`-h`))&&(console.log(`
79
+ bricks-buttress v${Ma.version}
80
80
 
81
81
  Buttress server for remote inference with GGML backends.
82
82
 
@@ -109,4 +109,4 @@ Examples:
109
109
  bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
110
110
  bricks-buttress --test-caps mlx-llm --test-models-default
111
111
  `),process.exit(0));let e=process.argv.findIndex(e=>e===`--port`||e===`-p`),t=e>=0?Number(process.argv[e+1]):void 0,n=process.argv.findIndex(e=>e===`--config`||e===`-c`),r=n>=0?process.argv[n+1]:null,i=null;if(r){let e;if(r.includes(`
112
- `))e=r;else{let t=l.resolve(r);try{e=k.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=A.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`&&t!==`mlx-llm`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),Zr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],u=process.argv.findIndex(e=>e===`--test-caps`);if(u>=0){let e=process.argv[u+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Aa({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Aa({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await ja({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ka({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ka({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Ma({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Da({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Da({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Oa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let d=Xi(i);t&&(d.server.port=t),d.server.port||(d.server.port=2080),Ua({config:d,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`||d.global.openai_compat?.enabled===!0,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`||d.global.anthropic_messages?.enabled===!0}).then(async({app:e,port:t,openaiEnabled:n,anthropicMessagesEnabled:r,autoDiscover:o})=>{let s=ma();console.log(`Buttress server listening on port ${t}`),console.log(`--------------------------------`),await Ba(),console.log();let c=e.store.workspaceState.workspace;if(c){let e=c.name?`${c.name} (${c.id})`:c.id;console.log(`Workspace: ${e}`),console.log(`- Server ID: ${c.serverId}`),console.log(`- Issuer kid: ${c.kid}`),console.log(`- Bound at: ${c.boundAt}`)}else console.log(`Workspace: not bound`),console.log(`- State file: ${Sa()}`),console.log("- Run `bricks buttress bind` from a workspace-authed CLI to pair.");console.log();let l={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`};console.log(`Current supported Generators:`);let u=new Set((d?.generators||[]).map(e=>e.type).filter(Boolean));if(u.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of u)console.log(`- ${l[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${s}:${t} to connect to this server via LAN.`),console.log(`- Visit http://${s}:${t}/status to see status via LAN.`),console.log(),n?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/oai-compat/v1`),console.log(`- Chat completions: POST http://${s}:${t}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${s}:${t}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set [openai_compat] enabled = true in config to enable`),console.log()),r?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/anthropic-messages`),console.log(`- Messages: POST http://${s}:${t}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${s}:${t}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set [anthropic_messages] enabled = true in config to enable`),console.log()),o&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:Fa,name:Ia}=ti(),La=async()=>{let e=`https://registry.npmjs.org/${Ia}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},Ra=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},za=e=>{console.log(``),console.log(`\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,Fa.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,Ia.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},Ba=async()=>{try{let e=await La();e&&Ra(Fa,e)&&za(e)}catch{}},Va=!(`Bun`in globalThis),Ha=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:s})=>{try{await f.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let u=ma()||`0.0.0.0`,p=Ta(),m=p.workspace!=null,h=await pa(Ki(i,i.generators.map(e=>e.type))),g={id:i.server.id,name:i.server.name,version:Fa,address:u,port:i.server.port,url:`http://${u}:${i.server.port}`,generators:h,authentication:m?{required:!0,type:`workspace-jwt`,kid:p.workspace.kid,bound:!0}:{required:!1,type:`workspace-jwt`,bound:!1},...m?{workspace:{id:p.workspace.id,name:p.workspace.name}}:{}},_=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:Va?t():void 0}).state({sessions:new Map,backend:e||Vr,config:i,serverInfo:g,workspaceState:p});r&&_.use(r),i.autodiscover?.http?.enabled&&_.use(oi(i)),_.use(vi),_.use(Ci),o&&_.use(Mi(i)),s&&_.use(Ri(i));let v=(e,t)=>di(ui(e,t),p.workspace),y={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},b=e=>e.id??e.raw?.id??e.remoteAddress,x=new Map,S=new Map;return _.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),async open(e){let t=b(e),n=e.data?.headers,r=e.data?.query,i,a=new Promise(e=>{i=e});if(S.set(t,{identity:null,ready:a}),!m){console.log(`[Request] New connection: ${t} (unbound, no auth)`),i(!0);return}let o=await v(n||{},r);if(!o){console.warn(`[Auth] Rejecting WS ${t}: invalid or missing workspace-access token`),S.delete(t),i(!1),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:null})),e.close(1008,`UNAUTHORIZED`);return}console.log(`[Request] New connection: ${t} (subject=${o.subjectType}:${o.subjectId})`);let s=S.get(t);s&&(s.identity=o),i(!0)},async message(e,{id:t,method:n,params:r}){let{sessions:i}=e.data.store,a=b(e),o=S.get(a);if(o?.ready&&!await o.ready){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}let s=S.get(a)?.identity??null;if(m&&!s){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}if(n===`init`){let[n]=la(r)??[],o,l=!1,u=n&&i.has(n)?i.get(n):null,d=!u?.identity||!!s&&s.subjectId===u.identity.subjectId&&s.subjectType===u.identity.subjectType;if(u&&d)u.timeout&&=(clearTimeout(u.timeout),null),u.identity=s,u.currentPeerId=a,o=n,l=!0,console.log(`[Request] Session restored: ${o}`);else{u&&console.warn(`[Request] Refused to restore session ${n}: identity mismatch (session=${u.identity?.subjectType}:${u.identity?.subjectId} peer=${s?.subjectType}:${s?.subjectId})`),o=c();let e={streams:new Map,streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null,identity:s,currentPeerId:a};i.set(o,e),console.log(`[Request] New session: ${o}`)}x.set(a,o),e.send(JSON.stringify({jsonrpc:`2.0`,result:ua({sessionId:o,restored:l}),id:t}));return}let l=x.get(a);if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}let u=i.get(l);if(!u){x.delete(a),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}console.log(`[Request] Received request from ${l}: ${n}`);let[f,p]=n.split(`.`),h=ca[f]?.[p];if(!h&&n!==`cancel`&&n!==`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){let e=u.streamReaders.get(t);e&&(e.reader.cancel(),u.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let i=la(r),o=sa[f]?.[p],s=o?o.parse(i):i,c=await h({...e.data.store,peerId:l,session:u},...s);if(c instanceof d){let r=c.getReader();u.streamReaders.set(t,{reader:r,peerId:a}),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:ua(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:ua(r),id:t}))}u.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:ua(c),id:t}))}catch(r){if(r instanceof j){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=b(e),n=x.get(t);if(x.delete(t),S.delete(t),!n){console.log(`[Request] Connection closed (no session)`);return}console.log(`[Request] Connection closed: ${n}`);let{backend:r,sessions:a}=e.data.store,o=a.get(n);if(o){for(let[e,n]of o.streamReaders)n.peerId===t&&(n.reader.cancel().catch(()=>{}),o.streamReaders.delete(e));if(o.currentPeerId!==t){console.log(`[Request] Session ${n} already adopted by another peer; skip arm`);return}o.timeout=setTimeout(()=>{if(o.currentPeerId!==t)return;a.delete(n),console.log(`[Request] Session timed out: ${n}`);let{generators:e}=o;e.forEach(e=>{r.finalizeGenerator(e)}),f.rm(l.join(i.server.temp_file_dir,n),{recursive:!0,force:!0}).catch(()=>{})},i.server.session_timeout)}}}),{app:_,config:i}},Ua=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:s}=await Ha({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:c}}=s,l=[new Promise(e=>a.listen(c,e))],u=null;if(s.autodiscover){let{workspace:e,serverKeyPair:t}=a.store.workspaceState;if(e&&t){let e={kid:t.kid,privateKey:o.createPrivateKey({key:Buffer.from(t.privateKeyPkcs8,`base64`),format:`der`,type:`pkcs8`})};u=new ba(s.autodiscover,()=>a.store.serverInfo,e),l.push(u.start())}else e?console.warn("[Autodiscover] disabled: bound to a workspace but state.json is missing serverKeyPair. Re-run `bricks buttress bind` to register a per-server announce key (required for v2.0 signed UDP discovery)."):console.warn("[Autodiscover] disabled: buttress-server is not bound to a workspace. Run `bricks buttress bind` from a workspace-authed CLI to pair.")}return await Promise.all(l),{app:a,port:c,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:u}},Wa=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||Wa.includes(process.argv[1]))&&await Pa();export{Ba as checkAndNotifyUpdates,La as checkForUpdates,Ra as compareVersions,Ha as createServer,za as logUpdateMessage,Xi as processConfig,Zr as startModelDownload,Ua as startServer};
112
+ `))e=r;else{let t=l.resolve(r);try{e=k.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=A.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`&&t!==`mlx-llm`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),Zr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],u=process.argv.findIndex(e=>e===`--test-caps`);if(u>=0){let e=process.argv[u+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ka({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ka({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Aa({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Oa({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Oa({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await ja({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Ea({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Ea({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Da({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let d=Yi(i);t&&(d.server.port=t),d.server.port||(d.server.port=2080),Ha({config:d,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`||d.global.openai_compat?.enabled===!0,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`||d.global.anthropic_messages?.enabled===!0}).then(async({app:e,port:t,openaiEnabled:n,anthropicMessagesEnabled:r,autoDiscover:o})=>{let s=pa();console.log(`Buttress server listening on port ${t}`),console.log(`--------------------------------`),await za(),console.log();let c=e.store.workspaceState.workspace;if(c){let e=c.name?`${c.name} (${c.id})`:c.id;console.log(`Workspace: ${e}`),console.log(`- Server ID: ${c.serverId}`),console.log(`- Issuer kid: ${c.kid}`),console.log(`- Bound at: ${c.boundAt}`)}else console.log(`Workspace: not bound`),console.log(`- State file: ${xa()}`),console.log("- Run `bricks buttress bind` from a workspace-authed CLI to pair.");console.log();let l={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`};console.log(`Current supported Generators:`);let u=new Set((d?.generators||[]).map(e=>e.type).filter(Boolean));if(u.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of u)console.log(`- ${l[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${s}:${t} to connect to this server via LAN.`),console.log(`- Visit http://${s}:${t}/status to see status via LAN.`),console.log(),n?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/oai-compat/v1`),console.log(`- Chat completions: POST http://${s}:${t}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${s}:${t}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set [openai_compat] enabled = true in config to enable`),console.log()),r?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/anthropic-messages`),console.log(`- Messages: POST http://${s}:${t}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${s}:${t}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set [anthropic_messages] enabled = true in config to enable`),console.log()),o&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:Pa,name:Fa}=ei(),Ia=async()=>{let e=`https://registry.npmjs.org/${Fa}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},La=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},Ra=e=>{console.log(``),console.log(`\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,Pa.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,Fa.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},za=async()=>{try{let e=await Ia();e&&La(Pa,e)&&Ra(e)}catch{}},Ba=!(`Bun`in globalThis),Va=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:s})=>{try{await f.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let u=pa()||`0.0.0.0`,p=wa(),m=p.workspace!=null,h=await fa(Gi(i,i.generators.map(e=>e.type))),g={id:i.server.id,name:i.server.name,version:Pa,address:u,port:i.server.port,url:`http://${u}:${i.server.port}`,generators:h,authentication:m?{required:!0,type:`workspace-jwt`,kid:p.workspace.kid,bound:!0}:{required:!1,type:`workspace-jwt`,bound:!1},...m?{workspace:{id:p.workspace.id,name:p.workspace.name}}:{}},_=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:Ba?t():void 0}).state({sessions:new Map,backend:e||Vr,config:i,serverInfo:g,workspaceState:p});r&&_.use(r),i.autodiscover?.http?.enabled&&_.use(ai(i)),_.use(_i),_.use(Si),o&&_.use(ji(i)),s&&_.use(Li(i));let v=(e,t)=>ui(li(e,t),p.workspace),y={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},b=e=>e.id??e.raw?.id??e.remoteAddress,x=new Map,S=new Map;return _.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),async open(e){let t=b(e),n=e.data?.headers,r=e.data?.query,i,a=new Promise(e=>{i=e});if(S.set(t,{identity:null,ready:a}),!m){console.log(`[Request] New connection: ${t} (unbound, no auth)`),i(!0);return}let o=await v(n||{},r);if(!o){console.warn(`[Auth] Rejecting WS ${t}: invalid or missing workspace-access token`),S.delete(t),i(!1),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:null})),e.close(1008,`UNAUTHORIZED`);return}console.log(`[Request] New connection: ${t} (subject=${o.subjectType}:${o.subjectId})`);let s=S.get(t);s&&(s.identity=o),i(!0)},async message(e,{id:t,method:n,params:r}){let{sessions:i}=e.data.store,a=b(e),o=S.get(a);if(o?.ready&&!await o.ready){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}let s=S.get(a)?.identity??null;if(m&&!s){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}if(n===`init`){let[n]=ca(r)??[],o,l=!1,u=n&&i.has(n)?i.get(n):null,d=!u?.identity||!!s&&s.subjectId===u.identity.subjectId&&s.subjectType===u.identity.subjectType;if(u&&d)u.timeout&&=(clearTimeout(u.timeout),null),u.identity=s,u.currentPeerId=a,o=n,l=!0,console.log(`[Request] Session restored: ${o}`);else{u&&console.warn(`[Request] Refused to restore session ${n}: identity mismatch (session=${u.identity?.subjectType}:${u.identity?.subjectId} peer=${s?.subjectType}:${s?.subjectId})`),o=c();let e={streams:new Map,streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null,identity:s,currentPeerId:a};i.set(o,e),console.log(`[Request] New session: ${o}`)}x.set(a,o),e.send(JSON.stringify({jsonrpc:`2.0`,result:la({sessionId:o,restored:l}),id:t}));return}let l=x.get(a);if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}let u=i.get(l);if(!u){x.delete(a),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}console.log(`[Request] Received request from ${l}: ${n}`);let[f,p]=n.split(`.`),h=sa[f]?.[p];if(!h&&n!==`cancel`&&n!==`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){let e=u.streamReaders.get(t);e&&(e.reader.cancel(),u.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let i=ca(r),o=oa[f]?.[p],s=o?o.parse(i):i,c=await h({...e.data.store,peerId:l,session:u},...s);if(c instanceof d){let r=c.getReader();u.streamReaders.set(t,{reader:r,peerId:a}),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:la(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:la(r),id:t}))}u.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:la(c),id:t}))}catch(r){if(r instanceof j){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=b(e),n=x.get(t);if(x.delete(t),S.delete(t),!n){console.log(`[Request] Connection closed (no session)`);return}console.log(`[Request] Connection closed: ${n}`);let{backend:r,sessions:a}=e.data.store,o=a.get(n);if(o){for(let[e,n]of o.streamReaders)n.peerId===t&&(n.reader.cancel().catch(()=>{}),o.streamReaders.delete(e));if(o.currentPeerId!==t){console.log(`[Request] Session ${n} already adopted by another peer; skip arm`);return}o.timeout=setTimeout(()=>{if(o.currentPeerId!==t)return;a.delete(n),console.log(`[Request] Session timed out: ${n}`);let{generators:e}=o;e.forEach(e=>{r.finalizeGenerator(e)}),f.rm(l.join(i.server.temp_file_dir,n),{recursive:!0,force:!0}).catch(()=>{})},i.server.session_timeout)}}}),{app:_,config:i}},Ha=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:s}=await Va({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:c}}=s,l=[new Promise(e=>a.listen(c,e))],u=null;if(s.autodiscover){let{workspace:e,serverKeyPair:t}=a.store.workspaceState;if(e&&t){let e={kid:t.kid,privateKey:o.createPrivateKey({key:Buffer.from(t.privateKeyPkcs8,`base64`),format:`der`,type:`pkcs8`})};u=new ya(s.autodiscover,()=>a.store.serverInfo,e),l.push(u.start())}else e?console.warn("[Autodiscover] disabled: bound to a workspace but state.json is missing serverKeyPair. Re-run `bricks buttress bind` to register a per-server announce key (required for v2.0 signed UDP discovery)."):console.warn("[Autodiscover] disabled: buttress-server is not bound to a workspace. Run `bricks buttress bind` from a workspace-authed CLI to pair.")}return await Promise.all(l),{app:a,port:c,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:u}},Ua=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||Ua.includes(process.argv[1]))&&await Na();export{za as checkAndNotifyUpdates,Ia as checkForUpdates,La as compareVersions,Va as createServer,Ra as logUpdateMessage,Yi as processConfig,Zr as startModelDownload,Ha as startServer};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fugood/buttress-server",
3
- "version": "2.25.0-beta.16",
3
+ "version": "2.25.0-beta.19",
4
4
  "main": "lib/index.mjs",
5
5
  "types": "lib/index.d.mts",
6
6
  "type": "module",
@@ -45,5 +45,5 @@
45
45
  "tsdown": "^0.20.1",
46
46
  "typescript": "^5.9.3"
47
47
  },
48
- "gitHead": "f38cee540522e4cf4616f7664a2a03584cd42d30"
48
+ "gitHead": "c275ff266863070f0a4d0c2cc6cb7acb8fdce3d4"
49
49
  }