@fugood/buttress-server 2.25.0-beta.26 → 2.25.0-beta.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.d.mts +117 -4
- package/lib/index.mjs +43 -43
- package/package.json +4 -2
- package/public/status.html +162 -1
package/lib/index.mjs
CHANGED
|
@@ -1,82 +1,82 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import l from"node:path";import*as u from"node:stream/web";import{ReadableStream as d}from"node:stream/web";import f,{mkdir as p,open as m,readFile as h,readdir as g,rename as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:os";import{gguf as S}from"@huggingface/gguf";import{getBackendDevicesInfo as C,isLibVariantAvailable as w,loadModel as T}from"@fugood/llama.node";import E from"bytes";import{EventEmitter as D}from"node:events";import{initWhisper as ee}from"@fugood/whisper.node";import{fileURLToPath as te}from"node:url";import{execFile as ne,execSync as O,spawn as re}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as j,z as M}from"zod";import{importSPKI as N,jwtVerify as P}from"jose";import{cors as ie}from"@elysiajs/cors";import F from"node-machine-id";import I from"ms";import{Buffer as L}from"node:buffer";import R from"node:dgram";const z=1024**3,ae=(e,t,n)=>Math.min(Math.max(e,t),n),oe=e=>e?40:0,se=(e=0)=>e?ae(e/(12*z)*20,0,20):0,ce=(e=0)=>e?ae(e/(32*z)*10,0,10):0,le=e=>e?10:0,B=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ue=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=oe(n)+B(t,e)+se(r),s=ce(i),c=le(a);return Math.min(100,Math.round(o+s+c))},de=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:oe(n),variant:B(t,e),gpuMemory:se(r),cpuMemory:ce(i),availability:le(a)}),fe=[`cuda`,`vulkan`,`snapdragon`,`default`],pe=.85,me=.5,he=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ge=e=>e&&String(e).trim().toLowerCase()||null,_e=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=fe}={})=>{let i=[];e&&i.push(e),i.push(...he(t)),i.push(...he(n)),i.push(...r);let a=new Set;for(let e of i){let t=ge(e);t&&a.add(t)}return Array.from(a)},ve=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ye=e=>Array.isArray(e)?e.map(e=>({...e})):[],be=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,xe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ye(be(n,r)),l=c.some(ve),u=c.filter(e=>ve(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ue(m),breakdown:o?de(m):null,error:s,timestamp:new Date().toISOString()}},Se=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},Ce=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=fe}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=_e({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(xe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Se({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Se({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},we=`ggml-llm`,Te=[`cuda`,`vulkan`,`default`],Ee=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=pe,cpuMemoryFraction:a=me,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>Ce({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Te,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Te}),De=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ue(d),p=l?de(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Se({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Se({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Oe=new Map([[we,Ce],[`ggml-stt`,Ee],[`mlx-llm`,De]]),ke=async({platform:e,totalMemoryInBytes:t,backend:n=we,dependencies:r,...i}={})=>{let a=Oe.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ae={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},je=e=>Ae[e?String(e).toLowerCase():`f16`]||Ae.f16,Me=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=je(a.k),h=je(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ne=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Pe=e=>e?String(e).trim().toLowerCase():null,Fe=(e={},t=null)=>{if(!e)return null;let n=Pe(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ie=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Le=({arch:e,nLayer:t=0})=>({arch:Pe(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Re=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ze=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Pe(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Fe(t,r),o=r?Re.get(r):null;if(!o)return Le({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Le({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Ie(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Be=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Ve=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),He=e=>e?String(e).trim().toLowerCase():null,Ue=e=>{let t=He(e);return t?Be.has(t):!1},We=e=>{let t=He(e);return t?Ve.has(t):!1},Ge=e=>Ue(e)?`recurrent`:We(e)?`hybrid`:`transformer`,Ke=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},qe=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ge(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Me(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Je=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ge(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ye=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new D;V.setMaxListeners(100);const Xe=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ze=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Xe(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Xe(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Xe(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new Ze,U=new Ze;let Qe=0;function $e(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function et(e){return Qe+=1,{subscriberId:Qe,unsubscribe:$e(e)}}function tt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function nt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function rt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function it(e){return{timestamp:new Date().toISOString(),ggmlLlm:tt(e),ggmlStt:nt(e),mlxLlm:rt(e)}}const{ReadableStream:at,WritableStream:ot}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,st=(e,t)=>Object.prototype.hasOwnProperty.call(e||{},t),ct=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ct(e[t],n)):e[t]=n}),e),lt=`https://huggingface.co`,ut=`https://huggingface.co/api`,W=l.join(x.homedir(),`.buttress`,`models`),dt=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],ft=.5,pt=[`speculative`,`spec_type`,`spec_draft_n_max`,`spec_draft_n_min`,`spec_draft_p_min`,`spec_draft_p_split`],mt=(e,t)=>{if(!(t==null||t===``)){if(e===`spec_draft_n_max`||e===`spec_draft_n_min`){let e=Number(t);return Number.isFinite(e)?Math.max(0,Math.floor(e)):void 0}if(e===`spec_draft_p_min`||e===`spec_draft_p_split`){let e=Number(t);return Number.isFinite(e)?Math.max(0,e):void 0}return t}},ht=e=>{let t={};for(let n of pt){let r=mt(n,e.model[n]??e.runtime[n]);r!==void 0&&(t[n]=r)}return t},gt=(e={})=>{let t={};for(let n of pt){let r=mt(n,e[n]);r!==void 0&&(t[n]=r)}return t},_t=(e={},t={})=>{let n=e||{},r=gt(t);return!Object.keys(r).length||pt.some(e=>st(n,e))?n:{...r,...n}},vt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:ft},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:ut,base_url:lt,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1,speculative:null,spec_type:null,spec_draft_n_max:null,spec_draft_n_min:null,spec_draft_p_min:null,spec_draft_p_split:null},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},yt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],bt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},xt=(e={})=>{let t=structuredClone(vt);if(ct(t,e),t.backend.variant=bt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(yt(t.backend.variant_preference).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(yt(t.runtime.prefer_variants).flatMap(e=>{let t=bt(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(yt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||lt,t.model.api_base=t.model.api_base||ut,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...vt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||vt.runtime.context_release_delay_ms),t},St=e=>{let t=e.toLowerCase();return dt.find(e=>t.includes(e))||null},Ct=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=bt(e);return t?[t]:[]})))},G=async e=>{await p(e,{recursive:!0})},wt=(e=W)=>l.join(e,`.metadata-cache`),Tt=(e,t,n=W)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(wt(n),t,`${r}.json`)},Et=async(e,t,n=W)=>{try{let r=Tt(e,t,n),i=await h(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,l.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},Dt=async(e,t,n,r=W)=>{try{let i=Tt(e,t,r);await G(l.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,l.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},Ot=(e=W)=>l.join(e,`.session-state-cache`),kt=(e=W)=>l.join(Ot(e),`cache-map.json`),At=(e=W)=>l.join(Ot(e),`temp`),jt=(e=W)=>l.join(Ot(e),`states`),Mt=()=>({version:1,entries:{},totalSize:0}),Nt=async(e=W)=>{try{let t=await h(kt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?Mt():n}catch{return Mt()}},Pt=async(e,t=W)=>{let n=kt(t),r=`${n}.tmp.${Date.now()}`;try{await G(l.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await _(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},Ft=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},It=(e,t=W)=>l.join(jt(t),`${e}.bin`),Lt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return l.join(At(e),`${t}.bin`)},Rt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,zt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Bt=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Rt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=zt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},Vt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Ht=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Rt(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Ut=async(e=W)=>{let t=At(e);try{let e=await g(t),n=Date.now();await Promise.all(e.map(async e=>{let r=l.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Wt=async e=>{try{return await v(e),!0}catch{return!1}},Gt=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t;var Kt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Gt(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await Pt(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Wt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(Ot(this.baseDir)),await G(At(this.baseDir)),await G(jt(this.baseDir)),this.cacheMap=await Nt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Rt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Bt(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Wt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Ft(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=It(o,this.baseDir),u=i?It(`${o}-prompt`,this.baseDir):null;try{await G(l.dirname(c)),await _(n,c);let s=await v(c),d=0;if(i&&u)try{await _(i,u),d=(await v(u)).size,console.log(`[SessionCache] Saved prompt state: ${u}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let f={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:u||null,promptStateSize:d,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=f,this.cacheMap.totalSize+=s.size+d,this.requiresExactMatch()||await Ht(this.cacheMap,e,o,this.metadata),await Vt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await Pt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),f}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(At(this.baseDir)),Lt(this.baseDir)}async cleanup(){await Ut(this.baseDir)}};const qt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Jt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Yt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await Et(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await S(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await Dt(r,`range-metadata`,o,n),o},Xt=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Zt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new ot({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Qt=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await Et(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Jt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await Dt(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:dt,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=St(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||St(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await qt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Jt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await Dt(i,`artifact-info`,y,r),y},$t=/^mmproj-.*\.gguf$/i,en=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await Et(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Jt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await Dt(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:l.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:l.resolve(e.model.mmproj_local_path)};return await Dt(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await qt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),u=c?.siblings||c?.files||[],d=u.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),f=e.model.mmproj_filename;if(f){if(!d.includes(f))throw Error(`mmproj file "${f}" not found in repo ${i}`)}else{let e=d.filter(e=>$t.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();f=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let p=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${f}`,m=u.find(e=>(e.rfilename||e.path||e.filename)===f),h=Number(m?.size);if(!Number.isFinite(h)||h<=0){let e=await Jt(p,{headers:r});h=Number(e.headers.get(`content-length`))||null}let g={repoId:i,revision:a,filename:f,url:p,size:h,headers:r};return await Dt(o,`artifact-info`,g,n),g},tn=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},nn=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=Ct(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},rn=async e=>{let t=await Qt(e),n=await en(e,t),r=await Yt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift,...ht(e)},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let F={k:A.cache_type_k,v:A.cache_type_v},I=t.size>0?t.size:0,L=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:F,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),R=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),z=await nn(e,{modelBytes:I,kvCacheBytes:L(M)+R}),ae=z.selected.totalMemory||0,oe=ae*(e.backend.gpu_memory_fraction||1),se=e.backend.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ce=Math.max(0,x.totalmem()*se),le=z.selected.hasGpu?oe:ce,B=Ye({maxCtx:M,availableMemory:le,modelBytes:I,kvBytesForCtx:L});if(!j&&B){let e=a?Math.min(B,a):B,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>B&&(M=B);let ue=Math.floor(B);console.log(`[buttress] Memory-limited context length: ${ue}`);let de=L(M),fe=I+de+R,pe=w?I/(w+1):I,me=0;z.selected.hasGpu&&pe>0&&(me=Math.min(w+1,Math.max(0,Math.floor(oe/pe)))),console.log(`[buttress] Auto GPU layer capacity (${z.selected.variant}): ${me}/${w+1}`);let he;he=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?me:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let ge=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:z.selected.hasGpu?`auto`:`off`})(),_e=e.runtime.cache_dir,ve=Xt(e,t),ye=await K(ve,t.size),be=tn(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:z.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:he,auto_gpu_layers:me,flash_attn_type:ge,cache_type_k:F.k,cache_type_v:F.v,estimated_max_n_ctx:ue},resources:{modelBytes:I,kvCacheBytes:de,recurrentMemoryBytes:R,totalEstimatedBytes:fe,gpuCapacityBytes:ae,gpuUsableBytes:oe,cpuUsableBytes:ce,fit:z.selected.fit},devices:{selected:z.selected,attempts:z.attempts},download:{cacheDir:_e,localPath:ve,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:z,cacheTypes:F,localPath:ve,localExists:ye}},an=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new at({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},on=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new at({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},sn=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},cn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),u=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),d=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=l.join(t,u);await K(f)||await Zt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Zt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},ln=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Zt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},un=async(e,t)=>{let n=sn(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await cn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};for(let t of pt){let n=e.plan.info.runtime[t];n!=null&&(o[t]=n)}e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await T(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await ln(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},dn=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},fn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return dn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?dn(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await dn(e,t)},i),!0)};async function pn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=xt(t),a=await rn(i),o=new Kt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await un(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=_t(t,s.plan.info.runtime),i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);await a.ready;let o=r.prompt||``,c=null,l=null;if(!o&&r.messages){({messages:c}=r),l={chatTemplate:r.chat_template||r.chatTemplate,jinja:r.jinja??!0,tools:r.tools,parallel_tool_calls:r.parallel_tool_calls,tool_choice:r.tool_choice,reasoning_format:r.reasoning_format,enable_thinking:r.enable_thinking,add_generation_prompt:r.add_generation_prompt,now:r.now,chat_template_kwargs:r.chat_template_kwargs,force_pure_content:r.force_pure_content};let e=await a.context.getFormattedChat(c,l.chatTemplate,l);o=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&o){let{options:e}=await s.sessionCache.prepareCompletionOptions(r,o,a.context),t=await s.sessionCache.generateTempStatePath(),n=(await a.context.tokenize(o))?.tokens?.length||0,i={...e,save_state_path:t},c=s.sessionCache.requiresExactMatch(),l=!!i.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),i.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return on(a.context,i,s.sessionCache,o,t,n,s.id,d,u)}let u={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return an(a.context,r,s.id,u)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=sn(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=sn(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=sn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=sn(s),t=s.contexts.get(e);return t?fn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):dn(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const mn=e=>{let t=xt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function hn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=xt(e),o=await Qt(n),s=Xt(n,o),{repoId:c}=o,u=await en(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),d=tn(n,u),f=async()=>{if(!u||!d||u.localPath)return;if(await K(d,u.size)){console.log(`[Download] mmproj already exists: ${d}`);return}let e=t.getDownload(d);if(e){await e;return}let n=(async()=>{try{await Zt(u.url,u.headers,d,u.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(d)}})();t.setDownload(d,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await f().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let p=t.getDownload(s);if(p)return console.log(`[Download] Already downloading: ${c}`),p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let m=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=l.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let u=String(s).padStart(5,`0`),d=o.filename.replace(e,`-${u}-of-${String(i).padStart(5,`0`)}.gguf`),f=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${d}`,p=l.join(t,d);await K(p)||await Zt(f,o.headers,p,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Zt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await f(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,m),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function gn(e){let t=xt(e),n=await Qt(t),r=await Yt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ke(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=ze({arch:i,metadata:r,nLayer:w}),re=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,k=Math.max(0,Math.floor(Number(re)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=qe({layerCount:k,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=Je({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),F=t.backend?.gpu_memory_fraction==null?vt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),I=t.backend?.cpu_memory_fraction==null?ft:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),L=await nn(t,{modelBytes:M,kvCacheBytes:P(A)}),R=(L.selected.totalMemory||0)*F,z=Math.max(0,x.totalmem()*I),ae=Ye({maxCtx:A,availableMemory:L.selected.hasGpu?R:z,modelBytes:M,kvBytesForCtx:P}),oe=P(A),se=P(ae);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:k,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:oe,limitedKvCacheBytes:se,memoryLimitedCtx:ae,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const _n=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function vn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await gn(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=_n(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=_n(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:yn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,bn=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),bn(e[t],n)):e[t]=n}),e),xn=`https://huggingface.co`,Sn=`https://huggingface.co/api`,Cn=l.join(x.homedir(),`.buttress`,`models`),wn=[`cuda`,`vulkan`,`default`],Tn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],En=`fp16`,Dn=.5,On=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],kn=e=>{if(!e)return null;let t=e.toLowerCase();return On.find(e=>t.includes(e))||null},An={backend:{type:`ggml-stt`,variant:null,variant_preference:wn,gpu_memory_fraction:.85,cpu_memory_fraction:Dn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,En,`q5_1`],allow_local_file:!1,local_path:null,api_base:Sn,base_url:xn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:Cn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},jn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Mn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Nn=(e={})=>{let t=structuredClone(An);if(bn(t,e),t.backend.variant=Mn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(jn(t.backend.variant_preference||wn).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...wn]),t.runtime.prefer_variants=Array.from(new Set(jn(t.runtime.prefer_variants).flatMap(e=>{let t=Mn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(jn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||xn,t.model.api_base=t.model.api_base||Sn,t.runtime.cache_dir=t.runtime.cache_dir?l.resolve(t.runtime.cache_dir):Cn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||An.runtime.context_release_delay_ms),t},Pn=e=>{let t=e.toLowerCase();return Tn.find(e=>t.includes(e))||null},Fn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Mn(e);t&&n.add(t)}return Array.from(n)},In=async e=>{await p(e,{recursive:!0})},Ln=(e=Cn)=>l.join(e,`.metadata-cache`),Rn=(e,t,n=Cn)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(Ln(n),t,`${r}.json`)},zn=async(e,t,n=Cn)=>{try{let r=await h(Rn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},Bn=async(e,t,n,r=Cn)=>{try{let i=Rn(e,t,r);await In(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},Vn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Hn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Un=(e,t)=>{if(e.model.local_path)return l.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=l.join(e.runtime.cache_dir,...n,t.revision);return l.join(r,t.filename)},Wn=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Gn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await In(l.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await m(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,u=.05;try{await a.body.pipeTo(new yn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=u;)i(u),u+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Kn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await zn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Hn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:Pn(c||``),headers:o};return await Bn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Vn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:Tn,f=()=>{for(let e of d)if(e===En){let e=u.find(e=>{let t=e.toLowerCase();return!Tn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=Pn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||Pn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Hn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await Bn(i,`artifact-info`,g,r),g},qn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Fn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?An.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?Dn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ke({platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Jn=async e=>{let t=await Kn(e),n=Ne({modelBytes:t.size>0?t.size:0}),r=await qn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Un(e,t),l=await Wn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:kn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Yn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Wn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Wn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Gn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Xn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Zn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Qn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Yn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await ee({filePath:i,useFlashAttn:!!e.plan.info.runtime.use_flash_attn,useGpu:!!e.plan.info.runtime.use_gpu,nThreads:e.plan.info.runtime.max_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},$n=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),er=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return $n(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?$n(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await $n(e,t)},i),!0)};async function tr(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Nn(t),a=await Jn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Xn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await $n(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Qn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},u=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?er(o,e):!1},d=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Zn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=l.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>d(e),transcribeData:async(e={})=>d(e),releaseContext:u,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const nr=e=>{let t=Nn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=kn(t.model.filename);return r?`${n}:${r}`:n};async function rr(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Nn(e),o=await Kn(n),s=Un(n,o),{repoId:c}=o;if(await Wn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Gn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const ir=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ue(e):0;async function ar(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Kn(Nn(i));o=e.size??null,{processingBufferBytes:s}=Ne({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ke({...a,platform:process.platform,totalMemoryInBytes:x.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:C,isLibVariantAvailable:w},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=ir(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=ir(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:or}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:u,sr=te(import.meta.url),cr=l.dirname(sr),lr=l.join(cr,`mlx-bridge.py`),ur=`mlx-vlm==0.4.0`,dr=`mlx-lm==0.31.1`,fr=l.join(x.homedir(),`.buttress`,`models`),pr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:fr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:dr,mlx_vlm_package:ur,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},mr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?E.parse(e)??t:t,hr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),hr(e[t],n)):e[t]=n}),e),gr=(e={})=>{let t=structuredClone(pr);return hr(t,e),t},_r=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},vr=async e=>{await p(e,{recursive:!0})},yr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return l.join(n,`.metadata-cache`,t,`${r}.json`)},br=async(e,t,n)=>{try{let r=await h(yr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},xr=async(e,t,n,r)=>{try{let i=yr(e,t,r);await vr(l.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function Sr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await br(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await _r(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await _r(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await xr(i,`mlx-model-metadata`,D,n),D}function Cr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const wr=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ne(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),Tr=new Map;async function Er({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.resolve(e),a=Tr.get(i);if(a){let e=await a;return r?.(1),e}let o=Or({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});Tr.set(i,o);try{return await o}finally{Tr.delete(i)}}const Dr=[3,10];async function Or({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=l.join(e,`bin`,`python3`),a=l.join(e,`bin`,`pip`);if(await wr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await wr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<Dr[0]||t===Dr[0]&&n<Dr[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${Dr.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await p(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var kr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[lr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
|
|
2
|
+
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import o,{createHash as s,randomUUID as c}from"node:crypto";import*as l from"node:stream/web";import{ReadableStream as u}from"node:stream/web";import d,{mkdir as f,open as p,readFile as m,readdir as h,rename as g,rm as _,stat as v,unlink as y,writeFile as b}from"node:fs/promises";import x from"node:path";import S from"node:os";import{gguf as C}from"@huggingface/gguf";import{getBackendDevicesInfo as w,isLibVariantAvailable as T,loadModel as E}from"@fugood/llama.node";import D from"bytes";import{EventEmitter as ee}from"node:events";import{initWhisper as te}from"@fugood/whisper.node";import{fileURLToPath as ne}from"node:url";import{execFile as O,execSync as k,spawn as re}from"node:child_process";import{AutoModel as A,WhisperTextStreamer as j,pipeline as M}from"@fugood/bricks-transformers";import{listSupportedBackends as N}from"onnxruntime-node";import P,{createWriteStream as ie}from"node:fs";import F from"@iarna/toml";import{ZodError as I,z as L}from"zod";import{importSPKI as ae,jwtVerify as R}from"jose";import{cors as oe}from"@elysiajs/cors";import se from"node-machine-id";import ce from"ms";import{Buffer as z}from"node:buffer";import le from"node:dgram";import{Readable as ue}from"node:stream";import{pipeline as de}from"node:stream/promises";const fe=1024**3,pe=(e,t,n)=>Math.min(Math.max(e,t),n),me=e=>e?40:0,he=(e=0)=>e?pe(e/(12*fe)*20,0,20):0,ge=(e=0)=>e?pe(e/(32*fe)*10,0,10):0,_e=e=>e?10:0,ve=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},B=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=me(n)+ve(t,e)+he(r),s=ge(i),c=_e(a);return Math.min(100,Math.round(o+s+c))},ye=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:me(n),variant:ve(t,e),gpuMemory:he(r),cpuMemory:ge(i),availability:_e(a)}),be=[`cuda`,`vulkan`,`snapdragon`,`default`],xe=.85,Se=.5,Ce=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],we=e=>e&&String(e).trim().toLowerCase()||null,Te=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=be}={})=>{let i=[];e&&i.push(e),i.push(...Ce(t)),i.push(...Ce(n)),i.push(...r);let a=new Set;for(let e of i){let t=we(e);t&&a.add(t)}return Array.from(a)},Ee=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},De=e=>Array.isArray(e)?e.map(e=>({...e})):[],Oe=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,ke=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=De(Oe(n,r)),l=c.some(Ee),u=c.filter(e=>Ee(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:B(m),breakdown:o?ye(m):null,error:s,timestamp:new Date().toISOString()}},Ae=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},je=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=xe,cpuMemoryFraction:a=Se,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=be}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=Te({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(ke({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(ke({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=Ae({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=Ae({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},Me=`ggml-llm`,Ne=[`cuda`,`vulkan`,`default`],Pe=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=xe,cpuMemoryFraction:a=Se,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>je({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:Ne,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:Ne}),Fe=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=B(d),p=l?ye(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=Ae({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=Ae({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},Ie=[`cuda`,`coreml`,`qnn`,`dml`,`webgpu`,`cpu`],Le={cuda:100,coreml:90,qnn:80,dml:80,webgpu:70,cpu:10},Re=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ze=e=>e&&String(e).trim().toLowerCase()||null,Be=({provider:e,preferProviders:t=[],providerPreference:n=[]}={})=>{let r=[];e&&r.push(e),r.push(...Re(t)),r.push(...Re(n)),r.push(...Ie);let i=r.map(ze).filter(Boolean);return Array.from(new Set(i))},Ve=new Set([`cuda`,`coreml`,`dml`,`webgpu`]),He=(e=``)=>Ve.has(String(e).toLowerCase()),Ue=e=>Array.isArray(e)?e.map(e=>typeof e==`string`?{name:e,available:!0}:{name:e.name,available:!0}):[],We=({platform:e,totalMemoryInBytes:t,provider:n,available:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=He(n),l=0;c&&r&&(n===`cuda`?l=8*1024*1024*1024:n===`coreml`?l=Math.floor(t*.5):n===`dml`&&(l=6*1024*1024*1024));let u=t,d=c?Math.floor(l*i):0,f=u?Math.floor(u*a):0,p={platform:e,variant:n,hasGpu:c,gpuUsableBytes:d,cpuUsableBytes:f,ok:o},m=B(p),h=o?ye(p):null;return{platform:e,ok:o,provider:n,hasGpu:c,available:r,gpuTotalBytes:l,gpuUsableBytes:d,cpuTotalBytes:u,cpuUsableBytes:f,score:m,breakdown:h,error:s,timestamp:new Date().toISOString()}},Ge=({device:e,modelBytes:t=0,workingMemory:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},Ke=async({platform:e,provider:t=null,preferProviders:n=[],providerPreference:r=[],gpuMemoryFraction:i=.85,cpuMemoryFraction:a=.5,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,workingMemory:l=null,dependencies:u={}}={})=>{let{listSupportedBackends:d}=u;if(typeof d!=`function`)throw TypeError(`ONNX capability detection requires listSupportedBackends function`);let f=Be({provider:t,preferProviders:n,providerPreference:r}),p=[],m=[];try{m=Ue(await d())}catch{m=f.map(e=>({name:e,available:!1}))}let h=new Set(m.filter(e=>e.available).map(e=>e.name.toLowerCase()));for(let t of f){if(!h.has(t)){p.push(We({platform:e,totalMemoryInBytes:s,provider:t,available:!1,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:`Provider ${t} not available on this platform`}));continue}p.push(We({platform:e,totalMemoryInBytes:s,provider:t,available:!0,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}let g=p.filter(e=>e.ok),_=(t?g.find(e=>e.provider===ze(t)):null)||g.find(e=>e.hasGpu)||g.sort((e,t)=>{let n=Le[e.provider]||0;return(Le[t.provider]||0)-n})[0]||null,v=[],y=[t,...n].filter(Boolean).map(ze);for(let e of y)e&&!h.has(e)&&v.push(`Requested provider "${e}" is not available (installed: ${[...h].join(`, `)||`none`})`);let b={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:p,warnings:v};if(!o&&b.selected&&delete b.selected.breakdown,!b||!c&&!l)return b;let x=e=>{if(!e)return e;let t=Ge({device:e,modelBytes:c||0,workingMemory:l||0});return{...e,fit:t}};return b.selected=x(b.selected),b.attempts=Array.isArray(b.attempts)?b.attempts.map(x):b.attempts,b},qe=`onnx-stt`,Je=`onnx-tts`,Ye=new Map([[Me,je],[`ggml-stt`,Pe],[`mlx-llm`,Fe],[qe,Ke],[Je,Ke]]),V=async({platform:e,totalMemoryInBytes:t,backend:n=Me,dependencies:r,...i}={})=>{let a=Ye.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Xe={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},Ze=e=>Xe[e?String(e).toLowerCase():`f16`]||Xe.f16,Qe=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=Ze(a.k),h=Ze(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},$e=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},et=e=>e?String(e).trim().toLowerCase():null,tt=(e={},t=null)=>{if(!e)return null;let n=et(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},nt=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},rt=({arch:e,nLayer:t=0})=>({arch:et(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),it=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),at=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=et(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=tt(t,r),o=r?it.get(r):null;if(!o)return rt({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return rt({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):nt(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},ot=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),st=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),ct=e=>e?String(e).trim().toLowerCase():null,lt=e=>{let t=ct(e);return t?ot.has(t):!1},ut=e=>{let t=ct(e);return t?st.has(t):!1},dt=e=>lt(e)?`recurrent`:ut(e)?`hybrid`:`transformer`,ft=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},pt=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=dt(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Qe(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},mt=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(dt(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},ht=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},gt={fp32:``,fp16:`_fp16`,int8:`_int8`,uint8:`_uint8`,q8:`_quantized`,q4:`_q4`,q4f16:`_q4f16`,bnb4:`_bnb4`},_t={cuda:2,coreml:1.5,dml:1.8,webgpu:2,qnn:1.5,cpu:1.3},vt={whisper:[`encoder_model`,`decoder_model_merged`],speech_to_text:[`encoder_model`,`decoder_model_merged`],speecht5:[`encoder_model`,`decoder_model_merged`],vits:[`model`],style_text_to_speech_2:[`model`],bert_vits2:[`model`],llama:[`model`],gpt2:[`model`],qwen2:[`model`],phi:[`model`],mistral:[`model`],gemma:[`model`]},yt=[`model`],bt=(e,t,n)=>{if(typeof t==`object`&&t){let n=t[e];if(typeof n==`string`&&n!==`auto`)return n}else if(typeof t==`string`&&t!==`auto`)return t;let r=n?.dtype;if(typeof r==`object`&&r){let t=r[e];if(typeof t==`string`&&t!==`auto`)return t}else if(typeof r==`string`&&r!==`auto`)return r;return`fp32`},xt=e=>vt[e]||yt,St=({siblings:e,modelType:t,dtype:n,tjConfig:r,subfolder:i=`onnx`})=>{let a=xt(t),o=[],s=[],c=0;for(let t of a){let a=bt(t,n,r),l=`${i}/${t}${gt[a]??``}.onnx`,u=`${l}_data`,d=0;for(let t of e)(t.rfilename===l||t.rfilename===u)&&(d+=t.size||0);d===0&&o.push(`File not found: ${t} (dtype=${a})`),s.push({name:t,dtype:a,bytes:d}),c+=d}return{totalBytes:c,files:s,warnings:o}},Ct=(e,t=`cpu`)=>{let n=_t[t]||_t.cpu;return Math.ceil(e*n)},H=new ee;H.setMaxListeners(100);const wt=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Tt=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){wt(this.modelLoads,e,this.maxEntries),H.emit(`status:modelLoad`,e),H.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){wt(this.completions,e,this.maxEntries),H.emit(`status:completion`,e),H.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){wt(this.transcriptions,e,this.maxEntries),H.emit(`status:transcription`,e),H.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const U=new Tt,Et=new Tt,Dt=new Tt,Ot=new Tt;let kt=0;function At(e){let t=t=>e(t);return H.on(`status:change`,t),()=>H.off(`status:change`,t)}function jt(e){return kt+=1,{subscriberId:kt,unsubscribe:At(e)}}function Mt(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:U.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:U.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function Nt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:Et.getModelLoadHistory(),transcriptions:Et.getTranscriptionHistory()}}}function Pt(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:U.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:U.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function Ft(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`onnx-stt`).map(([e,t])=>{let{instance:n}=t;return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,dtype:n.info?.model?.dtype||null,provider:n.info?.runtime?.provider||null,device:n.info?.runtime?.device||null,modelBytes:n.info?.model?.modelBytes||0,pipelines:n.getStatus?.()?.pipelineCount??0}}),history:{modelLoads:Dt.getModelLoadHistory(),transcriptions:Dt.getTranscriptionHistory()}}}function It(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`onnx-tts`).map(([e,t])=>{let{instance:n}=t;return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,dtype:n.info?.model?.dtype||null,provider:n.info?.runtime?.provider||null,device:n.info?.runtime?.device||null,modelBytes:n.info?.model?.modelBytes||0,vocoderRepoId:n.info?.model?.vocoderRepoId||null,pipelines:n.getStatus?.()?.pipelineCount??0}}),history:{modelLoads:Ot.getModelLoadHistory(),syntheses:Ot.getTranscriptionHistory()}}}function Lt(e){return{timestamp:new Date().toISOString(),ggmlLlm:Mt(e),ggmlStt:Nt(e),mlxLlm:Pt(e),onnxStt:Ft(e),onnxTts:It(e)}}const{ReadableStream:Rt,WritableStream:zt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:l,Bt=(e,t)=>Object.prototype.hasOwnProperty.call(e||{},t),Vt=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Vt(e[t],n)):e[t]=n}),e),Ht=`https://huggingface.co`,Ut=`https://huggingface.co/api`,W=x.join(S.homedir(),`.buttress`,`models`),Wt=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],Gt=.5,Kt=[`speculative`,`spec_type`,`spec_draft_n_max`,`spec_draft_n_min`,`spec_draft_p_min`,`spec_draft_p_split`],qt=(e,t)=>{if(!(t==null||t===``)){if(e===`spec_draft_n_max`||e===`spec_draft_n_min`){let e=Number(t);return Number.isFinite(e)?Math.max(0,Math.floor(e)):void 0}if(e===`spec_draft_p_min`||e===`spec_draft_p_split`){let e=Number(t);return Number.isFinite(e)?Math.max(0,e):void 0}return t}},Jt=e=>{let t={};for(let n of Kt){let r=qt(n,e.model[n]??e.runtime[n]);r!==void 0&&(t[n]=r)}return t},Yt=(e={})=>{let t={};for(let n of Kt){let r=qt(n,e[n]);r!==void 0&&(t[n]=r)}return t},Xt=(e={},t={})=>{let n=e||{},r=Yt(t);return!Object.keys(r).length||Kt.some(e=>Bt(n,e))?n:{...r,...n}},Zt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:Gt},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:Ut,base_url:Ht,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1,speculative:null,spec_type:null,spec_draft_n_max:null,spec_draft_n_min:null,spec_draft_p_min:null,spec_draft_p_split:null},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},Qt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],$t=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},en=(e={})=>{let t=structuredClone(Zt);if(Vt(t,e),t.backend.variant=$t(t.backend.variant),t.backend.variant_preference=Array.from(new Set(Qt(t.backend.variant_preference).flatMap(e=>{let t=$t(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(Qt(t.runtime.prefer_variants).flatMap(e=>{let t=$t(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(Qt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||Ht,t.model.api_base=t.model.api_base||Ut,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?x.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...Zt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||Zt.runtime.context_release_delay_ms),t},tn=e=>{let t=e.toLowerCase();return Wt.find(e=>t.includes(e))||null},nn=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=$t(e);return t?[t]:[]})))},G=async e=>{await f(e,{recursive:!0})},rn=(e=W)=>x.join(e,`.metadata-cache`),an=(e,t,n=W)=>{let r=s(`sha256`).update(e).digest(`hex`);return x.join(rn(n),t,`${r}.json`)},on=async(e,t,n=W)=>{try{let r=an(e,t,n),i=await m(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,x.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},sn=async(e,t,n,r=W)=>{try{let i=an(e,t,r);await G(x.dirname(i)),await b(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,x.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},cn=(e=W)=>x.join(e,`.session-state-cache`),ln=(e=W)=>x.join(cn(e),`cache-map.json`),un=(e=W)=>x.join(cn(e),`temp`),dn=(e=W)=>x.join(cn(e),`states`),fn=()=>({version:1,entries:{},totalSize:0}),pn=async(e=W)=>{try{let t=await m(ln(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?fn():n}catch{return fn()}},mn=async(e,t=W)=>{let n=ln(t),r=`${n}.tmp.${Date.now()}`;try{await G(x.dirname(n)),await b(r,JSON.stringify(e,null,2),`utf-8`),await g(r,n)}catch(e){throw await y(r).catch(()=>{}),e}},hn=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return s(`sha256`).update(n).digest(`hex`).slice(0,24)},gn=(e,t=W)=>x.join(dn(t),`${e}.bin`),_n=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return x.join(un(e),`${t}.bin`)},vn=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,yn=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},bn=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>vn(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=yn(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},xn=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Sn=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&vn(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await y(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await y(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Cn=async(e=W)=>{let t=un(e);try{let e=await h(t),n=Date.now();await Promise.all(e.map(async e=>{let r=x.join(t,e),i=await v(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await y(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},wn=async e=>{try{return await v(e),!0}catch{return!1}},Tn=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?D.parse(e)??t:t;var En=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Tn(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await mn(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await wn(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(cn(this.baseDir)),await G(un(this.baseDir)),await G(dn(this.baseDir)),this.cacheMap=await pn(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await y(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await y(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>vn(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=bn(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await wn(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=hn(a,this.metadata),s=()=>{n&&y(n).catch(()=>{}),i&&y(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=gn(o,this.baseDir),l=i?gn(`${o}-prompt`,this.baseDir):null;try{await G(x.dirname(c)),await g(n,c);let s=await v(c),u=0;if(i&&l)try{await g(i,l),u=(await v(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Sn(this.cacheMap,e,o,this.metadata),await xn(this.cacheMap,this.maxSizeBytes,this.maxEntries),await mn(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(un(this.baseDir)),_n(this.baseDir)}async cleanup(){await Cn(this.baseDir)}};const Dn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},On=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},kn=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await on(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await C(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await sn(r,`range-metadata`,o,n),o},An=(e,t)=>{if(e.model.local_path)return x.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=x.join(e.runtime.cache_dir,...n,t.revision);return x.join(r,t.filename)},K=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},jn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(x.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await p(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new zt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Mn=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await on(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await On(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await sn(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Dn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:Wt,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=tn(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||tn(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Dn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await On(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await sn(i,`artifact-info`,y,r),y},Nn=/^mmproj-.*\.gguf$/i,Pn=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await on(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await On(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await sn(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:x.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:x.resolve(e.model.mmproj_local_path)};return await sn(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Dn(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),l=c?.siblings||c?.files||[],u=l.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),d=e.model.mmproj_filename;if(d){if(!u.includes(d))throw Error(`mmproj file "${d}" not found in repo ${i}`)}else{let e=u.filter(e=>Nn.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();d=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let f=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${d}`,p=l.find(e=>(e.rfilename||e.path||e.filename)===d),m=Number(p?.size);if(!Number.isFinite(m)||m<=0){let e=await On(f,{headers:r});m=Number(e.headers.get(`content-length`))||null}let h={repoId:i,revision:a,filename:d,url:f,size:m,headers:r};return await sn(o,`artifact-info`,h,n),h},Fn=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=x.join(e.runtime.cache_dir,...n,t.revision);return x.join(r,t.filename)},In=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=nn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?Zt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?Gt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await V({platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:w,isLibVariantAvailable:T},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Ln=async e=>{let t=await Mn(e),n=await Pn(e,t),r=await kn(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:C}=ft(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=at({arch:i,metadata:r,nLayer:w}),k=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,re=Math.max(0,Math.floor(Number(k)||0)),A={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift,...Jt(e)},j=A.n_ctx?Number(A.n_ctx):null,M=j||a||4096,N=[],P=[],ie=!0;if(j&&a&&j>a){ie=!1;let e=`Requested context length (${j}) exceeds model training context (${a})`;N.push(e),P.push(e),M=a}j&&!a&&N.push(`Model metadata missing training context length, using requested value`);let F={k:A.cache_type_k,v:A.cache_type_v},I=t.size>0?t.size:0,L=pt({layerCount:re,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:F,swaConfig:O,kvUnified:A.kv_unified,nParallel:A.n_parallel,swaFull:A.swa_full,arch:i,attentionLayerCount:m}),ae=mt({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:A.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:C,arch:i}),R=await In(e,{modelBytes:I,kvCacheBytes:L(M)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?Gt:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),z=Math.max(0,S.totalmem()*ce),le=R.selected.hasGpu?se:z,ue=ht({maxCtx:M,availableMemory:le,modelBytes:I,kvBytesForCtx:L});if(!j&&ue){let e=a?Math.min(ue,a):ue,t=Math.max(32,e);t<M&&N.push(`Context length capped to ${t} by memory limits`),M=t}M>ue&&(M=ue);let de=Math.floor(ue);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=L(M),pe=I+fe+ae,me=w?I/(w+1):I,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=A.n_gpu_layers===`auto`||A.n_gpu_layers==null?he:Math.max(0,Math.min(Number(A.n_gpu_layers)||0,w+1));let _e=(()=>{let e=A.flash_attn_type&&String(A.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,B=An(e,t),ye=await K(B,t.size),be=Fn(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:ie,backend:`ggml-llm`,warnings:N,errors:P,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:re,swa:O?.enabled?{window:O.window,pattern:O.pattern,dense_first:O.denseFirst,type:O.type,layers:O.swaLayers}:null}},runtime:{...A,variant:R.selected.variant,n_ctx:M,requested_ctx:j,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:F.k,cache_type_v:F.v,estimated_max_n_ctx:de},resources:{modelBytes:I,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:z,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:B,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:F,localPath:B,localExists:ye}},Rn=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new Rt({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};U.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),U.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},zn=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&y(i).catch(()=>{}),c&&y(c).catch(()=>{})};return new Rt({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};U.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),U.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Bn=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return s(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Vn=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=x.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=x.join(t,l);await K(d)||await jn(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await jn(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Hn=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await jn(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},Un=async(e,t)=>{let n=Bn(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Vn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};for(let t of Kt){let n=e.plan.info.runtime[t];n!=null&&(o[t]=n)}e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await E(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await Hn(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),U.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(U.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},Wn=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},Gn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Wn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Wn(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await Wn(e,t)},i),!0)};async function Kn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=en(t),a=await Ln(i),o=new En(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await Un(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Xt(t,s.plan.info.runtime),i=Bn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);await a.ready;let o=r.prompt||``,c=null,l=null;if(!o&&r.messages){({messages:c}=r),l={chatTemplate:r.chat_template||r.chatTemplate,jinja:r.jinja??!0,tools:r.tools,parallel_tool_calls:r.parallel_tool_calls,tool_choice:r.tool_choice,reasoning_format:r.reasoning_format,enable_thinking:r.enable_thinking,add_generation_prompt:r.add_generation_prompt,now:r.now,chat_template_kwargs:r.chat_template_kwargs,force_pure_content:r.force_pure_content};let e=await a.context.getFormattedChat(c,l.chatTemplate,l);o=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&o){let{options:e}=await s.sessionCache.prepareCompletionOptions(r,o,a.context),t=await s.sessionCache.generateTempStatePath(),n=(await a.context.tokenize(o))?.tokens?.length||0,i={...e,save_state_path:t},c=s.sessionCache.requiresExactMatch(),l=!!i.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),i.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return zn(a.context,i,s.sessionCache,o,t,n,s.id,d,u)}let u={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Rn(a.context,r,s.id,u)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Bn(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Bn(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Bn(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Bn(s),t=s.contexts.get(e);return t?Gn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):Wn(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const qn=e=>{let t=en(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function Jn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=en(e),o=await Mn(n),s=An(n,o),{repoId:c}=o,l=await Pn(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),u=Fn(n,l),d=async()=>{if(!l||!u||l.localPath)return;if(await K(u,l.size)){console.log(`[Download] mmproj already exists: ${u}`);return}let e=t.getDownload(u);if(e){await e;return}let n=(async()=>{try{await jn(l.url,l.headers,u,l.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(u)}})();t.setDownload(u,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await d().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let f=t.getDownload(s);if(f)return console.log(`[Download] Already downloading: ${c}`),f.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let p=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=x.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=x.join(t,u);await K(f)||await jn(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await jn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await d(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,p),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function Yn(e){let t=en(e),n=await Mn(t),r=await kn(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:C}=ft(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,te=u!=null&&Number.isFinite(Number(u))?Number(u):ee,ne=d!=null&&Number.isFinite(Number(d))?Number(d):ee,O=at({arch:i,metadata:r,nLayer:w}),k=O&&Number.isFinite(Number(O.kvLayers))?Number(O.kvLayers):w,re=Math.max(0,Math.floor(Number(k)||0)),A=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,j={k:t.model.cache_type_k,v:t.model.cache_type_v},M=n.size>0?n.size:0,N=t.model.n_parallel||4,P=pt({layerCount:re,headKvCount:D,embdHeadKCount:te,embdHeadVCount:ne,cacheTypes:j,swaConfig:O,kvUnified:t.model.kv_unified,nParallel:N,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),ie=mt({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:N,ssmDConv:g,ssmDState:_,ssmDInner:v,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:C,arch:i}),F=t.backend?.gpu_memory_fraction==null?Zt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),I=t.backend?.cpu_memory_fraction==null?Gt:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),L=await In(t,{modelBytes:M,kvCacheBytes:P(A)}),ae=(L.selected.totalMemory||0)*F,R=Math.max(0,S.totalmem()*I),oe=ht({maxCtx:A,availableMemory:L.selected.hasGpu?ae:R,modelBytes:M,kvBytesForCtx:P}),se=P(A),ce=P(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:te,nEmbdHeadV:ne,nHeadCount:E,nHeadKvCount:D,kvLayerCount:re,swa:O?.enabled?{window:O.window,pattern:O.pattern,denseFirst:O.denseFirst,type:O.type,layers:O.swaLayers}:null},modelBytes:M,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:ie,quantization:{name:n.quantization||null,fileType:p,version:f}}}const Xn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):B(e):0;async function Zn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await Yn(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await V({...a,platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:w,isLibVariantAvailable:T},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=Xn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let v=null,y=null;if(e){let t=Xn(e);y={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=y.fit,a=y.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}v={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!v&&(v={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let b=null;return i&&(b={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:y,comparison:v,modelConfig:b}}const{WritableStream:Qn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:l,$n=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),$n(e[t],n)):e[t]=n}),e),er=`https://huggingface.co`,tr=`https://huggingface.co/api`,nr=x.join(S.homedir(),`.buttress`,`models`),rr=[`cuda`,`vulkan`,`default`],ir=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],ar=`fp16`,or=.5,sr=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],cr=e=>{if(!e)return null;let t=e.toLowerCase();return sr.find(e=>t.includes(e))||null},lr={backend:{type:`ggml-stt`,variant:null,variant_preference:rr,gpu_memory_fraction:.85,cpu_memory_fraction:or},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,ar,`q5_1`],allow_local_file:!1,local_path:null,api_base:tr,base_url:er,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:nr,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},ur=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],dr=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},fr=(e={})=>{let t=structuredClone(lr);if($n(t,e),t.backend.variant=dr(t.backend.variant),t.backend.variant_preference=Array.from(new Set(ur(t.backend.variant_preference||rr).flatMap(e=>{let t=dr(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...rr]),t.runtime.prefer_variants=Array.from(new Set(ur(t.runtime.prefer_variants).flatMap(e=>{let t=dr(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(ur(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||er,t.model.api_base=t.model.api_base||tr,t.runtime.cache_dir=t.runtime.cache_dir?x.resolve(t.runtime.cache_dir):nr,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||lr.runtime.context_release_delay_ms),t},pr=e=>{let t=e.toLowerCase();return ir.find(e=>t.includes(e))||null},mr=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=dr(e);t&&n.add(t)}return Array.from(n)},hr=async e=>{await f(e,{recursive:!0})},gr=(e=nr)=>x.join(e,`.metadata-cache`),_r=(e,t,n=nr)=>{let r=s(`sha256`).update(e).digest(`hex`);return x.join(gr(n),t,`${r}.json`)},vr=async(e,t,n=nr)=>{try{let r=await m(_r(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},yr=async(e,t,n,r=nr)=>{try{let i=_r(e,t,r);await hr(x.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}},br=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},xr=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Sr=(e,t)=>{if(e.model.local_path)return x.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=x.join(e.runtime.cache_dir,...n,t.revision);return x.join(r,t.filename)},Cr=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},wr=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await hr(x.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await p(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new Qn({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Tr=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await vr(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await xr(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:pr(c||``),headers:o};return await yr(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await br(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ir,f=()=>{for(let e of d)if(e===ar){let e=u.find(e=>{let t=e.toLowerCase();return!ir.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=pr(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||pr(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await xr(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await yr(i,`artifact-info`,g,r),g},Er=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=mr(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?lr.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?or:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await V({platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:w,isLibVariantAvailable:T},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Dr=async e=>{let t=await Tr(e),n=$e({modelBytes:t.size>0?t.size:0}),r=await Er(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Sr(e,t),l=await Cr(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:cr(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},Or=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Cr(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Cr(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await wr(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var kr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Ar=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},jr=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await Or(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await te({filePath:i,useFlashAttn:!!e.plan.info.runtime.use_flash_attn,useGpu:!!e.plan.info.runtime.use_gpu,nThreads:e.plan.info.runtime.max_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return Et.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw Et.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Mr=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Nr=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Mr(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Mr(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Mr(e,t)},i),!0)};async function Pr(e,t,n={}){let{globalDownloadManager:r=null}=n,i=fr(t),a=await Dr(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new kr,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Mr(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await jr(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Nr(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Ar(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=x.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return Et.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw Et.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Fr=e=>{let t=fr(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=cr(t.model.filename);return r?`${n}:${r}`:n};async function Ir(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=fr(e),o=await Tr(n),s=Sr(n,o),{repoId:c}=o;if(await Cr(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await wr(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Lr=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):B(e):0;async function Rr(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await Tr(fr(i));o=e.size??null,{processingBufferBytes:s}=$e({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await V({...a,platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:w,isLibVariantAvailable:T},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Lr(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Lr(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:zr}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:l,Br=ne(import.meta.url),Vr=x.dirname(Br),Hr=x.join(Vr,`mlx-bridge.py`),Ur=`mlx-vlm==0.4.0`,Wr=`mlx-lm==0.31.1`,Gr=x.join(S.homedir(),`.buttress`,`models`),Kr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:Gr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:Wr,mlx_vlm_package:Ur,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},qr=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?D.parse(e)??t:t,Jr=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Jr(e[t],n)):e[t]=n}),e),Yr=(e={})=>{let t=structuredClone(Kr);return Jr(t,e),t},Xr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},Zr=async e=>{await f(e,{recursive:!0})},Qr=(e,t,n)=>{let r=s(`sha256`).update(e).digest(`hex`);return x.join(n,`.metadata-cache`,t,`${r}.json`)},$r=async(e,t,n)=>{try{let r=await m(Qr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},ei=async(e,t,n,r)=>{try{let i=Qr(e,t,r);await Zr(x.dirname(i)),await b(i,JSON.stringify(n),`utf-8`)}catch{}};async function ti(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await $r(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await Xr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await Xr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await ei(i,`mlx-model-metadata`,D,n),D}function ni({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const ri=async e=>{try{return await v(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{O(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),ii=new Map;async function ai({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=x.resolve(e),a=ii.get(i);if(a){let e=await a;return r?.(1),e}let o=si({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});ii.set(i,o);try{return await o}finally{ii.delete(i)}}const oi=[3,10];async function si({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=x.join(e,`bin`,`python3`),a=x.join(e,`bin`,`pip`);if(await ri(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await ri(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<oi[0]||t===oi[0]&&n<oi[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${oi.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await f(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var ci=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=re(e,[Hr],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
|
|
3
3
|
`);this.buffer=t.pop();for(let e of t)if(e.trim())try{this.handleMessage(JSON.parse(e))}catch(t){console.error(`[mlx-llm] Failed to parse bridge message:`,e,t)}}),this.readyPromise=new Promise((e,t)=>{this.pendingRequests.set(`__init__`,{resolve:()=>e(),reject:t}),setTimeout(()=>t(Error(`Bridge startup timeout`)),3e4)}),this.readyPromise}handleMessage(e){let t=this.pendingRequests.get(e.id);t&&(e.error?(t.reject(Error(e.error.message)),this.pendingRequests.delete(e.id)):e.event?e.event===`result`?(t.resolve(e.data),this.pendingRequests.delete(e.id)):t.onEvent?.(e.event,e.data):e.result!==void 0&&(t.resolve(e.result),this.pendingRequests.delete(e.id)))}async call(e,t={}){if(!this.process)throw Error(`Bridge not running`);let n=String(++this.requestCounter);return new Promise((r,i)=>{this.pendingRequests.set(n,{resolve:r,reject:i}),this.write({id:n,method:e,params:t})})}stream(e,t,n){if(!this.process)throw Error(`Bridge not running`);let r=String(++this.requestCounter);return{id:r,promise:new Promise((i,a)=>{this.pendingRequests.set(r,{resolve:i,reject:a,onEvent:n}),this.write({id:r,method:e,params:t})})}}cancel(e){this.process&&this.write({id:`cancel-${e}`,method:`cancel`,params:{request_id:e}})}write(e){this.process?.stdin?.write(JSON.stringify(e)+`
|
|
4
|
-
`)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function Ar(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),x.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function jr(e){let t=Ar();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Mr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Nr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Pr=`</think>`;function Fr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(Pr);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(Pr.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Ir(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new or({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Fr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Fr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function Lr(e,t,n={}){let r=gr(t),i=jr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Nr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||fr,n=await Er({envDir:r.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||dr,mlxVlmPackage:r.runtime.mlx_vlm_package||ur,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new kr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.model.tokenizer_config&&(o.tokenizer_config=r.model.tokenizer_config),r.model.model_config&&(o.model_config=r.model.model_config),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let u=r.runtime.session_cache;u?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:l.join(e,`mlx-session-cache`),max_entries:u?.max_entries||100,max_size_bytes:mr(u?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Mr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new or({start(e){a.queue.enqueue(async()=>{let n=Ir(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Rr=e=>gr(e).model.repo_id||null;async function zr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=Ar(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await De({platform:process.platform,arch:x.arch(),totalMemoryInBytes:x.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=gr(r),t=e.model.repo_id;if(t)try{l=await Sr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||fr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=Cr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=Cr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:x.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Br(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=gr(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=Ar();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let u=`mlx:${s}`;if(t?.isDownloading(u))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let d=(async()=>{try{let e=o.runtime.cache_dir||fr,t=await Er({envDir:o.runtime.mlx_env_dir||l.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||dr,mlxVlmPackage:o.runtime.mlx_vlm_package||ur,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
|
|
4
|
+
`)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function li(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),S.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function ui(e){let t=li();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const di=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var fi=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const pi=`</think>`;function mi(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(pi);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(pi.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function hi(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new zr({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=mi(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=mi(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),U.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),U.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function gi(e,t,n={}){let r=Yr(t),i=ui(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new fi,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||Gr,n=await ai({envDir:r.runtime.mlx_env_dir||x.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||Wr,mlxVlmPackage:r.runtime.mlx_vlm_package||Ur,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new ci,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.model.tokenizer_config&&(o.tokenizer_config=r.model.tokenizer_config),r.model.model_config&&(o.model_config=r.model.model_config),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let l=r.runtime.session_cache;l?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:x.join(e,`mlx-session-cache`),max_entries:l?.max_entries||100,max_size_bytes:qr(l?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),U.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),U.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=di(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new zr({start(e){a.queue.enqueue(async()=>{let n=hi(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const _i=e=>Yr(e).model.repo_id||null;async function vi(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=li(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await Fe({platform:process.platform,arch:S.arch(),totalMemoryInBytes:S.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=Yr(r),t=e.model.repo_id;if(t)try{l=await ti(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||Gr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=ni({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=ni({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:S.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function yi(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=Yr(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=li();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let l=`mlx:${s}`;if(t?.isDownloading(l))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let u=(async()=>{try{let e=o.runtime.cache_dir||Gr,t=await ai({envDir:o.runtime.mlx_env_dir||x.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||Wr,mlxVlmPackage:o.runtime.mlx_vlm_package||Ur,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
|
|
5
5
|
from huggingface_hub import snapshot_download
|
|
6
6
|
path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
|
|
7
7
|
print(path)
|
|
8
|
-
`.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let
|
|
9
|
-
`).pop();i?.({localPath:u,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(u)}})();return t?.setDownload(u,d),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return vn(t,n);if(e===`ggml-stt`)return ar(t,n);if(e===`mlx-llm`)return zr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.25.0-beta.26`,Vr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.25.0-beta.26`,"@fugood/llama.node":`^1.7.4`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Hr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Ur({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
8
|
+
`.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let l=c.stdout.trim().split(`
|
|
9
|
+
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}const bi=async e=>{await f(e,{recursive:!0})},xi=async(e,t)=>{try{let n=await v(e);return t?n.size===t:!0}catch{return!1}},Si=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await bi(x.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await p(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new WritableStream({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await y(n).catch(()=>{}),e}if(r){let e=await v(n);if(e.size!==r)throw await y(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Ci=`https://huggingface.co`,wi=(e,t,n=`main`)=>n===`main`?x.join(e,...t.split(`/`)):x.join(e,...t.split(`/`),n),Ti=async e=>{try{let t=await h(e,{recursive:!0,withFileTypes:!0}),n=[];for(let r of t){if(!r.isFile())continue;let t=x.relative(e,x.join(r.parentPath??r.path,r.name)),i=await v(x.join(e,t));n.push({rfilename:t.split(x.sep).join(`/`),size:i.size})}return n.length>0?n:null}catch{return null}},Ei=async e=>{try{let t=await m(x.join(e,`config.json`),`utf-8`);return JSON.parse(t)}catch{return null}},Di=async(e,t,n,r={})=>{let i=`${e}/api/models/${t}?revision=${encodeURIComponent(n)}&blobs=true`,a=await fetch(i,{headers:r.headers});if(!a.ok)throw Error(`HF API error: ${a.status} ${a.statusText}`);return(await a.json()).siblings||[]},Oi=async(e,t,n,r={})=>{try{let i=`${e}/${t}/raw/${encodeURIComponent(n)}/config.json`,a=await fetch(i,{headers:r.headers});if(a.ok)return await a.json()}catch{}return null},ki=async({repoId:e,revision:t=`main`,modelType:n,dtype:r,cacheDir:i,baseUrl:a=Ci,subfolder:o=`onnx`,headers:s,configJson:c})=>{let l=a.replace(/\/$/,``);if(i){let a=wi(i,e,t),s=await Ti(a);if(s){let e=c??await Ei(a);return{...St({siblings:s,modelType:n||e?.model_type,dtype:r,tjConfig:e?.[`transformers.js_config`],subfolder:o}),source:`local`}}}let u=c??await Oi(l,e,t,{headers:s});return{...St({siblings:await Di(l,e,t,{headers:s}),modelType:n||u?.model_type,dtype:r,tjConfig:u?.[`transformers.js_config`],subfolder:o}),source:`remote`}},Ai=async({repoId:e,revision:t=`main`,modelType:n,dtype:r,cacheDir:i,baseUrl:a=Ci,subfolder:o=`onnx`,headers:s,configJson:c})=>{let l=a.replace(/\/$/,``),u=wi(i,e,t),d=c??await Ei(u)??await Oi(l,e,t,{headers:s}),f=await Di(l,e,t,{headers:s}),p=St({siblings:f,modelType:n||d?.model_type,dtype:r,tjConfig:d?.[`transformers.js_config`],subfolder:o}),m=new Set;for(let e of p.files){let t=gt[e.dtype]??``;m.add(`${o}/${e.name}${t}.onnx`),m.add(`${o}/${e.name}${t}.onnx_data`)}let h=[/^\./,/^README/i],g=e=>e.endsWith(`.onnx`)||e.endsWith(`.onnx_data`),_=[];for(let n of f)h.some(e=>e.test(n.rfilename))||g(n.rfilename)&&!m.has(n.rfilename)||_.push({rfilename:n.rfilename,url:`${l}/${e}/resolve/${encodeURIComponent(t)}/${n.rfilename}`,localPath:x.join(u,...n.rfilename.split(`/`)),size:n.size||0});return{modelDir:u,files:_,config:d}},ji=async(e,t,n={})=>{let{onProgress:r,onComplete:i,onError:a}=n,o=e.model?.repo_id;if(!o)return{started:!1,localPath:null,repoId:null,error:`Missing model.repo_id`};try{let n=e.runtime?.huggingface_token?{Authorization:`Bearer ${e.runtime.huggingface_token}`}:void 0,{modelDir:s,files:c}=await Ai({repoId:o,revision:e.model?.revision,dtype:e.model?.dtype,cacheDir:e.runtime?.cache_dir,baseUrl:e.model?.base_url,headers:n}),l=[];for(let e of c)await xi(e.localPath,e.size||void 0)||l.push(e);if(l.length===0)return typeof i==`function`&&i({localPath:s,repoId:o,alreadyExists:!0}),{started:!1,localPath:s,repoId:o,alreadyExists:!0};let u=e.model?.dtype,d=typeof u==`object`?JSON.stringify(u):u||`auto`,f=`onnx:${o}:${e.model?.revision||`main`}:${d}`,p=t?.getDownload(f);if(p)return p.then(()=>{typeof i==`function`&&i({localPath:s,repoId:o,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:o,alreadyDownloading:!0};console.log(`[Download] ONNX ${o}: ${l.length} files to download`);let m=(async()=>{let e=0,t=l.reduce((e,t)=>e+t.size,0);for(let i of l){let a=e;await Si(i.url,n,i.localPath,i.size||void 0,e=>{if(typeof r==`function`&&t>0){let n=(a+i.size*e)/t;r(Math.min(1,n))}}),e+=i.size}})();return t&&(t.setDownload(f,m),m.finally(()=>t.deleteDownload(f))),m.then(()=>{typeof i==`function`&&i({localPath:s,repoId:o})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!0,localPath:s,repoId:o}}catch(e){return typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:o,error:e.message}}},Mi=x.join(S.homedir(),`.buttress`,`models`),Ni={backend:{type:`onnx-stt`,provider:null,provider_preference:[`cuda`,`coreml`,`dml`,`webgpu`,`cpu`],gpu_memory_fraction:.85,cpu_memory_fraction:.5},model:{repo_id:null,revision:`main`,task:`automatic-speech-recognition`,device:null,dtype:`auto`,base_url:`https://huggingface.co`},runtime:{cache_dir:Mi,prefer_providers:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{}}},Pi=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Pi(e[t],n)):e[t]=n}),e),Fi=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`coreml`,`dml`,`webgpu`,`qnn`,`cpu`].includes(t)?t:null},Ii=(e={})=>{let t=JSON.parse(JSON.stringify(Ni));return Pi(t,e),t.backend.provider=Fi(t.backend.provider),t.backend.provider_preference=Array.from(new Set((Array.isArray(t.backend.provider_preference)?t.backend.provider_preference:[]).map(Fi).filter(Boolean))),t.backend.provider_preference.length===0&&(t.backend.provider_preference=[`cuda`,`coreml`,`dml`,`webgpu`,`cpu`]),t.runtime.prefer_providers=Array.from(new Set((Array.isArray(t.runtime.prefer_providers)?t.runtime.prefer_providers:[]).map(Fi).filter(Boolean))),t.runtime.cache_dir=t.runtime.cache_dir?x.resolve(t.runtime.cache_dir):Mi,t},Li=async e=>{let t=[];e.backend.provider&&t.push(e.backend.provider),e.runtime.prefer_providers.length>0&&t.push(...e.runtime.prefer_providers),t.push(...e.backend.provider_preference);let n=e.backend?.gpu_memory_fraction==null?.85:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),r=e.backend?.cpu_memory_fraction==null?.5:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),i=await V({platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:qe,provider:t[0]||null,preferProviders:t.slice(1),gpuMemoryFraction:n,cpuMemoryFraction:r,dependencies:{listSupportedBackends:N}});if(!i.ok||!i.selected){let e=(i.attempts||[]).map(e=>`${e.provider}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any ONNX provider (${t.join(`, `)}). Errors: ${e}`)}return{selected:i.selected,attempts:i.attempts}},Ri=async e=>{let t=e.model.repo_id;if(!t)throw Error("`model.repo_id` is required in Buttress ONNX-STT backend config");let n=await Li(e),r=n.selected?.provider||`cpu`,i=e.runtime.huggingface_token?{Authorization:`Bearer ${e.runtime.huggingface_token}`}:void 0,a=[],o=0,s=0;try{let n=await ki({repoId:t,revision:e.model.revision,dtype:e.model.dtype,cacheDir:e.runtime.cache_dir,baseUrl:e.model.base_url,headers:i});o=n.totalBytes,s=Ct(o,r),a.push(...n.warnings)}catch(e){a.push(`Model size estimation failed: ${e.message}`)}if(s>0&&n.selected){let e=n.selected.hasGpu?n.selected.gpuUsableBytes:n.selected.cpuUsableBytes;e>0&&s>e&&a.push(`Estimated runtime memory (${(s/1024/1024).toFixed(0)} MB) exceeds available ${n.selected.hasGpu?`GPU`:`CPU`} memory (${(e/1024/1024).toFixed(0)} MB)`)}return{config:e,info:{ok:!0,backend:`onnx-stt`,warnings:a,errors:[],model:{repoId:t,revision:e.model.revision,task:e.model.task,dtype:e.model.dtype,modelBytes:o,runtimeBytes:s},runtime:{provider:r,device:e.model.device||(n.selected?.hasGpu?`gpu`:`cpu`),cache_dir:e.runtime.cache_dir},providers:n,timestamp:new Date().toISOString()},providers:n}};async function zi(e,t,n={}){let r=await Ri(Ii(t)),i={id:e,type:`onnx-stt`,config:t,plan:r,info:r.info,pipelines:new Map};return{id:e,type:`onnx-stt`,info:r.info,initContext:async(e={})=>{let{onProgress:t}=e,n=`${i.plan.info.model.repoId}:${i.plan.info.runtime.provider}`,r=i.pipelines.get(n);if(r)return r.refCount+=1,await r.ready,typeof t==`function`&&t(1),{modelInfo:r.modelInfo,runtime:{...i.plan.info.runtime}};r={key:n,refCount:1};let a=Date.now();r.ready=(async()=>{typeof t==`function`&&t(.1);let e=new Map,n=()=>{if(typeof t!=`function`)return;let n=Array.from(e.values());n.length!==0&&t(.1+n.reduce((e,t)=>e+t,0)/n.length/100*.8)},a={cache_dir:i.plan.info.runtime.cache_dir,device:i.plan.info.runtime.device,dtype:i.plan.info.model.dtype,revision:i.plan.info.model.revision,progress_callback:t=>{t.status===`initiate`?e.set(t.file,0):t.status===`progress`?(e.set(t.file,t.progress),n()):t.status===`done`&&(e.set(t.file,100),n())}};i.plan.info.runtime.provider!==`cpu`&&(a.session_options={executionProviders:[{name:i.plan.info.runtime.provider}]});let o=await M(i.plan.info.model.task,i.plan.info.model.repoId,a);return typeof t==`function`&&t(1),r.pipeline=o,r.modelInfo={task:i.plan.info.model.task,repoId:i.plan.info.model.repoId,provider:i.plan.info.runtime.provider},r})(),i.pipelines.set(n,r);try{return await r.ready,Dt.addModelLoad({id:i.id,repoId:i.plan.info.model.repoId,dtype:i.plan.info.model.dtype,provider:i.plan.info.runtime.provider,durationMs:Date.now()-a,success:!0}),{modelInfo:r.modelInfo,runtime:{...i.plan.info.runtime}}}catch(e){throw Dt.addModelLoad({id:i.id,repoId:i.plan.info.model.repoId,dtype:i.plan.info.model.dtype,provider:i.plan.info.runtime.provider,durationMs:Date.now()-a,success:!1,error:e?.message||String(e)}),i.pipelines.delete(n),e}},transcribe:(e={})=>{let{audio:t,options:n={}}=e;if(!t)throw Error(`Audio input is required for transcription`);let r=`${i.plan.info.model.repoId}:${i.plan.info.runtime.provider}`,a=i.pipelines.get(r);if(!a)throw Error(`Pipeline "${r}" not initialized`);return new u({async start(e){await a.ready;let r=Date.now();try{let o=new j(a.pipeline.tokenizer,{skip_prompt:!0,skip_special_tokens:!0,callback_function:t=>{e.enqueue({event:`partial`,data:{text:t}})}}),s=await a.pipeline(t,{...n,streamer:o});Dt.addTranscription({id:i.id,repoId:i.plan.info.model.repoId,provider:i.plan.info.runtime.provider,durationMs:Date.now()-r,segmentCount:s?.segments?.length||0,textLength:s?.text?.length||0,success:!0}),e.enqueue({event:`result`,data:s}),e.close()}catch(t){Dt.addTranscription({id:i.id,repoId:i.plan.info.model.repoId,provider:i.plan.info.runtime.provider,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),e.error(t)}}})},transcribeData:async(e={})=>{let{audioData:t,options:n={}}=e;if(!t)throw Error(`audioData is required for transcription`);let r=`${i.plan.info.model.repoId}:${i.plan.info.runtime.provider}`,a=i.pipelines.get(r);if(!a)throw Error(`Pipeline "${r}" not initialized`);await a.ready;let o=t;if(Buffer.isBuffer(t)){let e=new Int16Array(t.buffer,t.byteOffset,t.byteLength/2);o=new Float32Array(e.length);for(let t=0;t<e.length;t+=1)o[t]=e[t]/32768}else if(t instanceof ArrayBuffer){let e=new Int16Array(t);o=new Float32Array(e.length);for(let t=0;t<e.length;t+=1)o[t]=e[t]/32768}return a.pipeline(o,n)},releaseContext:async(e={})=>{let t=`${i.plan.info.model.repoId}:${i.plan.info.runtime.provider}`,n=i.pipelines.get(t);if(!n)return!1;if(e.force)n.refCount=0;else if(n.refCount=Math.max(0,n.refCount-1),n.refCount>0)return!1;i.pipelines.delete(t);try{await n.pipeline?.dispose?.()}catch{}return!0},finalize:async()=>{let e=Array.from(i.pipelines.values()).map(async e=>{try{await e.pipeline?.dispose?.()}catch{}});await Promise.allSettled(e),i.pipelines.clear()}}}const Bi=e=>{let t=Ii(e);return t.model.repo_id?`${t.model.repo_id}:${t.model.revision||`main`}`:null};async function Vi(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,dependencies:a,...o}=t;if(!a?.listSupportedBackends)throw TypeError(`getCapabilities requires listSupportedBackends in dependencies`);let s=Ii(i||{}),c=[];s.backend.provider&&c.push(s.backend.provider),s.runtime.prefer_providers.length>0&&c.push(...s.runtime.prefer_providers),c.push(...s.backend.provider_preference);let l=s.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(s.backend.gpu_memory_fraction))),u=s.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(s.backend.cpu_memory_fraction))),d=await V({...o,platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:qe,includeBreakdown:r,provider:c[0]||null,preferProviders:c.slice(1),gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{listSupportedBackends:a.listSupportedBackends}}),f=d.selected?.score||0,p=null,m=null;if(e){let t=e.score||0;m={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;d.ok?!t&&t!==0?(r=`buttress`,i=`missing-client-score`):t>f*n?(r=`local`,i=`client-better`):f>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`):(r=`local`,i=`buttress-unavailable`),p={buttressScore:f,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!p&&(p={buttressScore:f,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let h=null;return i&&(h={repoId:i.model?.repo_id||null,task:i.model?.task||`automatic-speech-recognition`,dtype:i.model?.dtype||`auto`}),{type:`onnx-stt`,timestamp:new Date().toISOString(),buttress:d,client:m,comparison:p,modelConfig:h}}const Hi=x.join(S.homedir(),`.buttress`,`models`),Ui={backend:{type:`onnx-tts`,provider:null,provider_preference:[`cuda`,`coreml`,`dml`,`webgpu`,`cpu`],gpu_memory_fraction:.85,cpu_memory_fraction:.5},model:{repo_id:null,revision:`main`,task:`text-to-speech`,device:null,dtype:`auto`,vocoder_repo_id:null,base_url:`https://huggingface.co`},runtime:{cache_dir:Hi,prefer_providers:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},output_cache:{enabled:!0,max_size_bytes:2*1024*1024*1024,max_entries:5e3}}},Wi=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Wi(e[t],n)):e[t]=n}),e),Gi=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`coreml`,`dml`,`webgpu`,`qnn`,`cpu`].includes(t)?t:null},Ki=(e={})=>{let t=JSON.parse(JSON.stringify(Ui));return Wi(t,e),t.backend.provider=Gi(t.backend.provider),t.backend.provider_preference=Array.from(new Set((Array.isArray(t.backend.provider_preference)?t.backend.provider_preference:[]).map(Gi).filter(Boolean))),t.backend.provider_preference.length===0&&(t.backend.provider_preference=[`cuda`,`coreml`,`dml`,`webgpu`,`cpu`]),t.runtime.prefer_providers=Array.from(new Set((Array.isArray(t.runtime.prefer_providers)?t.runtime.prefer_providers:[]).map(Gi).filter(Boolean))),t.runtime.cache_dir=t.runtime.cache_dir?x.resolve(t.runtime.cache_dir):Hi,t},qi=e=>x.join(e,`.tts-cache`),Ji=e=>x.join(qi(e),`cache-map.json`),Yi=e=>x.join(qi(e),`audio`),Xi=(e,t,n)=>{let r=JSON.stringify({text:e,model:t,options:n});return s(`sha256`).update(r).digest(`hex`).slice(0,24)};var Zi=class{constructor(e){this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.output_cache?.enabled!==!1,this.maxSizeBytes=e.runtime.output_cache?.max_size_bytes??2*1024*1024*1024,this.maxEntries=e.runtime.output_cache?.max_entries??5e3,this.cacheMap=null}async load(){if(this.enabled)try{let e=await m(Ji(this.baseDir),`utf-8`);this.cacheMap=JSON.parse(e),this.cacheMap?.entries||(this.cacheMap={entries:{},totalSize:0})}catch{this.cacheMap={entries:{},totalSize:0}}}async save(){if(!this.cacheMap)return;let e=Ji(this.baseDir);await f(x.dirname(e),{recursive:!0}),await b(e,JSON.stringify(this.cacheMap,null,2),`utf-8`)}async lookup(e,t,n){if(!this.enabled||!this.cacheMap)return null;let r=Xi(e,t,n),i=this.cacheMap.entries[r];if(!i)return null;try{await v(i.filePath)}catch{return delete this.cacheMap.entries[r],this.cacheMap.totalSize-=i.fileSize||0,await this.save(),null}return i.lastAccessedAt=new Date().toISOString(),await this.save(),i}async store(e,t,n,r,i,a){if(!this.enabled||!this.cacheMap)return null;let o=Xi(e,t,n),s=Yi(this.baseDir);await f(s,{recursive:!0});let c=x.join(s,`${o}.wav`),l=Buffer.from(r);await b(c,l);let u=l.length,d=this.cacheMap.entries[o];return d&&(this.cacheMap.totalSize-=d.fileSize||0),this.cacheMap.entries[o]={id:o,filePath:c,fileSize:u,samplingRate:i,channels:a,text:e.slice(0,200),createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()},this.cacheMap.totalSize+=u,await this.evict(),await this.save(),this.cacheMap.entries[o]}async evict(){if(!this.cacheMap)return;let e=()=>Object.keys(this.cacheMap.entries).length;if(e()<=this.maxEntries&&this.cacheMap.totalSize<=this.maxSizeBytes)return;let t=Object.values(this.cacheMap.entries).sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt));for(let n of t){if(e()<=this.maxEntries&&this.cacheMap.totalSize<=this.maxSizeBytes)break;await y(n.filePath).catch(()=>{}),this.cacheMap.totalSize-=n.fileSize||0,delete this.cacheMap.entries[n.id]}}};const Qi=async e=>{let t=[];e.backend.provider&&t.push(e.backend.provider),e.runtime.prefer_providers.length>0&&t.push(...e.runtime.prefer_providers),t.push(...e.backend.provider_preference);let n=e.backend?.gpu_memory_fraction==null?.85:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),r=e.backend?.cpu_memory_fraction==null?.5:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),i=await V({platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:Je,provider:t[0]||null,preferProviders:t.slice(1),gpuMemoryFraction:n,cpuMemoryFraction:r,dependencies:{listSupportedBackends:N}});if(!i.ok||!i.selected){let e=(i.attempts||[]).map(e=>`${e.provider}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any ONNX provider (${t.join(`, `)}). Errors: ${e}`)}return{selected:i.selected,attempts:i.attempts}},$i=async e=>{let t=e.model.repo_id;if(!t)throw Error("`model.repo_id` is required in Buttress ONNX-TTS backend config");let n=await Qi(e),r=n.selected?.provider||`cpu`,i=[],a=0,o=0,s=e.runtime.huggingface_token?{Authorization:`Bearer ${e.runtime.huggingface_token}`}:void 0;try{let n=await ki({repoId:t,revision:e.model.revision,dtype:e.model.dtype,cacheDir:e.runtime.cache_dir,baseUrl:e.model.base_url,headers:s});if(a=n.totalBytes,i.push(...n.warnings),e.model.vocoder_repo_id){let t=await ki({repoId:e.model.vocoder_repo_id,revision:e.model.revision,dtype:e.model.dtype,cacheDir:e.runtime.cache_dir,baseUrl:e.model.base_url,headers:s});a+=t.totalBytes,i.push(...t.warnings)}o=Ct(a,r)}catch(e){i.push(`Model size estimation failed: ${e.message}`)}if(o>0&&n.selected){let e=n.selected.hasGpu?n.selected.gpuUsableBytes:n.selected.cpuUsableBytes;e>0&&o>e&&i.push(`Estimated runtime memory (${(o/1024/1024).toFixed(0)} MB) exceeds available ${n.selected.hasGpu?`GPU`:`CPU`} memory (${(e/1024/1024).toFixed(0)} MB)`)}return{config:e,info:{ok:!0,backend:`onnx-tts`,warnings:i,errors:[],model:{repoId:t,revision:e.model.revision,task:e.model.task,dtype:e.model.dtype,vocoderRepoId:e.model.vocoder_repo_id,modelBytes:a,runtimeBytes:o},runtime:{provider:r,device:e.model.device||(n.selected?.hasGpu?`gpu`:`cpu`),cache_dir:e.runtime.cache_dir},providers:n,timestamp:new Date().toISOString()},providers:n}};async function ea(e,t,n={}){let r=Ki(t),i=await $i(r),a=new Zi(r);await a.load();let o={id:e,type:`onnx-tts`,config:t,plan:i,info:i.info,pipelines:new Map,speakers:new Map,outputCache:a};return{id:e,type:`onnx-tts`,info:i.info,initContext:async(e={})=>{let{onProgress:t}=e,n=`${o.plan.info.model.repoId}:${o.plan.info.runtime.provider}`,r=o.pipelines.get(n);if(r)return r.refCount+=1,await r.ready,typeof t==`function`&&t(1),{modelInfo:r.modelInfo,runtime:{...o.plan.info.runtime}};r={key:n,refCount:1};let i=Date.now();r.ready=(async()=>{typeof t==`function`&&t(.1);let e=new Map,n=(n,r)=>{if(typeof t!=`function`)return;let i=Array.from(e.values());i.length!==0&&t(n+i.reduce((e,t)=>e+t,0)/i.length/100*(r-n))},i={cache_dir:o.plan.info.runtime.cache_dir,device:o.plan.info.runtime.device,dtype:o.plan.info.model.dtype,revision:o.plan.info.model.revision,progress_callback:t=>{t.status===`initiate`?e.set(t.file,0):t.status===`progress`?(e.set(t.file,t.progress),n(.1,.8)):t.status===`done`&&(e.set(t.file,100),n(.1,.8))}};o.plan.info.runtime.provider!==`cpu`&&(i.session_options={executionProviders:[{name:o.plan.info.runtime.provider}]});let a=await M(o.plan.info.model.task,o.plan.info.model.repoId,i),s=null;if(o.plan.info.model.vocoderRepoId){typeof t==`function`&&t(.8),e.clear();let r={dtype:o.plan.info.model.dtype,progress_callback:t=>{t.status===`initiate`?e.set(t.file,0):t.status===`progress`?(e.set(t.file,t.progress),n(.8,1)):t.status===`done`&&(e.set(t.file,100),n(.8,1))}};o.plan.info.runtime.provider!==`cpu`&&(r.session_options={executionProviders:[{name:o.plan.info.runtime.provider}]}),s=await A.from_pretrained(o.plan.info.model.vocoderRepoId,r)}return typeof t==`function`&&t(1),r.pipeline=a,r.vocoder=s,r.modelInfo={task:o.plan.info.model.task,repoId:o.plan.info.model.repoId,vocoderRepoId:o.plan.info.model.vocoderRepoId,provider:o.plan.info.runtime.provider},r})(),o.pipelines.set(n,r);try{return await r.ready,Ot.addModelLoad({id:o.id,repoId:o.plan.info.model.repoId,dtype:o.plan.info.model.dtype,provider:o.plan.info.runtime.provider,vocoderRepoId:o.plan.info.model.vocoderRepoId||null,durationMs:Date.now()-i,success:!0}),{modelInfo:r.modelInfo,runtime:{...o.plan.info.runtime}}}catch(e){throw Ot.addModelLoad({id:o.id,repoId:o.plan.info.model.repoId,dtype:o.plan.info.model.dtype,provider:o.plan.info.runtime.provider,durationMs:Date.now()-i,success:!1,error:e?.message||String(e)}),o.pipelines.delete(n),e}},addSpeaker:e=>{if(e==null)throw Error(`Speaker is required`);if(e instanceof Float32Array){let t=s(`sha256`).update(Buffer.from(e.buffer,e.byteOffset,e.byteLength)).digest(`hex`).slice(0,16);return o.speakers.set(t,{type:`embed`,data:e}),{speakerId:t,type:`embed`}}if(typeof e==`object`&&!Array.isArray(e)){let t=s(`sha256`).update(JSON.stringify(e)).digest(`hex`).slice(0,16);return o.speakers.set(t,{type:`config`,data:e}),{speakerId:t,type:`config`}}throw Error(`Speaker must be a Float32Array (embedding) or a plain object (config)`)},synthesize:async(e={})=>{let{text:t,options:n={}}=e;if(!t)throw Error(`Text input is required for speech synthesis`);let i={repoId:o.plan.info.model.repoId,provider:o.plan.info.runtime.provider,dtype:o.plan.info.model.dtype,vocoderRepoId:o.plan.info.model.vocoderRepoId||null},a=await o.outputCache.lookup(t,i,n);if(a)return Ot.addTranscription({id:o.id,repoId:i.repoId,provider:i.provider,textLength:t.length,durationMs:0,cached:!0,success:!0}),{cachedId:a.id,cachedFile:a.filePath,sampling_rate:a.samplingRate,channels:a.channels};let s=`${i.repoId}:${i.provider}`,c=o.pipelines.get(s);if(!c)throw Error(`Pipeline "${s}" not initialized`);await c.ready;let{speaker:l,...u}=n,d={...u};if(c.vocoder&&(d.vocoder=c.vocoder),typeof l==`string`){let e=o.speakers.get(l);if(!e)throw Error(`Unknown speaker id "${l}"`);e.type===`embed`?d.speaker_embeddings=e.data:e.type===`config`&&(d.speaker=e.data)}let p=Date.now(),m=await c.pipeline(t,d);if(Array.isArray(m)&&(m=m[0]),!m?.audio)throw Error(`Pipeline returned no audio`);Ot.addTranscription({id:o.id,repoId:i.repoId,provider:i.provider,textLength:t.length,audioLength:m.audio.length,samplingRate:m.sampling_rate,durationMs:Date.now()-p,cached:!1,success:!0});let h=typeof m.toWav==`function`?m.toWav():m.audio,g=await o.outputCache.store(t,i,n,h,m.sampling_rate,m.channels||1);if(!g){let e=Xi(t,i,n),a=Yi(r.runtime.cache_dir);await f(a,{recursive:!0});let o=x.join(a,`tmp-${e}.wav`),s=typeof m.toWav==`function`?m.toWav():m.audio;return await b(o,Buffer.from(s)),{cachedId:e,cachedFile:o,sampling_rate:m.sampling_rate,channels:m.channels||1}}return{cachedId:g.id,cachedFile:g.filePath,sampling_rate:m.sampling_rate,channels:m.channels||1}},releaseContext:async(e={})=>{let t=`${o.plan.info.model.repoId}:${o.plan.info.runtime.provider}`,n=o.pipelines.get(t);if(!n)return!1;if(e.force)n.refCount=0;else if(n.refCount=Math.max(0,n.refCount-1),n.refCount>0)return!1;o.pipelines.delete(t);try{await n.pipeline?.dispose?.(),n.vocoder&&await n.vocoder?.dispose?.()}catch{}return!0},finalize:async()=>{let e=Array.from(o.pipelines.values()).map(async e=>{try{await e.pipeline?.dispose?.(),e.vocoder&&await e.vocoder?.dispose?.()}catch{}});await Promise.allSettled(e),o.pipelines.clear()}}}const ta=e=>{let t=Ki(e);if(!t.model.repo_id)return null;let n=[t.model.repo_id,t.model.revision||`main`];return t.model.vocoder_repo_id&&n.push(t.model.vocoder_repo_id),n.join(`:`)};async function na(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,dependencies:a,...o}=t;if(!a?.listSupportedBackends)throw TypeError(`getCapabilities requires listSupportedBackends in dependencies`);let s=Ki(i||{}),c=[];s.backend.provider&&c.push(s.backend.provider),s.runtime.prefer_providers.length>0&&c.push(...s.runtime.prefer_providers),c.push(...s.backend.provider_preference);let l=s.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(s.backend.gpu_memory_fraction))),u=s.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(s.backend.cpu_memory_fraction))),d=await V({...o,platform:process.platform,totalMemoryInBytes:S.totalmem(),backend:Je,includeBreakdown:r,provider:c[0]||null,preferProviders:c.slice(1),gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{listSupportedBackends:a.listSupportedBackends}}),f=d.selected?.score||0,p=null,m=null;if(e){let t=e.score||0;m={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;d.ok?!t&&t!==0?(r=`buttress`,i=`missing-client-score`):t>f*n?(r=`local`,i=`client-better`):f>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`):(r=`local`,i=`buttress-unavailable`),p={buttressScore:f,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!p&&(p={buttressScore:f,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let h=null;return i&&(h={repoId:i.model?.repo_id||null,task:i.model?.task||`text-to-speech`,dtype:i.model?.dtype||`auto`,vocoderRepoId:i.model?.vocoder_repo_id||null}),{type:`onnx-tts`,timestamp:new Date().toISOString(),buttress:d,client:m,comparison:p,modelConfig:h}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return Zn(t,n);if(e===`ggml-stt`)return Rr(t,n);if(e===`mlx-llm`)return vi(t,n);if(e===`onnx-stt`)return Vi(t,{...n,dependencies:{...n.dependencies,listSupportedBackends:N}});if(e===`onnx-tts`)return na(t,{...n,dependencies:{...n.dependencies,listSupportedBackends:N}});throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.25.0-beta.31`,ra={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/bricks-transformers":`^2.25.0-beta.31`,"@fugood/buttress-hardware-guardrails":`^2.25.0-beta.31`,"@fugood/llama.node":`^1.7.4`,"@fugood/whisper.node":`^1.0.19`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,"onnxruntime-node":`1.24.3`}};const ia=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function aa({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
10
10
|
`),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
|
|
11
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},
|
|
11
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=pt(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
|
|
12
12
|
---`),n.push(`
|
|
13
|
-
### System Information`);let
|
|
14
|
-
### Command Used`);let
|
|
15
|
-
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),
|
|
16
|
-
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=
|
|
17
|
-
`),`utf8`),console.log(`\nModel capabilities table saved to: ${
|
|
13
|
+
### System Information`);let f=null;if(process.platform!==`win32`)try{f=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${S.hostname()}`),n.push(`- **OS:** ${S.type()} ${S.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${S.cpus().length}`),n.push(`- **Total System Memory:** ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
14
|
+
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
15
|
+
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),ra.description&&n.push(`- **Description:** ${ra.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
16
|
+
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=F.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=x.join(process.cwd(),m);P.writeFileSync(h,n.join(`
|
|
17
|
+
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function oa({modelId:e=null,defaultConfig:t=null}={}){console.log(`${Y} v${X}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
|
|
18
18
|
=== Model Information ===`);let t=e||u?.repoId;console.log(`Repository ID: ${t}`),u?.quantization&&console.log(`Quantization: ${u.quantization}`),u?.nCtx&&console.log(`Context Length: ${u.nCtx}`),l?.quantization?.name&&console.log(`Model Quantization: ${l.quantization.name.toUpperCase()}`);let n=u?.cache_type_k||`f16`,r=u?.cache_type_v||`f16`;if(console.log(`KV Cache Type: K=${n}, V=${r}`),l?.modelBytes&&l?.kvCacheBytes){if(console.log(`Model Size: ${(l.modelBytes/1024/1024/1024).toFixed(2)} GB`),l.kvInfo?console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(l.kvInfo)})`):console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Total Required Memory: ${((l.modelBytes+l.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),l.memoryLimitedCtx!=null){let e=l.memoryLimitedCtx,t=l.kvInfo?.nCtxTrain;t?console.log(`\nMemory-Limited Context: ${e} (Train: ${t})`):console.log(`\nMemory-Limited Context: ${e}`),l.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(l.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(c.buttress?.selected?.fit){let{totalRequiredBytes:e}=c.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}}if(c.buttress?.selected){let{selected:e}=c.buttress;console.log(`
|
|
19
|
-
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=
|
|
19
|
+
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${S.hostname()}`),console.log(`OS: ${S.type()} ${S.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${S.cpus().length}`),console.log(`Total System Memory: ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
20
20
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
21
21
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),e.limitedFit&&(console.log(`
|
|
22
22
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU (Limited): ${e.limitedFit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
23
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
23
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function sa({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating STT model capabilities comparison...
|
|
24
24
|
`),n.push(`${Y} v${X}`),n.push(`## STT Model Capabilities Comparison
|
|
25
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},
|
|
25
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=ia(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});l.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let u=e=>e?(e/1024/1024).toFixed(1):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),l.forEach(({modelId:e,modelInfo:t})=>{let r=u(t?.modelBytes),i=u(t?.processingBytes||t?.kvCacheBytes),a=u(t?.fit?.totalRequiredBytes),o=d(t?.fit?.fitsInGpu),s=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
|
|
26
26
|
---`),n.push(`
|
|
27
|
-
### System Information`);let
|
|
28
|
-
### Command Used`);let
|
|
29
|
-
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),
|
|
30
|
-
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=
|
|
31
|
-
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${
|
|
27
|
+
### System Information`);let f=null;if(process.platform!==`win32`)try{f=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${S.hostname()}`),n.push(`- **OS:** ${S.type()} ${S.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${S.cpus().length}`),n.push(`- **Total System Memory:** ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
28
|
+
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
29
|
+
### Package Information`),n.push(`- **Name:** ${Y}`),n.push(`- **Version:** ${X}`),ra.description&&n.push(`- **Description:** ${ra.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
30
|
+
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=F.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=x.join(process.cwd(),m);P.writeFileSync(h,n.join(`
|
|
31
|
+
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function ca({modelId:e=null,defaultConfig:t=null}={}){console.log(`${Y} v${X}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=ia(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
|
|
32
32
|
=== Model Information ===`);let e=n||f?.repoId;console.log(`Repository ID: ${e}`),r&&console.log(`Filename: ${r}`),d?.modelBytes&&console.log(`Model Size: ${(d.modelBytes/1024/1024).toFixed(1)} MB`);let t=d?.processingBytes||d?.kvCacheBytes;if(t&&console.log(`Processing Buffer: ${(t/1024/1024).toFixed(1)} MB`),d?.modelBytes&&t)console.log(`Total Required Memory: ${((d.modelBytes+t)/1024/1024).toFixed(1)} MB`);else if(u.buttress?.selected?.fit){let{totalRequiredBytes:e}=u.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024).toFixed(1)} MB`)}}if(u.buttress?.selected){let{selected:e}=u.buttress;console.log(`
|
|
33
|
-
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=
|
|
33
|
+
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${S.hostname()}`),console.log(`OS: ${S.type()} ${S.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${S.cpus().length}`),console.log(`Total System Memory: ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
34
34
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
35
35
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
36
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var qr=e({finalizeGenerator:()=>Zr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>ti,ggmlLlm:()=>Qr,ggmlStt:()=>ei,globalDownloadManager:()=>Jr,mlxLlm:()=>$r,showModelsTable:()=>Ur,showSttModelsTable:()=>Gr,startGenerator:()=>Xr,startModelDownload:()=>ri,status:()=>ni,testGgmlLlmCapabilities:()=>Wr,testGgmlSttCapabilities:()=>Kr});const Z=new Map,Jr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Yr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Yr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Xr(e,t){let n={"ggml-llm":{create:pn,getId:mn},"ggml-stt":{create:tr,getId:nr},"mlx-llm":{create:Lr,getId:Rr}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Jr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Zr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Qr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},$r={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},ei={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function ti(e,t){return e===`ggml-llm`?mn(t):e===`ggml-stt`?nr(t):e===`mlx-llm`?Rr(t):null}const ni={getFullStatus:()=>it(Z),getGgmlLlmStatus:()=>tt(Z),getGgmlSttStatus:()=>nt(Z),getMlxLlmStatus:()=>rt(Z),subscribeToStatus:$e,subscribeToStatusWithId:et,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function ri(e,t,n={}){let r={"ggml-llm":hn,"ggml-stt":rr,"mlx-llm":Br}[e];return r?r(t,Jr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}let ii=null;const ai={name:`@fugood/buttress-server`,version:`0.0.0`},oi=()=>{if(ii)return ii;try{let e=te(new URL(`../package.json`,import.meta.url)),t=JSON.parse(k.readFileSync(e,`utf8`));ii={name:t.name||ai.name,version:t.version||ai.version,description:t.description}}catch{ii=ai}return ii},si=!(`Bun`in globalThis),ci=e=>new n({adapter:si?t():void 0,...e}),li=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String(),score:a.Optional(a.Number()),hasGpu:a.Optional(a.Boolean()),usableBytes:a.Optional(a.Number())})),authentication:a.Object({required:a.Boolean(),type:a.String(),kid:a.Optional(a.String()),bound:a.Optional(a.Boolean())}),workspace:a.Optional(a.Object({id:a.String(),name:a.Optional(a.String())}))}),ui=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication,workspace:e.workspace});var di=e=>{let t=ci(),n=e.autodiscover?.http?.path??`/buttress/info`;return t.get(n,ui,{response:li}),t};let fi=null;const pi=async e=>{if(fi&&fi.kid===e.kid)return fi.key;let t=await N(e.issuerPublicKey,`EdDSA`);return fi={kid:e.kid,key:t},t},mi=/^Bearer\s+(.+)$/i,hi=(e,t)=>{if(e){let t=e.authorization||e.Authorization;if(t){let e=t.match(mi);return e?e[1].trim():t.trim()}}if(t){let e=t.access_token??t.token;if(typeof e==`string`&&e)return e}return null},gi=async(e,t)=>{if(!e||!t)return null;try{let{payload:n}=await P(e,await pi(t),{algorithms:[`EdDSA`]}),r=n;return r.k!==`ba`||r.w_id!==t.id||r.st!==`ws`&&r.st!==`dev`||!r.sid||!r.exp?null:{workspaceId:r.w_id,subjectType:r.st,subjectId:r.sid,jti:r.jti,exp:r.exp}}catch{return null}},_i=async({headers:e,query:t,set:n,store:r})=>{let i=r.workspaceState?.workspace;if(i&&!await gi(hi(e,t),i))return n.status=401,n.headers&&(n.headers[`WWW-Authenticate`]=`Bearer`),{error:{code:`UNAUTHORIZED`,message:`Invalid or missing workspace access token`}}},vi=e=>{if(!e)return null;let t=e[`x-buttress-sess-id`]??e[`X-BUTTRESS-SESS-ID`];return t?t.trim():null},yi=async({headers:e,query:t,set:n,store:r})=>{let i=r.sessions,a=vi(e);if(!a||!i)return n.status=401,{error:{code:`SESSION_REQUIRED`,message:`Missing buttress session id`}};let o=i.get(a);if(!o)return n.status=401,{error:{code:`SESSION_INVALID`,message:`Unknown or expired buttress session`}};let s=r.workspaceState?.workspace;if(s&&o.identity){let r=await gi(hi(e,t),s);if(!r||r.subjectId!==o.identity.subjectId||r.subjectType!==o.identity.subjectType)return n.status=403,{error:{code:`SESSION_FORBIDDEN`,message:`Token identity mismatch for this session`}}}},bi=(e,t)=>{let n=e.sessions,r=vi(t);if(!r||!n)return null;let i=n.get(r);return i?{sessionId:r,session:i}:null},xi=(e,t,n)=>{let r=l.join(e,t),i=l.join(r,n),a=l.relative(r,i);return{sessionDir:r,filePath:i,safe:a!==``&&!a.startsWith(`..`)&&!l.isAbsolute(a)}},Si=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var Ci=ci().onBeforeHandle(_i).onBeforeHandle(yi).post(`/buttress/upload`,async({body:{file:e},headers:t,store:n})=>{let r=bi(n,t);if(!r)return{ok:!1,error:`Session guard mis-wired`};let{sessionId:i}=r,{config:a}=n,o=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,{sessionDir:s,filePath:c,safe:l}=xi(a.server.temp_file_dir,i,o);if(!l)return{ok:!1,error:`Invalid file path`};try{return await p(s,{recursive:!0}),Si?await b(c,await e.stream()):await b(c,await e.arrayBuffer()),{ok:!0,filename:o}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},headers:t,store:n,status:i})=>{let a=bi(n,t);if(!a)return i(500),`Session guard mis-wired`;let{sessionId:o}=a,{config:s}=n,{filePath:c,safe:l}=xi(s.server.temp_file_dir,o,e);return l?r(c):(i(400),`Invalid file path`)},{params:a.Object({filename:a.String()})});const wi=l.dirname(te(import.meta.url)),Ti=async()=>{let e=[l.join(wi,`..`,`public`,`status.html`),l.join(wi,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>f.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},Ei=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},Di=async()=>{let e=await Ti();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await f.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var Oi=ci().get(`/status`,Di).get(`/status/`,Di).get(`/buttress/status`,({store:{backend:e}})=>Ei(e));const ki=[`ggml-llm`,`mlx-llm`],Ai=new Map;function ji(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function Mi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ki.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=Ai.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return Ai.set(c,h),await ji(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function Ni(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function Pi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Ni(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const Fi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function Ii(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=Pi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function Li({global:e}){let t=ci({prefix:`/oai-compat`});return t.use(ie({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.onBeforeHandle(_i),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ki.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await Mi(a,r,c,`[OpenAI]`),t=Fi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await ji(a,e.type).completion(e.id,{options:b});if(!s)return await Ii(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=Pi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const Ri=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function zi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function Bi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Vi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Ni(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Hi(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Vi(u),l=u.stopping_word||u.stoppingWord||null,c=Bi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function Ui({global:e}){let t=ci({prefix:`/anthropic-messages`});return t.use(ie({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.onBeforeHandle(_i),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=zi(o),n=await Mi(a,r,o.model,`[Anthropic]`),s=Ri(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await ji(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Hi(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Vi(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Vi(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Vi(r),w=r.stopping_word||r.stoppingWord||null,C=Bi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=zi(a),t=await Mi(i,r,a.model,`[Anthropic]`),n=ji(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ki.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Wi=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Wi(n[e]||{},t):n[e]=t}),n},Gi=e=>e&&typeof e==`object`?structuredClone(e):null,Ki=(e,t)=>Wi(Gi(e)||{},Gi(t)||{}),qi=(e,t)=>Wi(structuredClone(e.global),t||{}),Ji=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return qi(e,a)}}return Object.keys(e.global).length>0?qi(e,{}):null},Yi={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},Xi=e=>e?e===!0?{...Yi}:Wi(Yi,e):null,Zi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Qi=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,$i=6e4,ea=1024*1024*50,ta=e=>{let t=F.machineIdSync(),n=Wi({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:l.join(x.tmpdir(),`.buttress`),session_timeout:$i,max_body_size:ea},autodiscover:!1},Gi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:Xi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Qi(i.max_body_size,E.parse,ea),session_timeout:Qi(i.session_timeout,I,$i)},global:s,generators:r}},na={getCapabilities:M.tuple([M.object({type:M.string().optional().default(`ggml-llm`),config:M.any().optional(),currentClientCapabilities:M.any().optional(),options:M.any().optional()}).nullable().optional()]),startGenerator:M.tuple([M.string(),M.any().optional()]),finalizeGenerator:M.tuple([M.string()])};var ra={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Gi(i),c=Ki(Ji(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Gi(i),o=Ki(Ji(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const ia={initContext:M.tuple([M.string(),M.any().optional()]),completion:M.tuple([M.string(),M.any().optional()]),tokenize:M.tuple([M.string(),M.any()]),detokenize:M.tuple([M.string(),M.any()]),applyChatTemplate:M.tuple([M.string(),M.any()]),releaseContext:M.tuple([M.string()])};function aa(e){return function({backend:t,session:n},r,i){return new d({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function oa(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function sa(e,t){return{initContext:aa(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:oa(e,`${t}Release Context`)}}var ca=sa(e=>e.ggmlLlm,``);const la={initContext:M.tuple([M.string(),M.any().optional()]),transcribe:M.tuple([M.string(),M.string(),M.any().optional()]),transcribeData:M.tuple([M.string(),M.union([M.instanceof(Buffer),M.instanceof(Uint8Array)]),M.any().optional()]),releaseContext:M.tuple([M.string()])},ua=e=>e.ggmlStt,da={common:ra,ggmlLlm:ca,ggmlStt:{initContext:aa(ua),async transcribe({backend:e,config:{server:t},peerId:n},r,i,a){console.log(`[Server] Transcribe:`,{id:r,audioPath:i,options:a});let{filePath:o,safe:s}=xi(t.temp_file_dir,n,i);if(!s)throw Error(`Invalid audioPath`);return e.ggmlStt.transcribe(r,{audioPath:o,options:a})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:oa(ua,`Release STT Context`)},mlxLlm:sa(e=>e.mlxLlm,`MLX `)},fa={common:na,ggmlLlm:ia,ggmlStt:la,mlxLlm:ia};var pa=da;const ma=e=>{try{return JSON.parse(e,(e,t)=>t&&(t?.type===`Buffer`&&t?.data||t?.type===`Uint8Array`&&t?.data?L.from(t.data,`base64`):t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t))}catch{return e}},ha=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof L?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:L.from(t).toString(`base64`)}:t)}catch{return e}},ga={score:0,hasGpu:!1,usableBytes:0},_a=e=>e?{score:Number(e.score)||0,hasGpu:!!e.hasGpu,usableBytes:Number(e.gpuUsableBytes||e.cpuUsableBytes||0)}:ga,va=async e=>{let t=[];for(let n of e){let e=ga;try{e=_a((await J(n.type,null,{}))?.buttress?.selected)}catch(e){console.warn(`[Caps] Failed to detect capabilities for "${n.type}":`,e instanceof Error?e.message:e)}t.push({...n,...e})}return t},ya=()=>{let e=x.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},ba=e=>{let t=e.split(`.`).map(Number);return t.length!==4||t.some(e=>Number.isNaN(e))?0:(t[0]<<24|t[1]<<16|t[2]<<8|t[3])>>>0},xa=e=>[e>>>24&255,e>>>16&255,e>>>8&255,e&255].join(`.`),Sa=()=>{let e=[],t=new Set;for(let n of Object.values(x.networkInterfaces()))for(let r of n??[]){if(r.family!==`IPv4`||r.internal||!r.address||!r.netmask)continue;let n=ba(r.address),i=ba(r.netmask);if(!n||!i||i===4294967295)continue;let a=xa((n&i|~i>>>0)>>>0);if(a===r.address)continue;let o=`${r.address}->${a}`;t.has(o)||(t.add(o),e.push({address:r.address,broadcast:a}))}return e},Ca=e=>{if(!e)return!1;let t=e;return t.code===`ENOTSUP`||/Failed to bind socket/i.test(t.message??``)};var wa=class e{name=`udp`;receiver=null;senders=[];announcementTimer=null;config;getServerInfo;port;signer;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.port=e.port??8089,this.signer=n}async start(){this.receiver=await this.bindReceiver(!0).catch(e=>{if(!Ca(e))throw e;return console.warn(`[Autodiscover UDP] SO_REUSEPORT not supported by this runtime; falling back to REUSEADDR only (multiple buttress instances on one host will not coexist on the discovery port).`),this.bindReceiver(!1)}),this.receiver.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.receiver.on(`error`,e=>{console.error(`[Autodiscover UDP] Receiver error:`,e.message)}),this.receiver.setBroadcast(!0),this.senders=await this.createSenders();let e=this.senders.map(e=>`${e.address}->${e.broadcast}`).join(`, `)||`<none>`;if(console.log(`[Autodiscover UDP] Listening on port ${this.port}; announce interfaces: ${e}`),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),await Promise.all(this.senders.map(({socket:e})=>new Promise(t=>{e.close(()=>t())}))),this.senders=[],this.receiver&&=(await new Promise(e=>{this.receiver.close(()=>e())}),null)}async bindReceiver(e){let t=R.createSocket(e?{type:`udp4`,reuseAddr:!0,reusePort:!0}:{type:`udp4`,reuseAddr:!0});return new Promise((e,n)=>{let r=e=>{t.close(),n(e)};t.once(`error`,r),t.bind(this.port,()=>{t.off(`error`,r),e(t)})})}async createSenders(){let e=Sa();return(await Promise.all(e.map(async e=>{try{let t=R.createSocket({type:`udp4`});return await new Promise((n,r)=>{let i=e=>{t.close(),r(e)};t.once(`error`,i),t.bind({port:0,address:e.address},()=>{t.off(`error`,i),t.setBroadcast(!0),n()})}),t.on(`error`,t=>{console.error(`[Autodiscover UDP] Sender ${e.address} error:`,t.message)}),{...e,socket:t}}catch(t){return console.warn(`[Autodiscover UDP] Failed to bind sender on ${e.address}:`,t.message),null}}))).filter(e=>e!==null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}static canonicalBytes(e,t,n){return Buffer.from(JSON.stringify({t:e,d:t,ts:n}),`utf8`)}signEnvelope(t,n){if(!this.signer)return null;let r=Math.floor(Date.now()/1e3),i=o.sign(null,e.canonicalBytes(t,n,r),this.signer.privateKey).toString(`base64`);return{t,v:`2.0`,d:n,ts:r,kid:this.signer.kid,sig:i}}sendAnnouncement(){if(this.senders.length===0)return;let e=this.getServerInfo(),t=this.signEnvelope(`ANNOUNCE`,{info:e});if(!t){console.warn("[Autodiscover UDP] no per-server keypair; skipping announcement. Run `bricks buttress bind` to register a key.");return}let n=Buffer.from(JSON.stringify(t));for(let{broadcast:e,socket:t,address:r}of this.senders)t.send(n,0,n.length,this.port,e,t=>{t&&console.error(`[Autodiscover UDP] Announcement ${r}->${e} error:`,t.message)})}sendResponse(e,t){if(!this.receiver)return;let n=this.getServerInfo(),r=this.signEnvelope(`RESPONSE`,{request_id:e,info:n});if(!r)return;let i=Buffer.from(JSON.stringify(r));this.receiver.send(i,0,i.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Ta=class{transports=[];started=!1;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.signer=n,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new wa(e.udp,t,n))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const Ea=()=>process.env.BRICKS_BUTTRESS_STATE_DIR||l.join(x.homedir(),`.bricks-cli`,`buttress`),Da=()=>l.join(Ea(),`state.json`),Oa=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.id==`string`&&typeof t.serverId==`string`&&typeof t.issuerPublicKey==`string`&&typeof t.kid==`string`},ka=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.publicKeySpki==`string`&&typeof t.privateKeyPkcs8==`string`&&typeof t.kid==`string`},Aa=()=>{let e=Da();try{let t=k.readFileSync(e,`utf8`),n=JSON.parse(t);return{workspace:Oa(n?.workspace)?n.workspace:null,serverKeyPair:ka(n?.serverKeyPair)?n.serverKeyPair:null}}catch(e){return e.code!==`ENOENT`&&console.warn(`[Buttress] Failed to read workspace state:`,e.message),{workspace:null,serverKeyPair:null}}},$=oi(),ja=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Ma({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
36
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var la=e({estimateOnnxModelSize:()=>ki,estimateRuntimeMemory:()=>Ct,finalizeGenerator:()=>pa,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>ya,ggmlLlm:()=>ma,ggmlStt:()=>ga,globalDownloadManager:()=>ua,mlxLlm:()=>ha,onnxStt:()=>_a,onnxTts:()=>va,resolveModelCacheDir:()=>wi,resolveOnnxDownloadManifest:()=>Ai,showModelsTable:()=>aa,showSttModelsTable:()=>sa,startGenerator:()=>fa,startModelDownload:()=>xa,startOnnxModelDownload:()=>ji,status:()=>ba,testGgmlLlmCapabilities:()=>oa,testGgmlSttCapabilities:()=>ca});const Z=new Map,ua={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},da=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=da(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function fa(e,t){let n={"ggml-llm":{create:Kn,getId:qn},"ggml-stt":{create:Pr,getId:Fr},"mlx-llm":{create:gi,getId:_i},"onnx-stt":{create:zi,getId:Bi},"onnx-tts":{create:ea,getId:ta}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:ua}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function pa(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const ma={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},ha={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},ga={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}},_a={async initContext(e,t){return Q(e,`onnx-stt`).initContext(t)},transcribe(e,t){return Q(e,`onnx-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`onnx-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`onnx-stt`)throw Error(`Generator "${e}" does not support onnx-stt backend`);return n.instance.releaseContext(t)}},va={async initContext(e,t){return Q(e,`onnx-tts`).initContext(t)},async addSpeaker(e,t){return Q(e,`onnx-tts`).addSpeaker(t)},async synthesize(e,t){return Q(e,`onnx-tts`).synthesize(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`onnx-tts`)throw Error(`Generator "${e}" does not support onnx-tts backend`);return n.instance.releaseContext(t)}};function ya(e,t){return e===`ggml-llm`?qn(t):e===`ggml-stt`?Fr(t):e===`mlx-llm`?_i(t):e===`onnx-stt`?Bi(t):e===`onnx-tts`?ta(t):null}const ba={getFullStatus:()=>Lt(Z),getGgmlLlmStatus:()=>Mt(Z),getGgmlSttStatus:()=>Nt(Z),getMlxLlmStatus:()=>Pt(Z),subscribeToStatus:At,subscribeToStatusWithId:jt,llmStatusTracker:U,sttStatusTracker:Et,statusEmitter:H};async function xa(e,t,n={}){let r={"ggml-llm":Jn,"ggml-stt":Ir,"mlx-llm":yi,"onnx-stt":(e,t,n)=>ji(e,t,n),"onnx-tts":(e,t,n)=>ji(e,t,n)}[e];return r?r(t,ua,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}let Sa=null;const Ca={name:`@fugood/buttress-server`,version:`0.0.0`},wa=()=>{if(Sa)return Sa;try{let e=ne(new URL(`../package.json`,import.meta.url)),t=JSON.parse(P.readFileSync(e,`utf8`));Sa={name:t.name||Ca.name,version:t.version||Ca.version,description:t.description}}catch{Sa=Ca}return Sa},Ta=!(`Bun`in globalThis),Ea=e=>new n({adapter:Ta?t():void 0,...e}),Da=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String(),score:a.Optional(a.Number()),hasGpu:a.Optional(a.Boolean()),usableBytes:a.Optional(a.Number())})),authentication:a.Object({required:a.Boolean(),type:a.String(),kid:a.Optional(a.String()),bound:a.Optional(a.Boolean())}),workspace:a.Optional(a.Object({id:a.String(),name:a.Optional(a.String())}))}),Oa=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication,workspace:e.workspace});var ka=e=>{let t=Ea(),n=e.autodiscover?.http?.path??`/buttress/info`;return t.get(n,Oa,{response:Da}),t};let Aa=null;const ja=async e=>{if(Aa&&Aa.kid===e.kid)return Aa.key;let t=await ae(e.issuerPublicKey,`EdDSA`);return Aa={kid:e.kid,key:t},t},Ma=/^Bearer\s+(.+)$/i,Na=(e,t)=>{if(e){let t=e.authorization||e.Authorization;if(t){let e=t.match(Ma);return e?e[1].trim():t.trim()}}if(t){let e=t.access_token??t.token;if(typeof e==`string`&&e)return e}return null},Pa=async(e,t)=>{if(!e||!t)return null;try{let{payload:n}=await R(e,await ja(t),{algorithms:[`EdDSA`]}),r=n;return r.k!==`ba`||r.w_id!==t.id||r.st!==`ws`&&r.st!==`dev`||!r.sid||!r.exp?null:{workspaceId:r.w_id,subjectType:r.st,subjectId:r.sid,jti:r.jti,exp:r.exp}}catch{return null}},Fa=async({headers:e,query:t,set:n,store:r})=>{let i=r.workspaceState?.workspace;if(i&&!await Pa(Na(e,t),i))return n.status=401,n.headers&&(n.headers[`WWW-Authenticate`]=`Bearer`),{error:{code:`UNAUTHORIZED`,message:`Invalid or missing workspace access token`}}},Ia=e=>{if(!e)return null;let t=e[`x-buttress-sess-id`]??e[`X-BUTTRESS-SESS-ID`];return t?t.trim():null},La=async({headers:e,query:t,set:n,store:r})=>{let i=r.sessions,a=Ia(e);if(!a||!i)return n.status=401,{error:{code:`SESSION_REQUIRED`,message:`Missing buttress session id`}};let o=i.get(a);if(!o)return n.status=401,{error:{code:`SESSION_INVALID`,message:`Unknown or expired buttress session`}};let s=r.workspaceState?.workspace;if(s&&o.identity){let r=await Pa(Na(e,t),s);if(!r||r.subjectId!==o.identity.subjectId||r.subjectType!==o.identity.subjectType)return n.status=403,{error:{code:`SESSION_FORBIDDEN`,message:`Token identity mismatch for this session`}}}},Ra=(e,t)=>{let n=e.sessions,r=Ia(t);if(!r||!n)return null;let i=n.get(r);return i?{sessionId:r,session:i}:null},za=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var Ba=Ea().onBeforeHandle(Fa).onBeforeHandle(La).post(`/buttress/upload`,async({body:{file:e},headers:t,store:n})=>{let r=Ra(n,t);if(!r)return{ok:!1,error:`Session guard mis-wired`};let{session:i}=r;try{let t=za?e.stream():await e.arrayBuffer();return{ok:!0,filename:await i.fileManager.storeUpload(e.name,t)}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},headers:t,store:n,set:i})=>{let a=Ra(n,t);if(!a)return i.status=500,`Session guard mis-wired`;let{session:o}=a,s=o.fileManager.resolve(e);return s?r(s):(i.status=404,`File not found`)},{params:a.Object({filename:a.String()})});const Va=x.dirname(ne(import.meta.url)),Ha=async()=>{let e=[x.join(Va,`..`,`public`,`status.html`),x.join(Va,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>d.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},Ua=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}},onnxStt:{generators:[],history:{}},onnxTts:{generators:[],history:{}}}},Wa=async()=>{let e=await Ha();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await d.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var Ga=Ea().get(`/status`,Wa).get(`/status/`,Wa).get(`/buttress/status`,({store:{backend:e}})=>Ua(e));const Ka=[`ggml-llm`,`mlx-llm`],qa=new Map;function Ja(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function Ya(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>Ka.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=qa.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return qa.set(c,h),await Ja(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function Xa(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function Za(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Xa(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const Qa=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function $a(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=Za(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function eo({global:e}){let t=Ea({prefix:`/oai-compat`});return t.use(oe({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.onBeforeHandle(Fa),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>Ka.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await Ya(a,r,c,`[OpenAI]`),t=Qa(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await Ja(a,e.type).completion(e.id,{options:b});if(!s)return await $a(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=Za(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const to=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function no(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function ro(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function io(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=Xa(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function ao(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=io(u),l=u.stopping_word||u.stoppingWord||null,c=ro(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function oo({global:e}){let t=Ea({prefix:`/anthropic-messages`});return t.use(oe({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.onBeforeHandle(Fa),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=no(o),n=await Ya(a,r,o.model,`[Anthropic]`),s=to(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await Ja(a,n.type).completion(n.id,{options:l});if(!o.stream)return await ao(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=io(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=io(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=io(r),w=r.stopping_word||r.stoppingWord||null,C=ro(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=no(a),t=await Ya(i,r,a.model,`[Anthropic]`),n=Ja(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>Ka.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const so=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=so(n[e]||{},t):n[e]=t}),n},co=e=>e&&typeof e==`object`?structuredClone(e):null,lo=(e,t)=>so(co(e)||{},co(t)||{}),uo=(e,t)=>so(structuredClone(e.global),t||{}),fo=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return uo(e,a)}}return Object.keys(e.global).length>0?uo(e,{}):null},po={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},mo=e=>e?e===!0?{...po}:so(po,e):null,ho=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},go=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,_o=6e4,vo=1024*1024*50,yo=e=>{let t=se.machineIdSync(),n=so({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:x.join(S.tmpdir(),`.buttress`),session_timeout:_o,max_body_size:vo},autodiscover:!1},co(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:mo(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:go(i.max_body_size,D.parse,vo),session_timeout:go(i.session_timeout,ce,_o)},global:s,generators:r}},bo={getCapabilities:L.tuple([L.object({type:L.string().optional().default(`ggml-llm`),config:L.any().optional(),currentClientCapabilities:L.any().optional(),options:L.any().optional()}).nullable().optional()]),startGenerator:L.tuple([L.string(),L.any().optional()]),finalizeGenerator:L.tuple([L.string()])};var xo={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=co(i),c=lo(fo(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=co(i),o=lo(fo(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const So={initContext:L.tuple([L.string(),L.any().optional()]),completion:L.tuple([L.string(),L.any().optional()]),tokenize:L.tuple([L.string(),L.any()]),detokenize:L.tuple([L.string(),L.any()]),applyChatTemplate:L.tuple([L.string(),L.any()]),releaseContext:L.tuple([L.string()])};function Co(e){return function({backend:t,session:n},r,i){return new u({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function wo(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function To(e,t){return{initContext:Co(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:wo(e,`${t}Release Context`)}}var Eo=To(e=>e.ggmlLlm,``);const Do={initContext:L.tuple([L.string(),L.any().optional()]),transcribe:L.tuple([L.string(),L.string(),L.any().optional()]),transcribeData:L.tuple([L.string(),L.union([L.instanceof(Buffer),L.instanceof(Uint8Array)]),L.any().optional()]),releaseContext:L.tuple([L.string()])},Oo=e=>e.ggmlStt;var ko={initContext:Co(Oo),async transcribe({backend:e,session:t},n,r,i){console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i});let a=t.fileManager.resolve(r);if(!a)throw Error(`Audio file not found`);return e.ggmlStt.transcribe(n,{audioPath:a,options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:wo(Oo,`Release STT Context`)},Ao=To(e=>e.mlxLlm,`MLX `);const jo=L.object({language:L.string().optional(),task:L.string().optional(),return_timestamps:L.union([L.boolean(),L.literal(`word`)]).optional(),chunk_length_s:L.number().optional()}).passthrough(),Mo=L.object({audio:L.string(),options:jo.optional()}),No={initContext:L.tuple([L.string(),L.record(L.unknown()).optional()]),transcribe:L.tuple([L.string(),Mo]),transcribeData:L.tuple([L.string(),L.union([L.instanceof(Buffer),L.instanceof(Uint8Array)]),jo.optional()]),releaseContext:L.tuple([L.string()])};var Po={initContext({backend:e},t,n){return new u({async start(r){try{let i=await e.onnxStt.initContext(t,{...n,onProgress:e=>{r.enqueue({event:`progress`,data:{progress:e}})}});await new Promise(e=>setTimeout(e,1e3)),r.enqueue({event:`result`,data:{result:i}}),r.close()}catch(e){r.error(e)}}})},transcribe({backend:e,session:t},n,r){let{audio:i,options:a}=r;console.log(`[Server] ONNX STT Transcribe:`,{id:n,audio:i,options:a});let o=t.fileManager.resolve(i);if(!o)throw Error(`Audio file not found`);return e.onnxStt.transcribe(n,{audio:o,options:a})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] ONNX STT Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.onnxStt.transcribeData(t,{audioData:n,options:r})},async releaseContext({backend:e},t){return e.onnxStt.releaseContext(t,{})}};const Fo=L.object({text:L.string(),options:L.object({speaker:L.string().optional()}).passthrough().optional()}),Io={initContext:L.tuple([L.string(),L.record(L.unknown()).optional()]),addSpeaker:L.tuple([L.string(),L.union([L.instanceof(Float32Array),L.record(L.unknown())])]),synthesize:L.tuple([L.string(),Fo]),releaseContext:L.tuple([L.string()])},Lo={common:xo,ggmlLlm:Eo,ggmlStt:ko,mlxLlm:Ao,onnxStt:Po,onnxTts:{initContext({backend:e},t,n){return new u({async start(r){try{let i=await e.onnxTts.initContext(t,{...n,onProgress:e=>{r.enqueue({event:`progress`,data:{progress:e}})}});await new Promise(e=>setTimeout(e,1e3)),r.enqueue({event:`result`,data:{result:i}}),r.close()}catch(e){r.error(e)}}})},async addSpeaker({backend:e},t,n){return e.onnxTts.addSpeaker(t,n)},async synthesize({backend:e,session:t},n,r){let i=await e.onnxTts.synthesize(n,r),a=`tts-${i.cachedId}.wav`;return t.fileManager.register(a,i.cachedFile),{filename:a,sampling_rate:i.sampling_rate,channels:i.channels}},async releaseContext({backend:e},t){return e.onnxTts.releaseContext(t,{})}}},Ro={common:bo,ggmlLlm:So,ggmlStt:Do,mlxLlm:So,onnxStt:No,onnxTts:Io};var zo=Lo;const Bo=e=>e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength),Vo=e=>{try{return JSON.parse(e,(e,t)=>t&&(t?.type===`Buffer`&&t?.data||t?.type===`Uint8Array`&&t?.data?z.from(t.data,`base64`):t?.type===`Float32Array`&&t?.data?new Float32Array(Bo(z.from(t.data,`base64`))):t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t))}catch{return e}},Ho=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof z?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Float32Array?{type:`Float32Array`,data:z.from(t.buffer,t.byteOffset,t.byteLength).toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:z.from(t).toString(`base64`)}:t)}catch{return e}},Uo={score:0,hasGpu:!1,usableBytes:0},Wo=e=>e?{score:Number(e.score)||0,hasGpu:!!e.hasGpu,usableBytes:Number(e.gpuUsableBytes||e.cpuUsableBytes||0)}:Uo,Go=async e=>{let t=[];for(let n of e){let e=Uo;try{e=Wo((await J(n.type,null,{}))?.buttress?.selected)}catch(e){console.warn(`[Caps] Failed to detect capabilities for "${n.type}":`,e instanceof Error?e.message:e)}t.push({...n,...e})}return t},Ko=()=>{let e=S.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},qo=e=>{let t=e.split(`.`).map(Number);return t.length!==4||t.some(e=>Number.isNaN(e))?0:(t[0]<<24|t[1]<<16|t[2]<<8|t[3])>>>0},Jo=e=>[e>>>24&255,e>>>16&255,e>>>8&255,e&255].join(`.`),Yo=()=>{let e=[],t=new Set;for(let n of Object.values(S.networkInterfaces()))for(let r of n??[]){if(r.family!==`IPv4`||r.internal||!r.address||!r.netmask)continue;let n=qo(r.address),i=qo(r.netmask);if(!n||!i||i===4294967295)continue;let a=Jo((n&i|~i>>>0)>>>0);if(a===r.address)continue;let o=`${r.address}->${a}`;t.has(o)||(t.add(o),e.push({address:r.address,broadcast:a}))}return e},Xo=e=>{if(!e)return!1;let t=e;return t.code===`ENOTSUP`||/Failed to bind socket/i.test(t.message??``)};var Zo=class e{name=`udp`;receiver=null;senders=[];announcementTimer=null;config;getServerInfo;port;signer;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.port=e.port??8089,this.signer=n}async start(){this.receiver=await this.bindReceiver(!0).catch(e=>{if(!Xo(e))throw e;return console.warn(`[Autodiscover UDP] SO_REUSEPORT not supported by this runtime; falling back to REUSEADDR only (multiple buttress instances on one host will not coexist on the discovery port).`),this.bindReceiver(!1)}),this.receiver.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.receiver.on(`error`,e=>{console.error(`[Autodiscover UDP] Receiver error:`,e.message)}),this.receiver.setBroadcast(!0),this.senders=await this.createSenders();let e=this.senders.map(e=>`${e.address}->${e.broadcast}`).join(`, `)||`<none>`;if(console.log(`[Autodiscover UDP] Listening on port ${this.port}; announce interfaces: ${e}`),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),await Promise.all(this.senders.map(({socket:e})=>new Promise(t=>{e.close(()=>t())}))),this.senders=[],this.receiver&&=(await new Promise(e=>{this.receiver.close(()=>e())}),null)}async bindReceiver(e){let t=le.createSocket(e?{type:`udp4`,reuseAddr:!0,reusePort:!0}:{type:`udp4`,reuseAddr:!0});return new Promise((e,n)=>{let r=e=>{t.close(),n(e)};t.once(`error`,r),t.bind(this.port,()=>{t.off(`error`,r),e(t)})})}async createSenders(){let e=Yo();return(await Promise.all(e.map(async e=>{try{let t=le.createSocket({type:`udp4`});return await new Promise((n,r)=>{let i=e=>{t.close(),r(e)};t.once(`error`,i),t.bind({port:0,address:e.address},()=>{t.off(`error`,i),t.setBroadcast(!0),n()})}),t.on(`error`,t=>{console.error(`[Autodiscover UDP] Sender ${e.address} error:`,t.message)}),{...e,socket:t}}catch(t){return console.warn(`[Autodiscover UDP] Failed to bind sender on ${e.address}:`,t.message),null}}))).filter(e=>e!==null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}static canonicalBytes(e,t,n){return Buffer.from(JSON.stringify({t:e,d:t,ts:n}),`utf8`)}signEnvelope(t,n){if(!this.signer)return null;let r=Math.floor(Date.now()/1e3),i=o.sign(null,e.canonicalBytes(t,n,r),this.signer.privateKey).toString(`base64`);return{t,v:`2.0`,d:n,ts:r,kid:this.signer.kid,sig:i}}sendAnnouncement(){if(this.senders.length===0)return;let e=this.getServerInfo(),t=this.signEnvelope(`ANNOUNCE`,{info:e});if(!t){console.warn("[Autodiscover UDP] no per-server keypair; skipping announcement. Run `bricks buttress bind` to register a key.");return}let n=Buffer.from(JSON.stringify(t));for(let{broadcast:e,socket:t,address:r}of this.senders)t.send(n,0,n.length,this.port,e,t=>{t&&console.error(`[Autodiscover UDP] Announcement ${r}->${e} error:`,t.message)})}sendResponse(e,t){if(!this.receiver)return;let n=this.getServerInfo(),r=this.signEnvelope(`RESPONSE`,{request_id:e,info:n});if(!r)return;let i=Buffer.from(JSON.stringify(r));this.receiver.send(i,0,i.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qo=class{transports=[];started=!1;constructor(e,t,n){this.config=e,this.getServerInfo=t,this.signer=n,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zo(e.udp,t,n))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}},$o=class{sessionId;sessionDir;registry=new Map;constructor(e,t){this.sessionId=e,this.sessionDir=x.join(t,e)}register(e,t,n={}){this.registry.set(e,{realPath:t,owned:n.owned??!1,ttl:n.ttl??null,registeredAt:Date.now()})}resolve(e){let t=this.registry.get(e);return t?t.ttl!==null&&Date.now()-t.registeredAt>t.ttl?(t.owned&&y(t.realPath).catch(()=>{}),this.registry.delete(e),null):t.realPath:null}async storeUpload(e,t){let n=e.replace(/[^\dA-Za-z]/g,`_`),r=`${Date.now()}-${n}`;await f(this.sessionDir,{recursive:!0});let i=x.join(this.sessionDir,r);return t instanceof Buffer?await b(i,t):t instanceof ArrayBuffer?await b(i,Buffer.from(t)):await de(ue.fromWeb(t),ie(i)),this.register(r,i,{owned:!0}),r}async destroy(){let e=[...this.registry.values()].filter(e=>e.owned).map(e=>e.realPath);await Promise.all(e.map(e=>y(e).catch(()=>{}))),await _(this.sessionDir,{recursive:!0,force:!0}).catch(()=>{}),this.registry.clear()}};const es=()=>process.env.BRICKS_BUTTRESS_STATE_DIR||x.join(S.homedir(),`.bricks-cli`,`buttress`),ts=()=>x.join(es(),`state.json`),ns=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.id==`string`&&typeof t.serverId==`string`&&typeof t.issuerPublicKey==`string`&&typeof t.kid==`string`},rs=e=>{if(!e||typeof e!=`object`)return!1;let t=e;return typeof t.publicKeySpki==`string`&&typeof t.privateKeyPkcs8==`string`&&typeof t.kid==`string`},is=()=>{let e=ts();try{let t=P.readFileSync(e,`utf8`),n=JSON.parse(t);return{workspace:ns(n?.workspace)?n.workspace:null,serverKeyPair:rs(n?.serverKeyPair)?n.serverKeyPair:null}}catch(e){return e.code!==`ENOENT`&&console.warn(`[Buttress] Failed to read workspace state:`,e.message),{workspace:null,serverKeyPair:null}}},$=wa(),as=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function os({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
37
37
|
`),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
|
|
38
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},
|
|
38
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=pt(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
|
|
39
39
|
---`),n.push(`
|
|
40
|
-
### System Information`);let
|
|
41
|
-
### Command Used`);let
|
|
40
|
+
### System Information`);let f=null;if(process.platform!==`win32`)try{f=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${S.hostname()}`),n.push(`- **OS:** ${S.type()} ${S.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${S.cpus().length}`),n.push(`- **Total System Memory:** ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
41
|
+
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
42
42
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
43
|
-
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=
|
|
44
|
-
`),`utf8`),console.log(`\nModel capabilities table saved to: ${
|
|
43
|
+
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=F.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=x.join(process.cwd(),m);P.writeFileSync(h,n.join(`
|
|
44
|
+
`),`utf8`),console.log(`\nModel capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate model table:`,e.message),process.exit(1)}}async function ss({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=a(n[e]||{},t):n[e]=t}),n},o=e=>a(structuredClone(i),e||{}),s=(e=>{if(Array.isArray(r)&&r.length>0){let t=r.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return o(n)}}return Object.keys(i).length>0?o({}):null})(e);e&&(s={...s||{},model:{...s?.model||{},repo_id:e}});let c=await J(`ggml-llm`,null,{config:s,includeBreakdown:!0}),l=c.buttress?.selected||null,u=c.modelConfig||null;if(e||u?.repoId){console.log(`
|
|
45
45
|
=== Model Information ===`);let t=e||u?.repoId;if(console.log(`Repository ID: ${t}`),u?.quantization&&console.log(`Quantization: ${u.quantization}`),u?.nCtx&&console.log(`Context Length: ${u.nCtx}`),l?.quantization){let{fileType:e}=l.quantization;e!=null&&console.log(`Model File Type (GGUF): ${e}`)}let n=u?.cache_type_k||`f16`,r=u?.cache_type_v||`f16`;if(console.log(`KV Cache Type: K=${n}, V=${r}`),l?.modelBytes&&l?.kvCacheBytes!=null){console.log(`Model Size: ${(l.modelBytes/1024/1024/1024).toFixed(2)} GB`),l.kvInfo?console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(l.kvInfo)})`):console.log(`KV Cache Size: ${(l.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);let e=l.recurrentMemoryBytes||0;e>0&&console.log(`Recurrent Memory: ${(e/1024/1024/1024).toFixed(2)} GB`);let t=l.modelBytes+l.kvCacheBytes+e;if(console.log(`Total Required Memory: ${(t/1024/1024/1024).toFixed(2)} GB`),l.memoryLimitedCtx!=null){let e=l.memoryLimitedCtx,t=l.kvInfo?.nCtxTrain;t?console.log(`\nMemory-Limited Context: ${e} (Train: ${t})`):console.log(`\nMemory-Limited Context: ${e}`),l.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(l.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(c.buttress?.selected?.fit){let{totalRequiredBytes:e}=c.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}}if(c.buttress?.selected){let{selected:e}=c.buttress;console.log(`
|
|
46
|
-
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=
|
|
46
|
+
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${S.hostname()}`),console.log(`OS: ${S.type()} ${S.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${S.cpus().length}`),console.log(`Total System Memory: ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
47
47
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
48
48
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),e.limitedFit&&(console.log(`
|
|
49
49
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU (Limited): ${e.limitedFit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
50
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
50
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(c,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function cs({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating STT model capabilities comparison...
|
|
51
51
|
`),n.push(`${$.name} v${$.version}`),n.push(`## STT Model Capabilities Comparison
|
|
52
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},
|
|
52
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t],{repoId:r,filename:i}=as(n);console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let a=c(r);a={...a||{},model:{...a?.model||{},repo_id:r,...i&&{filename:i}}};let o=await J(`ggml-stt`,null,{config:a,includeBreakdown:!0});l.push({modelId:n,repoId:r,filename:i,capabilities:o,modelInfo:o.buttress?.selected||null,modelConfig:o.modelConfig||null})}let u=e=>e?(e/1024/1024).toFixed(1):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |`),n.push(`|-------|-----------|------------------------|---------------------|----------|----------|`),l.forEach(({modelId:e,modelInfo:t})=>{let r=u(t?.modelBytes),i=u(t?.processingBytes||t?.kvCacheBytes),a=u(t?.fit?.totalRequiredBytes),o=d(t?.fit?.fitsInGpu),s=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${r} | ${i} | ${a} | ${o} | ${s} |`)}),n.push(`
|
|
53
53
|
---`),n.push(`
|
|
54
|
-
### System Information`);let
|
|
55
|
-
### Command Used`);let
|
|
54
|
+
### System Information`);let f=null;if(process.platform!==`win32`)try{f=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${S.hostname()}`),n.push(`- **OS:** ${S.type()} ${S.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **CPU Cores:** ${S.cpus().length}`),n.push(`- **Total System Memory:** ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;if(e){let t=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;if(n.push(`- **Usable CPU Memory:** ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${t}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu){let t=e.devices.filter(e=>e.type===`gpu`);if(t.length>0){let r=t[0];n.push(`- **GPU Backend:** ${r.backend}`),n.push(`- **GPU Name:** ${r.deviceName}`),n.push(`- **GPU Total Memory:** ${(r.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let i=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;n.push(`- **GPU Usable Memory:** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${i}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else n.push(`- **GPU:** Not available`)}}n.push(`
|
|
55
|
+
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
56
56
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
57
|
-
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=
|
|
58
|
-
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${
|
|
57
|
+
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=F.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=x.join(process.cwd(),m);P.writeFileSync(h,n.join(`
|
|
58
|
+
`),`utf8`),console.log(`\nSTT model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate STT model table:`,e.message),process.exit(1)}}async function ls({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating MLX model capabilities comparison...
|
|
59
59
|
`),n.push(`${$.name} v${$.version}`),n.push(`## MLX Model Capabilities Comparison
|
|
60
|
-
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`mlx-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},
|
|
60
|
+
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`mlx-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...r?.model||{},repo_id:n}};let i=await J(`mlx-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quant | Size (GB) | Context | KV Cache (GB) | Total Required (GB) | Fits Unified Memory |`),n.push(`|----------|-------|-----------|---------|---------------|---------------------|---------------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.bits||r?.quantBits||`N/A`,a=typeof i==`number`?`${i}bit`:i,o=u(t?.modelBytes),s=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,c=u(t?.kvCacheBytes),l=u(t?.fit?.totalRequiredBytes),f=d(t?.fit?.fitsInGpu);n.push(`| ${e} | ${a} | ${o} | ${s} | ${c} | ${l} | ${f} |`);let p=t?.limitedFit!=null&&t?.memoryLimitedCtx!=null,m=!t?.fit?.fitsInGpu;if(p&&m){let e=t.memoryLimitedCtx,r=u(t.limitedKvCacheBytes),i=u(t.limitedFit.totalRequiredBytes),s=d(t.limitedFit.fitsInGpu);n.push(`| ↳ Limited | ${a} | ${o} | ${e} | ${r} | ${i} | ${s} |`)}}),n.push(`
|
|
61
61
|
---`),n.push(`
|
|
62
|
-
### System Information`);let
|
|
63
|
-
### Command Used`);let
|
|
62
|
+
### System Information`);let f=null;if(process.platform!==`win32`)try{f=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}if(f?n.push(`- **System:** ${f}`):(n.push(`- **Hostname:** ${S.hostname()}`),n.push(`- **OS:** ${S.type()} ${S.release()}`)),n.push(`- **Platform:** ${process.platform}`),n.push(`- **Architecture:** ${S.arch()}`),n.push(`- **CPU Cores:** ${S.cpus().length}`),n.push(`- **Total System Memory:** ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),l.length>0){let e=l[0].capabilities.buttress?.selected;e&&(n.push(`- **Unified Memory (Usable):** ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0)}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.devices?.length>0&&n.push(`- **Metal Device:** ${e.devices[0].deviceName}`),n.push(`- **Performance Score:** ${e.score}`))}if(l.length>0){let e=l[0].capabilities;n.push(`- **Python3:** ${e.python?.available?`Available`:`Not found`}`),n.push(`- **MLX (system):** ${e.mlx?.systemAvailable?`Available`:`Not installed (will use venv)`}`)}n.push(`
|
|
63
|
+
### Command Used`);let p=process.argv.slice(2).join(` `);if(n.push(`\`\`\`bash\n${process.argv[0]} ${process.argv[1]} ${p}\n\`\`\``),n.push(`
|
|
64
64
|
### Package Information`),n.push(`- **Name:** ${$.name}`),n.push(`- **Version:** ${$.version}`),$.description&&n.push(`- **Description:** ${$.description}`),t&&Object.keys(t).length>0){n.push(`
|
|
65
|
-
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=
|
|
66
|
-
`),`utf8`),console.log(`\nMLX model capabilities table saved to: ${
|
|
65
|
+
### Configuration`),n.push(`<details>`),n.push(`<summary>Click to expand TOML configuration</summary>`),n.push("\n```toml");try{let e=F.stringify(t);n.push(e)}catch{n.push(`# Error serializing config`),n.push(JSON.stringify(t,null,2))}n.push("```"),n.push(`</details>`)}let m=`mlx-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,`-`).split(`T`)[0]}.md`,h=x.join(process.cwd(),m);P.writeFileSync(h,n.join(`
|
|
66
|
+
`),`utf8`),console.log(`\nMLX model capabilities table saved to: ${h}`),process.exit(0)}catch(e){console.error(`Failed to generate MLX model table:`,e.message),process.exit(1)}}async function us({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: mlx-llm`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{server:n,generators:r=[],...i}=t||{},a=Object.keys(i).length>0?{...i}:null;e&&(a={...a||{},model:{...a?.model||{},repo_id:e}});let o=await J(`mlx-llm`,null,{config:a,includeBreakdown:!0});console.log(`
|
|
67
67
|
=== Platform Information ===`),console.log(`Available: ${o.available?`Yes`:`No`}`),console.log(`OS: ${o.platform.os} (${o.platform.arch})`),o.platform.errors?.length>0&&console.log(`Errors: ${o.platform.errors.join(`; `)}`),console.log(`Python3: ${o.python.available?`Available`:`Not found`}`),console.log(`MLX (system): ${o.mlx.systemAvailable?`Available`:`Not installed (will use venv)`}`);let s=o.buttress?.selected,c=o.modelConfig;if(e||c?.repoId){console.log(`
|
|
68
68
|
=== Model Information ===`);let t=e||c?.repoId;if(console.log(`Repository ID: ${t}`),c?.architecture&&console.log(`Architecture: ${c.architecture}`),c?.quantBits&&console.log(`Quantization: ${c.quantBits}bit (group_size=${c.quantGroupSize||`N/A`})`),c?.nCtx&&console.log(`Max Context Length: ${c.nCtx}`),s?.modelBytes&&console.log(`Model Size: ${(s.modelBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvCacheBytes!=null&&console.log(`KV Cache Size (full context): ${(s.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`),s?.kvInfo&&console.log(`KV Info: ${s.kvInfo.nLayer} layers, ${s.kvInfo.nHeadKv} KV heads, ${s.kvInfo.headDim} head dim`),s?.modelBytes&&s?.kvCacheBytes!=null){let e=s.modelBytes+s.kvCacheBytes;console.log(`Total Required Memory: ${(e/1024/1024/1024).toFixed(2)} GB`)}s?.memoryLimitedCtx!=null&&(console.log(`\nMemory-Limited Context: ${s.memoryLimitedCtx}`),s.limitedKvCacheBytes!=null&&console.log(`Limited KV Cache Size: ${(s.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`))}if(o.buttress?.selected){let{selected:e}=o.buttress;console.log(`
|
|
69
|
-
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=
|
|
69
|
+
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${S.hostname()}`),console.log(`OS: ${S.type()} ${S.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${S.cpus().length}`),console.log(`Total System Memory: ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`),console.log(`
|
|
70
70
|
--- Unified Memory (Metal) ---`),e.devices?.length>0&&console.log(`Device: ${e.devices[0].deviceName}`),console.log(`Total Memory: ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}%)`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
71
71
|
--- Model Fit Analysis ---`),console.log(`Fits in Unified Memory: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`),console.log(`Total Required: ${(e.fit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),e.limitedFit&&(console.log(`
|
|
72
72
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(e.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits (Limited): ${e.limitedFit.fitsInGpu?`Yes`:`No`}`),console.log(`Limiting Factor (Limited): ${e.limitedFit.limiting}`)))}console.log(`
|
|
73
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function
|
|
73
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(o,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}async function ds({modelId:e=null,defaultConfig:t=null}={}){console.log(`${$.name} v${$.version}`),console.log(`Testing capabilities for backend: ggml-stt`),e&&console.log(`Model: ${e}`),console.log(`--------------------------------`);try{let{repoId:n,filename:r}=as(e),{server:i,generators:a=[],...o}=t||{},s=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=s(n[e]||{},t):n[e]=t}),n},c=e=>s(structuredClone(o),e||{}),l=(e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-stt`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return c(n)}}return Object.keys(o).length>0?c({}):null})(n);n&&(l={...l||{},model:{...o.runtime,...l?.model||{},repo_id:n,...r&&{filename:r}}});let u=await J(`ggml-stt`,null,{config:l,includeBreakdown:!0}),d=u.buttress?.selected||null,f=u.modelConfig||null;if(n||f?.repoId){console.log(`
|
|
74
74
|
=== Model Information ===`);let e=n||f?.repoId;console.log(`Repository ID: ${e}`),r&&console.log(`Filename: ${r}`),d?.modelBytes&&console.log(`Model Size: ${(d.modelBytes/1024/1024).toFixed(1)} MB`);let t=d?.processingBytes||d?.kvCacheBytes;if(t&&console.log(`Processing Buffer: ${(t/1024/1024).toFixed(1)} MB`),d?.modelBytes&&t)console.log(`Total Required Memory: ${((d.modelBytes+t)/1024/1024).toFixed(1)} MB`);else if(u.buttress?.selected?.fit){let{totalRequiredBytes:e}=u.buttress.selected.fit;console.log(`Total Required Memory: ${(e/1024/1024).toFixed(1)} MB`)}}if(u.buttress?.selected){let{selected:e}=u.buttress;console.log(`
|
|
75
|
-
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=
|
|
75
|
+
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=k(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${S.hostname()}`),console.log(`OS: ${S.type()} ${S.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${S.cpus().length}`),console.log(`Total System Memory: ${(S.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
76
76
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
77
77
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
78
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const
|
|
79
|
-
bricks-buttress v${
|
|
78
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}const fs=wa();var ps=async()=>{(process.argv.includes(`--version`)||process.argv.includes(`-v`))&&(console.log(fs.version),process.exit(0)),(process.argv.includes(`--help`)||process.argv.includes(`-h`))&&(console.log(`
|
|
79
|
+
bricks-buttress v${fs.version}
|
|
80
80
|
|
|
81
81
|
Buttress server for remote inference with GGML backends.
|
|
82
82
|
|
|
@@ -109,4 +109,4 @@ Examples:
|
|
|
109
109
|
bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
|
|
110
110
|
bricks-buttress --test-caps mlx-llm --test-models-default
|
|
111
111
|
`),process.exit(0));let e=process.argv.findIndex(e=>e===`--port`||e===`-p`),t=e>=0?Number(process.argv[e+1]):void 0,n=process.argv.findIndex(e=>e===`--config`||e===`-c`),r=n>=0?process.argv[n+1]:null,i=null;if(r){let e;if(r.includes(`
|
|
112
|
-
`))e=r;else{let t=l.resolve(r);try{e=k.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=A.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`&&t!==`mlx-llm`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),ri(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],u=process.argv.findIndex(e=>e===`--test-caps`);if(u>=0){let e=process.argv[u+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Fa({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Fa({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Ia({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Pa({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Pa({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await La({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Ma({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Ma({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Na({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let d=ta(i);t&&(d.server.port=t),d.server.port||(d.server.port=2080),Ja({config:d,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`||d.global.openai_compat?.enabled===!0,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`||d.global.anthropic_messages?.enabled===!0}).then(async({app:e,port:t,openaiEnabled:n,anthropicMessagesEnabled:r,autoDiscover:o})=>{let s=ya();console.log(`Buttress server listening on port ${t}`),console.log(`--------------------------------`),await Ga(),console.log();let c=e.store.workspaceState.workspace;if(c){let e=c.name?`${c.name} (${c.id})`:c.id;console.log(`Workspace: ${e}`),console.log(`- Server ID: ${c.serverId}`),console.log(`- Issuer kid: ${c.kid}`),console.log(`- Bound at: ${c.boundAt}`)}else console.log(`Workspace: not bound`),console.log(`- State file: ${Da()}`),console.log("- Run `bricks buttress bind` from a workspace-authed CLI to pair.");console.log();let l={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`};console.log(`Current supported Generators:`);let u=new Set((d?.generators||[]).map(e=>e.type).filter(Boolean));if(u.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of u)console.log(`- ${l[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${s}:${t} to connect to this server via LAN.`),console.log(`- Visit http://${s}:${t}/status to see status via LAN.`),console.log(),n?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/oai-compat/v1`),console.log(`- Chat completions: POST http://${s}:${t}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${s}:${t}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set [openai_compat] enabled = true in config to enable`),console.log()),r?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/anthropic-messages`),console.log(`- Messages: POST http://${s}:${t}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${s}:${t}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set [anthropic_messages] enabled = true in config to enable`),console.log()),o&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:Ba,name:Va}=oi(),Ha=async()=>{let e=`https://registry.npmjs.org/${Va}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},Ua=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},Wa=e=>{console.log(``),console.log(`\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,Ba.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,Va.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},Ga=async()=>{try{let e=await Ha();e&&Ua(Ba,e)&&Wa(e)}catch{}},Ka=!(`Bun`in globalThis),qa=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:s})=>{try{await f.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let u=ya()||`0.0.0.0`,p=Aa(),m=p.workspace!=null,h=await va(Zi(i,i.generators.map(e=>e.type))),g={id:i.server.id,name:i.server.name,version:Ba,address:u,port:i.server.port,url:`http://${u}:${i.server.port}`,generators:h,authentication:m?{required:!0,type:`workspace-jwt`,kid:p.workspace.kid,bound:!0}:{required:!1,type:`workspace-jwt`,bound:!1},...m?{workspace:{id:p.workspace.id,name:p.workspace.name}}:{}},_=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:Ka?t():void 0}).state({sessions:new Map,backend:e||qr,config:i,serverInfo:g,workspaceState:p});r&&_.use(r),i.autodiscover?.http?.enabled&&_.use(di(i)),_.use(Ci),_.use(Oi),o&&_.use(Li(i)),s&&_.use(Ui(i));let v=(e,t)=>gi(hi(e,t),p.workspace),y={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},b=e=>e.id??e.raw?.id??e.remoteAddress,x=new Map,S=new Map;return _.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),async open(e){let t=b(e),n=e.data?.headers,r=e.data?.query,i,a=new Promise(e=>{i=e});if(S.set(t,{identity:null,ready:a}),!m){console.log(`[Request] New connection: ${t} (unbound, no auth)`),i(!0);return}let o=await v(n||{},r);if(!o){console.warn(`[Auth] Rejecting WS ${t}: invalid or missing workspace-access token`),S.delete(t),i(!1),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:null})),e.close(1008,`UNAUTHORIZED`);return}console.log(`[Request] New connection: ${t} (subject=${o.subjectType}:${o.subjectId})`);let s=S.get(t);s&&(s.identity=o),i(!0)},async message(e,{id:t,method:n,params:r}){let{sessions:i}=e.data.store,a=b(e),o=S.get(a);if(o?.ready&&!await o.ready){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}let s=S.get(a)?.identity??null;if(m&&!s){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}if(n===`init`){let[n]=ma(r)??[],o,l=!1,u=n&&i.has(n)?i.get(n):null,d=!u?.identity||!!s&&s.subjectId===u.identity.subjectId&&s.subjectType===u.identity.subjectType;if(u&&d)u.timeout&&=(clearTimeout(u.timeout),null),u.identity=s,u.currentPeerId=a,o=n,l=!0,console.log(`[Request] Session restored: ${o}`);else{u&&console.warn(`[Request] Refused to restore session ${n}: identity mismatch (session=${u.identity?.subjectType}:${u.identity?.subjectId} peer=${s?.subjectType}:${s?.subjectId})`),o=c();let e={streams:new Map,streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null,identity:s,currentPeerId:a};i.set(o,e),console.log(`[Request] New session: ${o}`)}x.set(a,o),e.send(JSON.stringify({jsonrpc:`2.0`,result:ha({sessionId:o,restored:l}),id:t}));return}let l=x.get(a);if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}let u=i.get(l);if(!u){x.delete(a),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}console.log(`[Request] Received request from ${l}: ${n}`);let[f,p]=n.split(`.`),h=pa[f]?.[p];if(!h&&n!==`cancel`&&n!==`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){let e=u.streamReaders.get(t);e&&(e.reader.cancel(),u.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let i=ma(r),o=fa[f]?.[p],s=o?o.parse(i):i,c=await h({...e.data.store,peerId:l,session:u},...s);if(c instanceof d){let r=c.getReader();u.streamReaders.set(t,{reader:r,peerId:a}),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:ha(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:ha(r),id:t}))}u.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:ha(c),id:t}))}catch(r){if(r instanceof j){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:y.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=b(e),n=x.get(t);if(x.delete(t),S.delete(t),!n){console.log(`[Request] Connection closed (no session)`);return}console.log(`[Request] Connection closed: ${n}`);let{backend:r,sessions:a}=e.data.store,o=a.get(n);if(o){for(let[e,n]of o.streamReaders)n.peerId===t&&(n.reader.cancel().catch(()=>{}),o.streamReaders.delete(e));if(o.currentPeerId!==t){console.log(`[Request] Session ${n} already adopted by another peer; skip arm`);return}o.timeout=setTimeout(()=>{if(o.currentPeerId!==t)return;a.delete(n),console.log(`[Request] Session timed out: ${n}`);let{generators:e}=o;e.forEach(e=>{r.finalizeGenerator(e)}),f.rm(l.join(i.server.temp_file_dir,n),{recursive:!0,force:!0}).catch(()=>{})},i.server.session_timeout)}}}),{app:_,config:i}},Ja=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:s}=await qa({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:c}}=s,l=[new Promise(e=>a.listen(c,e))],u=null;if(s.autodiscover){let{workspace:e,serverKeyPair:t}=a.store.workspaceState;if(e&&t){let e={kid:t.kid,privateKey:o.createPrivateKey({key:Buffer.from(t.privateKeyPkcs8,`base64`),format:`der`,type:`pkcs8`})};u=new Ta(s.autodiscover,()=>a.store.serverInfo,e),l.push(u.start())}else e?console.warn("[Autodiscover] disabled: bound to a workspace but state.json is missing serverKeyPair. Re-run `bricks buttress bind` to register a per-server announce key (required for v2.0 signed UDP discovery)."):console.warn("[Autodiscover] disabled: buttress-server is not bound to a workspace. Run `bricks buttress bind` from a workspace-authed CLI to pair.")}return await Promise.all(l),{app:a,port:c,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:u}},Ya=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||Ya.includes(process.argv[1]))&&await za();export{Ga as checkAndNotifyUpdates,Ha as checkForUpdates,Ua as compareVersions,qa as createServer,Wa as logUpdateMessage,ta as processConfig,ri as startModelDownload,Ja as startServer};
|
|
112
|
+
`))e=r;else{let t=x.resolve(r);try{e=P.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=F.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||![`ggml-llm`,`ggml-stt`,`mlx-llm`,`onnx-stt`,`onnx-tts`].includes(t)?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),xa(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/Qwen3.5-27B-GGUF`,`unsloth/gemma-4-26B-A4B-it-GGUF`,`unsloth/gemma-4-31B-it-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=[`mlx-community/Qwen3.5-27B-8bit`,`mlx-community/Qwen3.5-27B-4bit`,`mlx-community/Qwen3.5-35B-A3B-8bit`,`mlx-community/Qwen3.5-35B-A3B-4bit`,`mlx-community/Qwen3-235B-A22B-8bit`,`mlx-community/Qwen3-235B-A22B-4bit`,`mlx-community/GLM-4.7-Flash-8bit`,`mlx-community/GLM-4.7-Flash-4bit`,`mlx-community/MiniMax-M2.5-4bit`,`mlx-community/gpt-oss-120b-4bit`,`mlx-community/gemma-4-26b-a4b-it-8bit`,`mlx-community/gemma-4-26b-a4b-it-4bit`,`mlx-community/gemma-4-31b-it-8bit`,`mlx-community/gemma-4-31b-it-4bit`],l=process.argv.findIndex(e=>e===`--test-caps`);if(l>=0){let e=process.argv[l+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&e!==`mlx-llm`&&(console.error(`Only ggml-llm, ggml-stt, and mlx-llm backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`mlx-llm`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await ls({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await ls({modelIds:c,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await us({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await cs({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await cs({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await ds({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await os({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await os({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await ss({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let u=yo(i);t&&(u.server.port=t),u.server.port||(u.server.port=2080),Ss({config:u,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`||u.global.openai_compat?.enabled===!0,enableAnthropicMessages:process.env.ENABLE_ANTHROPIC_MESSAGES_ENDPOINT===`1`||u.global.anthropic_messages?.enabled===!0}).then(async({app:e,port:t,openaiEnabled:n,anthropicMessagesEnabled:r,autoDiscover:o})=>{let s=Ko();console.log(`Buttress server listening on port ${t}`),console.log(`--------------------------------`),await ys(),console.log();let c=e.store.workspaceState.workspace;if(c){let e=c.name?`${c.name} (${c.id})`:c.id;console.log(`Workspace: ${e}`),console.log(`- Server ID: ${c.serverId}`),console.log(`- Issuer kid: ${c.kid}`),console.log(`- Bound at: ${c.boundAt}`)}else console.log(`Workspace: not bound`),console.log(`- State file: ${ts()}`),console.log("- Run `bricks buttress bind` from a workspace-authed CLI to pair.");console.log();let l={"ggml-llm":`LLM (GGML)`,"ggml-stt":`STT (GGML)`,"mlx-llm":`LLM (MLX)`,"onnx-stt":`STT (ONNX)`,"onnx-tts":`TTS (ONNX)`};console.log(`Current supported Generators:`);let d=new Set((u?.generators||[]).map(e=>e.type).filter(Boolean));if(d.size===0)console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`);else for(let e of d)console.log(`- ${l[e]||e}`);console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${s}:${t} to connect to this server via LAN.`),console.log(`- Visit http://${s}:${t}/status to see status via LAN.`),console.log(),n?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/oai-compat/v1`),console.log(`- Chat completions: POST http://${s}:${t}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${s}:${t}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set [openai_compat] enabled = true in config to enable`),console.log()),r?(console.log(`Anthropic Messages API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${s}:${t}/anthropic-messages`),console.log(`- Messages: POST http://${s}:${t}/anthropic-messages/v1/messages`),console.log(`- Count tokens: POST http://${s}:${t}/anthropic-messages/v1/messages/count_tokens`),console.log()):(console.log(`Anthropic Messages API [EXPERIMENTAL]: disabled`),console.log(` Set [anthropic_messages] enabled = true in config to enable`),console.log()),o&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:ms,name:hs}=wa(),gs=async()=>{let e=`https://registry.npmjs.org/${hs}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},_s=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},vs=e=>{console.log(``),console.log(`\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,ms.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,hs.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},ys=async()=>{try{let e=await gs();e&&_s(ms,e)&&vs(e)}catch{}},bs=!(`Bun`in globalThis),xs=async({backend:e,router:r,config:i,enableOpenAICompat:o,enableAnthropicMessages:s})=>{try{await d.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let l=Ko()||`0.0.0.0`,f=is(),p=f.workspace!=null,m=await Go(ho(i,i.generators.map(e=>e.type))),h={id:i.server.id,name:i.server.name,version:ms,address:l,port:i.server.port,url:`http://${l}:${i.server.port}`,generators:m,authentication:p?{required:!0,type:`workspace-jwt`,kid:f.workspace.kid,bound:!0}:{required:!1,type:`workspace-jwt`,bound:!1},...p?{workspace:{id:f.workspace.id,name:f.workspace.name}}:{}},g=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:bs?t():void 0}).state({sessions:new Map,backend:e||la,config:i,serverInfo:h,workspaceState:f});r&&g.use(r),i.autodiscover?.http?.enabled&&g.use(ka(i)),g.use(Ba),g.use(Ga),o&&g.use(eo(i)),s&&g.use(oo(i));let _=(e,t)=>Pa(Na(e,t),f.workspace),v={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603},y=e=>e.id??e.raw?.id??e.remoteAddress,b=new Map,x=new Map;return g.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),async open(e){let t=y(e),n=e.data?.headers,r=e.data?.query,i,a=new Promise(e=>{i=e});if(x.set(t,{identity:null,ready:a}),!p){console.log(`[Request] New connection: ${t} (unbound, no auth)`),i(!0);return}let o=await _(n||{},r);if(!o){console.warn(`[Auth] Rejecting WS ${t}: invalid or missing workspace-access token`),x.delete(t),i(!1),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Unauthorized`},id:null})),e.close(1008,`UNAUTHORIZED`);return}console.log(`[Request] New connection: ${t} (subject=${o.subjectType}:${o.subjectId})`);let s=x.get(t);s&&(s.identity=o),i(!0)},async message(e,{id:t,method:n,params:r}){let{sessions:a}=e.data.store,o=y(e),s=x.get(o);if(s?.ready&&!await s.ready){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}let l=x.get(o)?.identity??null;if(p&&!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Unauthorized`},id:t}));return}if(n===`init`){let[n]=Vo(r)??[],s,u=!1,d=n&&a.has(n)?a.get(n):null,f=!d?.identity||!!l&&l.subjectId===d.identity.subjectId&&l.subjectType===d.identity.subjectType;if(d&&f)d.timeout&&=(clearTimeout(d.timeout),null),d.identity=l,d.currentPeerId=o,s=n,u=!0,console.log(`[Request] Session restored: ${s}`);else{d&&console.warn(`[Request] Refused to restore session ${n}: identity mismatch (session=${d.identity?.subjectType}:${d.identity?.subjectId} peer=${l?.subjectType}:${l?.subjectId})`),s=c();let e={streams:new Map,streamReaders:new Map,generators:new Set,initializedContexts:new Set,fileManager:new $o(s,i.server.temp_file_dir),timeout:null,identity:l,currentPeerId:o};a.set(s,e),console.log(`[Request] New session: ${s}`)}b.set(o,s),e.send(JSON.stringify({jsonrpc:`2.0`,result:Ho({sessionId:s,restored:u}),id:t}));return}let d=b.get(o);if(!d){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}let f=a.get(d);if(!f){b.delete(o),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_REQUEST,message:`Session not initialized`},id:t}));return}console.log(`[Request] Received request from ${d}: ${n}`);let[m,h]=n.split(`.`),g=zo[m]?.[h];if(!g&&n!==`cancel`&&n!==`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){let e=f.streamReaders.get(t);e&&(e.reader.cancel(),f.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let i=Vo(r),a=Ro[m]?.[h],s=a?a.parse(i):i,c=await g({...e.data.store,peerId:d,session:f},...s);if(c instanceof u){let r=c.getReader();f.streamReaders.set(t,{reader:r,peerId:o}),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:n,done:i}=await r.read();if(i)break;let{event:a,data:o}=n;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:Ho(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(r){console.error(`[RPC] Stream error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:Ho(r),id:t}))}f.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:Ho(c),id:t}))}catch(r){if(r instanceof I){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INVALID_PARAMS,message:`Invalid params`,data:r.issues},id:t}));return}console.error(`[RPC] Handler error for ${n}:`,r),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:v.INTERNAL_ERROR,message:String(r)},id:t}))}},async close(e){let t=y(e),n=b.get(t);if(b.delete(t),x.delete(t),!n){console.log(`[Request] Connection closed (no session)`);return}console.log(`[Request] Connection closed: ${n}`);let{backend:r,sessions:a}=e.data.store,o=a.get(n);if(o){for(let[e,n]of o.streamReaders)n.peerId===t&&(n.reader.cancel().catch(()=>{}),o.streamReaders.delete(e));if(o.currentPeerId!==t){console.log(`[Request] Session ${n} already adopted by another peer; skip arm`);return}o.timeout=setTimeout(()=>{if(o.currentPeerId!==t)return;a.delete(n),console.log(`[Request] Session timed out: ${n}`);let{generators:e}=o;e.forEach(e=>{r.finalizeGenerator(e)}),o.fileManager.destroy().catch(()=>{})},i.server.session_timeout)}}}),{app:g,config:i}},Ss=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1,enableAnthropicMessages:i=!1})=>{let{app:a,config:s}=await xs({backend:e,router:t,config:n,enableOpenAICompat:r,enableAnthropicMessages:i}),{server:{port:c}}=s,l=[new Promise(e=>a.listen(c,e))],u=null;if(s.autodiscover){let{workspace:e,serverKeyPair:t}=a.store.workspaceState;if(e&&t){let e={kid:t.kid,privateKey:o.createPrivateKey({key:Buffer.from(t.privateKeyPkcs8,`base64`),format:`der`,type:`pkcs8`})};u=new Qo(s.autodiscover,()=>a.store.serverInfo,e),l.push(u.start())}else e?console.warn("[Autodiscover] disabled: bound to a workspace but state.json is missing serverKeyPair. Re-run `bricks buttress bind` to register a per-server announce key (required for v2.0 signed UDP discovery)."):console.warn("[Autodiscover] disabled: buttress-server is not bound to a workspace. Run `bricks buttress bind` from a workspace-authed CLI to pair.")}return await Promise.all(l),{app:a,port:c,openaiEnabled:r,anthropicMessagesEnabled:i,autoDiscover:u}},Cs=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||Cs.includes(process.argv[1]))&&await ps();export{ys as checkAndNotifyUpdates,gs as checkForUpdates,_s as compareVersions,xs as createServer,vs as logUpdateMessage,yo as processConfig,xa as startModelDownload,Ss as startServer};
|