@fugood/buttress-server 2.24.1-beta.0 → 2.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.mjs CHANGED
@@ -1,12 +1,12 @@
1
1
  #!/usr/bin/env node
2
- import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import{fileURLToPath as D}from"node:url";import{execFile as ee,execSync as O,spawn as te}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as ne,z as j}from"zod";import{cors as M}from"@elysiajs/cors";import N from"node-machine-id";import P from"ms";import{Buffer as F}from"node:buffer";import re from"node:dgram";const I=1024**3,L=(e,t,n)=>Math.min(Math.max(e,t),n),ie=e=>e?40:0,ae=(e=0)=>e?L(e/(12*I)*20,0,20):0,R=(e=0)=>e?L(e/(32*I)*10,0,10):0,oe=e=>e?10:0,se=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ce=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=ie(n)+se(t,e)+ae(r),s=R(i),c=oe(a);return Math.min(100,Math.round(o+s+c))},le=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:ie(n),variant:se(t,e),gpuMemory:ae(r),cpuMemory:R(i),availability:oe(a)}),ue=[`cuda`,`vulkan`,`snapdragon`,`default`],z=.85,de=.5,fe=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],pe=e=>e&&String(e).trim().toLowerCase()||null,me=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=ue}={})=>{let i=[];e&&i.push(e),i.push(...fe(t)),i.push(...fe(n)),i.push(...r);let a=new Set;for(let e of i){let t=pe(e);t&&a.add(t)}return Array.from(a)},he=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ge=e=>Array.isArray(e)?e.map(e=>({...e})):[],_e=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,ve=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ge(_e(n,r)),l=c.some(he),u=c.filter(e=>he(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ce(m),breakdown:o?le(m):null,error:s,timestamp:new Date().toISOString()}},B=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},ye=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=ue}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=me({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=B({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=B({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},be=`ggml-llm`,xe=[`cuda`,`vulkan`,`default`],Se=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>ye({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:xe,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:xe}),Ce=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ce(d),p=l?le(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=B({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=B({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},we=new Map([[be,ye],[`ggml-stt`,Se],[`mlx-llm`,Ce]]),Te=async({platform:e,totalMemoryInBytes:t,backend:n=be,dependencies:r,...i}={})=>{let a=we.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ee={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},De=e=>Ee[e?String(e).toLowerCase():`f16`]||Ee.f16,Oe=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=De(a.k),h=De(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},ke=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Ae=e=>e?String(e).trim().toLowerCase():null,je=(e={},t=null)=>{if(!e)return null;let n=Ae(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Me=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Ne=({arch:e,nLayer:t=0})=>({arch:Ae(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Pe=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),Fe=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Ae(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=je(t,r),o=r?Pe.get(r):null;if(!o)return Ne({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Ne({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Me(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ie=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Le=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Re=e=>e?String(e).trim().toLowerCase():null,ze=e=>{let t=Re(e);return t?Ie.has(t):!1},Be=e=>{let t=Re(e);return t?Le.has(t):!1},Ve=e=>ze(e)?`recurrent`:Be(e)?`hybrid`:`transformer`,He=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},Ue=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ve(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Oe(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},We=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ve(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ge=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new T;V.setMaxListeners(100);const Ke=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var qe=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Ke(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Ke(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Ke(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new qe,U=new qe;let Je=0;function Ye(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function Xe(e){return Je+=1,{subscriberId:Je,unsubscribe:Ye(e)}}function Ze(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function Qe(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function $e(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function et(e){return{timestamp:new Date().toISOString(),ggmlLlm:Ze(e),ggmlStt:Qe(e),mlxLlm:$e(e)}}const{ReadableStream:tt,WritableStream:nt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,rt=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),rt(e[t],n)):e[t]=n}),e),it=`https://huggingface.co`,at=`https://huggingface.co/api`,W=_.join(v.homedir(),`.buttress`,`models`),ot=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],st=.5,ct={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:st},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:at,base_url:it,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},lt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],ut=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},dt=(e={})=>{let t=structuredClone(ct);if(rt(t,e),t.backend.variant=ut(t.backend.variant),t.backend.variant_preference=Array.from(new Set(lt(t.backend.variant_preference).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(lt(t.runtime.prefer_variants).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(lt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||it,t.model.api_base=t.model.api_base||at,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...ct.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||ct.runtime.context_release_delay_ms),t},ft=e=>{let t=e.toLowerCase();return ot.find(e=>t.includes(e))||null},pt=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=ut(e);return t?[t]:[]})))},G=async e=>{await l(e,{recursive:!0})},mt=(e=W)=>_.join(e,`.metadata-cache`),ht=(e,t,n=W)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(mt(n),t,`${r}.json`)},gt=async(e,t,n=W)=>{try{let r=ht(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},_t=async(e,t,n,r=W)=>{try{let i=ht(e,t,r);await G(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},vt=(e=W)=>_.join(e,`.session-state-cache`),yt=(e=W)=>_.join(vt(e),`cache-map.json`),bt=(e=W)=>_.join(vt(e),`temp`),xt=(e=W)=>_.join(vt(e),`states`),St=()=>({version:1,entries:{},totalSize:0}),Ct=async(e=W)=>{try{let t=await d(yt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?St():n}catch{return St()}},wt=async(e,t=W)=>{let n=yt(t),r=`${n}.tmp.${Date.now()}`;try{await G(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},Tt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},Et=(e,t=W)=>_.join(xt(t),`${e}.bin`),Dt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(bt(e),`${t}.bin`)},Ot=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,kt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},At=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Ot(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=kt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},jt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Mt=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Ot(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Nt=async(e=W)=>{let t=bt(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Pt=async e=>{try{return await m(e),!0}catch{return!1}},Ft=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var It=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Ft(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Pt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await wt(this.cacheMap,this.baseDir).catch(()=>{}),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(vt(this.baseDir)),await G(bt(this.baseDir)),await G(xt(this.baseDir)),this.cacheMap=await Ct(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await wt(this.cacheMap,this.baseDir).catch(()=>{})}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Ot(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=At(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Pt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await wt(this.cacheMap,this.baseDir).catch(()=>{}),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Tt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await wt(this.cacheMap,this.baseDir).catch(()=>{}),s(),e}let c=Et(o,this.baseDir),l=i?Et(`${o}-prompt`,this.baseDir):null;try{await G(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Mt(this.cacheMap,e,o,this.metadata),await jt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await wt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(bt(this.baseDir)),Dt(this.baseDir)}async cleanup(){await Nt(this.baseDir)}};const Lt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Rt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},zt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await gt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await _t(r,`range-metadata`,o,n),o},Bt=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},K=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Vt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new nt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Ht=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await gt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Rt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await _t(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ot,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=ft(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||ft(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Rt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await _t(i,`artifact-info`,y,r),y},Ut=/^mmproj-.*\.gguf$/i,Wt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await gt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Rt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await _t(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:_.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:_.resolve(e.model.mmproj_local_path)};return await _t(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Lt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),l=c?.siblings||c?.files||[],u=l.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),d=e.model.mmproj_filename;if(d){if(!u.includes(d))throw Error(`mmproj file "${d}" not found in repo ${i}`)}else{let e=u.filter(e=>Ut.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();d=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let f=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${d}`,p=l.find(e=>(e.rfilename||e.path||e.filename)===d),m=Number(p?.size);if(!Number.isFinite(m)||m<=0){let e=await Rt(f,{headers:r});m=Number(e.headers.get(`content-length`))||null}let h={repoId:i,revision:a,filename:d,url:f,size:m,headers:r};return await _t(o,`artifact-info`,h,n),h},Gt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Kt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=pt(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},qt=async e=>{let t=await Ht(e),n=await Wt(e,t),r=await zt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},M=j.n_ctx?Number(j.n_ctx):null,N=M||a||4096,P=[],F=[],re=!0;if(M&&a&&M>a){re=!1;let e=`Requested context length (${M}) exceeds model training context (${a})`;P.push(e),F.push(e),N=a}M&&!a&&P.push(`Model metadata missing training context length, using requested value`);let I={k:j.cache_type_k,v:j.cache_type_v},L=t.size>0?t.size:0,ie=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:I,swaConfig:k,kvUnified:j.kv_unified,nParallel:j.n_parallel,swaFull:j.swa_full,arch:i,attentionLayerCount:m}),ae=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:j.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Kt(e,{modelBytes:L,kvCacheBytes:ie(N)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),le=Math.max(0,v.totalmem()*ce),ue=R.selected.hasGpu?se:le,z=Ge({maxCtx:N,availableMemory:ue,modelBytes:L,kvBytesForCtx:ie});if(!M&&z){let e=a?Math.min(z,a):z,t=Math.max(32,e);t<N&&P.push(`Context length capped to ${t} by memory limits`),N=t}N>z&&(N=z);let de=Math.floor(z);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=ie(N),pe=L+fe+ae,me=w?L/(w+1):L,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=j.n_gpu_layers===`auto`||j.n_gpu_layers==null?he:Math.max(0,Math.min(Number(j.n_gpu_layers)||0,w+1));let _e=(()=>{let e=j.flash_attn_type&&String(j.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,B=Bt(e,t),ye=await K(B,t.size),be=Gt(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:re,backend:`ggml-llm`,warnings:P,errors:F,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,dense_first:k.denseFirst,type:k.type,layers:k.swaLayers}:null}},runtime:{...j,variant:R.selected.variant,n_ctx:N,requested_ctx:M,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:I.k,cache_type_v:I.v,estimated_max_n_ctx:de},resources:{modelBytes:L,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:le,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:B,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:I,localPath:B,localExists:ye}},Jt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new tt({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Yt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new tt({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Xt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Zt=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await K(d)||await Vt(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Vt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Qt=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Vt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},$t=async(e,t)=>{let n=Xt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Zt(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await Qt(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},en=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},tn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return en(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?en(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await en(e,t)},i),!0)};async function nn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=dt(t),a=await qt(i),o=new It(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await $t(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Yt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Jt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Xt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Xt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Xt(s),t=s.contexts.get(e);return t?tn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):en(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const rn=e=>{let t=dt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function an(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=dt(e),o=await Ht(n),s=Bt(n,o),{repoId:c}=o,l=await Wt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),u=Gt(n,l),d=async()=>{if(!l||!u||l.localPath)return;if(await K(u,l.size)){console.log(`[Download] mmproj already exists: ${u}`);return}let e=t.getDownload(u);if(e){await e;return}let n=(async()=>{try{await Vt(l.url,l.headers,u,l.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(u)}})();t.setDownload(u,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await d().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let f=t.getDownload(s);if(f)return console.log(`[Download] Already downloading: ${c}`),f.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let p=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await K(f)||await Vt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Vt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await d(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,p),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function on(e){let t=dt(e),n=await Ht(t),r=await zt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,M={k:t.model.cache_type_k,v:t.model.cache_type_v},N=n.size>0?n.size:0,P=t.model.n_parallel||4,F=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:M,swaConfig:k,kvUnified:t.model.kv_unified,nParallel:P,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),re=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:P,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),I=t.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),L=t.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),ie=await Kt(t,{modelBytes:N,kvCacheBytes:F(j)}),ae=(ie.selected.totalMemory||0)*I,R=Math.max(0,v.totalmem()*L),oe=Ge({maxCtx:j,availableMemory:ie.selected.hasGpu?ae:R,modelBytes:N,kvBytesForCtx:F}),se=F(j),ce=F(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:O,nEmbdHeadV:te,nHeadCount:E,nHeadKvCount:D,kvLayerCount:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,denseFirst:k.denseFirst,type:k.type,layers:k.swaLayers}:null},modelBytes:N,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:re,quantization:{name:n.quantization||null,fileType:p,version:f}}}const sn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function cn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await on(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=sn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=sn(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:ln}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,un=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),un(e[t],n)):e[t]=n}),e),dn=`https://huggingface.co`,fn=`https://huggingface.co/api`,pn=_.join(v.homedir(),`.buttress`,`models`),mn=[`cuda`,`vulkan`,`default`],hn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],gn=`fp16`,_n=.5,vn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],yn=e=>{if(!e)return null;let t=e.toLowerCase();return vn.find(e=>t.includes(e))||null},bn={backend:{type:`ggml-stt`,variant:null,variant_preference:mn,gpu_memory_fraction:.85,cpu_memory_fraction:_n},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,gn,`q5_1`],allow_local_file:!1,local_path:null,api_base:fn,base_url:dn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:pn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},xn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Sn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Cn=(e={})=>{let t=structuredClone(bn);if(un(t,e),t.backend.variant=Sn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(xn(t.backend.variant_preference||mn).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...mn]),t.runtime.prefer_variants=Array.from(new Set(xn(t.runtime.prefer_variants).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(xn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||dn,t.model.api_base=t.model.api_base||fn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):pn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||bn.runtime.context_release_delay_ms),t},wn=e=>{let t=e.toLowerCase();return hn.find(e=>t.includes(e))||null},Tn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Sn(e);t&&n.add(t)}return Array.from(n)},En=async e=>{await l(e,{recursive:!0})},Dn=(e=pn)=>_.join(e,`.metadata-cache`),On=(e,t,n=pn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(Dn(n),t,`${r}.json`)},kn=async(e,t,n=pn)=>{try{let r=await d(On(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},An=async(e,t,n,r=pn)=>{try{let i=On(e,t,r);await En(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},jn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Mn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Nn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Pn=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Fn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await En(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new ln({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},In=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await kn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Mn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:wn(c||``),headers:o};return await An(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await jn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:hn,f=()=>{for(let e of d)if(e===gn){let e=u.find(e=>{let t=e.toLowerCase();return!hn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=wn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||wn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Mn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await An(i,`artifact-info`,g,r),g},Ln=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Tn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?bn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?_n:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Rn=async e=>{let t=await In(e),n=ke({modelBytes:t.size>0?t.size:0}),r=await Ln(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Nn(e,t),l=await Pn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:yn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},zn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Pn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Pn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Fn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Bn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Vn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Hn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await zn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Un=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Wn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Un(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Un(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Un(e,t)},i),!0)};async function Gn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Cn(t),a=await Rn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Bn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Un(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Hn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Wn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Vn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Kn=e=>{let t=Cn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=yn(t.model.filename);return r?`${n}:${r}`:n};async function qn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Cn(e),o=await In(n),s=Nn(n,o),{repoId:c}=o;if(await Pn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Fn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Jn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function Yn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await In(Cn(i));o=e.size??null,{processingBufferBytes:s}=ke({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Jn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Jn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:Xn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,Zn=D(import.meta.url),Qn=_.dirname(Zn),$n=_.join(Qn,`mlx-bridge.py`),er=`mlx-vlm==0.4.0`,tr=`mlx-lm==0.31.1`,nr=_.join(v.homedir(),`.buttress`,`models`),rr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:nr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:tr,mlx_vlm_package:er,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},ir=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t,ar=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ar(e[t],n)):e[t]=n}),e),or=(e={})=>{let t=structuredClone(rr);return ar(t,e),t},sr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},cr=async e=>{await l(e,{recursive:!0})},lr=(e,t,n)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(n,`.metadata-cache`,t,`${r}.json`)},ur=async(e,t,n)=>{try{let r=await d(lr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},dr=async(e,t,n,r)=>{try{let i=lr(e,t,r);await cr(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}};async function fr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await ur(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await sr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await sr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await dr(i,`mlx-model-metadata`,D,n),D}function pr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const mr=async e=>{try{return await m(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ee(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),hr=new Map;async function gr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.resolve(e),a=hr.get(i);if(a){let e=await a;return r?.(1),e}let o=vr({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});hr.set(i,o);try{return await o}finally{hr.delete(i)}}const _r=[3,10];async function vr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.join(e,`bin`,`python3`),a=_.join(e,`bin`,`pip`);if(await mr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await mr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<_r[0]||t===_r[0]&&n<_r[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${_r.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await l(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var yr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=te(e,[$n],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
2
+ import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import{fileURLToPath as D}from"node:url";import{execFile as ee,execSync as O,spawn as te}from"node:child_process";import k from"node:fs";import A from"@iarna/toml";import{ZodError as ne,z as j}from"zod";import{cors as M}from"@elysiajs/cors";import N from"node-machine-id";import P from"ms";import{Buffer as F}from"node:buffer";import re from"node:dgram";const I=1024**3,L=(e,t,n)=>Math.min(Math.max(e,t),n),ie=e=>e?40:0,ae=(e=0)=>e?L(e/(12*I)*20,0,20):0,R=(e=0)=>e?L(e/(32*I)*10,0,10):0,oe=e=>e?10:0,se=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},ce=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=ie(n)+se(t,e)+ae(r),s=R(i),c=oe(a);return Math.min(100,Math.round(o+s+c))},le=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:ie(n),variant:se(t,e),gpuMemory:ae(r),cpuMemory:R(i),availability:oe(a)}),ue=[`cuda`,`vulkan`,`snapdragon`,`default`],z=.85,de=.5,fe=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],pe=e=>e&&String(e).trim().toLowerCase()||null,me=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=ue}={})=>{let i=[];e&&i.push(e),i.push(...fe(t)),i.push(...fe(n)),i.push(...r);let a=new Set;for(let e of i){let t=pe(e);t&&a.add(t)}return Array.from(a)},he=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},ge=e=>Array.isArray(e)?e.map(e=>({...e})):[],_e=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,ve=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=ge(_e(n,r)),l=c.some(he),u=c.filter(e=>he(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:ce(m),breakdown:o?le(m):null,error:s,timestamp:new Date().toISOString()}},B=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},ye=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=ue}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=me({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(ve({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=B({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=B({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},be=`ggml-llm`,xe=[`cuda`,`vulkan`,`default`],Se=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=z,cpuMemoryFraction:a=de,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>ye({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:xe,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:xe}),Ce=async({platform:e,arch:t=null,unifiedMemoryFraction:n=.85,includeBreakdown:r=!1,totalMemoryInBytes:i,modelBytes:a=null,kvCacheBytes:o=null,limitedKvCacheBytes:s=null}={})=>{let c=[];e!==`darwin`&&c.push(`MLX requires macOS`),t&&t!==`arm64`&&c.push(`MLX requires Apple Silicon (arm64)`);let l=c.length===0,u=l?Math.floor(i*n):0,d={platform:e,variant:`mlx`,hasGpu:l,gpuUsableBytes:u,cpuUsableBytes:u,ok:l},f=ce(d),p=l?le(d):null,m={platform:e,ok:l,variant:`mlx`,hasGpu:l,unifiedMemory:!0,devices:l?[{type:`metal`,deviceName:`Apple Silicon (Unified Memory)`,maxMemorySize:i}]:[],gpuTotalBytes:l?i:0,gpuUsableBytes:u,cpuTotalBytes:i,cpuUsableBytes:u,score:f,breakdown:r?p:void 0,error:l?void 0:c.join(`; `),timestamp:new Date().toISOString()};r||delete m.breakdown;let h={ok:l,selected:l?m:null,attempts:[m],errors:l?[]:c};if(!a&&!o)return h;let g=e=>{if(!e)return e;let t=B({device:e,modelBytes:a||0,kvCacheBytes:o||0}),n=null;return s!=null&&s!==o&&(n=B({device:e,modelBytes:a||0,kvCacheBytes:s})),{...e,fit:t,...n&&{limitedFit:n}}};return h.selected=g(h.selected),h.attempts=h.attempts.map(g),h},we=new Map([[be,ye],[`ggml-stt`,Se],[`mlx-llm`,Ce]]),Te=async({platform:e,totalMemoryInBytes:t,backend:n=be,dependencies:r,...i}={})=>{let a=we.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},Ee={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},De=e=>Ee[e?String(e).toLowerCase():`f16`]||Ee.f16,Oe=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=De(a.k),h=De(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},ke=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},Ae=e=>e?String(e).trim().toLowerCase():null,je=(e={},t=null)=>{if(!e)return null;let n=Ae(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Me=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},Ne=({arch:e,nLayer:t=0})=>({arch:Ae(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Pe=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`gemma4`,({nLayer:e,nSwa:t,metadata:n})=>{if(!t||t<=0)return{enabled:!1};let r=Number(n?.[`gemma4.attention.shared_kv_layers`])||0,i=Math.max(0,e-r),a=n?.[`gemma4.attention.sliding_window_pattern`];return Array.isArray(a)?{enabled:!0,window:t,type:`standard`,swaLayers:a.slice(0,i).filter(e=>Number(e)>0).length,kvLayers:i}:{enabled:!0,window:t,pattern:6,type:`standard`,kvLayers:i}}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),Fe=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=Ae(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=je(t,r),o=r?Pe.get(r):null;if(!o)return Ne({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return Ne({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=s.swaLayers!=null&&Number.isFinite(Number(s.swaLayers))?Math.max(0,Math.floor(Number(s.swaLayers))):Me(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ie=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),Le=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Re=e=>e?String(e).trim().toLowerCase():null,ze=e=>{let t=Re(e);return t?Ie.has(t):!1},Be=e=>{let t=Re(e);return t?Le.has(t):!1},Ve=e=>ze(e)?`recurrent`:Be(e)?`hybrid`:`transformer`,He=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=t?n(`${t}.attention.shared_kv_layers`,0):0,C=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:C,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x,sharedKvLayers:S}},Ue=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Ve(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Oe(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},We=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Ve(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},Ge=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},V=new T;V.setMaxListeners(100);const Ke=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var qe=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Ke(this.modelLoads,e,this.maxEntries),V.emit(`status:modelLoad`,e),V.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Ke(this.completions,e,this.maxEntries),V.emit(`status:completion`,e),V.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Ke(this.transcriptions,e,this.maxEntries),V.emit(`status:transcription`,e),V.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const H=new qe,U=new qe;let Je=0;function Ye(e){let t=t=>e(t);return V.on(`status:change`,t),()=>V.off(`status:change`,t)}function Xe(e){return Je+=1,{subscriberId:Je,unsubscribe:Ye(e)}}function Ze(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant!==`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant!==`mlx`)}}}function Qe(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:U.getModelLoadHistory(),transcriptions:U.getTranscriptionHistory()}}}function $e(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`mlx-llm`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{};return{id:e,type:t.type,refCount:t.refCount,repoId:r.repoId||n.info?.model?.repoId||null,variant:r.variant||`mlx`,contexts:r.contexts||[]}}),history:{modelLoads:H.getModelLoadHistory().filter(e=>e.variant===`mlx`),completions:H.getCompletionHistory().filter(e=>e.variant===`mlx`)}}}function et(e){return{timestamp:new Date().toISOString(),ggmlLlm:Ze(e),ggmlStt:Qe(e),mlxLlm:$e(e)}}const{ReadableStream:tt,WritableStream:nt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,rt=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),rt(e[t],n)):e[t]=n}),e),it=`https://huggingface.co`,at=`https://huggingface.co/api`,W=_.join(v.homedir(),`.buttress`,`models`),ot=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],st=.5,ct={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:st},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:at,base_url:it,enable_mtmd:!1,mmproj_filename:null,mmproj_url:null,mmproj_local_path:null,mmproj_use_gpu:null,mmproj_image_min_tokens:-1,mmproj_image_max_tokens:-1},runtime:{cache_dir:W,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},lt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],ut=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},dt=(e={})=>{let t=structuredClone(ct);if(rt(t,e),t.backend.variant=ut(t.backend.variant),t.backend.variant_preference=Array.from(new Set(lt(t.backend.variant_preference).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(lt(t.runtime.prefer_variants).flatMap(e=>{let t=ut(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(lt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||it,t.model.api_base=t.model.api_base||at,t.model.enable_mtmd=!!t.model.enable_mtmd;let n=e=>{if(e==null)return-1;let t=Number(e);return Number.isFinite(t)?Math.floor(t):-1};return t.model.mmproj_image_min_tokens=n(t.model.mmproj_image_min_tokens),t.model.mmproj_image_max_tokens=n(t.model.mmproj_image_max_tokens),t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):W,t.runtime.session_cache={...ct.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||ct.runtime.context_release_delay_ms),t},ft=e=>{let t=e.toLowerCase();return ot.find(e=>t.includes(e))||null},pt=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.flatMap(e=>{let t=ut(e);return t?[t]:[]})))},G=async e=>{await l(e,{recursive:!0})},mt=(e=W)=>_.join(e,`.metadata-cache`),ht=(e,t,n=W)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(mt(n),t,`${r}.json`)},gt=async(e,t,n=W)=>{try{let r=ht(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},_t=async(e,t,n,r=W)=>{try{let i=ht(e,t,r);await G(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},vt=(e=W)=>_.join(e,`.session-state-cache`),yt=(e=W)=>_.join(vt(e),`cache-map.json`),bt=(e=W)=>_.join(vt(e),`temp`),xt=(e=W)=>_.join(vt(e),`states`),St=()=>({version:1,entries:{},totalSize:0}),Ct=async(e=W)=>{try{let t=await d(yt(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?St():n}catch{return St()}},wt=async(e,t=W)=>{let n=yt(t),r=`${n}.tmp.${Date.now()}`;try{await G(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},Tt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},Et=(e,t=W)=>_.join(xt(t),`${e}.bin`),Dt=(e=W)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(bt(e),`${t}.bin`)},Ot=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,kt=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},At=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>Ot(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=kt(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},jt=async(e,t,n)=>{let r=Object.values(e.entries),i=r.sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),a=e.totalSize,o=r.length,s=i.filter(e=>!(a>t)&&!(o>n)?!1:(a-=(e.stateFileSize||0)+(e.promptStateSize||0),--o,!0));return await Promise.all(s.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,a),s.map(e=>e.id)},Mt=async(e,t,n,r)=>{let i=[];for(let[a,o]of Object.entries(e.entries))a!==n&&Ot(o.metadata,r)&&t.startsWith(o.fullText)&&o.fullText.length<t.length&&i.push(o);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Nt=async(e=W)=>{let t=bt(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Pt=async e=>{try{return await m(e),!0}catch{return!1}},Ft=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var It=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Ft(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}async persistCacheMap(){try{await wt(this.cacheMap,this.baseDir)}catch(e){console.warn(`[SessionCache] Failed to persist cache map: ${e?.message||e}`)}}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Pt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await G(vt(this.baseDir)),await G(bt(this.baseDir)),await G(xt(this.baseDir)),this.cacheMap=await Ct(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await this.persistCacheMap()}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>Ot(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=At(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Pt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await this.persistCacheMap(),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=Tt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await this.persistCacheMap(),s(),e}let c=Et(o,this.baseDir),l=i?Et(`${o}-prompt`,this.baseDir):null;try{await G(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Mt(this.cacheMap,e,o,this.metadata),await jt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await wt(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await G(bt(this.baseDir)),Dt(this.baseDir)}async cleanup(){await Nt(this.baseDir)}};const Lt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Rt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},zt=async(e,t,n=W)=>{let r=JSON.stringify({url:e,headers:t}),i=await gt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await _t(r,`range-metadata`,o,n),o},Bt=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},K=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Vt=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await G(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new nt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},Ht=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await gt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Rt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await _t(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=l?.siblings||l?.files||[],d=[];for(let e of u){let t=e.rfilename||e.path||e.filename;typeof t==`string`&&t.endsWith(`.gguf`)&&d.push(t)}if(d.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let f=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:ot,p=d.map(e=>e.toLowerCase()),m=()=>{for(let e of f){let t=p.findIndex(t=>t.includes(e));if(t!==-1)return{filename:d[t],quantization:e}}return null};if(s)c||=ft(s);else{let{filename:e,quantization:t}=m()||{filename:d[0],quantization:null};s=e,c=t||ft(s)}let h=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,g=/-(\d{5})-of-(\d{5})\.gguf$/,_=s.match(g),v=null;if(_){let[,,r]=_,i=await Lt(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);v=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(g,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(v+=o)}}else{let e=await Rt(h,{headers:o});v=Number(e.headers.get(`content-length`))||null}let y={repoId:t,revision:n,filename:s,url:h,size:v,quantization:c,headers:o,isSplit:!!_,splitCount:_?Number(_[2]):0};return await _t(i,`artifact-info`,y,r),y},Ut=/^mmproj-.*\.gguf$/i,Wt=async(e,t)=>{if(!e.model.enable_mtmd)return null;let n=e.runtime.cache_dir,r={...e.runtime.http_headers||{}};e.runtime.huggingface_token&&(r.Authorization=`Bearer ${e.runtime.huggingface_token}`);let i=t?.repoId||e.model.repo_id,a=t?.revision||e.model.revision||`main`,o=JSON.stringify({kind:`mmproj`,repoId:i,revision:a,mmproj_filename:e.model.mmproj_filename,mmproj_url:e.model.mmproj_url,mmproj_local_path:e.model.mmproj_local_path}),s=await gt(o,`artifact-info`,n);if(s)return s;if(e.model.mmproj_url){let t=await Rt(e.model.mmproj_url,{headers:r}),s=Number(t.headers.get(`content-length`))||null,c={repoId:i,revision:a,filename:e.model.mmproj_filename||e.model.mmproj_url.split(`/`).pop(),url:e.model.mmproj_url,size:s,headers:r};return await _t(o,`artifact-info`,c,n),c}if(e.model.mmproj_local_path){if(!e.model.allow_local_file)throw Error("`model.mmproj_local_path` requires `model.allow_local_file = true`");let t={repoId:i,revision:a,filename:_.basename(e.model.mmproj_local_path),url:null,size:null,headers:r,localPath:_.resolve(e.model.mmproj_local_path)};return await _t(o,`artifact-info`,t,n),t}if(!i)throw Error("Cannot derive mmproj artifact without `model.repo_id`");let c=await Lt(`${e.model.api_base}/models/${i}?revision=${a}&blobs=true`,{headers:r}),l=c?.siblings||c?.files||[],u=l.map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`),d=e.model.mmproj_filename;if(d){if(!u.includes(d))throw Error(`mmproj file "${d}" not found in repo ${i}`)}else{let e=u.filter(e=>Ut.test(e));if(e.length===0)return console.warn(`[buttress] enable_mtmd set but no mmproj file found in ${i}; skipping multimodal load`),null;let n=t?.quantization&&String(t.quantization).toLowerCase();d=n&&e.find(e=>e.toLowerCase().includes(n))||e[0]}let f=`${e.model.base_url.replace(/\/+$/,``)}/${i}/resolve/${a}/${d}`,p=l.find(e=>(e.rfilename||e.path||e.filename)===d),m=Number(p?.size);if(!Number.isFinite(m)||m<=0){let e=await Rt(f,{headers:r});m=Number(e.headers.get(`content-length`))||null}let h={repoId:i,revision:a,filename:d,url:f,size:m,headers:r};return await _t(o,`artifact-info`,h,n),h},Gt=(e,t)=>{if(t?.localPath)return t.localPath;if(!t)return null;let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Kt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=pt(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},qt=async e=>{let t=await Ht(e),n=await Wt(e,t),r=await zt(t.url,t.headers,e.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,no_extra_bufts:e.model.no_extra_bufts??e.runtime.no_extra_bufts,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},M=j.n_ctx?Number(j.n_ctx):null,N=M||a||4096,P=[],F=[],re=!0;if(M&&a&&M>a){re=!1;let e=`Requested context length (${M}) exceeds model training context (${a})`;P.push(e),F.push(e),N=a}M&&!a&&P.push(`Model metadata missing training context length, using requested value`);let I={k:j.cache_type_k,v:j.cache_type_v},L=t.size>0?t.size:0,ie=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:I,swaConfig:k,kvUnified:j.kv_unified,nParallel:j.n_parallel,swaFull:j.swa_full,arch:i,attentionLayerCount:m}),ae=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:j.n_parallel||4,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=await Kt(e,{modelBytes:L,kvCacheBytes:ie(N)+ae}),oe=R.selected.totalMemory||0,se=oe*(e.backend.gpu_memory_fraction||1),ce=e.backend.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),le=Math.max(0,v.totalmem()*ce),ue=R.selected.hasGpu?se:le,z=Ge({maxCtx:N,availableMemory:ue,modelBytes:L,kvBytesForCtx:ie});if(!M&&z){let e=a?Math.min(z,a):z,t=Math.max(32,e);t<N&&P.push(`Context length capped to ${t} by memory limits`),N=t}N>z&&(N=z);let de=Math.floor(z);console.log(`[buttress] Memory-limited context length: ${de}`);let fe=ie(N),pe=L+fe+ae,me=w?L/(w+1):L,he=0;R.selected.hasGpu&&me>0&&(he=Math.min(w+1,Math.max(0,Math.floor(se/me)))),console.log(`[buttress] Auto GPU layer capacity (${R.selected.variant}): ${he}/${w+1}`);let ge;ge=j.n_gpu_layers===`auto`||j.n_gpu_layers==null?he:Math.max(0,Math.min(Number(j.n_gpu_layers)||0,w+1));let _e=(()=>{let e=j.flash_attn_type&&String(j.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:R.selected.hasGpu?`auto`:`off`})(),ve=e.runtime.cache_dir,B=Bt(e,t),ye=await K(B,t.size),be=Gt(e,n),xe=be?await K(be,n?.size):!1,Se=n?{enabled:!0,initialized:!1,filename:n.filename,url:n.url,sizeBytes:n.size,localPath:be,exists:xe,useGpu:e.model.mmproj_use_gpu,imageMinTokens:e.model.mmproj_image_min_tokens,imageMaxTokens:e.model.mmproj_image_max_tokens}:{enabled:!1,requested:!!e.model.enable_mtmd};return{config:e,info:{ok:re,backend:`ggml-llm`,warnings:P,errors:F,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:i,n_ctx_train:a,n_layer:w,n_embd:T,quantization_version:f,file_type:p,kv_layer_count:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,dense_first:k.denseFirst,type:k.type,layers:k.swaLayers}:null}},runtime:{...j,variant:R.selected.variant,n_ctx:N,requested_ctx:M,n_gpu_layers:ge,auto_gpu_layers:he,flash_attn_type:_e,cache_type_k:I.k,cache_type_v:I.v,estimated_max_n_ctx:de},resources:{modelBytes:L,kvCacheBytes:fe,recurrentMemoryBytes:ae,totalEstimatedBytes:pe,gpuCapacityBytes:oe,gpuUsableBytes:se,cpuUsableBytes:le,fit:R.selected.fit},devices:{selected:R.selected,attempts:R.attempts},download:{cacheDir:ve,localPath:B,exists:ye},multimodal:Se,timestamp:new Date().toISOString()},artifact:t,mmprojArtifact:n,mmprojLocalPath:be,mmprojLocalExists:xe,metadata:{arch:i,nCtxTrain:a,nLayer:w,nEmbd:T},devices:R,cacheTypes:I,localPath:B,localExists:ye}},Jt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new tt({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};H.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Yt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new tt({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};H.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Xt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Zt=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await K(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await K(d)||await Vt(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Vt(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Qt=async(e,t,n,r=null)=>{let{mmprojArtifact:i,mmprojLocalPath:a}=e;if(!i||!a)return null;if(i.localPath){if(!await K(a))throw Error(`mmproj local file not found: ${a}`);return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}if(e.mmprojLocalExists&&!t.has(a))return e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a;let o=a;if(r){let t=r.getDownload(o);if(t)try{if(await t,await K(a,i.size))return e.mmprojLocalExists=!0,e.info.multimodal.exists=!0,typeof n==`function`&&n(1),a}catch(e){console.warn(`[ensureMmprojFile] Global download failed, will retry: ${e.message}`)}}t.has(o)||t.set(o,(async()=>{console.log(`Downloading mmproj: 0%`),await Vt(i.url,i.headers,a,i.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading mmproj: ${Math.min(100,t)}%`),typeof n==`function`&&n(e)}}),e.mmprojLocalExists=!0,e.info.multimodal.exists=!0})());try{await t.get(o)}finally{t.delete(o)}return a},$t=async(e,t)=>{let n=Xt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Zt(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,no_extra_bufts:e.plan.info.runtime.no_extra_bufts,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);if(e.plan.mmprojArtifact){let t=await Qt(e.plan,e.downloads,null,e.globalDownloadManager);if(t){let n=e.config.model.mmproj_use_gpu,r={path:t,use_gpu:n==null?(e.plan.info.runtime.n_gpu_layers||0)>0:!!n,image_min_tokens:e.config.model.mmproj_image_min_tokens,image_max_tokens:e.config.model.mmproj_image_max_tokens};console.log(`[Context] initMultimodal:`,r),await s.initMultimodal(r)?e.plan.info.multimodal.initialized=!0:console.warn(`[Context] initMultimodal returned false; multimodal disabled`)}}return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(H.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},en=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},tn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return en(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?en(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await en(e,t)},i),!0)};async function nn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=dt(t),a=await qt(i),o=new It(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await $t(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download},multimodal:s.plan.info.multimodal?{...s.plan.info.multimodal}:null}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs,force_pure_content:t.force_pure_content};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Yt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Jt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Xt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[],Number);return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Xt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Xt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Xt(s),t=s.contexts.get(e);return t?tn(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):en(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const rn=e=>{let t=dt(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function an(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=dt(e),o=await Ht(n),s=Bt(n,o),{repoId:c}=o,l=await Wt(n,o).catch(e=>(console.warn(`[Download] Failed to derive mmproj artifact: ${e.message}`),null)),u=Gt(n,l),d=async()=>{if(!l||!u||l.localPath)return;if(await K(u,l.size)){console.log(`[Download] mmproj already exists: ${u}`);return}let e=t.getDownload(u);if(e){await e;return}let n=(async()=>{try{await Vt(l.url,l.headers,u,l.size,e=>{e>=0&&Number.isFinite(e)&&console.log(`[Download] mmproj ${c}: ${Math.round(e*100)}%`)})}finally{t.deleteDownload(u)}})();t.setDownload(u,n),await n};if(await K(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),await d().catch(e=>{console.error(`[Download] mmproj download failed: ${e.message}`),typeof a==`function`&&a(e)}),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let f=t.getDownload(s);if(f)return console.log(`[Download] Already downloading: ${c}`),f.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let p=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await K(f)||await Vt(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Vt(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});await d(),console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,p),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function on(e){let t=dt(e),n=await Ht(t),r=await zt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=He(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,ee=E>0&&T>0?T/E:128,O=u!=null&&Number.isFinite(Number(u))?Number(u):ee,te=d!=null&&Number.isFinite(Number(d))?Number(d):ee,k=Fe({arch:i,metadata:r,nLayer:w}),A=k&&Number.isFinite(Number(k.kvLayers))?Number(k.kvLayers):w,ne=Math.max(0,Math.floor(Number(A)||0)),j=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,M={k:t.model.cache_type_k,v:t.model.cache_type_v},N=n.size>0?n.size:0,P=t.model.n_parallel||4,F=Ue({layerCount:ne,headKvCount:D,embdHeadKCount:O,embdHeadVCount:te,cacheTypes:M,swaConfig:k,kvUnified:t.model.kv_unified,nParallel:P,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),re=We({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:P,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),I=t.backend?.gpu_memory_fraction==null?ct.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),L=t.backend?.cpu_memory_fraction==null?st:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),ie=await Kt(t,{modelBytes:N,kvCacheBytes:F(j)}),ae=(ie.selected.totalMemory||0)*I,R=Math.max(0,v.totalmem()*L),oe=Ge({maxCtx:j,availableMemory:ie.selected.hasGpu?ae:R,modelBytes:N,kvBytesForCtx:F}),se=F(j),ce=F(oe);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:O,nEmbdHeadV:te,nHeadCount:E,nHeadKvCount:D,kvLayerCount:ne,swa:k?.enabled?{window:k.window,pattern:k.pattern,denseFirst:k.denseFirst,type:k.type,layers:k.swaLayers}:null},modelBytes:N,kvCacheBytes:se,limitedKvCacheBytes:ce,memoryLimitedCtx:oe,recurrentMemoryBytes:re,quantization:{name:n.quantization||null,fileType:p,version:f}}}const sn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function cn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await on(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=sn(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=sn(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:ln}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,un=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),un(e[t],n)):e[t]=n}),e),dn=`https://huggingface.co`,fn=`https://huggingface.co/api`,pn=_.join(v.homedir(),`.buttress`,`models`),mn=[`cuda`,`vulkan`,`default`],hn=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],gn=`fp16`,_n=.5,vn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],yn=e=>{if(!e)return null;let t=e.toLowerCase();return vn.find(e=>t.includes(e))||null},bn={backend:{type:`ggml-stt`,variant:null,variant_preference:mn,gpu_memory_fraction:.85,cpu_memory_fraction:_n},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,gn,`q5_1`],allow_local_file:!1,local_path:null,api_base:fn,base_url:dn,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:pn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},xn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],Sn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},Cn=(e={})=>{let t=structuredClone(bn);if(un(t,e),t.backend.variant=Sn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(xn(t.backend.variant_preference||mn).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...mn]),t.runtime.prefer_variants=Array.from(new Set(xn(t.runtime.prefer_variants).flatMap(e=>{let t=Sn(e);return t?[t]:[]}))),t.model.preferred_quantizations=Array.from(new Set(xn(t.model.preferred_quantizations||t.model.quantizations).flatMap(e=>{let t=e?String(e).toLowerCase():null;return t?[t]:[]}))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||dn,t.model.api_base=t.model.api_base||fn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):pn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||bn.runtime.context_release_delay_ms),t},wn=e=>{let t=e.toLowerCase();return hn.find(e=>t.includes(e))||null},Tn=e=>{let t=[];e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`);let n=new Set;for(let e of t){let t=Sn(e);t&&n.add(t)}return Array.from(n)},En=async e=>{await l(e,{recursive:!0})},Dn=(e=pn)=>_.join(e,`.metadata-cache`),On=(e,t,n=pn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(Dn(n),t,`${r}.json`)},kn=async(e,t,n=pn)=>{try{let r=await d(On(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},An=async(e,t,n,r=pn)=>{try{let i=On(e,t,r);await En(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},jn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Mn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Nn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Pn=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Fn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await En(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new ln({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},In=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await kn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Mn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:wn(c||``),headers:o};return await An(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await jn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:hn,f=()=>{for(let e of d)if(e===gn){let e=u.find(e=>{let t=e.toLowerCase();return!hn.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=wn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||wn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Mn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await An(i,`artifact-info`,g,r),g},Ln=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=Tn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?bn.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?_n:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await Te({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Rn=async e=>{let t=await In(e),n=ke({modelBytes:t.size>0?t.size:0}),r=await Ln(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Nn(e,t),l=await Pn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:yn(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},zn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await Pn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await Pn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Fn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var Bn=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const Vn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Hn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await zn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw U.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Un=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Wn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Un(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Un(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Un(e,t)},i),!0)};async function Gn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=Cn(t),a=await Rn(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new Bn,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Un(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Hn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Wn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=Vn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw U.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const Kn=e=>{let t=Cn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=yn(t.model.filename);return r?`${n}:${r}`:n};async function qn(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=Cn(e),o=await In(n),s=Nn(n,o),{repoId:c}=o;if(await Pn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Fn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Jn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):ce(e):0;async function Yn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await In(Cn(i));o=e.size??null,{processingBufferBytes:s}=ke({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await Te({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Jn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Jn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}const{ReadableStream:Xn}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,Zn=D(import.meta.url),Qn=_.dirname(Zn),$n=_.join(Qn,`mlx-bridge.py`),er=`mlx-vlm==0.4.0`,tr=`mlx-lm==0.31.1`,nr=_.join(v.homedir(),`.buttress`,`models`),rr={backend:{type:`mlx-llm`},model:{repo_id:null,revision:`main`,adapter_path:null,tokenizer_config:null,model_config:null,vlm:`auto`},runtime:{cache_dir:nr,huggingface_token:process.env.HUGGINGFACE_TOKEN||null,mlx_env_dir:null,mlx_lm_package:tr,mlx_vlm_package:er,context_release_delay_ms:1e4,session_cache:{enabled:!0,max_size_bytes:5*1024*1024*1024,max_entries:100}}},ir=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t,ar=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),ar(e[t],n)):e[t]=n}),e),or=(e={})=>{let t=structuredClone(rr);return ar(t,e),t},sr=async(e,t={})=>{let n=await fetch(e,t);if(!n.ok)throw Error(`HTTP ${n.status}: ${e}`);return n.json()},cr=async e=>{await l(e,{recursive:!0})},lr=(e,t,n)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(n,`.metadata-cache`,t,`${r}.json`)},ur=async(e,t,n)=>{try{let r=await d(lr(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},dr=async(e,t,n,r)=>{try{let i=lr(e,t,r);await cr(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}};async function fr(e,{revision:t=`main`,cacheDir:n,token:r}={}){let i=JSON.stringify({repoId:e,revision:t,type:`mlx-model-metadata`});if(n){let e=await ur(i,`mlx-model-metadata`,n);if(e)return e}let a={};r&&(a.Authorization=`Bearer ${r}`);let o=(await sr(`https://huggingface.co/api/models/${e}?revision=${t}&blobs=true`,{headers:a}))?.siblings||[],s=0;for(let e of o){let t=e.rfilename||e.path||e.filename||``;/\.(safetensors|npz)$/.test(t)&&(s+=Number(e.size)||0)}let c=null;try{c=await sr(`https://huggingface.co/${e}/raw/${t}/config.json`,{headers:a})}catch{}let l=c?.text_config||c||{},u=c||{},d=u.model_type||u.architectures?.[0]||null,f=l.hidden_size||l.dim||0,p=l.num_hidden_layers||l.n_layers||0,m=l.num_attention_heads||l.n_heads||0,h=l.num_key_value_heads??m,g=l.vocab_size||0,_=l.max_position_embeddings||0,v=l.intermediate_size||0,y=l.head_dim||l.v_head_dim||(m>0&&f>0&&Number.isInteger(f/m)?f/m:0),b=l.kv_lora_rank||0,x=l.qk_rope_head_dim||0,S=b>0,C=u.quantization||u.quantization_config||null,w=C?.bits||null,T=C?.group_size||null,E=l.dtype||u.torch_dtype||(w?`${w}bit`:null),D={repoId:e,revision:t,modelBytes:s,arch:d,hiddenSize:f,numLayers:p,numHeads:m,numKvHeads:h,headDim:y,vocabSize:g,maxCtx:_,intermediateSize:v,quantBits:w,quantGroupSize:T,dtype:E,isMLA:S,kvLoraRank:b,qkRopeHeadDim:x,fileCount:o.length,config:c};return n&&await dr(i,`mlx-model-metadata`,D,n),D}function pr({numLayers:e,numKvHeads:t,headDim:n,contextLength:r,isMLA:i,kvLoraRank:a,qkRopeHeadDim:o}){return!e||!r?0:i&&a>0?e*(a+(o||0))*r*2:!t||!n?0:e*t*n*r*2*2}const mr=async e=>{try{return await m(e),!0}catch{return!1}},q=(e,t,n={})=>new Promise((r,i)=>{ee(e,t,{timeout:n.timeout||3e5,...n},(t,n,a)=>{if(t){let n=a?.toString().trim()||t.message;i(Error(`${e} failed: ${n}`))}else r({stdout:n?.toString()||``,stderr:a?.toString()||``})})}),hr=new Map;async function gr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.resolve(e),a=hr.get(i);if(a){let e=await a;return r?.(1),e}let o=vr({envDir:i,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r});hr.set(i,o);try{return await o}finally{hr.delete(i)}}const _r=[3,10];async function vr({envDir:e,mlxLmPackage:t,mlxVlmPackage:n,onProgress:r}){let i=_.join(e,`bin`,`python3`),a=_.join(e,`bin`,`pip`);if(await mr(i))try{return await q(i,[`-c`,`import mlx_vlm; import torch`],{timeout:1e4}),r?.(1),i}catch{}if(!await mr(i)){r?.(.1);try{let{stdout:e}=await q(`python3`,[`-c`,`import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")`],{timeout:5e3}),[t,n]=e.trim().split(`.`).map(Number);(t<_r[0]||t===_r[0]&&n<_r[1])&&console.warn(`[mlx-llm] WARNING: System Python is ${t}.${n}, but mlx-vlm requires >= ${_r.join(`.`)}. You may get an older mlx-vlm version with reduced functionality. Consider installing Python >= 3.10 (e.g. via Homebrew).`)}catch{}console.log(`[mlx-llm] Creating venv at ${e}`),await l(e,{recursive:!0}),await q(`python3`,[`-m`,`venv`,e],{timeout:6e4}),r?.(.3)}return console.log(`[mlx-llm] Installing ${n}`),r?.(.4),await q(a,[`install`,t,n,`torch`,`torchvision`],{timeout:6e5,env:{...process.env}}),r?.(.9),await q(i,[`-c`,`import mlx_vlm; import torch; print(mlx_vlm.__version__)`],{timeout:15e3}),r?.(1),console.log(`[mlx-llm] mlx-vlm installed successfully`),i}var yr=class{constructor(){this.process=null,this.pendingRequests=new Map,this.requestCounter=0,this.readyPromise=null,this.buffer=``}spawn(e){return this.process=te(e,[$n],{stdio:[`pipe`,`pipe`,`pipe`],env:{...process.env,PYTHONUNBUFFERED:`1`}}),this.process.stderr.on(`data`,e=>{let t=e.toString().trim();t&&console.log(t)}),this.process.on(`exit`,e=>{console.log(`[mlx-llm] Bridge process exited with code ${e}`);for(let[t,n]of this.pendingRequests)n.reject(Error(`Bridge process exited (code ${e})`)),this.pendingRequests.delete(t);this.process=null}),this.process.stdout.on(`data`,e=>{this.buffer+=e.toString();let t=this.buffer.split(`
3
3
  `);this.buffer=t.pop();for(let e of t)if(e.trim())try{this.handleMessage(JSON.parse(e))}catch(t){console.error(`[mlx-llm] Failed to parse bridge message:`,e,t)}}),this.readyPromise=new Promise((e,t)=>{this.pendingRequests.set(`__init__`,{resolve:()=>e(),reject:t}),setTimeout(()=>t(Error(`Bridge startup timeout`)),3e4)}),this.readyPromise}handleMessage(e){let t=this.pendingRequests.get(e.id);t&&(e.error?(t.reject(Error(e.error.message)),this.pendingRequests.delete(e.id)):e.event?e.event===`result`?(t.resolve(e.data),this.pendingRequests.delete(e.id)):t.onEvent?.(e.event,e.data):e.result!==void 0&&(t.resolve(e.result),this.pendingRequests.delete(e.id)))}async call(e,t={}){if(!this.process)throw Error(`Bridge not running`);let n=String(++this.requestCounter);return new Promise((r,i)=>{this.pendingRequests.set(n,{resolve:r,reject:i}),this.write({id:n,method:e,params:t})})}stream(e,t,n){if(!this.process)throw Error(`Bridge not running`);let r=String(++this.requestCounter);return{id:r,promise:new Promise((i,a)=>{this.pendingRequests.set(r,{resolve:i,reject:a,onEvent:n}),this.write({id:r,method:e,params:t})})}}cancel(e){this.process&&this.write({id:`cancel-${e}`,method:`cancel`,params:{request_id:e}})}write(e){this.process?.stdin?.write(JSON.stringify(e)+`
4
4
  `)}kill(){this.process&&=(this.process.kill(),null),this.pendingRequests.clear()}get alive(){return this.process!=null&&!this.process.killed}};function br(){let e=[];return process.platform!==`darwin`&&e.push(`MLX requires macOS (Apple Silicon)`),v.arch()!==`arm64`&&e.push(`MLX requires Apple Silicon (arm64)`),e}function xr(e){let t=br();return{config:e,info:{ok:t.length===0,backend:`mlx-llm`,warnings:[],errors:[...t],model:{repoId:e.model.repo_id,revision:e.model.revision},runtime:{variant:`mlx`},resources:{},devices:{selected:{variant:`mlx`,hasGpu:!0}},download:{cacheDir:e.runtime.cache_dir,localPath:null,exists:!1},timestamp:new Date().toISOString()}}}const Sr=e=>{if(!e)return[];let t=[];for(let n of e)if(Array.isArray(n.content))for(let e of n.content)e.type===`image_url`&&e.image_url?.url&&t.push(e.image_url.url);return t};var Cr=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const wr=`</think>`;function Tr(e,t){if(!t)return{reasoningContent:``,content:e};let n=e.indexOf(wr);if(n!==-1)return{reasoningContent:e.slice(0,n).replace(/^\n+/,``),content:e.slice(n+8).replace(/^\n+/,``)};let r=e.length;for(let t=1;t<=8&&t<=e.length;t++)if(wr.startsWith(e.slice(-t))){r=e.length-t;break}return{reasoningContent:e.slice(0,r).replace(/^\n+/,``),content:``}}function Er(e,t,n,r,{enableThinking:i=!1}={}){let a=null,o=Date.now(),s=0,c=``;return new Xn({start(l){let{id:u,promise:d}=e.stream(`generate`,t,(e,t)=>{if(e===`token`){s+=1,c+=t.token||``;let e=Tr(c,i);l.enqueue({event:`token`,data:{requestId:u,token:t.token,token_id:t.token_id,text:c,content:e.content,reasoning_content:e.reasoningContent}})}});a=u,d.then(e=>{let t={prompt_n:e.prompt_tokens??0,prompt_per_second:e.prompt_tps??0,predicted_n:e.generation_tokens??s,predicted_per_second:e.generation_tps??0},a=Tr(c,i);l.enqueue({event:`result`,data:{requestId:u,text:c,content:a.content,reasoning_content:a.reasoningContent,timings:t,prompt_tokens:t.prompt_n,tokens_predicted:t.predicted_n,interrupted:e.interrupted||!1,peak_memory:e.peak_memory}}),l.close(),H.addCompletion({id:`completion-${u}`,generatorId:n,requestId:u,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:`mlx`,promptTokens:t.prompt_n,tokensGenerated:t.predicted_n,tokensPerSecond:t.predicted_per_second,promptPerSecond:t.prompt_per_second,durationMs:Date.now()-o,success:!0,interrupted:e.interrupted||!1})}).catch(e=>{l.enqueue({event:`error`,data:{message:e?.message||String(e)}}),l.error(e),H.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,variant:`mlx`,durationMs:Date.now()-o,tokensGenerated:s,success:!1,error:e?.message||String(e)})})},cancel(){a&&e.cancel(a)}})}async function Dr(e,t,n={}){let r=or(t),i=xr(r);i.info.ok||console.error(`[mlx-llm] Platform check failed:`,i.info.errors);let a={id:e,type:`mlx-llm`,config:r,plan:i,info:i.info,contexts:new Map,bridge:null,queue:new Cr,finalized:!1},o=`mlx:${r.model.repo_id}`,s=async(e={})=>{let{onProgress:t}=e,n=a.contexts.get(o);if(n&&!n.released)return n.refCount+=1,n.releaseTimer&&=(clearTimeout(n.releaseTimer),null),await n.ready,{modelInfo:n.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}};let i={key:o,refCount:1,ready:null,released:!1,releaseRequested:!1,releaseTimer:null,modelInfo:null};a.contexts.set(o,i);let s=Date.now();i.ready=(async()=>{let e=r.runtime.cache_dir||nr,n=await gr({envDir:r.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:r.runtime.mlx_lm_package||tr,mlxVlmPackage:r.runtime.mlx_vlm_package||er,onProgress:t?e=>t(e*.3):void 0});(!a.bridge||!a.bridge.alive)&&(a.bridge=new yr,await a.bridge.spawn(n)),t?.(.4);let o={model:r.model.repo_id};r.model.revision&&(o.revision=r.model.revision),r.model.adapter_path&&(o.adapter_path=r.model.adapter_path),r.model.vlm!=null&&(o.vlm=r.model.vlm),r.runtime.huggingface_token&&(process.env.HF_TOKEN=r.runtime.huggingface_token),await a.bridge.call(`load`,o),t?.(.9);let c=await a.bridge.call(`get_info`);i.modelInfo={model:c.model,peak_memory:c.peak_memory,active_memory:c.active_memory};let l=r.runtime.session_cache;l?.enabled!==!1&&await a.bridge.call(`configure_cache`,{enabled:!0,cache_dir:_.join(e,`mlx-session-cache`),max_entries:l?.max_entries||100,max_size_bytes:ir(l?.max_size_bytes,5*1024*1024*1024)}),a.info.download.exists=!0,t?.(1),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!0})})();try{await i.ready}catch(e){throw i.released=!0,a.contexts.delete(o),H.addModelLoad({id:`load-${Date.now()}`,generatorId:a.id,repoId:r.model.repo_id,variant:`mlx`,durationMs:Date.now()-s,success:!1,error:e?.message||String(e)}),e}return{modelInfo:i.modelInfo,runtime:{...a.info.runtime},download:{...a.info.download}}},c=async e=>{if(e.released)return!1;e.released=!0;try{a.bridge?.alive&&await a.bridge.call(`release`)}catch(e){console.error(`[mlx-llm] Error releasing context:`,e.message)}return a.contexts.delete(e.key),!0};return{id:e,type:`mlx-llm`,info:i.info,contexts:a.contexts,queue:a.queue,initContext:s,completion:async(e={})=>{let{options:t={}}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);await n.ready;let r=Sr(t.messages),i=t.prompt||``;if(!i&&t.messages){let e={messages:t.messages,add_generation_prompt:t.add_generation_prompt??!0,tools:t.tools,...t.chat_template_kwargs};t.enable_thinking!=null&&(e.enable_thinking=t.enable_thinking),i=(await a.bridge.call(`apply_chat_template`,e)).text}let s={prompt:i,max_tokens:t.n_predict??t.max_tokens??256};r.length>0&&(s.image=r),t.temperature!=null&&(s.temperature=t.temperature),t.top_p!=null&&(s.top_p=t.top_p),t.top_k!=null&&(s.top_k=t.top_k),t.min_p!=null&&(s.min_p=t.min_p),t.seed!=null&&(s.seed=t.seed),t.repetition_penalty!=null&&(s.repetition_penalty=t.repetition_penalty),t.stop&&(s.stop=t.stop);let c={repoId:a.info.model?.repoId||null},l=`completion-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;return new Xn({start(e){a.queue.enqueue(async()=>{let n=Er(a.bridge,s,a.id,c,{enableThinking:!!t.enable_thinking}).getReader();try{for(;;){let{value:t,done:r}=await n.read();if(r)break;e.enqueue(t)}e.close()}catch(t){throw e.error(t),t}},l).catch(t=>{try{e.error(t)}catch{}})},cancel(){a.bridge?.alive}})},tokenize:async(e={})=>{let{text:t=``}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,a.bridge.call(`tokenize`,{text:t})},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=a.contexts.get(o);if(!n)throw Error(`Context "${o}" not initialized`);return await n.ready,(await a.bridge.call(`detokenize`,{tokens:t})).text},applyChatTemplate:async(e={})=>{let{messages:t=[],params:n={}}=e,r=a.contexts.get(o);if(!r)throw Error(`Context "${o}" not initialized`);await r.ready;let i={messages:t,add_generation_prompt:n.add_generation_prompt??!0,tools:n.tools,...n.chat_template_kwargs};return(await a.bridge.call(`apply_chat_template`,i)).text},releaseContext:async()=>{if(a.finalized)return!1;let e=a.contexts.get(o);if(!e||(e.releaseRequested=!0,e.refCount=Math.max(0,e.refCount-1),e.refCount>0))return!1;let t=r.runtime.context_release_delay_ms??1e4;return t>0?new Promise(n=>{e.releaseTimer=setTimeout(async()=>{e.releaseTimer=null,e.refCount<=0&&!e.released?n(await c(e)):n(!1)},t)}):c(e)},finalize:async()=>{if(a.finalized)return;a.finalized=!0;let e=Array.from(a.contexts.values());for(let t of e)t.released||(t.refCount=0,await c(t));a.bridge?.kill(),a.bridge=null},getStatus:()=>({id:a.id,type:a.type,repoId:a.info.model?.repoId||null,variant:`mlx`,contexts:Array.from(a.contexts.entries()).map(([e,t])=>({key:e,refCount:t.refCount,hasModel:!t.released})),queueStatus:a.queue.getStatus()}),hasPendingReleases:()=>Array.from(a.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{a.finalized=!1}}}const Or=e=>or(e).model.repo_id||null;async function kr(e=null,t={}){let{includeBreakdown:n=!1,config:r}=t,i=br(),a=i.length===0,o=!1,s=!1;if(a){try{await q(`python3`,[`--version`],{timeout:5e3}),o=!0}catch{}if(o)try{await q(`python3`,[`-c`,`import mlx`],{timeout:1e4}),s=!0}catch{}}let c=await Ce({platform:process.platform,arch:v.arch(),totalMemoryInBytes:v.totalmem(),includeBreakdown:n}),l=null,u=null,d=null,f=null,p=null,m=null,h=null;if(r){let e=or(r),t=e.model.repo_id;if(t)try{l=await fr(t,{revision:e.model.revision,cacheDir:e.runtime.cache_dir||nr,token:e.runtime.huggingface_token}),u=l.modelBytes||0,f=l.maxCtx||4096;let n={numLayers:l.numLayers,numKvHeads:l.numKvHeads,headDim:l.headDim,isMLA:l.isMLA,kvLoraRank:l.kvLoraRank,qkRopeHeadDim:l.qkRopeHeadDim};d=pr({...n,contextLength:f});let r=c.ok?c.selected.gpuUsableBytes:0;if(r>0&&u>0&&l.numLayers){let e=r-u;if(e>0){let t;t=l.isMLA&&l.kvLoraRank>0?l.numLayers*(l.kvLoraRank+(l.qkRopeHeadDim||0))*2:l.numKvHeads&&l.headDim?l.numLayers*l.numKvHeads*l.headDim*2*2:0,t>0&&(m=Math.floor(e/t),m=Math.min(m,f))}else m=0;m!=null&&m<f&&(p=pr({...n,contextLength:m}))}h={repoId:t,revision:e.model.revision,nCtx:f,architecture:l.arch,quantBits:l.quantBits,quantGroupSize:l.quantGroupSize}}catch{}}let g=c.ok?{...c.selected,modelBytes:u,kvCacheBytes:d,memoryLimitedCtx:m,limitedKvCacheBytes:p,kvInfo:l?{nCtxTrain:l.maxCtx||null,nLayer:l.numLayers,nEmbd:l.hiddenSize,nHeadKv:l.numKvHeads,headDim:l.headDim}:null,quantization:l?{bits:l.quantBits,groupSize:l.quantGroupSize,dtype:l.dtype}:null}:null;if(c.ok&&u!=null&&u>0){let e=u+(d||0),t=c.selected.gpuUsableBytes,n=e<=t;if(g.fit={totalRequiredBytes:e,fitsInGpu:n,fitsInCpu:e<=t,limiting:n?`none`:`insufficient-memory`},p!=null&&p!==d){let e=u+p;g.limitedFit={totalRequiredBytes:e,fitsInGpu:e<=t,fitsInCpu:e<=t,limiting:e<=t?`none`:`insufficient-memory`}}}return{type:`mlx-llm`,available:a,platform:{ok:a,os:process.platform,arch:v.arch(),errors:i},python:{available:o},mlx:{systemAvailable:s,venvSupported:o},buttress:c.ok?{ok:c.ok,selected:g,attempts:c.attempts}:{ok:!1,selected:null,attempts:c.attempts||[],errors:c.errors},modelConfig:h,timestamp:new Date().toISOString()}}async function Ar(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n,o=or(e),s=o.model.repo_id;if(!s)return{started:!1,localPath:null,repoId:null,error:`Missing repo_id`};let c=br();if(c.length>0)return{started:!1,localPath:null,repoId:s,error:c.join(`; `)};let l=`mlx:${s}`;if(t?.isDownloading(l))return{started:!1,localPath:null,repoId:s,alreadyDownloading:!0};let u=(async()=>{try{let e=o.runtime.cache_dir||nr,t=await gr({envDir:o.runtime.mlx_env_dir||_.join(e,`mlx-env`),mlxLmPackage:o.runtime.mlx_lm_package||tr,mlxVlmPackage:o.runtime.mlx_vlm_package||er,onProgress:r?e=>r(e*.3):void 0});r?.(.3);let n=`
5
5
  from huggingface_hub import snapshot_download
6
6
  path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
7
7
  print(path)
8
8
  `.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let l=c.stdout.trim().split(`
9
- `).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return cn(t,n);if(e===`ggml-stt`)return Yn(t,n);if(e===`mlx-llm`)return kr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0`,jr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Mr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Nr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
9
+ `).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return cn(t,n);if(e===`ggml-stt`)return Yn(t,n);if(e===`mlx-llm`)return kr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.1`,jr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Mr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Nr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
10
10
  `),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
11
11
  `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=Ue(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
12
12
  ---`),n.push(`
@@ -33,7 +33,7 @@ print(path)
33
33
  === Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
34
34
  --- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
35
35
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
36
- === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Lr=e({finalizeGenerator:()=>Vr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Gr,ggmlLlm:()=>Hr,ggmlStt:()=>Wr,globalDownloadManager:()=>Rr,mlxLlm:()=>Ur,showModelsTable:()=>Nr,showSttModelsTable:()=>Fr,startGenerator:()=>Br,startModelDownload:()=>qr,status:()=>Kr,testGgmlLlmCapabilities:()=>Pr,testGgmlSttCapabilities:()=>Ir});const Z=new Map,Rr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},zr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=zr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Br(e,t){let n={"ggml-llm":{create:nn,getId:rn},"ggml-stt":{create:Gn,getId:Kn},"mlx-llm":{create:Dr,getId:Or}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Rr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Vr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Hr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Ur={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Wr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Gr(e,t){return e===`ggml-llm`?rn(t):e===`ggml-stt`?Kn(t):e===`mlx-llm`?Or(t):null}const Kr={getFullStatus:()=>et(Z),getGgmlLlmStatus:()=>Ze(Z),getGgmlSttStatus:()=>Qe(Z),getMlxLlmStatus:()=>$e(Z),subscribeToStatus:Ye,subscribeToStatusWithId:Xe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function qr(e,t,n={}){let r={"ggml-llm":an,"ggml-stt":qn,"mlx-llm":Ar}[e];return r?r(t,Rr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Jr=`@fugood/buttress-server`,Yr=`2.24.1-beta.0`,Xr={name:Jr,version:Yr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`./bin/bricks-buttress`},files:[`lib`,`bin`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`0a07675c4ab70cabbf91525bfbd38f610a7e406f`};const Zr=()=>({version:Yr,name:Jr,description:Xr.description}),Qr=typeof process<`u`&&process.versions&&process.versions.node,$r=e=>new n({adapter:Qr?t():void 0,...e}),ei=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),ti=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ni=e=>{let t=$r(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,ti,{response:ei}),t};const ri=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ii=$r().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return ri?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ai=_.dirname(D(import.meta.url)),oi=async()=>{let e=[_.join(ai,`..`,`public`,`status.html`),_.join(ai,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},si=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ci=async()=>{let e=await oi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var li=$r().get(`/status`,ci).get(`/status/`,ci).get(`/buttress/status`,({store:{backend:e}})=>si(e));const ui=[`ggml-llm`,`mlx-llm`],di=new Map;function fi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function pi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ui.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=di.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return di.set(c,h),await fi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function mi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function hi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const gi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function _i(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=hi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function vi({global:e}){let t=$r({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await pi(a,r,c,`[OpenAI]`),t=gi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await fi(a,e.type).completion(e.id,{options:b});if(!s)return await _i(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=hi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const yi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function bi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function xi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Si(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ci(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Si(u),l=u.stopping_word||u.stoppingWord||null,c=xi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function wi({global:e}){let t=$r({prefix:`/anthropic-messages`});return t.use(M({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=bi(o),n=await pi(a,r,o.model,`[Anthropic]`),s=yi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await fi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ci(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Si(r),w=r.stopping_word||r.stoppingWord||null,C=xi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=bi(a),t=await pi(i,r,a.model,`[Anthropic]`),n=fi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ti=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ti(n[e]||{},t):n[e]=t}),n},Ei=e=>e&&typeof e==`object`?structuredClone(e):null,Di=(e,t)=>Ti(Ei(e)||{},Ei(t)||{}),Oi=(e,t)=>Ti(structuredClone(e.global),t||{}),ki=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Oi(e,a)}}return Object.keys(e.global).length>0?Oi(e,{}):null},Ai={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},ji=e=>e?e===!0?{...Ai}:Ti(Ai,e):null,Mi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ni=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Pi=6e4,Fi=1024*1024*50,Ii=e=>{let t=N.machineIdSync(),n=Ti({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Pi,max_body_size:Fi},autodiscover:!1},Ei(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:ji(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ni(i.max_body_size,w.parse,Fi),session_timeout:Ni(i.session_timeout,P,Pi)},global:s,generators:r}},Li={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ri={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Ei(i),c=Di(ki(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Ei(i),o=Di(ki(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const zi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Bi(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Vi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Hi(e,t){return{initContext:Bi(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Vi(e,`${t}Release Context`)}}var Ui=Hi(e=>e.ggmlLlm,``);const Wi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Gi=e=>e.ggmlStt,Ki={common:Ri,ggmlLlm:Ui,ggmlStt:{initContext:Bi(Gi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Vi(Gi,`Release STT Context`)},mlxLlm:Hi(e=>e.mlxLlm,`MLX `)},qi={common:Li,ggmlLlm:zi,ggmlStt:Wi,mlxLlm:zi};var Ji=Ki;const Yi=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Xi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Zi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=re.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const $i=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=Zr(),ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ta({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
36
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Lr=e({finalizeGenerator:()=>Vr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Gr,ggmlLlm:()=>Hr,ggmlStt:()=>Wr,globalDownloadManager:()=>Rr,mlxLlm:()=>Ur,showModelsTable:()=>Nr,showSttModelsTable:()=>Fr,startGenerator:()=>Br,startModelDownload:()=>qr,status:()=>Kr,testGgmlLlmCapabilities:()=>Pr,testGgmlSttCapabilities:()=>Ir});const Z=new Map,Rr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},zr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=zr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Br(e,t){let n={"ggml-llm":{create:nn,getId:rn},"ggml-stt":{create:Gn,getId:Kn},"mlx-llm":{create:Dr,getId:Or}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Rr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Vr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Hr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Ur={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Wr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Gr(e,t){return e===`ggml-llm`?rn(t):e===`ggml-stt`?Kn(t):e===`mlx-llm`?Or(t):null}const Kr={getFullStatus:()=>et(Z),getGgmlLlmStatus:()=>Ze(Z),getGgmlSttStatus:()=>Qe(Z),getMlxLlmStatus:()=>$e(Z),subscribeToStatus:Ye,subscribeToStatusWithId:Xe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function qr(e,t,n={}){let r={"ggml-llm":an,"ggml-stt":qn,"mlx-llm":Ar}[e];return r?r(t,Rr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Jr=`@fugood/buttress-server`,Yr=`2.24.1`,Xr={name:Jr,version:Yr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`./bin/bricks-buttress`},files:[`lib`,`bin`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`984a440ed04862f12c65f3cf62bdc70a938fcdd6`};const Zr=()=>({version:Yr,name:Jr,description:Xr.description}),Qr=typeof process<`u`&&process.versions&&process.versions.node,$r=e=>new n({adapter:Qr?t():void 0,...e}),ei=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),ti=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ni=e=>{let t=$r(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,ti,{response:ei}),t};const ri=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ii=$r().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return ri?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ai=_.dirname(D(import.meta.url)),oi=async()=>{let e=[_.join(ai,`..`,`public`,`status.html`),_.join(ai,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},si=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ci=async()=>{let e=await oi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var li=$r().get(`/status`,ci).get(`/status/`,ci).get(`/buttress/status`,({store:{backend:e}})=>si(e));const ui=[`ggml-llm`,`mlx-llm`],di=new Map;function fi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function pi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ui.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=di.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return di.set(c,h),await fi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function mi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function hi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const gi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function _i(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=hi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function vi({global:e}){let t=$r({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await pi(a,r,c,`[OpenAI]`),t=gi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await fi(a,e.type).completion(e.id,{options:b});if(!s)return await _i(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=hi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const yi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function bi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function xi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Si(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ci(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Si(u),l=u.stopping_word||u.stoppingWord||null,c=xi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function wi({global:e}){let t=$r({prefix:`/anthropic-messages`});return t.use(M({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=bi(o),n=await pi(a,r,o.model,`[Anthropic]`),s=yi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await fi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ci(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Si(r),w=r.stopping_word||r.stoppingWord||null,C=xi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=bi(a),t=await pi(i,r,a.model,`[Anthropic]`),n=fi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ti=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ti(n[e]||{},t):n[e]=t}),n},Ei=e=>e&&typeof e==`object`?structuredClone(e):null,Di=(e,t)=>Ti(Ei(e)||{},Ei(t)||{}),Oi=(e,t)=>Ti(structuredClone(e.global),t||{}),ki=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Oi(e,a)}}return Object.keys(e.global).length>0?Oi(e,{}):null},Ai={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},ji=e=>e?e===!0?{...Ai}:Ti(Ai,e):null,Mi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ni=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Pi=6e4,Fi=1024*1024*50,Ii=e=>{let t=N.machineIdSync(),n=Ti({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Pi,max_body_size:Fi},autodiscover:!1},Ei(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:ji(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ni(i.max_body_size,w.parse,Fi),session_timeout:Ni(i.session_timeout,P,Pi)},global:s,generators:r}},Li={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ri={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Ei(i),c=Di(ki(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Ei(i),o=Di(ki(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const zi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Bi(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Vi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Hi(e,t){return{initContext:Bi(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Vi(e,`${t}Release Context`)}}var Ui=Hi(e=>e.ggmlLlm,``);const Wi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Gi=e=>e.ggmlStt,Ki={common:Ri,ggmlLlm:Ui,ggmlStt:{initContext:Bi(Gi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Vi(Gi,`Release STT Context`)},mlxLlm:Hi(e=>e.mlxLlm,`MLX `)},qi={common:Li,ggmlLlm:zi,ggmlStt:Wi,mlxLlm:zi};var Ji=Ki;const Yi=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Xi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Zi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=re.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const $i=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=Zr(),ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ta({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
37
37
  `),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
38
38
  `),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=Ue(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
39
39
  ---`),n.push(`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fugood/buttress-server",
3
- "version": "2.24.1-beta.0",
3
+ "version": "2.24.2",
4
4
  "main": "lib/index.mjs",
5
5
  "types": "lib/index.d.mts",
6
6
  "type": "module",
@@ -44,5 +44,5 @@
44
44
  "tsdown": "^0.20.1",
45
45
  "typescript": "^5.9.3"
46
46
  },
47
- "gitHead": "0a07675c4ab70cabbf91525bfbd38f610a7e406f"
47
+ "gitHead": "984a440ed04862f12c65f3cf62bdc70a938fcdd6"
48
48
  }
@@ -1,27 +0,0 @@
1
- import type { AnyElysia } from "elysia";
2
- import * as backendCore from "@fugood/buttress-backend-core";
3
- import { AutodiscoverService } from "./autodiscover";
4
- import type { Config } from "./types";
5
- export { startModelDownload } from "@fugood/buttress-backend-core";
6
- export { processConfig } from "./utils/config";
7
- export declare const checkForUpdates: () => Promise<string | null>;
8
- export declare const compareVersions: (current: string, latest: string) => boolean;
9
- export declare const logUpdateMessage: (latestVersion: string) => void;
10
- export declare const checkAndNotifyUpdates: () => Promise<void>;
11
- export type Backend = typeof backendCore;
12
- export interface StartServerOptions {
13
- backend?: Backend;
14
- router?: AnyElysia;
15
- config: Config;
16
- enableOpenAICompat?: boolean;
17
- }
18
- export declare const createServer: ({ backend, router, config, enableOpenAICompat }: StartServerOptions) => Promise<{
19
- app: AnyElysia;
20
- config: Config;
21
- }>;
22
- export declare const startServer: ({ backend, router, config, enableOpenAICompat }: StartServerOptions) => Promise<{
23
- app: AnyElysia;
24
- port: number;
25
- openaiEnabled: boolean;
26
- autoDiscover: AutodiscoverService | null;
27
- }>;
@@ -1,110 +0,0 @@
1
- #!/usr/bin/env node
2
- import{createRequire as s3}from"node:module";var l3=Object.defineProperty;var i3=($,X)=>{for(var Z in X)l3($,Z,{get:X[Z],enumerable:!0,configurable:!0,set:(W)=>X[Z]=()=>W})};var P=($,X)=>()=>($&&(X=$($=0)),X);var R1=s3(import.meta.url);var w1=($,X,Z)=>Math.min(Math.max($,X),Z),F1=($)=>$?40:0,E1=($=0)=>{if(!$)return 0;return w1($/12884901888*20,0,20)},M1=($=0)=>{if(!$)return 0;return w1($/34359738368*10,0,10)},x1=($)=>$?10:0,S1=($="default",X=null)=>{let Z=String($).toLowerCase();if(!Z)return 0;if(Z.includes("cuda"))return 20;if(Z.includes("vulkan"))return 10;if(Z.includes("default"))return X==="darwin"||X==="ios"?15:5;return 0},q0=({platform:$,variant:X,hasGpu:Z,gpuUsableBytes:W=0,cpuUsableBytes:j=0,ok:V=!0}={})=>{if(!V)return 0;let Y=F1(Z)+S1(X,$)+E1(W),J=M1(j),N=x1(V);return Math.min(100,Math.round(Y+J+N))},F4=({platform:$,variant:X,hasGpu:Z,gpuUsableBytes:W=0,cpuUsableBytes:j=0,ok:V=!0}={})=>({gpuPresence:F1(Z),variant:S1(X,$),gpuMemory:E1(W),cpuMemory:M1(j),availability:x1(V)});var D1,u0=0.85,f0=0.5,P1=($)=>{if(!$&&$!==0)return[];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},n3=($)=>{if(!$)return null;return String($).trim().toLowerCase()||null},r3=({variant:$,preferVariants:X=[],variantPreference:Z=[],defaultVariants:W=D1}={})=>{let j=[];if($)j.push($);j.push(...P1(X)),j.push(...P1(Z)),j.push(...W);let V=j.map(n3).filter(Boolean);return Array.from(new Set(V))},T1=($={})=>{let X=String($.type||$.deviceType||$.kind||"").toLowerCase();if(X.includes("gpu"))return!0;if(X.includes("cuda"))return!0;if(X.includes("metal"))return!0;if(X.includes("vulkan"))return!0;if(X.includes("snapdragon"))return!0;return!1},o3=($)=>{if(!Array.isArray($))return[];return $.map((X)=>({...X}))},a3=($,X)=>{if($==="snapdragon")return X.filter((Z)=>Z.deviceName!=="GPUOpenCL");return X},k1=({platform:$,totalMemoryInBytes:X,variant:Z,devices:W,gpuMemoryFraction:j,cpuMemoryFraction:V,ok:Y,error:J})=>{let N=o3(a3(Z,W)),H=N.some(T1),O=N.filter((R)=>T1(R)&&Number.isFinite(Number(R.maxMemorySize))).reduce((R,U)=>R+U.maxMemorySize,0),q=X,_=H?Math.floor(O*j):0,G=q?Math.floor(q*V):0,w={platform:$,variant:Z,hasGpu:H,gpuUsableBytes:_,cpuUsableBytes:G,ok:Y},z=q0(w),A=Y?F4(w):null;return{platform:$,ok:Y,variant:Z,hasGpu:H,devices:N,gpuTotalBytes:O,gpuUsableBytes:_,cpuTotalBytes:q,cpuUsableBytes:G,score:z,breakdown:A,error:J,timestamp:new Date().toISOString()}},E4=({device:$,modelBytes:X=0,kvCacheBytes:Z=0}={})=>{if(!$)return{totalRequiredBytes:X+Z,fitsInGpu:!1,fitsInCpu:!1,limiting:"unknown-device"};let W=Math.max(0,Number(X)||0)+Math.max(0,Number(Z)||0),j=$.hasGpu&&W>0&&W<=$.gpuUsableBytes,V=W>0&&W<=$.cpuUsableBytes,Y="ok";if(!j&&$.hasGpu)Y="gpu-memory";if(!V)Y=j?"cpu-memory":"insufficient-memory";return{totalRequiredBytes:W,fitsInGpu:j,fitsInCpu:V,limiting:Y}},E0=async({platform:$,variant:X=null,preferVariants:Z=[],variantPreference:W=[],gpuMemoryFraction:j=u0,cpuMemoryFraction:V=f0,includeBreakdown:Y=!1,totalMemoryInBytes:J,modelBytes:N=null,kvCacheBytes:H=null,limitedKvCacheBytes:O=null,dependencies:q={},defaultVariants:_=D1}={})=>{let{getBackendDevicesInfo:G,isLibVariantAvailable:w}=q;if(typeof G!=="function"||typeof w!=="function")throw TypeError("GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions");let z=r3({variant:X,preferVariants:Z,variantPreference:W,defaultVariants:_}),A=[];for(let B of z)try{if(!await w(B))throw Error(`Variant ${B} not available on this platform`);let F=await G(B);A.push(k1({platform:$,totalMemoryInBytes:J,variant:B,devices:F,gpuMemoryFraction:j,cpuMemoryFraction:V,ok:!0}))}catch(L){let F=L instanceof Error?L.message:String(L);A.push(k1({platform:$,totalMemoryInBytes:J,variant:B,devices:[],gpuMemoryFraction:j,cpuMemoryFraction:V,ok:!1,error:F}))}let U=A.filter((B)=>B.ok)[0]||null,Q={ok:Boolean(U),selected:U?{...U,breakdown:Y?U.breakdown:void 0}:null,attempts:A};if(!Y&&Q.selected)delete Q.selected.breakdown;if(!Q||!N&&!H)return Q;let K=(B)=>{if(!B)return B;let L=E4({device:B,modelBytes:N||0,kvCacheBytes:H||0}),F=null;if(O!=null&&O!==H)F=E4({device:B,modelBytes:N||0,kvCacheBytes:O});return{...B,fit:L,...F&&{limitedFit:F}}};return Q.selected=K(Q.selected),Q.attempts=Array.isArray(Q.attempts)?Q.attempts.map(K):Q.attempts,Q},g0="ggml-llm";var p0=P(()=>{D1=["cuda","vulkan","snapdragon","default"]});var M4="ggml-stt",b1,x4=async({platform:$,variant:X=null,preferVariants:Z=[],variantPreference:W=[],gpuMemoryFraction:j=u0,cpuMemoryFraction:V=f0,includeBreakdown:Y=!1,totalMemoryInBytes:J,modelBytes:N=null,processingBytes:H=null,kvCacheBytes:O=null,dependencies:q={}}={})=>{let _=W&&W.length>0?W:b1;return E0({platform:$,variant:X,preferVariants:Z,variantPreference:_,gpuMemoryFraction:j,cpuMemoryFraction:V,includeBreakdown:Y,totalMemoryInBytes:J,modelBytes:N,kvCacheBytes:H??O,dependencies:q,defaultVariants:b1})};var S4=P(()=>{p0();b1=["cuda","vulkan","default"]});var t3,_0=async({platform:$,totalMemoryInBytes:X,backend:Z=g0,dependencies:W,...j}={})=>{let V=t3.get(Z);if(!V)throw Error(`No capability detector registered for backend "${Z}"`);return await V({...j,dependencies:W,totalMemoryInBytes:X,platform:$})};var v1=P(()=>{p0();S4();t3=new Map([[g0,E0],[M4,x4]])});var h1,P4=($)=>{let X=$?String($).toLowerCase():"f16";return h1[X]||h1.f16},T4=($,X,Z,W,j,V={},{totalLayers:Y=null,swaLayers:J=0,swaContext:N=null,swaContextMultiplier:H=1,swaAdditionalTokens:O=0,swaFull:q=!1}={})=>{if(!$||!X||!Z||!W||!j)return 0;let _=Y!=null&&Y!==void 0?Number(Y):Number($),G=Math.max(0,Math.floor(_));if(!G)return 0;let w=P4(V.k),z=P4(V.v),A=Number(Z)*(Number(W)*w+Number(j)*z);if(!A)return 0;let R=Math.max(0,Number(X)||0),U=Math.min(G,Math.max(0,Math.floor(Number(J)||0))),Q=Math.max(0,G-U),K=N!=null&&Number.isFinite(Number(N))?Math.max(0,Number(N)):R,B=Math.max(1,Number(H)||1),L=Math.max(0,Number(O)||0),F=K*B+L,E=q?R:Math.min(R,F),M=Q*R+U*Math.max(0,Math.floor(E));return Math.round(A*M)},m0=({modelBytes:$=0,audioLengthSeconds:X=30,sampleRate:Z=16000,bytesPerSample:W=4}={})=>{let j=Math.max(0,Number($)||0),V=Math.max(0,Math.floor(Math.max(0,X)*Z*W)),Y=1048576,J=1073741824,N;if(j<209715200)N=125829120;else if(j<524288000)N=146800640;else if(j<2147483648)N=157286400;else N=167772160;let H;if(j<209715200)H=73400320;else if(j<524288000)H=141557760;else if(j<2147483648)H=230686720;else H=230686720;let O;if(j<104857600)O=20971520;else if(j<209715200)O=31457280;else if(j<524288000)O=89128960;else if(j<2147483648)O=225443840;else O=377487360;let q=N+H+O,_=j+q+V;return{modelBytes:j,audioBufferBytes:V,processingBufferBytes:q,totalBytes:_}};var k4=P(()=>{h1={f16:2,f32:4,q8_0:1,q6_k:0.75,q5_k:0.625,q5_k_m:0.625,q5_k_s:0.625,q5_1:0.625,q5_0:0.625,q4_k:0.5,q4_k_m:0.5,q4_k_s:0.5,q4_1:0.5,q4_0:0.5,iq4_nl:0.5}});var D4=($)=>$?String($).trim().toLowerCase():null,e3=($={},X=null)=>{if(!$)return null;let Z=D4(X),W=Z?`${Z}.attention.sliding_window`:null,j=(W&&$[W]!=null?$[W]:null)??$["llama.attention.sliding_window"];if(j==null)return null;let V=Number(j);return Number.isFinite(V)?V:null},C1=($=0,X=0,Z=!1)=>{let W=Math.max(0,Math.floor(Number($)||0)),j=Math.max(0,Math.floor(Number(X)||0));if(!W||j===1)return 0;if(j<=0)return W;let V=Math.max(0,j-1),Y=Math.floor(W/j),J=W%j,N=Z?Math.max(0,J-1):Math.min(J,V);return Y*V+N},I1=({arch:$,nLayer:X=0})=>({arch:D4($),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(X)||0)),swaLayers:0}),$6,c0=({arch:$,metadata:X={},nLayer:Z=0}={})=>{let W=D4($||X["general.architecture"]),j=Math.max(0,Math.floor(Number(Z)||0)),V=e3(X,W),Y=W?$6.get(W):null;if(!Y)return I1({arch:W,nLayer:Z});let J=Y({nLayer:j,nSwa:V,metadata:X});if(!J||!J.enabled||!J.window||J.window<=0)return I1({arch:W,nLayer:Z});let N=Math.max(0,Math.floor(Number(J.pattern)||0)),H=J.kvLayers!=null&&Number.isFinite(Number(J.kvLayers))?Number(J.kvLayers):j,O=Math.max(0,Math.floor(H)),q=C1(O,N,Boolean(J.denseFirst));return{arch:W,enabled:q>0,window:J.window,pattern:N,denseFirst:Boolean(J.denseFirst),type:J.type||"standard",kvLayers:O,swaLayers:q}};var y1=P(()=>{$6=new Map([["llama4",({nSwa:$})=>{if($===0)return{enabled:!1};return{enabled:!0,window:$&&$>0?$:8192,pattern:4,type:"chunked"}}],["afmoe",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["phi3",()=>({enabled:!1})],["gemma2",({nSwa:$})=>{let X=$&&$>0?$:4096;if(!X)return{enabled:!1};return{enabled:!0,window:X,pattern:2,type:"standard"}}],["gemma3",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"standard"}}],["gemma3n",({nLayer:$,nSwa:X})=>{if(!X||X<=0)return{enabled:!1};return{enabled:!0,window:X,pattern:5,type:"standard",kvLayers:Math.min(20,$)}}],["gemma-embedding",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"symmetric"}}],["cohere2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["olmo2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["exaone4",({nLayer:$,nSwa:X})=>{let Z=$>=64,W=null;if(X&&X>0)W=X;else if(Z)W=4096;if(!W)return{enabled:!1};return{enabled:!0,window:W,pattern:4,type:"standard"}}],["gpt-oss",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:2,type:"standard"}}],["smallthinker",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:"standard"}}]])});var X6,Z6,u1=($)=>$?String($).trim().toLowerCase():null,f1=($)=>{let X=u1($);return X?X6.has(X):!1},g1=($)=>{let X=u1($);return X?Z6.has(X):!1},b4=($)=>{if(f1($))return"recurrent";if(g1($))return"hybrid";return"transformer"},d0=($={})=>{let X=$["general.architecture"],Z=(F,E=null)=>{let M=$[F],x=Number(M);return Number.isFinite(x)?x:E},W=(F,E=null)=>{let M=$[F];if(Array.isArray(M))return M;let x=Number(M);return Number.isFinite(x)?x:E},j=X?Z(`${X}.context_length`,Z("llama.context_length")):null,V=X?Z(`${X}.block_count`,Z("llama.block_count")):null,Y=X?Z(`${X}.embedding_length`,Z("llama.embedding_length")):null,J=X?Z(`${X}.attention.head_count`,Z("llama.attention.head_count")):null,N=X?W(`${X}.attention.head_count_kv`,W("llama.attention.head_count_kv",J)):null,H=null,O=null;if(Array.isArray(N)){let F=N.filter((E)=>Number(E)>0);if(F.length>0)H=Math.max(...F.map(Number)),O=F.length;else H=0,O=0}else H=N;let q=X?Z(`${X}.attention.key_length`,Z("llama.attention.key_length")):null,_=X?Z(`${X}.attention.value_length`,Z("llama.attention.value_length")):null,G=$["general.quantization_version"]||null,w=$["general.file_type"]||null,z=X?Z(`${X}.ssm.conv_kernel`):null,A=X?Z(`${X}.ssm.state_size`):null,R=X?Z(`${X}.ssm.inner_size`):null,U=X?Z(`${X}.ssm.group_count`):null,Q=X?Z(`${X}.ssm.time_step_rank`):null,K=X?Z(`${X}.rwkv.head_size`):null,B=X?Z(`${X}.rwkv.token_shift_count`,2):null,L=O!=null&&V!=null?V-O:null;return{arch:X,nCtxTrain:j,nLayer:V,nEmbd:Y,nHead:J,nHeadKv:H,nEmbdHeadK:q,nEmbdHeadV:_,quantVersion:G,fileType:w,attentionLayerCount:O,recurrentLayerCount:L,ssmDConv:z,ssmDState:A,ssmDInner:R,ssmNGroup:U,ssmDtRank:Q,rwkvHeadSize:K,rwkvTokenShiftCount:B}},G0=({layerCount:$,headKvCount:X,embdHeadKCount:Z,embdHeadVCount:W,cacheTypes:j,swaConfig:V,kvUnified:Y=!1,nParallel:J=1,swaFull:N=!1,arch:H=null,attentionLayerCount:O=null})=>{let q=b4(H);if(q==="recurrent")return()=>0;let _=q==="hybrid"&&O!=null?Math.max(0,Math.floor(Number(O)||0)):$,G=V?.window&&Y?Math.max(1,Number(J)||1):1,w=Y?1:Math.max(1,Number(J)||1);return(z)=>T4(_,z,X,Z,W,j,{totalLayers:_,swaLayers:V?.swaLayers||0,swaContext:V?.window,swaFull:N,swaContextMultiplier:G})*w},l0=({nLayer:$,nEmbd:X,recurrentLayerCount:Z=null,nSeqMax:W=1,ssmDConv:j=null,ssmDState:V=null,ssmDInner:Y=null,ssmNGroup:J=null,ssmDtRank:N=null,rwkvHeadSize:H=null,rwkvTokenShiftCount:O=2,arch:q=null})=>{if(b4(q)==="transformer")return 0;let G=Z!=null?Math.max(0,Math.floor(Number(Z)||0)):Math.max(0,Math.floor(Number($)||0));if(G===0)return 0;let w=Math.max(1,Math.floor(Number(W)||1)),z=0,A=0;if(H!=null&&H>0&&X!=null&&X>0)z=Math.max(1,Number(O)||2)*X,A=X*H;else if(V!=null&&Y!=null){let Q=Math.max(0,Number(j)||0),K=Math.max(0,Number(V)||0),B=Math.max(0,Number(Y)||0),L=Math.max(1,Number(J)||1);if(Math.max(0,Number(N)||0)>0)z=Q>0?(Q-1)*2*L*K:0,A=Math.floor(K*B/2);else z=Q>0?(Q-1)*(B+2*L*K):0,A=K*B}else return 0;let R=4,U=(z+A)*w*G*R;return Math.max(0,U)},i0=({maxCtx:$,availableMemory:X,modelBytes:Z,kvBytesForCtx:W})=>{let j=Math.max(1,Math.floor(Number($)||0));if(!W||X<=Z)return j;let V=1,Y=j,J=j;while(V<=Y){let N=Math.floor((V+Y)/2);if(Z+W(N)<=X)J=N,V=N+1;else Y=N-1}return J};var p1=P(()=>{k4();X6=new Set(["mamba","mamba2","rwkv6","rwkv6qwen2","rwkv7","arwkv7"]),Z6=new Set(["jamba","falcon-h1","plamo2","granitehybrid","lfm2","lfm2moe","nemotron_h","nemotron_h_moe","qwen3next"])});var M0=P(()=>{v1();k4();p0();S4();y1();p1()});import{EventEmitter as W6}from"node:events";class h4{constructor($=j6){this.maxEntries=$,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad($){v4(this.modelLoads,$,this.maxEntries),e.emit("status:modelLoad",$),e.emit("status:change",{type:"modelLoad",entry:$})}addCompletion($){v4(this.completions,$,this.maxEntries),e.emit("status:completion",$),e.emit("status:change",{type:"completion",entry:$})}addTranscription($){v4(this.transcriptions,$,this.maxEntries),e.emit("status:transcription",$),e.emit("status:change",{type:"transcription",entry:$})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}}function I4($){let X=(Z)=>$(Z);return e.on("status:change",X),()=>e.off("status:change",X)}function c1($){m1+=1;let X=m1,Z=I4($);return{subscriberId:X,unsubscribe:Z}}function C4($){let X=[];return{generators:Array.from($.entries()).filter(([,W])=>W.type==="ggml-llm").map(([W,j])=>{let{instance:V}=j,Y=[];if(V.contexts)Y=Array.from(V.contexts.entries()).map(([J,N])=>{let H={key:J,refCount:N.refCount,hasModel:Boolean(N.context)},O=N.context.parallel.getStatus();return H.parallelStatus=O,X.push({generatorId:W,contextKey:J,...O}),H});return{id:W,type:j.type,refCount:j.refCount,repoId:V.info?.model?.repoId||null,quantization:V.info?.model?.quantization||null,variant:V.info?.runtime?.variant||null,nCtx:V.info?.runtime?.n_ctx||null,nParallel:V.info?.runtime?.n_parallel||null,contexts:Y}}),parallelStatuses:X,history:{modelLoads:$0.getModelLoadHistory(),completions:$0.getCompletionHistory()}}}function y4($){return{generators:Array.from($.entries()).filter(([,Z])=>Z.type==="ggml-stt").map(([Z,W])=>{let{instance:j}=W,V=j.getStatus?.()||{},Y=V.queueStatus||{processing:!1,queuedCount:0};return{id:Z,type:W.type,refCount:W.refCount,repoId:j.info?.model?.repoId||null,quantization:j.info?.model?.quantization||null,modelType:j.info?.model?.modelType||null,variant:j.info?.runtime?.variant||null,hasContext:V.hasContext||!1,contextRefCount:V.contextRefCount||0,queueStatus:Y}}),history:{modelLoads:N0.getModelLoadHistory(),transcriptions:N0.getTranscriptionHistory()}}}function d1($){return{timestamp:new Date().toISOString(),ggmlLlm:C4($),ggmlStt:y4($)}}var j6=9999,e,v4=($,X,Z)=>{if($.push({...X,timestamp:X.timestamp||new Date().toISOString()}),$.length>Z)$.shift()},$0,N0,m1=0;var s0=P(()=>{e=new W6;e.setMaxListeners(100);$0=new h4,N0=new h4});import y from"node:path";import T0 from"node:os";import{stat as B0,mkdir as Y6,open as Q6,unlink as o,readFile as t1,writeFile as e1,rename as f4,readdir as J6}from"node:fs/promises";import{createHash as p4}from"node:crypto";import{gguf as V6}from"@huggingface/gguf";import{loadModel as N6,getBackendDevicesInfo as $2,isLibVariantAvailable as X2}from"@fugood/llama.node";import U6 from"bytes";import*as G6 from"node:stream/web";class L0{constructor($,X){this.config=$,this.plan=X,this.baseDir=$.runtime.cache_dir,this.enabled=$.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=x6($.runtime.session_cache?.max_size_bytes,10737418240),this.maxEntries=$.runtime.session_cache?.max_entries||1000,this.metadata={variant:X.info?.runtime?.variant||null,n_gpu_layers:X.info?.runtime?.n_gpu_layers||0,n_ctx:X.info?.runtime?.n_ctx||0,modelPath:X.localPath,cacheTypeK:X.info?.runtime?.cache_type_k||"f16",cacheTypeV:X.info?.runtime?.cache_type_v||"f16",kvUnified:X.info?.runtime?.kv_unified??null,swaFull:X.info?.runtime?.swa_full??null,flashAttnType:X.info?.runtime?.flash_attn_type||"off",isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo($){if(!$)return;if(this.metadata.isRecurrent=Boolean($.is_recurrent),this.metadata.isHybrid=Boolean($.is_hybrid),this.metadata.isRecurrent||this.metadata.isHybrid)console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`)}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}static checkTokenPrefixMatch($,X){if($.length>X.length)return!1;for(let Z=0;Z<$.length;Z+=1)if($[Z]!==X[Z])return!1;return!0}static async tokenizeToArray($,X){let Z=await $.tokenize(X);return Array.from(Z?.tokens||[])}async findFormattedMatchForRecurrent($,X,Z){let W=await L0.tokenizeToArray(Z,X),j=$.map(async(_)=>{try{let G=await L0.tokenizeToArray(Z,_.fullText);if(L0.checkTokenPrefixMatch(G,W))return{entry:_,usePromptState:!1,tokenCount:G.length};if(_.promptStatePath&&_.promptText){let w=await L0.tokenizeToArray(Z,_.promptText);if(L0.checkTokenPrefixMatch(w,W))return{entry:_,usePromptState:!0,tokenCount:w.length}}return null}catch(G){return console.warn(`[SessionCache] Failed to check entry ${_.id}: ${G.message}`),null}}),Y=(await Promise.all(j)).find((_)=>_!==null);if(!Y)return console.log("[SessionCache] No token prefix match found for recurrent/hybrid model"),null;let{entry:J,usePromptState:N,tokenCount:H}=Y;console.log(`[SessionCache] Token prefix match: ${J.id} (${H} tokens, usePromptState=${N})`);let O=N?J.promptStatePath:J.stateFilePath;if(!await n1(O))return await this.removeStaleEntry(J),null;return J.lastAccessedAt=new Date().toISOString(),await x0(this.cacheMap,this.baseDir).catch(()=>{}),{entry:J,usePromptState:N}}async initialize(){if(!this.enabled||this.initialized)return;try{await H0($4(this.baseDir)),await H0(o0(this.baseDir)),await H0(U2(this.baseDir)),this.cacheMap=await A6(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch($){console.warn(`[SessionCache] Failed to initialize: ${$.message}`),this.enabled=!1}}async removeStaleEntry($){if(console.log(`[SessionCache] Removing stale entry: ${$.id}`),$.stateFilePath)await o($.stateFilePath).catch(()=>{});if($.promptStatePath)await o($.promptStatePath).catch(()=>{});delete this.cacheMap.entries[$.id],this.cacheMap.totalSize-=($.stateFileSize||0)+($.promptStateSize||0),await x0(this.cacheMap,this.baseDir).catch(()=>{})}async findMatchingEntry($,X=null){if(!this.enabled||!this.cacheMap)return null;let Z=this.requiresExactMatch();if(Z&&X){let J=Object.values(this.cacheMap.entries).filter((N)=>m4(N.metadata,this.metadata)&&N.fullText);return this.findFormattedMatchForRecurrent(J,$,X)}let W=w6($,this.metadata,this.cacheMap,Z);if(!W)return null;let{entry:j}=W;if(!await n1(j.stateFilePath))return await this.removeStaleEntry(j),null;return j.lastAccessedAt=new Date().toISOString(),await x0(this.cacheMap,this.baseDir).catch(()=>{}),{entry:j,usePromptState:!1}}async prepareCompletionOptions($,X,Z=null){let W={options:$,cacheEntry:null,promptPrefix:null};if(!this.enabled)return W;let j=await this.findMatchingEntry(X,Z);if(!j)return W;let{entry:V,usePromptState:Y}=j,J=Y?V.promptStatePath:V.stateFilePath,N=Y?V.promptText:V.fullText;return console.log(`[SessionCache] Found matching entry: ${V.id} (${N.length} chars, usePromptState=${Y})`),{options:{...$,load_state_path:J},cacheEntry:V,promptPrefix:N}}async saveCompletionState($,X,Z,W=0,j=null){if(!this.enabled)return null;let V=$+X,Y=z6(V,this.metadata),J=()=>{if(Z)o(Z).catch(()=>{});if(j)o(j).catch(()=>{})};if(this.cacheMap.entries[Y]){console.log(`[SessionCache] Entry already exists for prompt: ${Y}, updating position`);let O=this.cacheMap.entries[Y];return O.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[Y],this.cacheMap.entries[Y]=O,await x0(this.cacheMap,this.baseDir).catch(()=>{}),J(),O}let N=s1(Y,this.baseDir),H=j?s1(`${Y}-prompt`,this.baseDir):null;try{await H0(y.dirname(N)),await f4(Z,N);let O=await B0(N),q=0;if(j&&H)try{await f4(j,H),q=(await B0(H)).size,console.log(`[SessionCache] Saved prompt state: ${H}`)}catch(G){console.warn(`[SessionCache] Failed to save prompt state: ${G.message}`)}let _={id:Y,promptText:$,completionText:X,fullText:V,promptTokenCount:W,stateFilePath:N,stateFileSize:O.size,promptStatePath:H||null,promptStateSize:q,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};if(this.cacheMap.entries[Y]=_,this.cacheMap.totalSize+=O.size+q,!this.requiresExactMatch())await E6(this.cacheMap,$,Y,this.metadata);return await F6(this.cacheMap,this.maxSizeBytes,this.maxEntries),await x0(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${Y} (${O.size} bytes, ${V.length} chars)`),_}catch(O){return console.warn(`[SessionCache] Failed to save state: ${O.message}`),J(),null}}async generateTempStatePath(){return await H0(o0(this.baseDir)),B6(this.baseDir)}async cleanup(){await M6(this.baseDir)}}async function K2($,X,Z={}){let{globalDownloadManager:W=null}=Z,j=e0(X),V=await S6(j),Y=new L0(j,V);await Y.initialize();let J={id:$,type:"ggml-llm",config:j,plan:V,info:V.info,contexts:new Map,downloads:new Map,globalDownloadManager:W,sessionCache:Y,finalized:!1},N=async()=>{if(J.finalized)return;J.finalized=!0;let Q=Array.from(J.contexts.values()),K=Q.map((L)=>{if(L.released)return Promise.resolve(!1);if(L.releaseRequested||L.releaseTimer)return Promise.resolve(!1);if(L.refCount=Math.max(0,L.refCount-1),L.refCount>0)return Promise.resolve(!1);return r0(J,L)});if(await Promise.allSettled(K),Q.length===0||Q.every((L)=>L.released))await J.sessionCache.cleanup()},H=async(Q={})=>{let{onProgress:K}=Q,B=await D6(J,K);return J.sessionCache.updateModelInfo(B.modelInfo),{modelInfo:B.modelInfo?{...B.modelInfo}:null,runtime:{...J.plan.info.runtime},download:{...J.plan.info.download}}},O=async()=>{if(J.finalized)return!1;let Q=z0(J),K=J.contexts.get(Q);if(!K)return!1;return b6(J,K,!1)},q=async(Q={})=>{let{options:K={},useCache:B=!0}=Q,L=z0(J),F=J.contexts.get(L);if(!F)throw Error(`Context "${L}" not initialized`);await F.ready;let E=K.prompt||"",M=null,x=null;if(!E&&K.messages){({messages:M}=K),x={chatTemplate:K.chat_template||K.chatTemplate,jinja:K.jinja??!0,tools:K.tools,parallel_tool_calls:K.parallel_tool_calls,tool_choice:K.tool_choice,reasoning_format:K.reasoning_format,enable_thinking:K.enable_thinking,add_generation_prompt:K.add_generation_prompt,now:K.now,chat_template_kwargs:K.chat_template_kwargs};let I=await F.context.getFormattedChat(M,x.chatTemplate,x);E=I?.prompt||I||""}if(B&&J.sessionCache.enabled&&E){let{options:I}=await J.sessionCache.prepareCompletionOptions(K,E,F.context),b=await J.sessionCache.generateTempStatePath(),r=(await F.context.tokenize(E))?.tokens?.length||0,k={...I,save_state_path:b},d=J.sessionCache.requiresExactMatch(),v=Boolean(k.load_state_path),S=null;if(d&&!v)S=await J.sessionCache.generateTempStatePath(),k.save_prompt_state_path=S;let C={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return T6(F.context,k,J.sessionCache,E,b,r,J.id,C,S)}let D={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return P6(F.context,K,J.id,D)},_=async(Q={})=>{let{text:K="",params:B={}}=Q,L=z0(J),F=J.contexts.get(L);if(!F)throw Error(`Context "${L}" not initialized`);await F.ready;let E=await F.context.tokenize(K,B);if(!E)return{tokens:[]};let M=Array.from(E.tokens??[]).map((x)=>Number(x));return{...E,tokens:M}},G=async(Q={})=>{let{tokens:K=[]}=Q,B=z0(J),L=J.contexts.get(B);if(!L)throw Error(`Context "${B}" not initialized`);await L.ready;let F=K.map((E)=>Number(E));return L.context.detokenize(F)},w=async(Q={})=>{let{messages:K=[],template:B,params:L}=Q,F=z0(J),E=J.contexts.get(F);if(!E)throw Error(`Context "${F}" not initialized`);return await E.ready,await E.context.getFormattedChat(K,B,L)},z=()=>Array.from(J.contexts.values()).some((Q)=>!Q.released&&(Q.releaseRequested||Q.releaseTimer||Q.refCount>0)),A=()=>{J.finalized=!1},R=()=>{let Q=[],K=Array.from(J.contexts.entries()).map(([B,L])=>{let F={key:B,refCount:L.refCount,hasModel:Boolean(L.context)},E=L.context.parallel.getStatus();return F.parallelStatus=E,Q.push({contextKey:B,...E}),F});return{id:J.id,type:J.type,repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null,nCtx:J.plan.info.runtime?.n_ctx||null,nParallel:J.plan.info.runtime?.n_parallel||null,contexts:K,parallelStatuses:Q}},U=(Q)=>{let K=Array.from(J.contexts.entries()).map(([B,L])=>L.context.parallel.subscribeToStatus((F)=>{Q({contextKey:B,...F})}));return{remove:()=>{K.forEach((B)=>{if(B?.remove)B.remove()})}}};return{id:$,type:"ggml-llm",info:V.info,contexts:J.contexts,initContext:H,completion:q,tokenize:_,detokenize:G,applyChatTemplate:w,releaseContext:O,finalize:N,getStatus:R,subscribeParallelStatus:U,hasPendingReleases:z,resetFinalized:A}}async function q2($,X,Z={}){let{onProgress:W,onComplete:j,onError:V}=Z;try{let Y=e0($),J=await c4(Y),N=H2(Y,J),{repoId:H}=J;if(await P0(N,J.size)){if(console.log(`[Download] Model already exists: ${H} at ${N}`),typeof j==="function")j({localPath:N,repoId:H,alreadyExists:!0});return{started:!1,localPath:N,repoId:H,alreadyExists:!0}}let q=X.getDownload(N);if(q)return console.log(`[Download] Already downloading: ${H}`),q.then(()=>{if(typeof j==="function")j({localPath:N,repoId:H,joinedExisting:!0})}).catch((G)=>{if(typeof V==="function")V(G)}),{started:!1,localPath:N,repoId:H,alreadyDownloading:!0};console.log(`[Download] Starting download: ${H}`);let _=(async()=>{try{if(J.isSplit&&J.splitCount>0){let G=/-(\d{5})-of-(\d{5})\.gguf$/,w=y.dirname(N),z=J.splitCount,A=0;for(let R=1;R<=z;R+=1){let U=String(R).padStart(5,"0"),Q=J.filename.replace(G,`-${U}-of-${String(z).padStart(5,"0")}.gguf`),K=`${Y.model.base_url.replace(/\/+$/,"")}/${J.repoId}/resolve/${J.revision}/${Q}`,B=y.join(w,Q);if(!await P0(B))await a0(K,J.headers,B,null,(F)=>{if(F>=0&&Number.isFinite(F)){let E=(A+F)/z;if(console.log(`[Download] ${H}: ${Math.round(E*100)}%`),typeof W==="function")W(E)}});A+=1}}else await a0(J.url,J.headers,N,J.size,(G)=>{if(G>=0&&Number.isFinite(G)){if(console.log(`[Download] ${H}: ${Math.round(G*100)}%`),typeof W==="function")W(G)}});if(console.log(`[Download] Completed: ${H}`),typeof j==="function")j({localPath:N,repoId:H})}catch(G){if(console.error(`[Download] Failed: ${H}`,G.message),typeof V==="function")V(G);throw G}finally{X.deleteDownload(N)}})();return X.setDownload(N,_),{started:!0,localPath:N,repoId:H}}catch(Y){if(console.error("[Download] Failed to start download:",Y.message),typeof V==="function")V(Y);return{started:!1,localPath:null,repoId:null,error:Y.message}}}async function v6($){let X=e0($),Z=await c4(X),W=await G2(Z.url,Z.headers,X.runtime.cache_dir),{arch:j,nCtxTrain:V,nLayer:Y,nEmbd:J,nHead:N,nHeadKv:H,nEmbdHeadK:O,nEmbdHeadV:q,quantVersion:_,fileType:G,attentionLayerCount:w,recurrentLayerCount:z,ssmDConv:A,ssmDState:R,ssmDInner:U,ssmNGroup:Q,ssmDtRank:K,rwkvHeadSize:B,rwkvTokenShiftCount:L}=d0(W),F=Number.isFinite(Number(Y))?Number(Y):0,E=Number.isFinite(Number(J))?Number(J):0,M=Number.isFinite(Number(N))?Number(N):0,x=Number.isFinite(Number(H))?Number(H):M,D=M>0&&E>0?E/M:128,I=O!=null&&Number.isFinite(Number(O))?Number(O):D,b=q!=null&&Number.isFinite(Number(q))?Number(q):D,u=c0({arch:j,metadata:W,nLayer:F}),r=u&&Number.isFinite(Number(u.kvLayers))?Number(u.kvLayers):F,k=Math.max(0,Math.floor(Number(r)||0)),v=(X.model.n_ctx?Number(X.model.n_ctx):null)||V||4096,S={k:X.model.cache_type_k,v:X.model.cache_type_v},C=Z.size>0?Z.size:0,h=X.model.n_parallel||4,p=G0({layerCount:k,headKvCount:x,embdHeadKCount:I,embdHeadVCount:b,cacheTypes:S,swaConfig:u,kvUnified:X.model.kv_unified,nParallel:h,swaFull:X.model.swa_full,arch:j,attentionLayerCount:w}),f=l0({nLayer:F,nEmbd:E,recurrentLayerCount:z,nSeqMax:h,ssmDConv:A,ssmDState:R,ssmDInner:U,ssmNGroup:Q,ssmDtRank:K,rwkvHeadSize:B,rwkvTokenShiftCount:L,arch:j}),s=X.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(X.backend.gpu_memory_fraction))):S0.backend.gpu_memory_fraction||1,W0=X.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(X.backend.cpu_memory_fraction))):t0,A0=p(v),l=await O2(X,{modelBytes:C,kvCacheBytes:A0}),F0=(l.selected.totalMemory||0)*s,A4=Math.max(0,T0.totalmem()*W0),h0=l.selected.hasGpu?F0:A4,I0=i0({maxCtx:v,availableMemory:h0,modelBytes:C,kvBytesForCtx:p}),U0=p(v),C0=p(I0);return{kvInfo:{nCtxTrain:V,nLayer:F,nEmbd:E,nHeadKv:x,nEmbdHeadK:I,nEmbdHeadV:b,nHeadCount:M,nHeadKvCount:x,kvLayerCount:k,swa:u?.enabled?{window:u.window,pattern:u.pattern,denseFirst:u.denseFirst,type:u.type,layers:u.swaLayers}:null},modelBytes:C,kvCacheBytes:U0,limitedKvCacheBytes:C0,memoryLimitedCtx:I0,recurrentMemoryBytes:f,quantization:{name:Z.quantization||null,fileType:G,version:_}}}async function _2($=null,X={}){let{threshold:Z=1.1,includeBreakdown:W=!1,config:j,...V}=X,Y=null,J=null,N=null,H=null,O=null,q=null,_=null;if(j)try{let{modelBytes:B,kvCacheBytes:L,limitedKvCacheBytes:F,memoryLimitedCtx:E,recurrentMemoryBytes:M,kvInfo:x,quantization:D}=await v6(j);Y=B,J=L,N=F,H=E,O=M,q=x,_=D}catch(B){}let G=j?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(j.backend.gpu_memory_fraction))):void 0,w=j?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(j.backend.cpu_memory_fraction))):void 0,z=await _0({...V,platform:process.platform,totalMemoryInBytes:T0.totalmem(),backend:"ggml-llm",includeBreakdown:W,gpuMemoryFraction:G,cpuMemoryFraction:w,dependencies:{getBackendDevicesInfo:$2,isLibVariantAvailable:X2},modelBytes:Y,kvCacheBytes:J,limitedKvCacheBytes:N}),A=z.selected,R=a1(A);A.modelBytes=Y||null,A.kvCacheBytes=J||null,A.memoryLimitedCtx=H||null,A.limitedKvCacheBytes=N||null,A.recurrentMemoryBytes=O||null,A.kvInfo=q||null,A.quantization=_||null;let U=null,Q=null;if($){let B=a1($);Q={...$,score:B};let L="buttress",F="buttress-higher-score";if(!z.ok)L="local",F="buttress-unavailable";else if(!B&&B!==0)L="buttress",F="missing-client-score";else{let{fit:E,limitedFit:M}=Q,x=A?.fit,D=A?.limitedFit,I=E?.fitsInGpu||E?.fitsInCpu||M?.fitsInGpu||M?.fitsInCpu,b=x?.fitsInGpu||x?.fitsInCpu||D?.fitsInGpu||D?.fitsInCpu;if(I&&!b)L="local",F="client-fits-in-memory";else if(b&&!I)L="buttress",F="buttress-fits-in-memory";else if(B>R*Z)L="local",F="client-better";else if(R>B*Z)L="buttress",F="buttress-better";else L="either",F="comparable-scores"}U={buttressScore:R,clientScore:B,threshold:Z,recommendation:L,reason:F}}if(!z.ok&&!U)U={buttressScore:R,clientScore:$?.score??null,threshold:Z,recommendation:"local",reason:"buttress-unavailable"};let K=null;if(j)K={repoId:j.model?.repo_id||null,quantization:j.model?.quantization||null,nCtx:j.model?.n_ctx||null,cacheKType:j.model?.cache_type_k||"f16",cacheVType:j.model?.cache_type_v||"f16"};return{type:"ggml-llm",timestamp:new Date().toISOString(),buttress:z,client:Q,comparison:U,modelConfig:K}}var H6=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return G6},O6,Z2,K6,W2=($={},X={})=>{return Object.entries(X||{}).forEach(([Z,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W)){if(!$[Z]||typeof $[Z]!=="object")$[Z]={};W2($[Z],W)}else $[Z]=W}),$},q6=".gguf",j2="https://huggingface.co",Y2="https://huggingface.co/api",n,Q2,t0=0.5,S0,u4=($,X=[])=>{if(!$&&$!==0)return[...X];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},n0=($)=>{if(!$)return null;let X=String($).toLowerCase();if(["cuda","vulkan","snapdragon","default"].includes(X))return X;return null},e0=($={})=>{let X=JSON.parse(JSON.stringify(S0));if(W2(X,$),X.backend.variant=n0(X.backend.variant),X.backend.variant_preference=Array.from(new Set(u4(X.backend.variant_preference).map(n0).filter(Boolean))),X.backend.variant_preference.length===0)X.backend.variant_preference=["cuda","vulkan","snapdragon","default"];if(X.runtime.prefer_variants=Array.from(new Set(u4(X.runtime.prefer_variants).map(n0).filter(Boolean))),X.model.preferred_quantizations=Array.from(new Set(u4(X.model.preferred_quantizations||X.model.quantizations).map((Z)=>Z?String(Z).toLowerCase():null).filter(Boolean))),X.model.quantization){let Z=String(X.model.quantization).toLowerCase();if(!X.model.preferred_quantizations.includes(Z))X.model.preferred_quantizations.unshift(Z)}return X.model.n_parallel=Math.max(1,Number(X.model.n_parallel)||4),X.model.n_batch=Math.max(1,Number(X.model.n_batch)||512),X.model.base_url=X.model.base_url||j2,X.model.api_base=X.model.api_base||Y2,X.runtime.cache_dir=X.runtime.cache_dir?y.resolve(X.runtime.cache_dir):n,X.runtime.session_cache={...S0.runtime.session_cache,...X.runtime.session_cache||{}},X.runtime.context_release_delay_ms=Math.max(0,Number(X.runtime.context_release_delay_ms)||S0.runtime.context_release_delay_ms),X},l1=($)=>{let X=$.toLowerCase();return Q2.find((W)=>X.includes(W))||null},_6=($)=>{let X=[];if($.backend.variant)X.push($.backend.variant);if($.runtime.prefer_variants.length>0)X.push(...$.runtime.prefer_variants);return X.push(...$.backend.variant_preference),X.push("default"),Array.from(new Set(X.map(n0).filter(Boolean)))},H0=async($)=>{await Y6($,{recursive:!0})},L6=($=n)=>y.join($,".metadata-cache"),J2=($,X,Z=n)=>{let W=p4("sha256").update($).digest("hex");return y.join(L6(Z),X,`${W}.json`)},V2=async($,X,Z=n)=>{try{let W=J2($,X,Z),j=await t1(W,"utf-8");return console.log(`[Cache] Hit ${X} cache:`,y.basename(W)),JSON.parse(j,(V,Y)=>{if(typeof Y==="string"&&Y.startsWith("__bigint__"))return BigInt(Y.slice(10));return Y})}catch(W){return null}},g4=async($,X,Z,W=n)=>{try{let j=J2($,X,W);await H0(y.dirname(j)),await e1(j,JSON.stringify(Z,(V,Y)=>{if(typeof Y==="bigint")return`__bigint__${Y.toString()}`;return Y}),"utf-8"),console.log(`[Cache] Wrote ${X} cache:`,y.basename(j))}catch(j){console.warn(`[Cache] Failed to write ${X} cache:`,j.message)}},$4=($=n)=>y.join($,".session-state-cache"),N2=($=n)=>y.join($4($),"cache-map.json"),o0=($=n)=>y.join($4($),"temp"),U2=($=n)=>y.join($4($),"states"),i1=()=>({version:1,entries:{},totalSize:0}),A6=async($=n)=>{try{let X=N2($),Z=await t1(X,"utf-8"),W=JSON.parse(Z);if(!W.entries||typeof W.entries!=="object")return i1();return W}catch{return i1()}},x0=async($,X=n)=>{let Z=N2(X),W=`${Z}.tmp.${Date.now()}`;try{await H0(y.dirname(Z)),await e1(W,JSON.stringify($,null,2),"utf-8"),await f4(W,Z)}catch(j){throw await o(W).catch(()=>{}),j}},z6=($,X)=>{let Z=JSON.stringify({text:$,model:X.modelPath,variant:X.variant,n_gpu_layers:X.n_gpu_layers,n_ctx:X.n_ctx,cacheTypeK:X.cacheTypeK,cacheTypeV:X.cacheTypeV,kvUnified:X.kvUnified,swaFull:X.swaFull,flashAttnType:X.flashAttnType});return p4("sha256").update(Z).digest("hex").slice(0,24)},s1=($,X=n)=>y.join(U2(X),`${$}.bin`),B6=($=n)=>{let X=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return y.join(o0($),`${X}.bin`)},m4=($,X)=>$.modelPath===X.modelPath&&$.variant===X.variant&&$.n_gpu_layers===X.n_gpu_layers&&$.n_ctx>=X.n_ctx&&$.cacheTypeK===X.cacheTypeK&&$.cacheTypeV===X.cacheTypeV&&$.kvUnified===X.kvUnified&&$.swaFull===X.swaFull&&$.flashAttnType===X.flashAttnType&&Boolean($.isRecurrent)===Boolean(X.isRecurrent)&&Boolean($.isHybrid)===Boolean(X.isHybrid),R6=($,X)=>{let Z=Math.min($.length,X.length),W=0;while(W<Z&&$[W]===X[W])W+=1;return W},w6=($,X,Z,W=!1)=>{let j=Object.values(Z.entries);console.log(`[SessionCache] Finding match for promptText (${$.length} chars), exactMatch=${W}`),console.log(`[SessionCache] Checking ${j.length} cache entries`);let V=j.filter((J)=>m4(J.metadata,X));if(W){let J=V.find((N)=>N.fullText===$);if(J)return console.log(`[SessionCache] Exact match found: ${J.id} (${J.fullText.length} chars)`),{entry:J,prefixLength:J.fullText.length,exactMatch:!0};return null}let Y=V.reduce((J,N)=>{let H=R6($,N.fullText);if(H>J.prefixLen)return{entry:N,prefixLen:H};if(H===J.prefixLen&&N.fullText.length>(J.entry?.fullText?.length||0))return{entry:N,prefixLen:H};return J},{entry:null,prefixLen:0});if(Y.entry)return console.log(`[SessionCache] Prefix match found: ${Y.entry.id} (${Y.prefixLen}/${Y.entry.fullText.length} chars)`),{entry:Y.entry,prefixLength:Y.prefixLen};return console.log("[SessionCache] No match found"),null},F6=async($,X,Z)=>{let W=Object.values($.entries).sort((J,N)=>new Date(J.lastAccessedAt)-new Date(N.lastAccessedAt)),j=$.totalSize,V=Object.keys($.entries).length,Y=W.filter((J)=>{let N=j>X,H=V>Z;if(!N&&!H)return!1;return j-=(J.stateFileSize||0)+(J.promptStateSize||0),V-=1,!0});return await Promise.all(Y.map(async(J)=>{if(await o(J.stateFilePath).catch(()=>{}),J.promptStatePath)await o(J.promptStatePath).catch(()=>{});delete $.entries[J.id],console.log(`[SessionCache] Evicted entry: ${J.id}`)})),$.totalSize=Math.max(0,j),Y.map((J)=>J.id)},E6=async($,X,Z,W)=>{let j=Object.entries($.entries).filter(([V,Y])=>{if(V===Z)return!1;if(!m4(Y.metadata,W))return!1;return X.startsWith(Y.fullText)&&Y.fullText.length<X.length}).map(([,V])=>V);return await Promise.all(j.map(async(V)=>{if(await o(V.stateFilePath).catch(()=>{}),V.promptStatePath)await o(V.promptStatePath).catch(()=>{});$.totalSize-=(V.stateFileSize||0)+(V.promptStateSize||0),delete $.entries[V.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${V.id} (${V.promptText.length} prompt chars)`)})),j.map((V)=>V.id)},M6=async($=n)=>{let X=o0($);try{let Z=await J6(X),W=Date.now(),j=3600000;await Promise.all(Z.map(async(V)=>{let Y=y.join(X,V),J=await B0(Y).catch(()=>null);if(J&&W-J.mtimeMs>3600000)await o(Y).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${V}`)}))}catch{}},n1=async($)=>{try{return await B0($),!0}catch{return!1}},x6=($,X)=>{if($==null)return X;if(typeof $==="number")return $;if(typeof $==="string"){let Z=U6.parse($);return Z!=null?Z:X}return X},r1=async($,X={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let Z=await fetch($,X);if(!Z.ok){let W=await Z.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${Z.status} ${Z.statusText} ${W}`.trim())}return Z.json()},o1=async($,X={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let Z=await fetch($,{...X,method:"HEAD"});if(!Z.ok)throw Error(`Failed to fetch headers for ${$}: ${Z.status} ${Z.statusText}`);return Z},G2=async($,X,Z=n)=>{let W=JSON.stringify({url:$,headers:X}),j=await V2(W,"range-metadata",Z);if(j)return j;let V=!/^https?:/i.test($),{metadata:Y}=await V6($,{fetch,additionalFetchHeaders:X,allowLocalFile:V});return await g4(W,"range-metadata",Y,Z),Y},H2=($,X)=>{if($.model.local_path)return y.resolve($.model.local_path);let Z=X.repoId.split("/"),W=y.join($.runtime.cache_dir,...Z,X.revision);return y.join(W,X.filename)},P0=async($,X)=>{try{let Z=await B0($);if(!X)return!0;return Z.size===X}catch(Z){return!1}},a0=async($,X,Z,W,j)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await H0(y.dirname(Z));let V=await fetch($,{headers:X});if(!V.ok||!V.body)throw Error(`Failed to download ${$}: ${V.status} ${V.statusText}`);let Y=await Q6(Z,"w"),J=Number(V.headers.get("content-length"))||W||0,N=0,H=0.05;try{await V.body.pipeTo(new K6({async write(O){if(await Y.write(O),N+=O.byteLength,typeof j==="function"&&J>0){let q=Math.min(1,N/J);while(q>=H)j(H),H+=0.05}},async close(){if(await Y.close(),typeof j==="function")j(1)},async abort(O){throw await Y.close().catch(()=>{}),await o(Z).catch(()=>{}),O}}))}catch(O){throw await Y.close().catch(()=>{}),await o(Z).catch(()=>{}),O}if(W){let O=await B0(Z);if(O.size!==W)throw await o(Z).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${W} got ${O.size}`)}},c4=async($)=>{let X=$.model.repo_id||$.model.repository||$.model.model;if(!X)throw Error("`model.repo_id` is required in Buttress backend config");let Z=$.model.revision||"main",W=$.runtime.cache_dir,j=JSON.stringify({repoId:X,revision:Z,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),V=await V2(j,"artifact-info",W);if(V)return V;let Y={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Y.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let Q=await o1($.model.url,{headers:Y}),K=Number(Q.headers.get("content-length"))||null,B=$.model.filename||$.model.url.split("/").pop(),L={repoId:X,revision:Z,filename:B,url:$.model.url,size:K,headers:Y};return await g4(j,"artifact-info",L,W),L}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),H=await r1(`${$.model.api_base}/models/${X}?revision=${Z}&blobs=true`,{headers:Y}),q=(H?.siblings||H?.files||[]).map((Q)=>Q.rfilename||Q.path||Q.filename).filter((Q)=>typeof Q==="string"&&Q.endsWith(q6));if(q.length===0)throw Error(`No GGUF artifacts found in repo ${X}`);let _=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:Q2,G=()=>{let Q=_.find((K)=>{return q.find((L)=>L.toLowerCase().includes(K))});if(Q)return{filename:q.find((B)=>B.toLowerCase().includes(Q)),quantization:Q};return null};if(!J){let Q=G()||{filename:q[0],quantization:null},{filename:K,quantization:B}=Q;J=K,N=B||l1(J)}else if(!N)N=l1(J);let w=`${$.model.base_url.replace(/\/+$/,"")}/${X}/resolve/${Z}/${J}`,z=/-(\d{5})-of-(\d{5})\.gguf$/,A=J.match(z),R=null;if(A){let[,,Q]=A,K=await r1(`${$.model.api_base}/models/${X}?revision=${Z}&blobs=true`,{headers:Y}),B=K?.siblings||K?.files||[],L=Number(Q);R=0;for(let F=1;F<=L;F+=1){let E=String(F).padStart(5,"0"),M=J.replace(z,`-${E}-of-${Q}.gguf`),x=B.find((I)=>(I.rfilename||I.path||I.filename)===M),D=Number(x?.size);if(Number.isFinite(D)&&D>0)R+=D}}else{let Q=await o1(w,{headers:Y});R=Number(Q.headers.get("content-length"))||null}let U={repoId:X,revision:Z,filename:J,url:w,size:R,quantization:N,headers:Y,isSplit:Boolean(A),splitCount:A?Number(A[2]):0};return await g4(j,"artifact-info",U,W),U},O2=async($,{modelBytes:X=null,kvCacheBytes:Z=null}={})=>{let W=_6($),[j,...V]=W,Y=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):S0.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):t0,N=await _0({platform:process.platform,totalMemoryInBytes:T0.totalmem(),backend:"ggml-llm",variant:j||null,preferVariants:V,gpuMemoryFraction:Y,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:$2,isLibVariantAvailable:X2},modelBytes:X,kvCacheBytes:Z}),H=(_)=>({..._,devices:Array.isArray(_.devices)?_.devices:[],ok:_.ok,hasGpu:Boolean(_.hasGpu),totalMemory:_.gpuTotalBytes||_.totalMemory||0,error:_.ok?null:Error(_.error||`Variant ${_.variant} not available on this platform`)});if(!N.ok||!N.selected){let _=(N.attempts||[]).map((G)=>`${G.variant}: ${G.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${W.join(", ")}). Errors: ${_}`)}let O=(N.attempts||[]).map(H);return{selected:H(N.selected),attempts:O}},S6=async($)=>{let X=await c4($),Z=await G2(X.url,X.headers,$.runtime.cache_dir),{arch:W,nCtxTrain:j,nLayer:V,nEmbd:Y,nHead:J,nHeadKv:N,nEmbdHeadK:H,nEmbdHeadV:O,quantVersion:q,fileType:_,attentionLayerCount:G,recurrentLayerCount:w,ssmDConv:z,ssmDState:A,ssmDInner:R,ssmNGroup:U,ssmDtRank:Q,rwkvHeadSize:K,rwkvTokenShiftCount:B}=d0(Z),L=Number.isFinite(Number(V))?Number(V):0,F=Number.isFinite(Number(Y))?Number(Y):0,E=Number.isFinite(Number(J))?Number(J):0,M=Number.isFinite(Number(N))?Number(N):E,x=E>0&&F>0?F/E:128,D=H!=null&&Number.isFinite(Number(H))?Number(H):x,I=O!=null&&Number.isFinite(Number(O))?Number(O):x,b=c0({arch:W,metadata:Z,nLayer:L}),u=b&&Number.isFinite(Number(b.kvLayers))?Number(b.kvLayers):L,r=Math.max(0,Math.floor(Number(u)||0)),k={use_mmap:$.model.use_mmap??$.runtime.use_mmap,use_mlock:$.model.use_mlock??$.runtime.use_mlock,n_threads:$.model.n_threads??$.runtime.n_threads,n_ctx:$.model.n_ctx??$.runtime.n_ctx,n_batch:$.model.n_batch??$.runtime.n_batch,n_ubatch:$.model.n_ubatch??$.runtime.n_ubatch,n_cpu_moe:$.model.n_cpu_moe??$.runtime.n_cpu_moe,n_parallel:$.model.n_parallel??$.runtime.n_parallel,cpu_mask:$.model.cpu_mask??$.runtime.cpu_mask,cpu_strict:$.model.cpu_strict??$.runtime.cpu_strict,devices:$.model.devices??$.runtime.devices,n_gpu_layers:$.model.n_gpu_layers??$.runtime.n_gpu_layers,flash_attn_type:$.model.flash_attn_type??$.runtime.flash_attn_type,cache_type_k:$.model.cache_type_k??$.runtime.cache_type_k,cache_type_v:$.model.cache_type_v??$.runtime.cache_type_v,kv_unified:$.model.kv_unified??$.runtime.kv_unified,swa_full:$.model.swa_full??$.runtime.swa_full,ctx_shift:$.model.ctx_shift??$.runtime.ctx_shift},d=k.n_ctx?Number(k.n_ctx):null,v=d||j||4096,S=[],C=[],h=!0;if(d&&j&&d>j){h=!1;let V0=`Requested context length (${d}) exceeds model training context (${j})`;S.push(V0),C.push(V0),v=j}if(d&&!j)S.push("Model metadata missing training context length, using requested value");let p={k:k.cache_type_k,v:k.cache_type_v},f=X.size>0?X.size:0,s=G0({layerCount:r,headKvCount:M,embdHeadKCount:D,embdHeadVCount:I,cacheTypes:p,swaConfig:b,kvUnified:k.kv_unified,nParallel:k.n_parallel,swaFull:k.swa_full,arch:W,attentionLayerCount:G}),W0=l0({nLayer:L,nEmbd:F,recurrentLayerCount:w,nSeqMax:k.n_parallel||4,ssmDConv:z,ssmDState:A,ssmDInner:R,ssmNGroup:U,ssmDtRank:Q,rwkvHeadSize:K,rwkvTokenShiftCount:B,arch:W}),A0=s(v),l=await O2($,{modelBytes:f,kvCacheBytes:A0+W0}),L4=l.selected.totalMemory||0,F0=L4*($.backend.gpu_memory_fraction||1),A4=$.backend.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):t0,h0=Math.max(0,T0.totalmem()*A4),I0=l.selected.hasGpu?F0:h0,U0=i0({maxCtx:v,availableMemory:I0,modelBytes:f,kvBytesForCtx:s});if(!d&&U0){let V0=j?Math.min(U0,j):U0,R4=Math.max(32,V0);if(R4<v)S.push(`Context length capped to ${R4} by memory limits`);v=R4}if(v>U0)v=U0;let C0=Math.floor(U0);console.log(`[buttress] Memory-limited context length: ${C0}`);let A1=s(v),p3=f+A1+W0,z1=L?f/(L+1):f,y0=0;if(l.selected.hasGpu&&z1>0)y0=Math.min(L+1,Math.max(0,Math.floor(F0/z1)));console.log(`[buttress] Auto GPU layer capacity (${l.selected.variant}): ${y0}/${L+1}`);let z4;if(k.n_gpu_layers==="auto"||k.n_gpu_layers==null)z4=y0;else z4=Math.max(0,Math.min(Number(k.n_gpu_layers)||0,L+1));let m3=(()=>{let V0=k.flash_attn_type&&String(k.flash_attn_type).toLowerCase();if(V0==="on"||V0==="off")return V0;if(V0==="auto")return l.selected.hasGpu?"auto":"off";return l.selected.hasGpu?"auto":"off"})(),c3=$.runtime.cache_dir,B4=H2($,X),B1=await P0(B4,X.size),d3={ok:h,backend:"ggml-llm",warnings:S,errors:C,model:{repoId:X.repoId,revision:X.revision,filename:X.filename,quantization:X.quantization,url:X.url,sizeBytes:X.size,metadata:{architecture:W,n_ctx_train:j,n_layer:L,n_embd:F,quantization_version:q,file_type:_,kv_layer_count:r,swa:b?.enabled?{window:b.window,pattern:b.pattern,dense_first:b.denseFirst,type:b.type,layers:b.swaLayers}:null}},runtime:{...k,variant:l.selected.variant,n_ctx:v,requested_ctx:d,n_gpu_layers:z4,auto_gpu_layers:y0,flash_attn_type:m3,cache_type_k:p.k,cache_type_v:p.v,estimated_max_n_ctx:C0},resources:{modelBytes:f,kvCacheBytes:A1,recurrentMemoryBytes:W0,totalEstimatedBytes:p3,gpuCapacityBytes:L4,gpuUsableBytes:F0,cpuUsableBytes:h0,fit:l.selected.fit},devices:{selected:l.selected,attempts:l.attempts},download:{cacheDir:c3,localPath:B4,exists:B1},timestamp:new Date().toISOString()};return{config:$,info:d3,artifact:X,metadata:{arch:W,nCtxTrain:j,nLayer:L,nEmbd:F},devices:l,cacheTypes:p,localPath:B4,localExists:B1}},P6=($,X,Z=null,W=null)=>{let j,V=Date.now(),Y=0;return new Z2({async start(J){try{let N=await $.parallel.completion(X,(G,w)=>{if(!w)return;if(w.token)Y+=1;J.enqueue({event:"token",data:{requestId:G,...w}})}),{requestId:H}=N;j=N.stop;let O=await N.promise;console.log("[Completion] Result:",O),J.enqueue({event:"result",data:{requestId:H,...O}}),J.close();let q=Date.now()-V,_=O.timings||{};$0.addCompletion({id:`completion-${H}`,generatorId:Z,requestId:H,repoId:W?.repoId||null,quantization:W?.quantization||null,variant:W?.variant||null,cacheTokens:_.cache_n??0,promptTokens:_.prompt_n??0,tokensGenerated:_.predicted_n??Y,tokensPerSecond:_.predicted_per_second??0,promptPerSecond:_.prompt_per_second??0,durationMs:q,success:!0,interrupted:O.interrupted||!1,contextFull:O.context_full||O.contextFull||!1})}catch(N){J.enqueue({event:"error",data:{message:N?.message||String(N)}}),J.error(N),$0.addCompletion({id:`completion-${Date.now()}`,generatorId:Z,repoId:W?.repoId||null,quantization:W?.quantization||null,variant:W?.variant||null,durationMs:Date.now()-V,tokensGenerated:Y,success:!1,error:N?.message||String(N)})}},cancel(){if(j)j()}})},T6=($,X,Z,W,j,V,Y=null,J=null,N=null)=>{let H,O="",q=!1,_=Date.now(),G=0,w=()=>{if(j)o(j).catch(()=>{});if(N)o(N).catch(()=>{})};return new Z2({async start(z){try{let A=await $.parallel.completion(X,(B,L)=>{if(!L)return;if(L.token)O+=L.token,G+=1;z.enqueue({event:"token",data:{requestId:B,...L}})}),{requestId:R}=A;H=A.stop;let U=await A.promise;if(U.text)O=U.text;else if(U.content)O=U.content;q=!U.interrupted&&!U.context_full,console.log("[Completion] Result:",U),z.enqueue({event:"result",data:{requestId:R,...U}}),z.close();let Q=Date.now()-_,K=U.timings||{};if($0.addCompletion({id:`completion-${R}`,generatorId:Y,requestId:R,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,cacheTokens:K.cache_n??0,promptTokens:K.prompt_n??V??0,tokensGenerated:K.predicted_n??G,tokensPerSecond:K.predicted_per_second??0,promptPerSecond:K.prompt_per_second??0,durationMs:Q,success:!0,interrupted:U.interrupted||!1,contextFull:U.context_full||U.contextFull||!1,usedCache:Boolean(X.load_state_path)}),q&&Z.enabled&&O)Z.saveCompletionState(W,O,j,V,N).catch((B)=>{console.warn("[SessionCache] Save failed:",B.message)});else w()}catch(A){z.enqueue({event:"error",data:{message:A?.message||String(A)}}),z.error(A),$0.addCompletion({id:`completion-${Date.now()}`,generatorId:Y,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,durationMs:Date.now()-_,tokensGenerated:G,success:!1,error:A?.message||String(A)}),w()}},cancel(){if(H)H();w()}})},z0=($)=>{let X={model:$.plan.localPath,runtime:$.plan.info.runtime};return p4("sha256").update(JSON.stringify(X)).digest("hex").slice(0,24)},k6=async($,X,Z,W=null)=>{let{config:j,localPath:V,artifact:Y}=$;if($.localExists&&!X.has(V)){if($.info.download.exists=!0,typeof Z==="function")Z(0.5);return V}if(j.model.local_path&&!j.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let J=V;if(W){let N=W.getDownload(J);if(N){console.log(`[ensureModelFile] Waiting for global download: ${Y.repoId}`);try{if(await N,await P0(V,Y.size)){if($.localExists=!0,$.info.download.exists=!0,typeof Z==="function")Z(0.5);return V}}catch(H){console.warn(`[ensureModelFile] Global download failed, will retry: ${H.message}`)}}}if(!X.has(J))X.set(J,(async()=>{if(Y.isSplit&&Y.splitCount>0){let N=/-(\d{5})-of-(\d{5})\.gguf$/,H=y.dirname(V),O=Y.splitCount,q=0;for(let _=1;_<=O;_+=1){let G=String(_).padStart(5,"0"),w=Y.filename.replace(N,`-${G}-of-${String(O).padStart(5,"0")}.gguf`),z=`${j.model.base_url.replace(/\/+$/,"")}/${Y.repoId}/resolve/${Y.revision}/${w}`,A=y.join(H,w);if(!await P0(A))await a0(z,Y.headers,A,null,(U)=>{if(U>=0&&Number.isFinite(U)){let Q=(q+U)/O,K=Math.round(Q*100);if(console.log(`Downloading model splits: ${Math.min(100,K)}%`),typeof Z==="function")Z(Q*0.5)}});q+=1}}else console.log("Downloading model: 0%"),await a0(Y.url,Y.headers,V,Y.size,(N)=>{if(N>=0&&Number.isFinite(N)){let H=Math.round(N*100);if(console.log(`Downloading model: ${Math.min(100,H)}%`),typeof Z==="function")Z(N*0.5)}});$.localExists=!0,$.info.download.exists=!0})());try{await X.get(J)}finally{X.delete(J)}return V},D6=async($,X)=>{let Z=z0($),W=$.contexts.get(Z);if(W&&!W.released){if(W.releaseTimer)clearTimeout(W.releaseTimer),W.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${Z}"`);if(W.releaseRequested=!1,W.refCount+=1,console.log(`[Context] Reusing existing context "${Z}", refCount=${W.refCount}`),typeof X==="function")X(0);if(!W.context)await W.ready;if(typeof X==="function")X(1);return W}if(W)console.log(`[Context] Record exists but released=${W.released}, creating new context`);else console.log(`[Context] No existing record for "${Z}", creating new context`);W={key:Z,refCount:1,ready:null,released:!1},$.contexts.set(Z,W),W.ready=(async()=>{let j=Date.now(),V=await k6($.plan,$.downloads,X,$.globalDownloadManager);if(typeof X==="function")X(0.5);let Y={model:V,n_threads:$.plan.info.runtime.n_threads,use_mmap:$.plan.info.runtime.use_mmap,use_mlock:$.plan.info.runtime.use_mlock,cpu_mask:$.plan.info.runtime.cpu_mask,cpu_strict:$.plan.info.runtime.cpu_strict,devices:$.plan.info.runtime.devices,n_ctx:$.plan.info.runtime.n_ctx,n_gpu_layers:$.plan.info.runtime.n_gpu_layers,n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch,n_ubatch:$.plan.info.runtime.n_ubatch,n_cpu_moe:$.plan.info.runtime.n_cpu_moe,flash_attn_type:$.plan.info.runtime.flash_attn_type,ctx_shift:$.plan.info.runtime.ctx_shift,kv_unified:$.plan.info.runtime.kv_unified,swa_full:$.plan.info.runtime.swa_full,lib_variant:$.plan.info.runtime.variant};if($.plan.info.runtime.flash_attn_type!=="off")Y.cache_type_k=$.plan.info.runtime.cache_type_k,Y.cache_type_v=$.plan.info.runtime.cache_type_v;console.log("[Context] Load Options:",Y);let J;try{if(J=await N6(Y,(N)=>{if(typeof X==="function"){if(X(0.5+N*0.25),N%5===0)console.log("[Context] Load Model Progress:",N)}}),$.plan.info.runtime.n_parallel){if(!await J.parallel.enable({n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch}))throw Error("Failed to enable parallel decoding mode for context")}if(typeof X==="function")X(1);return W.context=J,W.modelInfo=J.getModelInfo(),$0.addModelLoad({id:`${$.id}-${Z}`,generatorId:$.id,contextKey:Z,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,nCtx:$.plan.info.runtime?.n_ctx||null,nGpuLayers:$.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-j,success:!0}),W}catch(N){if($0.addModelLoad({id:`${$.id}-${Z}`,generatorId:$.id,contextKey:Z,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-j,success:!1,error:N?.message||String(N)}),J)try{J.release()}catch(H){}throw N}})();try{return await W.ready,W}catch(j){throw $.contexts.delete(Z),j}},r0=async($,X,Z=!1)=>{if(X.released)return!1;if(!Z&&X.refCount>0)return!1;X.released=!0,$.contexts.delete(X.key);try{X.context?.parallel?.disable?.()}catch(W){}return await X.context?.release?.(),!0},b6=async($,X,Z=!1)=>{if(X.releaseRequested=!0,X.releaseTimer)clearTimeout(X.releaseTimer),X.releaseTimer=null;if(Z)X.refCount=0;else if(X.refCount=Math.max(0,X.refCount-1),X.refCount>0)return X.releaseRequested=!1,!1;let W=$.config.runtime.context_release_delay_ms;if(typeof W!=="number"||!Number.isFinite(W))return r0($,X);let j=Math.max(0,Math.floor(W));if(Z||j<=0)return r0($,X);return console.log(`[Context] Scheduling release in ${j}ms for context "${X.key}"`),X.releaseTimer=setTimeout(async()=>{if(X.releaseTimer=null,X.refCount>0){console.log(`[Context] Release cancelled, refCount=${X.refCount} for context "${X.key}"`),X.releaseRequested=!1;return}console.log(`[Context] Releasing context "${X.key}" after ${j}ms delay`),await r0($,X)},j),!0},d4=($)=>{let X=e0($);return X.model.repo_id||X.model.repository||X.model.model||null},a1=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return q0($)};var l4=P(()=>{M0();s0();O6=H6(),{ReadableStream:Z2,WritableStream:K6}=O6,n=y.join(T0.homedir(),".buttress","models"),Q2=["mxfp4","q8_0","q6_k","q6","q5_k_m","q5_k_s","q5_k","q5_1","q5_0","q4_k_m","q4_k_s","q4_k","q4_1","q4_0","q3","q2"],S0={backend:{type:"ggml-llm",variant:null,variant_preference:["cuda","vulkan","snapdragon","default"],gpu_memory_fraction:0.85,cpu_memory_fraction:t0},model:{repo_id:null,revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:"auto",allow_local_file:!1,local_path:null,api_base:Y2,base_url:j2},runtime:{cache_dir:n,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10737418240,max_entries:1000},context_release_delay_ms:1e4}}});import j0 from"node:path";import t4 from"node:os";import{stat as B2,mkdir as h6,open as I6,unlink as i4,readFile as C6,writeFile as y6}from"node:fs/promises";import{createHash as u6}from"node:crypto";import{initWhisper as f6}from"@fugood/whisper.node";import{getBackendDevicesInfo as R2,isLibVariantAvailable as w2}from"@fugood/llama.node";import*as g6 from"node:stream/web";class v2{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue($,X=null){return new Promise((Z,W)=>{this.queue.push({task:$,resolve:Z,reject:W,taskId:X}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:$,resolve:X,reject:Z,taskId:W}=this.queue.shift();this.currentTaskId=W;try{let j=await $();X(j)}catch(j){Z(j)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}}async function h2($,X,Z={}){let{globalDownloadManager:W=null}=Z,j=j4(X),V=await o6(j),Y={id:$,type:"ggml-stt",config:j,plan:V,info:V.info,contextRecord:null,downloads:new Map,globalDownloadManager:W,queue:new v2,finalized:!1},J=async()=>{if(Y.finalized)return;Y.finalized=!0;let A=Y.contextRecord;if(!A)return;if(A.released)return;if(A.releaseRequested||A.releaseTimer)return;if(A.refCount=Math.max(0,A.refCount-1),A.refCount>0)return;await Z4(Y,A)},N=async(A={})=>{let{onProgress:R}=A;try{let U=await e6(Y,R);return{modelInfo:U.modelInfo&&typeof U.modelInfo==="object"?{...U.modelInfo}:null,runtime:{...Y.plan.info.runtime},download:{...Y.plan.info.download}}}catch(U){throw console.error("[Context] Error initializing context:",U),U}},H=async()=>{if(Y.finalized)return!1;let A=Y.contextRecord;if(!A)return!1;return $9(Y,A)},O=async(A={})=>{let{audioPath:R,audioData:U,options:Q={}}=A,K=Y.contextRecord;if(!K)throw Error("Context not initialized");let B={...Q};if(Y.plan.info.runtime.max_threads&&B.maxThreads==null)B.maxThreads=Y.plan.info.runtime.max_threads;let L=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,F=Date.now();return Y.queue.enqueue(async()=>{await K.ready;try{let E;if(U){let M=t6(U),{promise:x}=K.context.transcribeData(M,B);E=await x}else{if(!R)throw Error("audioPath or audioData is required for transcription");let M=j0.resolve(R),{promise:x}=K.context.transcribe(M,B);E=await x}return N0.addTranscription({id:L,generatorId:Y.id,repoId:Y.plan.info.model?.repoId||null,quantization:Y.plan.info.model?.quantization||null,modelType:Y.plan.info.model?.modelType||null,variant:Y.plan.info.runtime?.variant||null,durationMs:Date.now()-F,segmentCount:E?.segments?.length||0,textLength:E?.text?.length||0,success:!0}),E}catch(E){throw N0.addTranscription({id:L,generatorId:Y.id,repoId:Y.plan.info.model?.repoId||null,quantization:Y.plan.info.model?.quantization||null,modelType:Y.plan.info.model?.modelType||null,variant:Y.plan.info.runtime?.variant||null,durationMs:Date.now()-F,success:!1,error:E?.message||String(E)}),E}},L)},q=async(A={})=>O(A),_=async(A={})=>O(A),G=()=>{let A=Y.contextRecord;if(!A)return!1;return!A.released&&(A.releaseRequested||A.releaseTimer||A.refCount>0)},w=()=>{Y.finalized=!1},z=()=>({id:Y.id,type:Y.type,repoId:Y.plan.info.model?.repoId||null,quantization:Y.plan.info.model?.quantization||null,modelType:Y.plan.info.model?.modelType||null,variant:Y.plan.info.runtime?.variant||null,hasContext:Boolean(Y.contextRecord?.context),contextRefCount:Y.contextRecord?.refCount||0,queueStatus:Y.queue.getStatus()});return{id:$,type:"ggml-stt",info:V.info,queue:Y.queue,initContext:N,transcribe:q,transcribeData:_,releaseContext:H,finalize:J,getStatus:z,hasPendingReleases:G,resetFinalized:w}}async function I2($,X,Z={}){let{onProgress:W,onComplete:j,onError:V}=Z;try{let Y=j4($),J=await e4(Y),N=D2(Y,J),{repoId:H}=J;if(await W4(N,J.size)){if(console.log(`[Download] STT model already exists: ${H} at ${N}`),typeof j==="function")j({localPath:N,repoId:H,alreadyExists:!0});return{started:!1,localPath:N,repoId:H,alreadyExists:!0}}let q=X.getDownload(N);if(q)return console.log(`[Download] Already downloading STT model: ${H}`),q.then(()=>{if(typeof j==="function")j({localPath:N,repoId:H,joinedExisting:!0})}).catch((G)=>{if(typeof V==="function")V(G)}),{started:!1,localPath:N,repoId:H,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${H}`);let _=(async()=>{try{if(await b2(J.url,J.headers,N,J.size,(G)=>{if(G>=0&&Number.isFinite(G)){if(console.log(`[Download] ${H}: ${Math.round(G*100)}%`),typeof W==="function")W(G)}}),console.log(`[Download] Completed STT model: ${H}`),typeof j==="function")j({localPath:N,repoId:H})}catch(G){if(console.error(`[Download] Failed STT model: ${H}`,G.message),typeof V==="function")V(G);throw G}finally{X.deleteDownload(N)}})();return X.setDownload(N,_),{started:!0,localPath:N,repoId:H}}catch(Y){if(console.error("[Download] Failed to start STT download:",Y.message),typeof V==="function")V(Y);return{started:!1,localPath:null,repoId:null,error:Y.message}}}async function C2($=null,X={}){let{threshold:Z=1.1,includeBreakdown:W=!1,config:j,...V}=X,Y=null,J=null,N=null;if(j)try{let R=j4(j),U=await e4(R);Y=U.size??null,{processingBufferBytes:J}=m0({modelBytes:Y}),N=U.quantization||null}catch(R){}let H=j?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(j.backend.gpu_memory_fraction))):void 0,O=j?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(j.backend.cpu_memory_fraction))):void 0,q=await _0({...V,platform:process.platform,totalMemoryInBytes:t4.totalmem(),backend:"ggml-stt",includeBreakdown:W,gpuMemoryFraction:H,cpuMemoryFraction:O,dependencies:{getBackendDevicesInfo:R2,isLibVariantAvailable:w2},modelBytes:Y,kvCacheBytes:J}),_=q.selected,G=z2(_);if(_)_.modelBytes=Y||null,_.processingBytes=J||null,_.quantization=N||null;let w=null,z=null;if($){let R=z2($);z={...$,score:R};let U="buttress",Q="buttress-higher-score";if(!q.ok)U="local",Q="buttress-unavailable";else if(!R&&R!==0)U="buttress",Q="missing-client-score";else if($.fit&&_?.fit){let K=$.fit.fitsInGpu||$.fit.fitsInCpu,B=_.fit.fitsInGpu||_.fit.fitsInCpu;if(K&&!B)U="local",Q="client-fits-in-memory";else if(B&&!K)U="buttress",Q="buttress-fits-in-memory";else if(R>G*Z)U="local",Q="client-better";else if(G>R*Z)U="buttress",Q="buttress-better";else U="either",Q="comparable-scores"}else if(R>G*Z)U="local",Q="client-better";else if(G>R*Z)U="buttress",Q="buttress-better";else U="either",Q="comparable-scores";w={buttressScore:G,clientScore:R,threshold:Z,recommendation:U,reason:Q}}if(!q.ok&&!w)w={buttressScore:G,clientScore:$?.score??null,threshold:Z,recommendation:"local",reason:"buttress-unavailable"};let A=null;if(j)A={repoId:j.model?.repo_id||null,quantization:j.model?.quantization||null,filename:j.model?.filename||null};return{type:"ggml-stt",timestamp:new Date().toISOString(),buttress:q,client:z,comparison:w,modelConfig:A}}var p6=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return g6},m6,F2=($={},X={})=>{return Object.entries(X||{}).forEach(([Z,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W)){if(!$[Z]||typeof $[Z]!=="object")$[Z]={};F2($[Z],W)}else $[Z]=W}),$},c6=".bin",E2="https://huggingface.co",M2="https://huggingface.co/api",R0,r4,o4,x2="fp16",S2=0.5,d6,P2=($)=>{if(!$)return null;let X=$.toLowerCase();return d6.find((Z)=>X.includes(Z))||null},a4,s4=($,X=[])=>{if(!$&&$!==0)return[...X];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},X4=($)=>{if(!$)return null;let X=String($).toLowerCase();if(["cuda","vulkan","default"].includes(X))return X;return null},j4=($={})=>{let X=JSON.parse(JSON.stringify(a4));if(F2(X,$),X.backend.variant=X4(X.backend.variant),X.backend.variant_preference=Array.from(new Set(s4(X.backend.variant_preference||r4).map(X4).filter(Boolean))),X.backend.variant_preference.length===0)X.backend.variant_preference=[...r4];if(X.runtime.prefer_variants=Array.from(new Set(s4(X.runtime.prefer_variants).map(X4).filter(Boolean))),X.model.preferred_quantizations=Array.from(new Set(s4(X.model.preferred_quantizations||X.model.quantizations).map((Z)=>Z?String(Z).toLowerCase():null).filter(Boolean))),X.model.quantization){let Z=String(X.model.quantization).toLowerCase();if(!X.model.preferred_quantizations.includes(Z))X.model.preferred_quantizations.unshift(Z)}return X.model.base_url=X.model.base_url||E2,X.model.api_base=X.model.api_base||M2,X.runtime.cache_dir=X.runtime.cache_dir?j0.resolve(X.runtime.cache_dir):R0,X.runtime.context_release_delay_ms=Math.max(0,Number(X.runtime.context_release_delay_ms)||a4.runtime.context_release_delay_ms),X},n4=($)=>{let X=$.toLowerCase();return o4.find((W)=>X.includes(W))||null},l6=($)=>{let X=[];if($.backend.variant)X.push($.backend.variant);if($.runtime.prefer_variants.length>0)X.push(...$.runtime.prefer_variants);return X.push(...$.backend.variant_preference),X.push("default"),Array.from(new Set(X.map(X4).filter(Boolean)))},T2=async($)=>{await h6($,{recursive:!0})},i6=($=R0)=>j0.join($,".metadata-cache"),k2=($,X,Z=R0)=>{let W=u6("sha256").update($).digest("hex");return j0.join(i6(Z),X,`${W}.json`)},s6=async($,X,Z=R0)=>{try{let W=k2($,X,Z),j=await C6(W,"utf-8");return JSON.parse(j)}catch(W){return null}},L2=async($,X,Z,W=R0)=>{try{let j=k2($,X,W);await T2(j0.dirname(j)),await y6(j,JSON.stringify(Z),"utf-8")}catch(j){}},n6=async($,X={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let Z=await fetch($,X);if(!Z.ok){let W=await Z.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${Z.status} ${Z.statusText} ${W}`.trim())}return Z.json()},A2=async($,X={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let Z=await fetch($,{...X,method:"HEAD"});if(!Z.ok)throw Error(`Failed to fetch headers for ${$}: ${Z.status} ${Z.statusText}`);return Z},D2=($,X)=>{if($.model.local_path)return j0.resolve($.model.local_path);let Z=X.repoId.split("/"),W=j0.join($.runtime.cache_dir,...Z,X.revision);return j0.join(W,X.filename)},W4=async($,X)=>{try{let Z=await B2($);if(!X)return!0;return Z.size===X}catch(Z){return!1}},b2=async($,X,Z,W,j)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await T2(j0.dirname(Z));let V=await fetch($,{headers:X});if(!V.ok||!V.body)throw Error(`Failed to download ${$}: ${V.status} ${V.statusText}`);let Y=await I6(Z,"w"),J=Number(V.headers.get("content-length"))||W||0,N=0,H=0.05;try{await V.body.pipeTo(new m6({async write(O){if(await Y.write(O),N+=O.byteLength,typeof j==="function"&&J>0){let q=Math.min(1,N/J);while(q>=H)j(H),H+=0.05}},async close(){if(await Y.close(),typeof j==="function")j(1)},async abort(O){throw await Y.close().catch(()=>{}),await i4(Z).catch(()=>{}),O}}))}catch(O){throw await Y.close().catch(()=>{}),await i4(Z).catch(()=>{}),O}if(W){let O=await B2(Z);if(O.size!==W)throw await i4(Z).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${W} got ${O.size}`)}},e4=async($)=>{let X=$.model.repo_id||$.model.repository||$.model.model;if(!X)throw Error("`model.repo_id` is required in Buttress backend config");let Z=$.model.revision||"main",W=$.runtime.cache_dir,j=JSON.stringify({repoId:X,revision:Z,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),V=await s6(j,"artifact-info",W);if(V)return V;let Y={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Y.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let U=await A2($.model.url,{headers:Y}),Q=Number(U.headers.get("content-length"))||null,K=$.model.filename||$.model.url.split("/").pop(),B={repoId:X,revision:Z,filename:K,url:$.model.url,size:Q,quantization:n4(K||""),headers:Y};return await L2(j,"artifact-info",B,W),B}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),H=await n6(`${$.model.api_base}/models/${X}?revision=${Z}&blobs=true`,{headers:Y}),q=(H?.siblings||H?.files||[]).map((U)=>U.rfilename||U.path||U.filename).filter((U)=>typeof U==="string"&&U.endsWith(c6));if(q.length===0)throw Error(`No model artifacts found in repo ${X}`);let _=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:o4,G=()=>{for(let U of _)if(U===x2){let Q=q.find((K)=>{let B=K.toLowerCase();return!o4.some((L)=>B.includes(L))});if(Q)return{filename:Q,quantization:null}}else{let Q=q.find((K)=>K.toLowerCase().includes(U));if(Q)return{filename:Q,quantization:U}}return null};if(!J){let U=G()||{filename:q[0],quantization:null},{filename:Q,quantization:K}=U;J=Q,N=K||n4(J)}else if(!N)N=n4(J);let w=`${$.model.base_url.replace(/\/+$/,"")}/${X}/resolve/${Z}/${J}`,z=await A2(w,{headers:Y}),A=Number(z.headers.get("content-length"))||null,R={repoId:X,revision:Z,filename:J,url:w,size:A,quantization:N,headers:Y,isSplit:!1,splitCount:0};return await L2(j,"artifact-info",R,W),R},r6=async($,{modelBytes:X=null,processingBytes:Z=null}={})=>{let W=l6($),[j,...V]=W,Y=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):a4.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):S2,N=await _0({platform:process.platform,totalMemoryInBytes:t4.totalmem(),backend:"ggml-stt",variant:j||null,preferVariants:V,variantPreference:$.backend.variant_preference,gpuMemoryFraction:Y,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:R2,isLibVariantAvailable:w2},modelBytes:X,kvCacheBytes:Z}),H=(_)=>({..._,devices:Array.isArray(_.devices)?_.devices:[],ok:_.ok,hasGpu:Boolean(_.hasGpu),totalMemory:_.gpuTotalBytes||_.totalMemory||0,error:_.ok?null:Error(_.error||`Variant ${_.variant} not available on this platform`)});if(!N.ok||!N.selected){let _=(N.attempts||[]).map((G)=>`${G.variant}: ${G.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${W.join(", ")}). Errors: ${_}`)}let O=(N.attempts||[]).map(H);return{selected:H(N.selected),attempts:O}},o6=async($)=>{let X=await e4($),Z=m0({modelBytes:X.size>0?X.size:0}),W=await r6($,{modelBytes:Z.modelBytes,processingBytes:Z.processingBufferBytes}),j=W.selected.hasGpu&&(W.selected.fit?.fitsInGpu!==void 0?W.selected.fit.fitsInGpu:!0);if($.model.use_gpu===!1)j=!1;let V=$.model.use_flash_attn&&String($.model.use_flash_attn).toLowerCase(),Y;if(V==="on"||V==="true")Y=!0;else if(V==="off"||V==="false")Y=!1;else Y=j;let J=$.runtime.cache_dir,N=D2($,X),H=await W4(N,X.size),O={ok:!0,backend:"ggml-stt",model:{repoId:X.repoId,revision:X.revision,filename:X.filename,quantization:X.quantization,modelType:P2(X.filename),url:X.url,sizeBytes:X.size},runtime:{variant:W.selected.variant,use_gpu:j,use_flash_attn:Y,max_threads:$.runtime.max_threads?Number($.runtime.max_threads):null},resources:{...Z,gpuCapacityBytes:W.selected.gpuTotalBytes,gpuUsableBytes:W.selected.gpuUsableBytes,cpuUsableBytes:W.selected.cpuUsableBytes,fit:W.selected.fit},devices:{selected:W.selected,attempts:W.attempts},download:{cacheDir:J,localPath:N,exists:H},timestamp:new Date().toISOString()};return{config:$,info:O,artifact:X,memory:Z,devices:W,localPath:N,localExists:H}},a6=async($,X,Z,W=null)=>{let{localPath:j,artifact:V,config:Y}=$;if($.localExists){if(typeof Z==="function")Z(1);return j}if(W){let H=W.getDownload(j);if(H){console.log(`[ensureModelFile] Waiting for global STT download: ${V.repoId}`);try{if(await H,await W4(j,V.size)){if($.localExists=!0,$.info.download.exists=!0,typeof Z==="function")Z(1);return j}}catch(O){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${O.message}`)}}}let J=X.get(j);if(J){if(await J,typeof Z==="function")Z(1);return j}let N=(async()=>{if(Y.model.allow_local_file){if(!await W4(j,V.size))throw Error(`Local model file not found: ${j}`);return j}return await b2(V.url,V.headers,j,V.size,Z),j})();X.set(j,N);try{return await N,j}finally{X.delete(j)}},t6=($)=>{if(!$)return null;if($ instanceof ArrayBuffer)return $;if(ArrayBuffer.isView($))return $.buffer;if(typeof $==="string"){let X=$.startsWith("data:")?$.split(",")[1]||"":$,Z=Buffer.from(X,"base64");return Z.buffer.slice(Z.byteOffset,Z.byteOffset+Z.byteLength)}throw Error("Unsupported audioData format, expected base64 string or ArrayBuffer")},e6=async($,X)=>{if($.contextRecord&&!$.contextRecord.released){if($.contextRecord.releaseTimer)clearTimeout($.contextRecord.releaseTimer),$.contextRecord.releaseTimer=null,console.log("[Context] Cancelled pending STT release");if($.contextRecord.releaseRequested=!1,$.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${$.contextRecord.refCount}`),typeof X==="function")X(0);if(!$.contextRecord.context)await $.contextRecord.ready;if(typeof X==="function")X(1);return $.contextRecord}if($.contextRecord)console.log(`[Context] STT record exists but released=${$.contextRecord.released}, creating new context`);else console.log("[Context] No existing STT record, creating new context");let Z={refCount:1,ready:null,released:!1};$.contextRecord=Z,Z.ready=(async()=>{let W=Date.now();try{if(typeof X==="function")X(0);let j=await a6($.plan,$.downloads,X,$.globalDownloadManager);if(typeof X==="function")X(0.5);let V=await f6({filePath:j,useFlashAttn:$.plan.info.runtime.flash_attn_type==="on",useGpu:$.plan.info.runtime.n_gpu_layers>0,nThreads:$.plan.info.runtime.n_threads},$.plan.info.runtime.variant);if(typeof X==="function")X(1);Z.context=V;try{Z.modelInfo=V.getModelInfo()}catch(Y){Z.modelInfo=null}return N0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,useGpu:$.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-W,success:!0}),Z}catch(j){throw N0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-W,success:!1,error:j?.message||String(j)}),j}})();try{if(await Z.ready,typeof X==="function")X(1);return Z}catch(W){throw $.contextRecord=null,W}},Z4=async($,X,Z=!1)=>{if(X.released)return!1;if(!Z&&X.refCount>0)return!1;return X.released=!0,$.contextRecord=null,await X.context?.release?.(),!0},$9=async($,X,Z=!1)=>{if(X.releaseRequested=!0,X.releaseTimer)clearTimeout(X.releaseTimer),X.releaseTimer=null;if(Z)X.refCount=0;else if(X.refCount=Math.max(0,X.refCount-1),X.refCount>0)return X.releaseRequested=!1,!1;let W=$.config.runtime.context_release_delay_ms;if(typeof W!=="number"||!Number.isFinite(W))return Z4($,X);let j=Math.max(0,Math.floor(W));if(Z||j<=0)return Z4($,X);return console.log(`[Context] Scheduling STT release in ${j}ms`),X.releaseTimer=setTimeout(async()=>{if(X.releaseTimer=null,X.refCount>0){console.log(`[Context] STT release cancelled, refCount=${X.refCount}`),X.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${j}ms delay`),await Z4($,X)},j),!0},$1=($)=>{let X=j4($),Z=X.model.repo_id||X.model.repository||X.model.model||null;if(!Z)return null;let W=P2(X.model.filename);if(W)return`${Z}:${W}`;return Z},z2=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return q0($)};var X1=P(()=>{M0();s0();({WritableStream:m6}=p6()),R0=j0.join(t4.homedir(),".buttress","models"),r4=["cuda","vulkan","default"],o4=["q8_0","q5_1","q5_0","q4_1","q4_0"],d6=["large-v3-turbo","distil-large-v3","large-v3","large-v2","large-v1","large","distil-medium","medium.en","medium","small.en-tdrz","distil-small.en","small.en","small","base.en","base","tiny.en","tiny"],a4={backend:{type:"ggml-stt",variant:null,variant_preference:r4,gpu_memory_fraction:0.85,cpu_memory_fraction:S2},model:{repo_id:"BricksDisplay/whisper-ggml",revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:["q8_0",x2,"q5_1"],allow_local_file:!1,local_path:null,api_base:M2,base_url:E2,use_gpu:!0,use_flash_attn:"auto"},runtime:{cache_dir:R0,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}}});async function t($,X=null,Z={}){if($==="ggml-llm")return _2(X,Z);if($==="ggml-stt")return C2(X,Z);throw Error(`Unknown backend type: ${$}`)}var Z1=P(()=>{l4();X1()});var g;var y2=P(()=>{g={name:"@fugood/buttress-backend-core",private:!0,type:"module",version:"2.23.0-beta.53",main:"src/index.js",types:"lib/types/index.d.ts",scripts:{build:"tsc --noResolve --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js"},dependencies:{"@fugood/buttress-hardware-guardrails":"^2.23.0-beta.53","@fugood/llama.node":"^1.6.0-rc.0","@fugood/whisper.node":"^1.0.13","@huggingface/gguf":"^0.3.2","@iarna/toml":"^3.0.0",bytes:"^3.1.0"}}});import m from"node:os";import u2 from"node:fs";import f2 from"node:path";import{execSync as Y4}from"node:child_process";import g2 from"@iarna/toml";async function m2({modelIds:$=[],defaultConfig:X=null}={}){let Z=[];if(console.log(`${g.name} v${g.version}`),console.log(`Generating model capabilities comparison...
3
- `),Z.push(`${g.name} v${g.version}`),Z.push(`## Model Capabilities Comparison
4
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let W=(U={},Q={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Q||{}).forEach(([B,L])=>{if(L&&typeof L==="object"&&!Array.isArray(L))K[B]=W(K[B]||{},L);else K[B]=L}),K},j=X||{},{server:V,generators:Y=[],...J}=j,N=(U)=>W(JSON.parse(JSON.stringify(J)),U||{}),H=(U)=>{if(Array.isArray(Y)&&Y.length>0){let Q=Y.filter((K)=>K?.type==="ggml-llm");if(Q.length>0&&U){let K=Q.find((B)=>B.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},O=[];for(let U=0;U<$.length;U+=1){let Q=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Q}...`);let K=H(Q);K={...K||{},model:{...J.runtime,...K?.model||{},repo_id:Q}};let B=await t("ggml-llm",null,{config:K,includeBreakdown:!0});O.push({modelId:Q,capabilities:B,modelInfo:B.buttress?.selected||null,modelConfig:B.modelConfig||null})}let q=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",_=(U)=>U?"✅":"\uD83D\uDEAB";Z.push("| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),Z.push("|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|"),O.forEach(({modelId:U,modelInfo:Q,modelConfig:K})=>{let B=Q?.quantization?.name?.toUpperCase()||"N/A",L=q(Q?.modelBytes),F=K?.nCtx||Q?.kvInfo?.nCtxTrain||"N/A",E=G0(Q),M=Number(F),x=Q?.kvCacheBytes||(E&&Number.isFinite(M)&&M>0?E(M):E&&E(Q?.kvInfo?.nCtxTrain||0))||null,D=q(x),I=q(Q?.modelBytes&&x?Q.modelBytes+x:Q?.fit?.totalRequiredBytes),b=_(Q?.fit?.fitsInGpu),u=_(Q?.fit?.fitsInCpu);Z.push(`| ${U} | ${B} | ${L} | ${F} | ${D} | ${I} | ${b} | ${u} |`);let r=Q?.memoryLimitedCtx!=null||Q?.limitedFit!=null,k=!Q?.fit?.fitsInGpu||!Q?.fit?.fitsInCpu;if(r&&k){let d=Q?.memoryLimitedCtx||F,v=Number(d),S=Q?.limitedKvCacheBytes||E&&Number.isFinite(v)&&v>0&&E(v)||null,C=q(S),h=q(Q?.modelBytes&&S?Q.modelBytes+S:Q?.limitedFit?.totalRequiredBytes),p=_(Q?.limitedFit?.fitsInGpu),f=_(Q?.limitedFit?.fitsInCpu);if(d!==F||C!==D||h!==I)Z.push(`| ↳ Limited | - | ${L} | ${d} | ${C} | ${h} | ${p} | ${f} |`)}}),Z.push(`
5
- ---`),Z.push(`
6
- ### System Information`);let G=null;if(process.platform!=="win32")try{G=Y4("uname -a",{encoding:"utf8"}).trim()}catch{}if(G)Z.push(`- **System:** ${G}`);else Z.push(`- **Hostname:** ${m.hostname()}`),Z.push(`- **OS:** ${m.type()} ${m.release()}`);if(Z.push(`- **Platform:** ${process.platform}`),Z.push(`- **CPU Cores:** ${m.cpus().length}`),Z.push(`- **Total System Memory:** ${(m.totalmem()/1024/1024/1024).toFixed(2)} GB`),O.length>0){let Q=O[0].capabilities.buttress?.selected;if(Q){let K=Q.cpuTotalBytes>0?(Q.cpuUsableBytes/Q.cpuTotalBytes*100).toFixed(0):0;if(Z.push(`- **Usable CPU Memory:** ${(Q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.hasGpu){let B=Q.devices.filter((L)=>L.type==="gpu");if(B.length>0){let L=B[0];Z.push(`- **GPU Backend:** ${L.backend}`),Z.push(`- **GPU Name:** ${L.deviceName}`),Z.push(`- **GPU Total Memory:** ${(L.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Q.gpuTotalBytes>0?(Q.gpuUsableBytes/Q.gpuTotalBytes*100).toFixed(0):0;Z.push(`- **GPU Usable Memory:** ${(Q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else Z.push("- **GPU:** Not available")}}Z.push(`
7
- ### Command Used`);let w=process.argv.slice(2).join(" ");if(Z.push(`\`\`\`bash
8
- ${process.argv[0]} ${process.argv[1]} ${w}
9
- \`\`\``),Z.push(`
10
- ### Package Information`),Z.push(`- **Name:** ${g.name}`),Z.push(`- **Version:** ${g.version}`),g.description)Z.push(`- **Description:** ${g.description}`);if(X&&Object.keys(X).length>0){Z.push(`
11
- ### Configuration`),Z.push("<details>"),Z.push("<summary>Click to expand TOML configuration</summary>"),Z.push("\n```toml");try{let U=g2.stringify(X);Z.push(U)}catch(U){Z.push("# Error serializing config"),Z.push(JSON.stringify(X,null,2))}Z.push("```"),Z.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,R=f2.join(process.cwd(),A);u2.writeFileSync(R,Z.join(`
12
- `),"utf8"),console.log(`
13
- Model capabilities table saved to: ${R}`),process.exit(0)}catch(W){console.error("Failed to generate model table:",W.message),process.exit(1)}}async function c2({modelId:$=null,defaultConfig:X=null}={}){if(console.log(`${g.name} v${g.version}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let Z=X||{},{server:W,generators:j=[],...V}=Z,Y=(G={},w={})=>{let z=Array.isArray(G)?[...G]:{...G};return Object.entries(w||{}).forEach(([A,R])=>{if(R&&typeof R==="object"&&!Array.isArray(R))z[A]=Y(z[A]||{},R);else z[A]=R}),z},J=(G)=>Y(JSON.parse(JSON.stringify(V)),G||{}),H=((G)=>{if(Array.isArray(j)&&j.length>0){let w=j.filter((z)=>z?.type==="ggml-llm");if(w.length>0){if(G){let z=w.find((A)=>A.model?.repo_id===G);if(z)return J(z)}}}if(Object.keys(V).length>0)return J({});return null})($);if($)H={...H||{},model:{...H?.model||{},repo_id:$}};let O=await t("ggml-llm",null,{config:H,includeBreakdown:!0}),q=O.buttress?.selected||null,_=O.modelConfig||null;if($||_?.repoId){console.log(`
14
- === Model Information ===`);let G=$||_?.repoId;if(console.log(`Repository ID: ${G}`),_?.quantization)console.log(`Quantization: ${_.quantization}`);if(_?.nCtx)console.log(`Context Length: ${_.nCtx}`);if(q?.quantization?.name)console.log(`Model Quantization: ${q.quantization.name.toUpperCase()}`);let w=_?.cache_type_k||"f16",z=_?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${w}, V=${z}`),q?.modelBytes&&q?.kvCacheBytes){if(console.log(`Model Size: ${(q.modelBytes/1024/1024/1024).toFixed(2)} GB`),q.kvInfo)console.log(`KV Cache Size: ${(q.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(q.kvInfo)})`);else console.log(`KV Cache Size: ${(q.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);if(console.log(`Total Required Memory: ${((q.modelBytes+q.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),q.memoryLimitedCtx!=null){let A=q.memoryLimitedCtx,R=q.kvInfo?.nCtxTrain;if(R)console.log(`
15
- Memory-Limited Context: ${A} (Train: ${R})`);else console.log(`
16
- Memory-Limited Context: ${A}`);if(q.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(q.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(O.buttress?.selected?.fit){let{totalRequiredBytes:A}=O.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(O.buttress?.selected){let{selected:G}=O.buttress;console.log(`
17
- === Hardware Information ===`);let w=null;if(process.platform!=="win32")try{w=Y4("uname -a",{encoding:"utf8"}).trim()}catch{}if(w)console.log(`System: ${w}`);else console.log(`Hostname: ${m.hostname()}`),console.log(`OS: ${m.type()} ${m.release()}`);console.log(`Platform: ${G.platform}`),console.log(`CPU Cores: ${m.cpus().length}`),console.log(`Total System Memory: ${(m.totalmem()/1024/1024/1024).toFixed(2)} GB`);let z=G.cpuTotalBytes>0?(G.cpuUsableBytes/G.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(G.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${z}% of ${(G.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),G.hasGpu)console.log(`
18
- --- GPU Details ---`),G.devices.filter((R)=>R.type==="gpu").forEach((R)=>{console.log(`GPU Backend: ${R.backend}`),console.log(`GPU Name: ${R.deviceName}`),console.log(`GPU Total Memory: ${(R.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let U=G.gpuTotalBytes>0?(G.gpuUsableBytes/G.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(G.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${U}% of ${(G.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),R.metadata){if(R.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(R.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
19
- Backend Variant: ${G.variant}`),console.log(`Performance Score: ${G.score}`),G.fit){if(console.log(`
20
- --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${G.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${G.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${G.fit.limiting}`),G.limitedFit)console.log(`
21
- --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(G.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${G.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${G.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${G.limitedFit.limiting}`)}}console.log(`
22
- === Full Capabilities JSON ===`),console.log(JSON.stringify(O,null,2)),process.exit(0)}catch(Z){console.error("Failed to get capabilities:",Z.message),process.exit(1)}}async function d2({modelIds:$=[],defaultConfig:X=null}={}){let Z=[];if(console.log(`${g.name} v${g.version}`),console.log(`Generating STT model capabilities comparison...
23
- `),Z.push(`${g.name} v${g.version}`),Z.push(`## STT Model Capabilities Comparison
24
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let W=(U={},Q={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Q||{}).forEach(([B,L])=>{if(L&&typeof L==="object"&&!Array.isArray(L))K[B]=W(K[B]||{},L);else K[B]=L}),K},j=X||{},{server:V,generators:Y=[],...J}=j,N=(U)=>W(JSON.parse(JSON.stringify(J)),U||{}),H=(U)=>{if(Array.isArray(Y)&&Y.length>0){let Q=Y.filter((K)=>K?.type==="ggml-stt");if(Q.length>0&&U){let K=Q.find((B)=>B.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},O=[];for(let U=0;U<$.length;U+=1){let Q=$[U],{repoId:K,filename:B}=p2(Q);console.log(`[${U+1}/${$.length}] Analyzing ${Q}...`);let L=H(K);L={...L||{},model:{...L?.model||{},repo_id:K,...B&&{filename:B}}};let F=await t("ggml-stt",null,{config:L,includeBreakdown:!0});O.push({modelId:Q,repoId:K,filename:B,capabilities:F,modelInfo:F.buttress?.selected||null,modelConfig:F.modelConfig||null})}let q=(U)=>U?(U/1024/1024).toFixed(1):"N/A",_=(U)=>U?"✅":"\uD83D\uDEAB";Z.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),Z.push("|-------|-----------|------------------------|---------------------|----------|----------|"),O.forEach(({modelId:U,modelInfo:Q})=>{let K=q(Q?.modelBytes),B=q(Q?.processingBytes||Q?.kvCacheBytes),L=q(Q?.fit?.totalRequiredBytes),F=_(Q?.fit?.fitsInGpu),E=_(Q?.fit?.fitsInCpu);Z.push(`| ${U} | ${K} | ${B} | ${L} | ${F} | ${E} |`)}),Z.push(`
25
- ---`),Z.push(`
26
- ### System Information`);let G=null;if(process.platform!=="win32")try{G=Y4("uname -a",{encoding:"utf8"}).trim()}catch{}if(G)Z.push(`- **System:** ${G}`);else Z.push(`- **Hostname:** ${m.hostname()}`),Z.push(`- **OS:** ${m.type()} ${m.release()}`);if(Z.push(`- **Platform:** ${process.platform}`),Z.push(`- **CPU Cores:** ${m.cpus().length}`),Z.push(`- **Total System Memory:** ${(m.totalmem()/1024/1024/1024).toFixed(2)} GB`),O.length>0){let Q=O[0].capabilities.buttress?.selected;if(Q){let K=Q.cpuTotalBytes>0?(Q.cpuUsableBytes/Q.cpuTotalBytes*100).toFixed(0):0;if(Z.push(`- **Usable CPU Memory:** ${(Q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.hasGpu){let B=Q.devices.filter((L)=>L.type==="gpu");if(B.length>0){let L=B[0];Z.push(`- **GPU Backend:** ${L.backend}`),Z.push(`- **GPU Name:** ${L.deviceName}`),Z.push(`- **GPU Total Memory:** ${(L.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Q.gpuTotalBytes>0?(Q.gpuUsableBytes/Q.gpuTotalBytes*100).toFixed(0):0;Z.push(`- **GPU Usable Memory:** ${(Q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else Z.push("- **GPU:** Not available")}}Z.push(`
27
- ### Command Used`);let w=process.argv.slice(2).join(" ");if(Z.push(`\`\`\`bash
28
- ${process.argv[0]} ${process.argv[1]} ${w}
29
- \`\`\``),Z.push(`
30
- ### Package Information`),Z.push(`- **Name:** ${g.name}`),Z.push(`- **Version:** ${g.version}`),g.description)Z.push(`- **Description:** ${g.description}`);if(X&&Object.keys(X).length>0){Z.push(`
31
- ### Configuration`),Z.push("<details>"),Z.push("<summary>Click to expand TOML configuration</summary>"),Z.push("\n```toml");try{let U=g2.stringify(X);Z.push(U)}catch(U){Z.push("# Error serializing config"),Z.push(JSON.stringify(X,null,2))}Z.push("```"),Z.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,R=f2.join(process.cwd(),A);u2.writeFileSync(R,Z.join(`
32
- `),"utf8"),console.log(`
33
- STT model capabilities table saved to: ${R}`),process.exit(0)}catch(W){console.error("Failed to generate STT model table:",W.message),process.exit(1)}}async function l2({modelId:$=null,defaultConfig:X=null}={}){if(console.log(`${g.name} v${g.version}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:Z,filename:W}=p2($),j=X||{},{server:V,generators:Y=[],...J}=j,N=(z={},A={})=>{let R=Array.isArray(z)?[...z]:{...z};return Object.entries(A||{}).forEach(([U,Q])=>{if(Q&&typeof Q==="object"&&!Array.isArray(Q))R[U]=N(R[U]||{},Q);else R[U]=Q}),R},H=(z)=>N(JSON.parse(JSON.stringify(J)),z||{}),q=((z)=>{if(Array.isArray(Y)&&Y.length>0){let A=Y.filter((R)=>R?.type==="ggml-stt");if(A.length>0){if(z){let R=A.find((U)=>U.model?.repo_id===z);if(R)return H(R)}}}if(Object.keys(J).length>0)return H({});return null})(Z);if(Z)q={...q||{},model:{...J.runtime,...q?.model||{},repo_id:Z,...W&&{filename:W}}};let _=await t("ggml-stt",null,{config:q,includeBreakdown:!0}),G=_.buttress?.selected||null,w=_.modelConfig||null;if(Z||w?.repoId){console.log(`
34
- === Model Information ===`);let z=Z||w?.repoId;if(console.log(`Repository ID: ${z}`),W)console.log(`Filename: ${W}`);if(G?.modelBytes)console.log(`Model Size: ${(G.modelBytes/1024/1024).toFixed(1)} MB`);let A=G?.processingBytes||G?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(G?.modelBytes&&A)console.log(`Total Required Memory: ${((G.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(_.buttress?.selected?.fit){let{totalRequiredBytes:R}=_.buttress.selected.fit;console.log(`Total Required Memory: ${(R/1024/1024).toFixed(1)} MB`)}}if(_.buttress?.selected){let{selected:z}=_.buttress;console.log(`
35
- === Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=Y4("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${m.hostname()}`),console.log(`OS: ${m.type()} ${m.release()}`);console.log(`Platform: ${z.platform}`),console.log(`CPU Cores: ${m.cpus().length}`),console.log(`Total System Memory: ${(m.totalmem()/1024/1024/1024).toFixed(2)} GB`);let R=z.cpuTotalBytes>0?(z.cpuUsableBytes/z.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(z.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${R}% of ${(z.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),z.hasGpu)console.log(`
36
- --- GPU Details ---`),z.devices.filter((Q)=>Q.type==="gpu").forEach((Q)=>{console.log(`GPU Backend: ${Q.backend}`),console.log(`GPU Name: ${Q.deviceName}`),console.log(`GPU Total Memory: ${(Q.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let K=z.gpuTotalBytes>0?(z.gpuUsableBytes/z.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(z.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(z.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.metadata){if(Q.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Q.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
37
- Backend Variant: ${z.variant}`),console.log(`Performance Score: ${z.score}`),z.fit)console.log(`
38
- --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${z.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${z.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${z.fit.limiting}`)}console.log(`
39
- === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(Z){console.error("Failed to get capabilities:",Z.message),process.exit(1)}}var p2=($)=>{if(!$)return{repoId:null,filename:null};let[X,Z]=$.split(":");return{repoId:X,filename:Z||null}};var i2=P(()=>{M0();Z1();y2()});var J4={};i3(J4,{testGgmlSttCapabilities:()=>l2,testGgmlLlmCapabilities:()=>c2,status:()=>V9,startModelDownload:()=>Q4,startGenerator:()=>W9,showSttModelsTable:()=>d2,showModelsTable:()=>m2,globalDownloadManager:()=>W1,ggmlStt:()=>Q9,ggmlLlm:()=>Y9,getModelIdentifier:()=>J9,getCapabilities:()=>t,generatorRegistry:()=>X0,finalizeGenerator:()=>j9});async function W9($,X){let W={"ggml-llm":{create:K2,getId:d4},"ggml-stt":{create:h2,getId:$1}}[$];if(!W)throw Error(`Unsupported backend type: ${$}`);let j=W.getId(X);if(!j)throw Error("Buttress generator config missing repo identifier");let V=`${$}:${j}`,Y=X0.get(V);if(Y)return Y.refCount+=1,Y.instance.resetFinalized?.(),{id:Y.id,info:Y.instance.info};let J=await W.create(V,X,{globalDownloadManager:W1}),N={id:V,type:J.type,instance:J,refCount:1};return X0.set(V,N),{id:V,info:J.info}}async function j9($){let X=X0.get($);if(!X)return!1;if(X.refCount-=1,X.refCount<=0){if(await X.instance.finalize(),!(X.instance.hasPendingReleases?.()??!1))X0.delete($)}return!0}function J9($,X){if($==="ggml-llm")return d4(X);if($==="ggml-stt")return $1(X);return null}async function Q4($,X,Z={}){let j={"ggml-llm":q2,"ggml-stt":I2}[$];if(!j)return{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${$}`};return j(X,W1,Z)}var X0,W1,Z9=($)=>{let X=X0.get($);if(!X)throw Error(`Unknown generator id "${$}"`);return X},O0=($,X)=>{let Z=Z9($);if(Z.type!==X)throw Error(`Generator "${$}" does not support ${X} backend`);return Z.instance},Y9,Q9,V9;var V4=P(()=>{l4();X1();Z1();s0();i2();X0=new Map,W1={downloads:new Map,getDownload($){return this.downloads.get($)||null},setDownload($,X){this.downloads.set($,X)},deleteDownload($){this.downloads.delete($)},isDownloading($){return this.downloads.has($)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([$,X])=>({localPath:$,promise:X}))}};Y9={async initContext($,X){return O0($,"ggml-llm").initContext(X)},async completion($,X){return O0($,"ggml-llm").completion(X)},async tokenize($,X){return O0($,"ggml-llm").tokenize(X)},async detokenize($,X){return O0($,"ggml-llm").detokenize(X)},async applyChatTemplate($,X){return O0($,"ggml-llm").applyChatTemplate(X)},async releaseContext($,X){let Z=X0.get($);if(!Z)return{released:!0,alreadyReleased:!0};if(Z.type!=="ggml-llm")throw Error(`Generator "${$}" does not support ggml-llm backend`);return Z.instance.releaseContext(X)}},Q9={async initContext($,X){return O0($,"ggml-stt").initContext(X)},async transcribe($,X){return O0($,"ggml-stt").transcribe(X)},async transcribeData($,X){return O0($,"ggml-stt").transcribeData(X)},async releaseContext($,X){let Z=X0.get($);if(!Z)return{released:!0,alreadyReleased:!0};if(Z.type!=="ggml-stt")throw Error(`Generator "${$}" does not support ggml-stt backend`);return Z.instance.releaseContext(X)}};V9={getFullStatus:()=>d1(X0),getGgmlLlmStatus:()=>C4(X0),getGgmlSttStatus:()=>y4(X0),subscribeToStatus:I4,subscribeToStatusWithId:c1,llmStatusTracker:$0,sttStatusTracker:N0,statusEmitter:e}});import{node as N9}from"@elysiajs/node";import{Elysia as U9}from"elysia";var G9,K0=($)=>new U9({adapter:G9?N9():void 0,...$});var k0=P(()=>{G9=typeof process<"u"&&process.versions&&process.versions.node});import{t as Y0}from"elysia";var H9,O9=({store:{serverInfo:$}})=>({id:$.id,name:$.name,version:$.version,generators:$.generators,authentication:$.authentication}),j1=($)=>{let X=K0(),Z=$.autodiscover.http?.path??"/buttress/info";return X.get(Z,O9,{response:H9}),X};var s2=P(()=>{k0();H9=Y0.Object({id:Y0.String(),name:Y0.String(),version:Y0.String(),generators:Y0.Array(Y0.Object({type:Y0.String()})),authentication:Y0.Object({required:Y0.Boolean(),type:Y0.Literal("device-group")})})});import{t as Q0,file as K9}from"elysia";import{writeFile as n2}from"node:fs/promises";import Y1 from"node:path";var q9,Q1;var r2=P(()=>{k0();q9=typeof process<"u"&&process.versions!=null&&process.versions.node!=null,Q1=K0().post("/buttress/upload",async({body:{file:$},store:{config:X}})=>{let Z=`${Date.now()}-${$.name.replace(/[^\dA-Za-z]/g,"_")}`,W=Y1.join(X.server.temp_file_dir,Z);try{if(q9)await n2(W,await $.stream());else await n2(W,await $.arrayBuffer());return{ok:!0,filename:Z}}catch(j){return{ok:!1,error:String(j)}}},{body:Q0.Object({file:Q0.File()}),response:Q0.Object({ok:Q0.Boolean(),filename:Q0.Optional(Q0.String()),error:Q0.Optional(Q0.String())})}).get("/buttress/download/:filename",async({params:{filename:$},store:{config:X},status:Z})=>{let W=Y1.join(X.server.temp_file_dir,$);if(Y1.relative(X.server.temp_file_dir,W).includes(".."))return Z(400),"Invalid file path";return K9(W)},{params:Q0.Object({filename:Q0.String()})})});import J1 from"node:path";import t2 from"node:fs/promises";import{fileURLToPath as _9}from"node:url";var o2,L9=async()=>{let $=[J1.join(o2,"..","public","status.html"),J1.join(o2,"..","..","public","status.html")];return(await Promise.all($.map((Z)=>t2.access(Z).then(()=>Z,()=>null)))).find((Z)=>Z!==null)??null},A9=($)=>{let{status:X}=$;if(X?.getFullStatus)return X.getFullStatus();return{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},a2=async()=>{let $=await L9();if(!$)return console.error("[Status] Failed to find status.html in candidate paths"),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}});try{let X=await t2.readFile($,"utf-8");return new Response(X,{headers:{"Content-Type":"text/html; charset=utf-8"}})}catch(X){return console.error("[Status] Failed to serve status page:",X),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}})}},V1;var e2=P(()=>{k0();o2=J1.dirname(_9(import.meta.url)),V1=K0().get("/status",a2).get("/status/",a2).get("/buttress/status",({store:{backend:$}})=>A9($))});import{t as T,sse as N4}from"elysia";import{cors as z9}from"@elysiajs/cors";async function R9($,X,Z){let j=(X.generators||[]).filter((z)=>z.type==="ggml-llm");if(j.length===0)throw Error('No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.');let V=j[0],Y=Z||V.model?.repo_id;if(Z){let z=j.find((A)=>A.model?.repo_id===Z);if(z)V=z}else Y=V.model?.repo_id;let J=Y,N=$3.get(J);if(N?.initialized)return N;let{generators:H,server:O,...q}=X.global||{},_={...q,...V,model:{...V.model,repo_id:Y}};console.log(`[OpenAI] Creating generator for ${J}`);let{id:G}=await $.startGenerator("ggml-llm",_),w={id:G,config:_,repoId:Y,initialized:!1};return $3.set(J,w),await $.ggmlLlm.initContext(G,{}),w.initialized=!0,console.log(`[OpenAI] Generator ready: ${J}`),w}async function w9($,X,Z,W){let j=$.getReader(),V="",Y=null,J=null,N="stop",H={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let q=!1;while(!q){let _=await j.read();if({done:q}=_,q)break;let{event:G,data:w}=_.value;if(G==="token"){if(w.content!=null)V+=w.content;else if(w.token!=null)V+=w.token}else if(G==="result"){if(w.text)V=w.text;else if(w.content)V=w.content;if(w.reasoning_content)Y=w.reasoning_content;if(w.tool_calls?.length>0)J=w.tool_calls.map((z,A)=>({id:z.id||`call_${X}_${A}`,type:"function",function:{name:z.function?.name||"",arguments:z.function?.arguments||""}})),N="tool_calls";else N=w.interrupted?"length":"stop";H={prompt_tokens:w.prompt_tokens||w.promptTokens||0,completion_tokens:w.tokens_predicted||w.tokensPredicted||0,total_tokens:(w.prompt_tokens||w.promptTokens||0)+(w.tokens_predicted||w.tokensPredicted||0)}}else if(G==="error")throw Error(w.message)}}finally{j.cancel().catch(()=>{})}let O={role:"assistant",content:V||null};if(Y)O.reasoning_content=Y;if(J)O.tool_calls=J;return{id:X,object:"chat.completion",created:Z,model:W,choices:[{index:0,message:O,finish_reason:N}],usage:H}}function U4({global:$}){let X=K0({prefix:"/oai-compat"});return X.use(z9({origin:$?.openai_compat?.cors_allowed_origins??!1,methods:["GET","POST","OPTIONS"],allowedHeaders:["Content-Type","Authorization"],maxAge:86400,preflight:!0})),X.get("/v1/models",({store:Z})=>{let{config:W}=Z,Y=(W.generators||[]).filter((J)=>J.type==="ggml-llm").map((J)=>{return{id:J.model?.repo_id||"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"}});if(Y.length===0)Y.push({id:"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"});return{object:"list",data:Y}}),X.post("/v1/chat/completions",async function*({body:W,set:j,store:V}){let{config:Y,backend:J}=V,{messages:N=[],stream:H=!1,model:O,tools:q,temperature:_,stop:G,top_p:w,max_tokens:z,presence_penalty:A,frequency_penalty:R,tool_choice:U,stream_options:Q}=W;if(!N||N.length===0)return j.status=400,{error:{message:"messages is required and must not be empty",type:"invalid_request_error"}};try{let K=await R9(J,Y,O),B=B9(),L=Math.floor(Date.now()/1000),F=K.repoId||"ggml-llm",E={reasoning_format:"auto",messages:N,jinja:!0,add_generation_prompt:!0};if(_!=null)E.temperature=_;if(w!=null)E.top_p=w;if(z!=null)E.n_predict=z;if(G!=null)E.stop=Array.isArray(G)?G:[G];if(A!=null)E.presence_penalty=A;if(R!=null)E.frequency_penalty=R;if(q!=null)E.tools=q;if(U!=null)E.tool_choice=U;E.enable_thinking=!1;let M=await J.ggmlLlm.completion(K.id,{options:E});if(!H)return await w9(M,B,L,F);let x=Q?.include_usage===!0,D=M.getReader(),I="",b="",u=new Map,r=new Map;try{let k=!1;while(!k){let d=await D.read();if({done:k}=d,k)break;let{event:v,data:S}=d.value;if(v==="token"){let C={};if(S.content!=null){let h=S.content;if(h.length>I.length)C.content=h.slice(I.length),I=h}if(S.reasoning_content!=null){let h=S.reasoning_content;if(h.length>b.length)C.reasoning_content=h.slice(b.length),b=h}if(S.tool_calls?.length>0){let h=[];if(S.tool_calls.forEach((p,f)=>{let s={index:f};if(!r.has(f))r.set(f,p.id||`call_${B}_${f}`),s.id=r.get(f),s.type="function";let W0=p.function?.arguments||"",A0=u.get(f)||"",l={};if(!u.has(f)&&p.function?.name)l.name=p.function.name;if(W0.length>A0.length)l.arguments=W0.slice(A0.length),u.set(f,W0);if(Object.keys(l).length>0)s.function=l,h.push(s);else if(s.id)s.function={name:p.function?.name||"",arguments:""},h.push(s)}),h.length>0)C.tool_calls=h}if(Object.keys(C).length>0)yield N4({data:JSON.stringify({id:B,object:"chat.completion.chunk",created:L,model:F,choices:[{index:0,delta:C,finish_reason:null}]})})}else if(v==="result"){let C="stop";if(S.tool_calls?.length>0||r.size>0)C="tool_calls";else if(S.interrupted)C="length";let h={id:B,object:"chat.completion.chunk",created:L,model:F,choices:[{index:0,delta:{},finish_reason:C}]};if(x)h.usage={prompt_tokens:S.prompt_tokens||S.promptTokens||0,completion_tokens:S.tokens_predicted||S.tokensPredicted||0,total_tokens:(S.prompt_tokens||S.promptTokens||0)+(S.tokens_predicted||S.tokensPredicted||0)};yield N4({data:JSON.stringify(h)})}else if(v==="error")yield N4({data:JSON.stringify({error:{message:S.message,type:"server_error"}})})}yield N4({data:"[DONE]"})}finally{D.cancel().catch(()=>{})}}catch(K){return console.error("[OpenAI] Chat completion error:",K),j.status=500,{error:{message:K.message||"Internal server error",type:"server_error"}}}},{body:T.Object({model:T.Optional(T.String()),messages:T.Array(T.Any()),stream:T.Optional(T.Boolean()),temperature:T.Optional(T.Number()),top_p:T.Optional(T.Number()),max_tokens:T.Optional(T.Number()),stop:T.Optional(T.Union([T.String(),T.Array(T.String())])),presence_penalty:T.Optional(T.Number()),frequency_penalty:T.Optional(T.Number()),tools:T.Optional(T.Array(T.Any())),tool_choice:T.Optional(T.Any()),stream_options:T.Optional(T.Object({include_usage:T.Optional(T.Boolean())}))})}),X}var B9=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,$3;var X3=P(()=>{k0();$3=new Map});var Z3=P(()=>{s2();r2();e2();X3()});import E9 from"node:os";import M9 from"node:path";import x9 from"node-machine-id";import S9 from"bytes";import P9 from"ms";var D0=($={},X={})=>{let Z=Array.isArray($)?[...$]:{...$};return Object.entries(X||{}).forEach(([W,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j))Z[W]=D0(Z[W]||{},j);else Z[W]=j}),Z},w0=($)=>{if(!$)return null;if(typeof $==="object")return JSON.parse(JSON.stringify($));return null},N1=($,X)=>{let Z=w0($)||{},W=w0(X)||{};return D0(Z,W)},W3=($,X)=>D0(JSON.parse(JSON.stringify($.global)),X||{}),U1=($,X,Z,W)=>{if($.generators.length>0){let j=$.generators.filter((V)=>V?.type===Z);if(j.length>0){if(W){let V=j.find((Y)=>X.getModelIdentifier(Z,Y)===W);if(V)return W3($,V)}}}if(Object.keys($.global).length>0)return W3($,{});return null},j3,T9=($)=>{if(!$)return null;if($===!0)return{...j3};return D0(j3,$)},V3=($,X)=>{if(!$.generators||$.generators.length===0)return X.map((W)=>({type:W}));let Z=new Set;if($.generators.forEach((W)=>{if(W.type)Z.add(W.type)}),Z.size===0)return X.map((W)=>({type:W}));return Array.from(Z).map((W)=>({type:W}))},Y3=($,X,Z)=>{if($===void 0)return Z;if(typeof $==="number")return $;return X($)??Z},k9=2080,Q3=60000,J3=52428800,G1=($)=>{let X=x9.machineIdSync(),Z={server:{id:`buttress-${X}`,name:`Buttress Server (${X.slice(-8)})`,port:k9,temp_file_dir:M9.join(E9.tmpdir(),".buttress"),session_timeout:Q3,max_body_size:J3},autodiscover:!1},W=D0(Z,w0($)||{}),j=Array.isArray(W.generators)?W.generators:[],{server:V,generators:Y,autodiscover:J,...N}=W;return{autodiscover:T9(J),server:{id:V.id,name:V.name,port:V.port,log_level:V.log_level,temp_file_dir:V.temp_file_dir,max_body_size:Y3(V.max_body_size,S9.parse,J3),session_timeout:Y3(V.session_timeout,P9,Q3)},global:N,generators:j}};var b0=P(()=>{j3={udp:{port:8089,announcements:{enabled:!0,interval:5000},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:"/buttress/info",cors:!0}}});import{z as Z0}from"zod";var N3,U3;var G3=P(()=>{b0();N3={getCapabilities:Z0.tuple([Z0.object({type:Z0.string().optional().default("ggml-llm"),config:Z0.any().optional(),currentClientCapabilities:Z0.any().optional(),options:Z0.any().optional()}).nullable().optional()]),startGenerator:Z0.tuple([Z0.string(),Z0.any().optional()]),finalizeGenerator:Z0.tuple([Z0.string()])},U3={async getCapabilities({backend:$,config:X},Z=null){console.log("[Server] Get Capabilities:",Z);let W=Z||{type:"ggml-llm"},{type:j="ggml-llm",config:V,currentClientCapabilities:Y=null,options:J={}}=W,N=w0(V),H=$.getModelIdentifier(j,N),O=U1(X,$,j,H),q=N1(O,V);if(Object.keys(q).length===0)throw Error("Buttress server missing generator configuration");if(q.backend=q.backend||{},!q.backend.type)q.backend.type=j;return $.getCapabilities(j,Y,{...J,config:q})},async startGenerator({backend:$,config:X,session:Z},W,j){console.log("[Server] Start Generator:",W,j);let V=w0(j),Y=$.getModelIdentifier(W,V),J=U1(X,$,W,Y),N=N1(J,j);if(Object.keys(N).length===0)throw Error("Buttress server missing generator configuration");if(N.backend=N.backend||{},!N.backend.type)N.backend.type=W;let H=await $.startGenerator(W,N);return Z.generators.add(H.id),H},async finalizeGenerator({backend:$,session:X},Z){return console.log("[Server] Finalize Generator:",Z),X.generators.delete(Z),$.finalizeGenerator(Z)}}});import{z as i}from"zod";import{ReadableStream as D9}from"node:stream/web";var H3,O3;var K3=P(()=>{H3={initContext:i.tuple([i.string(),i.any().optional()]),completion:i.tuple([i.string(),i.any().optional()]),tokenize:i.tuple([i.string(),i.any()]),detokenize:i.tuple([i.string(),i.any()]),applyChatTemplate:i.tuple([i.string(),i.any()]),releaseContext:i.tuple([i.string()])},O3={initContext({backend:$,session:X},Z,W){return new D9({async start(j){try{let V=await $.ggmlLlm.initContext(Z,{...W,onProgress:(N)=>{j.enqueue({event:"progress",data:{progress:N}})}});X.initializedContexts.add(Z),await new Promise((N)=>setTimeout(N,1000));let{download:Y,...J}=V||{};j.enqueue({event:"result",data:{result:J}}),j.close()}catch(V){j.error(V)}}})},completion({backend:$},X,Z){return console.log("[Server] Completion:",{id:X,property:Z}),$.ggmlLlm.completion(X,Z)},async tokenize({backend:$},X,Z){return console.log("[Server] Tokenize:",{id:X,property:Z}),$.ggmlLlm.tokenize(X,Z)},async detokenize({backend:$},X,Z){return console.log("[Server] Detokenize:",{id:X,property:Z}),$.ggmlLlm.detokenize(X,Z)},async applyChatTemplate({backend:$},X,Z){return console.log("[Server] Apply Chat Template:",{id:X,property:Z}),$.ggmlLlm.applyChatTemplate(X,Z)},async releaseContext({backend:$,session:X},Z,W){if(console.log("[Server] Release Context:",{id:Z,force:W}),!X.initializedContexts.has(Z))return console.log("[Server] Release Context skipped - not initialized by this session:",{id:Z}),{released:!1,skipped:!0};return X.initializedContexts.delete(Z),$.ggmlLlm.releaseContext(Z,{force:W})}}});import{z as a}from"zod";import{ReadableStream as b9}from"node:stream/web";import v9 from"node:path";var q3,_3;var L3=P(()=>{q3={initContext:a.tuple([a.string(),a.any().optional()]),transcribe:a.tuple([a.string(),a.string(),a.any().optional()]),transcribeData:a.tuple([a.string(),a.union([a.instanceof(Buffer),a.instanceof(Uint8Array)]),a.any().optional()]),releaseContext:a.tuple([a.string()])},_3={initContext({backend:$,session:X},Z,W){return new b9({async start(j){try{let V=await $.ggmlStt.initContext(Z,{...W,onProgress:(N)=>{j.enqueue({event:"progress",data:{progress:N}})}});X.initializedContexts.add(Z),await new Promise((N)=>setTimeout(N,1000));let{download:Y,...J}=V||{};j.enqueue({event:"result",data:{result:J}}),j.close()}catch(V){j.error(V)}}})},async transcribe({backend:$,config:{server:X}},Z,W,j){return console.log("[Server] Transcribe:",{id:Z,audioPath:W,options:j}),$.ggmlStt.transcribe(Z,{audioPath:v9.join(X.temp_file_dir,W),options:j})},async transcribeData({backend:$},X,Z,W){return console.log("[Server] Transcribe Data:",{id:X,audioDataLength:Z?.length||0,options:W}),$.ggmlStt.transcribeData(X,{audioData:Z,options:W})},async releaseContext({backend:$,session:X},Z,W){if(console.log("[Server] Release STT Context:",{id:Z,force:W}),!X.initializedContexts.has(Z))return console.log("[Server] Release STT Context skipped - not initialized by this session:",{id:Z}),{released:!1,skipped:!0};return X.initializedContexts.delete(Z),$.ggmlStt.releaseContext(Z,{force:W})}}});var h9,A3,z3;var B3=P(()=>{G3();K3();L3();h9={common:U3,ggmlLlm:O3,ggmlStt:_3},A3={common:N3,ggmlLlm:H3,ggmlStt:q3},z3=h9});import{Buffer as G4}from"node:buffer";var R3=($)=>{try{return JSON.parse($,(X,Z)=>{if(!Z)return Z;if(Z?.type==="Buffer"&&Z?.data)return G4.from(Z.data,"base64");if(Z?.type==="Uint8Array"&&Z?.data){let W=G4.from(Z.data,"base64");return W.buffer.slice(W.byteOffset,W.byteOffset+W.byteLength)}if(Z?.type==="Error"&&Z?.name&&Z?.message)return Error(Z.name,Z.message);return Z})}catch{return $}},H4=($)=>{try{return JSON.stringify($,(X,Z)=>{if(Z instanceof Error)return{type:"Error",name:Z.name,message:Z.message};if(Z instanceof G4)return{type:"Buffer",data:Z.toString("base64")};if(Z instanceof Uint8Array)return{type:"Uint8Array",data:G4.from(Z).toString("base64")};return Z})}catch{return $}};var w3=()=>{};var H1="1.0",F3=8089;import I9 from"node:dgram";class O1{name="udp";socket=null;announcementTimer=null;config;getServerInfo;port;constructor($,X){this.config=$,this.getServerInfo=X,this.port=$.port??F3}async start(){if(this.socket=I9.createSocket({type:"udp4",reuseAddr:!0}),this.socket.on("message",($,X)=>{this.handleMessage($,X)}),this.socket.on("error",($)=>{console.error("[Autodiscover UDP] Socket error:",$.message)}),await new Promise(($,X)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),$()}),this.socket.once("error",X)}),this.config.announcements.enabled){let $=this.config.announcements.interval??5000;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},$),this.sendAnnouncement()}}async stop(){if(this.announcementTimer)clearInterval(this.announcementTimer),this.announcementTimer=null;if(this.socket)await new Promise(($)=>{this.socket.close(()=>$())}),this.socket=null}handleMessage($,X){try{let Z=JSON.parse($.toString());if(Z.t==="QUERY"&&this.config.requests.enabled){let W=Z.d,j=this.config.requests.responseDelay??0,V=j>0?Math.random()*j:0;setTimeout(()=>{this.sendResponse(W.id,X)},V)}}catch{}}sendAnnouncement(){if(!this.socket)return;let $=this.getServerInfo(),X={t:"ANNOUNCE",v:H1,d:{info:$}},Z=Buffer.from(JSON.stringify(X));this.socket.send(Z,0,Z.length,this.port,"255.255.255.255",(W)=>{if(W)console.error("[Autodiscover UDP] Announcement error:",W.message)})}sendResponse($,X){if(!this.socket)return;let Z=this.getServerInfo(),W={t:"RESPONSE",v:H1,d:{request_id:$,info:Z}},j=Buffer.from(JSON.stringify(W));this.socket.send(j,0,j.length,X.port,X.address,(V)=>{if(V)console.error("[Autodiscover UDP] Response error:",V.message)})}}var E3=()=>{};class K1{config;getServerInfo;transports=[];started=!1;constructor($,X){this.config=$;this.getServerInfo=X;if($.udp?.announcements?.enabled||$.udp?.requests?.enabled)this.transports.push(new O1($.udp,X))}async start(){if(this.started)return;(await Promise.allSettled(this.transports.map((X)=>X.start()))).forEach((X,Z)=>{if(X.status==="rejected")console.error(`[Autodiscover] Failed to start ${this.transports[Z].name}:`,X.reason)}),this.started=!0}async stop(){if(!this.started)return;await Promise.allSettled(this.transports.map(($)=>$.stop())),this.started=!1}}var M3=P(()=>{E3()});import C9 from"node:os";var O4=()=>{let $=C9.networkInterfaces();return Object.values($).flat().find((Z)=>Z?.family==="IPv4"&&!Z?.internal)?.address||null};var q1=()=>{};import c from"node:os";import x3 from"node:fs";import S3 from"node:path";import{execSync as K4}from"node:child_process";import P3 from"@iarna/toml";async function _1({modelIds:$=[],defaultConfig:X=null}={}){let Z=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),console.log(`Generating model capabilities comparison...
40
- `),Z.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),Z.push(`## Model Capabilities Comparison
41
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let W=(U={},Q={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Q||{}).forEach(([B,L])=>{if(L&&typeof L==="object"&&!Array.isArray(L))K[B]=W(K[B]||{},L);else K[B]=L}),K},j=X||{},{server:V,generators:Y=[],...J}=j,N=(U)=>W(JSON.parse(JSON.stringify(J)),U||{}),H=(U)=>{if(Array.isArray(Y)&&Y.length>0){let Q=Y.filter((K)=>K?.type==="ggml-llm");if(Q.length>0&&U){let K=Q.find((B)=>B.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},O=[];for(let U=0;U<$.length;U+=1){let Q=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Q}...`);let K=H(Q);K={...K||{},model:{...J.runtime,...K?.model||{},repo_id:Q}};let B=await t("ggml-llm",null,{config:K,includeBreakdown:!0});O.push({modelId:Q,capabilities:B,modelInfo:B.buttress?.selected||null,modelConfig:B.modelConfig||null})}let q=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",_=(U)=>U?"✅":"\uD83D\uDEAB";Z.push("| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),Z.push("|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|"),O.forEach(({modelId:U,modelInfo:Q,modelConfig:K})=>{let B=q(Q?.modelBytes),L=K?.nCtx||Q?.kvInfo?.nCtxTrain||"N/A",F=G0(Q),E=Number(L),M=Q?.kvCacheBytes||(F&&Number.isFinite(E)&&E>0?F(E):F&&F(Q?.kvInfo?.nCtxTrain||0))||null,x=q(M),D=Q?.recurrentMemoryBytes||0,I=D>0?q(D):"-",b=q(Q?.modelBytes&&(M!=null||D>0)?Q.modelBytes+(M||0)+D:Q?.fit?.totalRequiredBytes),u=_(Q?.fit?.fitsInGpu),r=_(Q?.fit?.fitsInCpu);Z.push(`| ${U} | ${B} | ${L} | ${x} | ${I} | ${b} | ${u} | ${r} |`);let k=Q?.memoryLimitedCtx!=null||Q?.limitedFit!=null,d=!Q?.fit?.fitsInGpu||!Q?.fit?.fitsInCpu;if(k&&d){let v=Q?.memoryLimitedCtx||L,S=Number(v),C=Q?.limitedKvCacheBytes||F&&Number.isFinite(S)&&S>0&&F(S)||null,h=q(C),p=q(Q?.modelBytes&&(C!=null||D>0)?Q.modelBytes+(C||0)+D:Q?.limitedFit?.totalRequiredBytes),f=_(Q?.limitedFit?.fitsInGpu),s=_(Q?.limitedFit?.fitsInCpu);if(v!==L||h!==x||p!==b)Z.push(`| ↳ Limited | ${B} | ${v} | ${h} | ${I} | ${p} | ${f} | ${s} |`)}}),Z.push(`
42
- ---`),Z.push(`
43
- ### System Information`);let G=null;if(process.platform!=="win32")try{G=K4("uname -a",{encoding:"utf8"}).trim()}catch{}if(G)Z.push(`- **System:** ${G}`);else Z.push(`- **Hostname:** ${c.hostname()}`),Z.push(`- **OS:** ${c.type()} ${c.release()}`);if(Z.push(`- **Platform:** ${process.platform}`),Z.push(`- **CPU Cores:** ${c.cpus().length}`),Z.push(`- **Total System Memory:** ${(c.totalmem()/1024/1024/1024).toFixed(2)} GB`),O.length>0){let Q=O[0].capabilities.buttress?.selected;if(Q){let K=Q.cpuTotalBytes>0?(Q.cpuUsableBytes/Q.cpuTotalBytes*100).toFixed(0):0;if(Z.push(`- **Usable CPU Memory:** ${(Q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.hasGpu){let B=Q.devices.filter((L)=>L.type==="gpu");if(B.length>0){let L=B[0];Z.push(`- **GPU Backend:** ${L.backend}`),Z.push(`- **GPU Name:** ${L.deviceName}`),Z.push(`- **GPU Total Memory:** ${(L.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Q.gpuTotalBytes>0?(Q.gpuUsableBytes/Q.gpuTotalBytes*100).toFixed(0):0;Z.push(`- **GPU Usable Memory:** ${(Q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else Z.push("- **GPU:** Not available")}}Z.push(`
44
- ### Command Used`);let w=process.argv.slice(2).join(" ");if(Z.push(`\`\`\`bash
45
- ${process.argv[0]} ${process.argv[1]} ${w}
46
- \`\`\``),Z.push(`
47
- ### Package Information`),Z.push(`- **Name:** ${"@fugood/buttress-server"}`),Z.push(`- **Version:** ${"2.23.0-beta.53"}`),X&&Object.keys(X).length>0){Z.push(`
48
- ### Configuration`),Z.push("<details>"),Z.push("<summary>Click to expand TOML configuration</summary>"),Z.push("\n```toml");try{let U=P3.stringify(X);Z.push(U)}catch(U){Z.push("# Error serializing config"),Z.push(JSON.stringify(X,null,2))}Z.push("```"),Z.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,R=S3.join(process.cwd(),A);x3.writeFileSync(R,Z.join(`
49
- `),"utf8"),console.log(`
50
- Model capabilities table saved to: ${R}`),process.exit(0)}catch(W){console.error("Failed to generate model table:",W.message),process.exit(1)}}async function k3({modelId:$=null,defaultConfig:X=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let Z=X||{},{server:W,generators:j=[],...V}=Z,Y=(G={},w={})=>{let z=Array.isArray(G)?[...G]:{...G};return Object.entries(w||{}).forEach(([A,R])=>{if(R&&typeof R==="object"&&!Array.isArray(R))z[A]=Y(z[A]||{},R);else z[A]=R}),z},J=(G)=>Y(JSON.parse(JSON.stringify(V)),G||{}),H=((G)=>{if(Array.isArray(j)&&j.length>0){let w=j.filter((z)=>z?.type==="ggml-llm");if(w.length>0){if(G){let z=w.find((A)=>A.model?.repo_id===G);if(z)return J(z)}}}if(Object.keys(V).length>0)return J({});return null})($);if($)H={...H||{},model:{...H?.model||{},repo_id:$}};let O=await t("ggml-llm",null,{config:H,includeBreakdown:!0}),q=O.buttress?.selected||null,_=O.modelConfig||null;if($||_?.repoId){console.log(`
51
- === Model Information ===`);let G=$||_?.repoId;if(console.log(`Repository ID: ${G}`),_?.quantization)console.log(`Quantization: ${_.quantization}`);if(_?.nCtx)console.log(`Context Length: ${_.nCtx}`);if(q?.quantization){let{fileType:A}=q.quantization;if(A!=null)console.log(`Model File Type (GGUF): ${A}`)}let w=_?.cache_type_k||"f16",z=_?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${w}, V=${z}`),q?.modelBytes&&q?.kvCacheBytes!=null){if(console.log(`Model Size: ${(q.modelBytes/1024/1024/1024).toFixed(2)} GB`),q.kvInfo)console.log(`KV Cache Size: ${(q.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(q.kvInfo)})`);else console.log(`KV Cache Size: ${(q.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);let A=q.recurrentMemoryBytes||0;if(A>0)console.log(`Recurrent Memory: ${(A/1024/1024/1024).toFixed(2)} GB`);let R=q.modelBytes+q.kvCacheBytes+A;if(console.log(`Total Required Memory: ${(R/1024/1024/1024).toFixed(2)} GB`),q.memoryLimitedCtx!=null){let U=q.memoryLimitedCtx,Q=q.kvInfo?.nCtxTrain;if(Q)console.log(`
52
- Memory-Limited Context: ${U} (Train: ${Q})`);else console.log(`
53
- Memory-Limited Context: ${U}`);if(q.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(q.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(O.buttress?.selected?.fit){let{totalRequiredBytes:A}=O.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(O.buttress?.selected){let{selected:G}=O.buttress;console.log(`
54
- === Hardware Information ===`);let w=null;if(process.platform!=="win32")try{w=K4("uname -a",{encoding:"utf8"}).trim()}catch{}if(w)console.log(`System: ${w}`);else console.log(`Hostname: ${c.hostname()}`),console.log(`OS: ${c.type()} ${c.release()}`);console.log(`Platform: ${G.platform}`),console.log(`CPU Cores: ${c.cpus().length}`),console.log(`Total System Memory: ${(c.totalmem()/1024/1024/1024).toFixed(2)} GB`);let z=G.cpuTotalBytes>0?(G.cpuUsableBytes/G.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(G.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${z}% of ${(G.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),G.hasGpu)console.log(`
55
- --- GPU Details ---`),G.devices.filter((R)=>R.type==="gpu").forEach((R)=>{console.log(`GPU Backend: ${R.backend}`),console.log(`GPU Name: ${R.deviceName}`),console.log(`GPU Total Memory: ${(R.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let U=G.gpuTotalBytes>0?(G.gpuUsableBytes/G.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(G.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${U}% of ${(G.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),R.metadata){if(R.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(R.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
56
- Backend Variant: ${G.variant}`),console.log(`Performance Score: ${G.score}`),G.fit){if(console.log(`
57
- --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${G.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${G.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${G.fit.limiting}`),G.limitedFit)console.log(`
58
- --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(G.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${G.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${G.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${G.limitedFit.limiting}`)}}console.log(`
59
- === Full Capabilities JSON ===`),console.log(JSON.stringify(O,null,2)),process.exit(0)}catch(Z){console.error("Failed to get capabilities:",Z.message),process.exit(1)}}async function L1({modelIds:$=[],defaultConfig:X=null}={}){let Z=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),console.log(`Generating STT model capabilities comparison...
60
- `),Z.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),Z.push(`## STT Model Capabilities Comparison
61
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let W=(U={},Q={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Q||{}).forEach(([B,L])=>{if(L&&typeof L==="object"&&!Array.isArray(L))K[B]=W(K[B]||{},L);else K[B]=L}),K},j=X||{},{server:V,generators:Y=[],...J}=j,N=(U)=>W(JSON.parse(JSON.stringify(J)),U||{}),H=(U)=>{if(Array.isArray(Y)&&Y.length>0){let Q=Y.filter((K)=>K?.type==="ggml-stt");if(Q.length>0&&U){let K=Q.find((B)=>B.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},O=[];for(let U=0;U<$.length;U+=1){let Q=$[U],{repoId:K,filename:B}=T3(Q);console.log(`[${U+1}/${$.length}] Analyzing ${Q}...`);let L=H(K);L={...L||{},model:{...L?.model||{},repo_id:K,...B&&{filename:B}}};let F=await t("ggml-stt",null,{config:L,includeBreakdown:!0});O.push({modelId:Q,repoId:K,filename:B,capabilities:F,modelInfo:F.buttress?.selected||null,modelConfig:F.modelConfig||null})}let q=(U)=>U?(U/1024/1024).toFixed(1):"N/A",_=(U)=>U?"✅":"\uD83D\uDEAB";Z.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),Z.push("|-------|-----------|------------------------|---------------------|----------|----------|"),O.forEach(({modelId:U,modelInfo:Q})=>{let K=q(Q?.modelBytes),B=q(Q?.processingBytes||Q?.kvCacheBytes),L=q(Q?.fit?.totalRequiredBytes),F=_(Q?.fit?.fitsInGpu),E=_(Q?.fit?.fitsInCpu);Z.push(`| ${U} | ${K} | ${B} | ${L} | ${F} | ${E} |`)}),Z.push(`
62
- ---`),Z.push(`
63
- ### System Information`);let G=null;if(process.platform!=="win32")try{G=K4("uname -a",{encoding:"utf8"}).trim()}catch{}if(G)Z.push(`- **System:** ${G}`);else Z.push(`- **Hostname:** ${c.hostname()}`),Z.push(`- **OS:** ${c.type()} ${c.release()}`);if(Z.push(`- **Platform:** ${process.platform}`),Z.push(`- **CPU Cores:** ${c.cpus().length}`),Z.push(`- **Total System Memory:** ${(c.totalmem()/1024/1024/1024).toFixed(2)} GB`),O.length>0){let Q=O[0].capabilities.buttress?.selected;if(Q){let K=Q.cpuTotalBytes>0?(Q.cpuUsableBytes/Q.cpuTotalBytes*100).toFixed(0):0;if(Z.push(`- **Usable CPU Memory:** ${(Q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.hasGpu){let B=Q.devices.filter((L)=>L.type==="gpu");if(B.length>0){let L=B[0];Z.push(`- **GPU Backend:** ${L.backend}`),Z.push(`- **GPU Name:** ${L.deviceName}`),Z.push(`- **GPU Total Memory:** ${(L.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Q.gpuTotalBytes>0?(Q.gpuUsableBytes/Q.gpuTotalBytes*100).toFixed(0):0;Z.push(`- **GPU Usable Memory:** ${(Q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else Z.push("- **GPU:** Not available")}}Z.push(`
64
- ### Command Used`);let w=process.argv.slice(2).join(" ");if(Z.push(`\`\`\`bash
65
- ${process.argv[0]} ${process.argv[1]} ${w}
66
- \`\`\``),Z.push(`
67
- ### Package Information`),Z.push(`- **Name:** ${"@fugood/buttress-server"}`),Z.push(`- **Version:** ${"2.23.0-beta.53"}`),X&&Object.keys(X).length>0){Z.push(`
68
- ### Configuration`),Z.push("<details>"),Z.push("<summary>Click to expand TOML configuration</summary>"),Z.push("\n```toml");try{let U=P3.stringify(X);Z.push(U)}catch(U){Z.push("# Error serializing config"),Z.push(JSON.stringify(X,null,2))}Z.push("```"),Z.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,R=S3.join(process.cwd(),A);x3.writeFileSync(R,Z.join(`
69
- `),"utf8"),console.log(`
70
- STT model capabilities table saved to: ${R}`),process.exit(0)}catch(W){console.error("Failed to generate STT model table:",W.message),process.exit(1)}}async function D3({modelId:$=null,defaultConfig:X=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.53"}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:Z,filename:W}=T3($),j=X||{},{server:V,generators:Y=[],...J}=j,N=(z={},A={})=>{let R=Array.isArray(z)?[...z]:{...z};return Object.entries(A||{}).forEach(([U,Q])=>{if(Q&&typeof Q==="object"&&!Array.isArray(Q))R[U]=N(R[U]||{},Q);else R[U]=Q}),R},H=(z)=>N(JSON.parse(JSON.stringify(J)),z||{}),q=((z)=>{if(Array.isArray(Y)&&Y.length>0){let A=Y.filter((R)=>R?.type==="ggml-stt");if(A.length>0){if(z){let R=A.find((U)=>U.model?.repo_id===z);if(R)return H(R)}}}if(Object.keys(J).length>0)return H({});return null})(Z);if(Z)q={...q||{},model:{...J.runtime,...q?.model||{},repo_id:Z,...W&&{filename:W}}};let _=await t("ggml-stt",null,{config:q,includeBreakdown:!0}),G=_.buttress?.selected||null,w=_.modelConfig||null;if(Z||w?.repoId){console.log(`
71
- === Model Information ===`);let z=Z||w?.repoId;if(console.log(`Repository ID: ${z}`),W)console.log(`Filename: ${W}`);if(G?.modelBytes)console.log(`Model Size: ${(G.modelBytes/1024/1024).toFixed(1)} MB`);let A=G?.processingBytes||G?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(G?.modelBytes&&A)console.log(`Total Required Memory: ${((G.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(_.buttress?.selected?.fit){let{totalRequiredBytes:R}=_.buttress.selected.fit;console.log(`Total Required Memory: ${(R/1024/1024).toFixed(1)} MB`)}}if(_.buttress?.selected){let{selected:z}=_.buttress;console.log(`
72
- === Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=K4("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${c.hostname()}`),console.log(`OS: ${c.type()} ${c.release()}`);console.log(`Platform: ${z.platform}`),console.log(`CPU Cores: ${c.cpus().length}`),console.log(`Total System Memory: ${(c.totalmem()/1024/1024/1024).toFixed(2)} GB`);let R=z.cpuTotalBytes>0?(z.cpuUsableBytes/z.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(z.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${R}% of ${(z.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),z.hasGpu)console.log(`
73
- --- GPU Details ---`),z.devices.filter((Q)=>Q.type==="gpu").forEach((Q)=>{console.log(`GPU Backend: ${Q.backend}`),console.log(`GPU Name: ${Q.deviceName}`),console.log(`GPU Total Memory: ${(Q.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let K=z.gpuTotalBytes>0?(z.gpuUsableBytes/z.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(z.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(z.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Q.metadata){if(Q.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Q.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
74
- Backend Variant: ${z.variant}`),console.log(`Performance Score: ${z.score}`),z.fit)console.log(`
75
- --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${z.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${z.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${z.fit.limiting}`)}console.log(`
76
- === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(Z){console.error("Failed to get capabilities:",Z.message),process.exit(1)}}var T3=($)=>{if(!$)return{repoId:null,filename:null};let[X,Z]=$.split(":");return{repoId:X,filename:Z||null}};var b3=P(()=>{V4();M0()});var d9={};import y9 from"node:fs";import u9 from"node:path";import f9 from"@iarna/toml";async function g9($){if(!$?.generators||!Array.isArray($.generators))return;let X=$.generators.filter((O)=>{if(!O.model?.download)return!1;let{type:q}=O;if(!q||q!=="ggml-llm"&&q!=="ggml-stt")return console.warn(`[Download] Skipping unknown generator type: ${q}`),!1;return!0});if(X.length===0)return;let{server:Z,generators:W,...j}=$,V=X.map((O)=>{let{type:q}=O,_=O.model?.repo_id;console.log(`[Download] Starting pre-download for ${q}: ${_}`);let G={...j,backend:O.backend||{},model:O.model||{},runtime:{...j.runtime,...O.runtime||{}}};return Q4(q,G,{onProgress:()=>{},onComplete:({repoId:w,alreadyExists:z})=>{if(z)console.log(`[Download] Pre-download complete (already exists): ${w}`);else console.log(`[Download] Pre-download complete: ${w}`)},onError:(w)=>{console.error(`[Download] Pre-download failed for ${_}:`,w.message)}})}),Y=await Promise.all(V),J=Y.filter((O)=>O.started).length,N=Y.filter((O)=>O.alreadyExists).length,H=Y.filter((O)=>O.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${J} started, ${N} already exist, ${H} already downloading`)}var v3,h3,I3,q4,J0=null,p9,m9,C3,_4,c9;var y3=P(async()=>{b3();b0();q1();await g3();if(process.argv.includes("--version")||process.argv.includes("-v"))console.log("2.23.0-beta.53"),process.exit(0);if(process.argv.includes("--help")||process.argv.includes("-h"))console.log(`
77
- bricks-buttress v${"2.23.0-beta.53"}
78
-
79
- Buttress server for remote inference with GGML backends.
80
-
81
- Usage:
82
- bricks-buttress [options]
83
-
84
- Options:
85
- -h, --help Show this help message
86
- -v, --version Show version number
87
- -p, --port <port> Port to listen on (default: 2080)
88
- -c, --config <path|toml> Path to TOML config file or inline TOML string
89
-
90
- Testing Options:
91
- --test-caps <backend> Test model capabilities (ggml-llm or ggml-stt)
92
- --test-caps-model-id <id> Model ID to test (used with --test-caps)
93
- --test-models <ids> Comma-separated list of model IDs to test
94
- --test-models-default Test default set of models
95
-
96
- Note: --test-models and --test-models-default output a markdown report
97
- file (e.g., ggml-llm-model-capabilities-YYYY-MM-DD.md)
98
-
99
- Environment Variables:
100
- NODE_ENV Set to 'development' for dev mode
101
- ENABLE_OPENAI_COMPAT_ENDPOINT Set to '1' to enable OpenAI-compatible API
102
-
103
- Examples:
104
- bricks-buttress
105
- bricks-buttress --port 3000
106
- bricks-buttress --config ./config.toml
107
- bricks-buttress --test-caps ggml-llm --test-models-default
108
- bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
109
- `),process.exit(0);v3=process.argv.findIndex(($)=>$==="--port"||$==="-p"),h3=v3>=0?Number(process.argv[v3+1]):void 0,I3=process.argv.findIndex(($)=>$==="--config"||$==="-c"),q4=I3>=0?process.argv[I3+1]:null;if(q4){let $;if(q4.includes(`
110
- `))$=q4;else{let X=u9.resolve(q4);try{$=y9.readFileSync(X,"utf8")}catch(Z){console.error(`Failed to read Buttress config at ${X}:`,Z),process.exit(1)}}try{let X=f9.parse($);if(X.env&&typeof X.env==="object")Object.entries(X.env).forEach(([Z,W])=>{if(process.env[Z]===void 0)process.env[Z]=String(W)}),delete X.env;J0=X}catch(X){console.error("Failed to parse TOML config:",X),process.exit(1)}}p9=["ggml-org/gpt-oss-20b-GGUF","ggml-org/gpt-oss-120b-GGUF","unsloth/Nemotron-3-Nano-30B-A3B-GGUF","unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF","unsloth/GLM-4.7-Flash-GGUF","bartowski/Mistral-Nemo-Instruct-2407-GGUF","mistralai/Magistral-Small-2509-GGUF","mistralai/Ministral-3-14B-Reasoning-2512-GGUF","bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF","bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF","ggml-org/gemma-3-12b-it-qat-GGUF","ggml-org/gemma-3-27b-it-qat-GGUF","unsloth/phi-4-GGUF"],m9=["BricksDisplay/whisper-ggml:ggml-small.bin","BricksDisplay/whisper-ggml:ggml-small-q8_0.bin","BricksDisplay/whisper-ggml:ggml-medium.bin","BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3.bin"],C3=process.argv.findIndex(($)=>$==="--test-caps");if(C3>=0){let $=process.argv[C3+1]||"ggml-llm";if($!=="ggml-llm"&&$!=="ggml-stt")console.error("Only ggml-llm and ggml-stt backends are supported for testing capabilities"),process.exit(1);let X=process.argv.findIndex((W)=>W==="--test-models"),Z=process.argv.includes("--test-models-default");if($==="ggml-stt")if(X>=0){let W=process.argv[X+1];if(!W)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let j=W.split(",").map((V)=>V.trim());await L1({modelIds:j,defaultConfig:J0})}else if(Z)await L1({modelIds:m9,defaultConfig:J0});else{let W=process.argv.findIndex((V)=>V==="--test-caps-model-id"),j=W>=0?process.argv[W+1]:null;await D3({modelId:j,defaultConfig:J0})}else if(X>=0){let W=process.argv[X+1];if(!W)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let j=W.split(",").map((V)=>V.trim());await _1({modelIds:j,defaultConfig:J0})}else if(Z)await _1({modelIds:p9,defaultConfig:J0});else{let W=process.argv.findIndex((V)=>V==="--test-caps-model-id"),j=W>=0?process.argv[W+1]:null;await k3({modelId:j,defaultConfig:J0})}}_4=G1(J0);if(h3)_4.server.port=h3;if(!_4.server.port)_4.server.port=2080;c9=process.env.ENABLE_OPENAI_COMPAT_ENDPOINT==="1";f3({config:_4,enableOpenAICompat:c9}).then(async({port:$,openaiEnabled:X,autoDiscover:Z})=>{let W=O4();if(console.log(`Buttress server listening on port ${$}`),console.log("--------------------------------"),await u3(),console.log(),console.log("Current supported Generators:"),console.log("- LLM (GGML)"),console.log("- STT (GGML)"),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${W}:${$} to connect to this server via LAN.`),console.log(`- Visit http://${W}:${$}/status to see status via LAN.`),console.log(),X)console.log("OpenAI-compatible API [EXPERIMENTAL]:"),console.log(`- Base URL: http://${W}:${$}/oai-compat/v1`),console.log(`- Chat completions: POST http://${W}:${$}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${W}:${$}/oai-compat/v1/models`),console.log();else console.log("OpenAI-compatible API [EXPERIMENTAL]: disabled"),console.log(" Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable"),console.log();if(Z)console.log("Auto-discover enabled"),console.log();if(J0)await g9(J0)}).catch(($)=>{console.error("Failed to start Buttress server:",$),process.exitCode=1})});import{node as l9}from"@elysiajs/node";import{Elysia as i9,t as v0}from"elysia";import{ReadableStream as s9}from"node:stream/web";import n9 from"node:fs/promises";import{ZodError as r9}from"zod";var o9=async()=>{let $=`https://registry.npmjs.org/${"@fugood/buttress-server"}/latest`;try{let X=new AbortController,Z=setTimeout(()=>X.abort(),3000),W=await fetch($,{headers:{Accept:"application/json"},signal:X.signal});if(clearTimeout(Z),!W.ok)return null;return(await W.json()).version||null}catch{return null}},a9=($,X)=>{if(!X)return!1;let Z=$.split(/[.-]/),W=X.split(/[.-]/);for(let j=0;j<Math.max(Z.length,W.length);j+=1){let V=parseInt(Z[j])||0,Y=parseInt(W[j])||0;if(Y>V)return!0;if(Y<V)return!1}return!1},t9=($)=>{console.log(""),console.log("\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m"),console.log("\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m","2.23.0-beta.53".padEnd(12),$.padEnd(12),"\x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m","@fugood/buttress-server".padEnd(27)),console.log("\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m"),console.log("")},u3=async()=>{try{let $=await o9();if($&&a9("2.23.0-beta.53",$))t9($)}catch($){}},e9,$$=async({backend:$,router:X,config:Z,enableOpenAICompat:W})=>{try{await n9.mkdir(Z.server.temp_file_dir,{recursive:!0})}catch{}let j=O4()||"0.0.0.0",V={id:Z.server.id,name:Z.server.name,version:"2.23.0-beta.53",address:j,port:Z.server.port,url:`http://${j}:${Z.server.port}`,generators:V3(Z,Z.generators.map((N)=>N.type)),authentication:{required:!0,type:"device-group"}},Y=new i9({serve:{maxRequestBodySize:Z.server.max_body_size},websocket:{idleTimeout:Math.ceil(Z.server.session_timeout/1000)},adapter:e9?l9():void 0}).state({sessions:new Map,backend:$||J4,config:Z,serverInfo:V});if(X)Y.use(X);if(Z.autodiscover?.http?.enabled)Y.use(j1(Z));if(Y.use(Q1),Y.use(V1),W)Y.use(U4(Z));let J={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return Y.ws("/buttress/rpc",{parse:(N,H)=>{if(typeof H==="string")try{return JSON.parse(H)}catch{return N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_REQUEST,message:"Invalid request"},id:null})),null}return H},body:v0.Object({jsonrpc:v0.String(),method:v0.String(),params:v0.String(),id:v0.String()}),open(N){let H=N.id??N.raw?.id??N.remoteAddress;if(console.log(`[Request] New connection: ${H}`),!N.data.store.sessions.has(H))N.data.store.sessions.set(H,{streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null});else{let O=N.data.store.sessions.get(H);clearTimeout(O.timeout),O.timeout=null}},async message(N,{id:H,method:O,params:q}){let _=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Received request from ${_}: ${O}`);let G=N.data.store.sessions.get(_),[w,z]=O.split("."),A=z3[w]?.[z];if(!A){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.METHOD_NOT_FOUND,message:"Method not found"},id:H}));return}try{if(O==="cancel"){if(G.streamReaders.has(H))G.streamReaders.get(H)?.cancel(),G.streamReaders.delete(H);return}if(O==="ping"){N.send(JSON.stringify({jsonrpc:"2.0",result:"pong",id:H}));return}let R=R3(q),U=A3[w]?.[z],Q=U?U.parse(R):R,K={...N.data.store,peerId:_,session:G},B=await A(K,...Q);if(B instanceof s9){let L=B.getReader();G.streamReaders.set(H,L),N.send(JSON.stringify({jsonrpc:"2.0",result:{type:"stream"},id:H}));try{while(!0){let{value:F,done:E}=await L.read();if(E)break;let{event:M,data:x}=F;N.send(JSON.stringify({jsonrpc:"2.0",method:`notification/${M}`,params:H4(x),id:H}))}N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_end",id:H}))}catch(F){console.error(F),N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_error",params:H4(F),id:H}))}G.streamReaders.delete(H)}else N.send(JSON.stringify({jsonrpc:"2.0",result:H4(B),id:H}))}catch(R){if(R instanceof r9){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_PARAMS,message:"Invalid params",data:R.issues},id:H}));return}console.error(R),N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INTERNAL_ERROR,message:String(R)},id:H}))}},async close(N){let H=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Connection closed: ${H}`);let{backend:O,sessions:q}=N.data.store,_=q.get(H);if(!_)return;_.streamReaders.forEach((G)=>G.cancel()),_.streamReaders.clear(),_.timeout=setTimeout(()=>{q.delete(H),console.log(`[Request] Session timed out: ${H}`);let{generators:G}=_;G.forEach((w)=>{O.finalizeGenerator(w)})},Z.server.session_timeout)}}),{app:Y,config:Z}},f3=async({backend:$,router:X,config:Z,enableOpenAICompat:W=!1})=>{let{app:j,config:V}=await $$({backend:$,router:X,config:Z,enableOpenAICompat:W}),{server:{port:Y}}=V,J=[new Promise((H)=>j.listen(Y,H))],N=null;if(V.autodiscover)N=new K1(V.autodiscover,()=>j.store.serverInfo),J.push(N.start());return await Promise.all(J),{app:j,port:Y,openaiEnabled:W,autoDiscover:N}};var g3=P(async()=>{V4();Z3();B3();w3();b0();M3();q1();V4();b0();e9=typeof process<"u"&&process.versions&&process.versions.node;if(R1.main==R1.module)await y3().then(() => d9)});await g3();export{f3 as startServer,Q4 as startModelDownload,G1 as processConfig,t9 as logUpdateMessage,$$ as createServer,a9 as compareVersions,o9 as checkForUpdates,u3 as checkAndNotifyUpdates};