@fugood/buttress-server 2.24.0-beta.1 → 2.24.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/sample.toml +13 -3
- package/lib/index.mjs +3 -3
- package/package.json +2 -2
package/config/sample.toml
CHANGED
|
@@ -11,9 +11,6 @@
|
|
|
11
11
|
# HF_TOKEN = "your_huggingface_token_here"
|
|
12
12
|
# CUDA_VISIBLE_DEVICES = "0"
|
|
13
13
|
|
|
14
|
-
[autodiscover.http]
|
|
15
|
-
enabled = true
|
|
16
|
-
|
|
17
14
|
[server]
|
|
18
15
|
port = 2080
|
|
19
16
|
log_level = "info"
|
|
@@ -101,6 +98,19 @@ repo_id = "unsloth/Nemotron-3-Nano-30B-A3B-GGUF"
|
|
|
101
98
|
quantization = "q4_0"
|
|
102
99
|
n_ctx = 51200
|
|
103
100
|
|
|
101
|
+
[[generators]]
|
|
102
|
+
type = "ggml-llm"
|
|
103
|
+
[generators.backend]
|
|
104
|
+
variant = "default"
|
|
105
|
+
variant_preference = ["cuda", "vulkan", "snapdragon", "default"]
|
|
106
|
+
gpu_memory_fraction = 0.8
|
|
107
|
+
cpu_memory_fraction = 0.8
|
|
108
|
+
[generators.model]
|
|
109
|
+
repo_id = "DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF"
|
|
110
|
+
quantization = "q2_k"
|
|
111
|
+
n_ctx = 64000
|
|
112
|
+
kv_unified = true
|
|
113
|
+
|
|
104
114
|
[[generators]]
|
|
105
115
|
type = "ggml-llm"
|
|
106
116
|
[generators.backend]
|
package/lib/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import D from"node:fs";import{execSync as O}from"node:child_process";import k from"@iarna/toml";import{ZodError as A,z as j}from"zod";import{fileURLToPath as ee}from"node:url";import{cors as M}from"@elysiajs/cors";import N from"node-machine-id";import P from"ms";import{Buffer as F}from"node:buffer";import te from"node:dgram";const I=1024**3,L=(e,t,n)=>Math.min(Math.max(e,t),n),R=e=>e?40:0,ne=(e=0)=>e?L(e/(12*I)*20,0,20):0,z=(e=0)=>e?L(e/(32*I)*10,0,10):0,B=e=>e?10:0,re=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},V=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=R(n)+re(t,e)+ne(r),s=z(i),c=B(a);return Math.min(100,Math.round(o+s+c))},ie=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:R(n),variant:re(t,e),gpuMemory:ne(r),cpuMemory:z(i),availability:B(a)}),ae=[`cuda`,`vulkan`,`snapdragon`,`default`],oe=.85,H=.5,se=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ce=e=>e&&String(e).trim().toLowerCase()||null,le=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=ae}={})=>{let i=[];e&&i.push(e),i.push(...se(t)),i.push(...se(n)),i.push(...r);let a=i.map(ce).filter(Boolean);return Array.from(new Set(a))},ue=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},de=e=>Array.isArray(e)?e.map(e=>({...e})):[],fe=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,pe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=de(fe(n,r)),l=c.some(ue),u=c.filter(e=>ue(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:V(m),breakdown:o?ie(m):null,error:s,timestamp:new Date().toISOString()}},me=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},he=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=oe,cpuMemoryFraction:a=H,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=ae}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=le({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(pe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(pe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=me({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=me({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},ge=`ggml-llm`,_e=[`cuda`,`vulkan`,`default`],ve=new Map([[ge,he],[`ggml-stt`,async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=oe,cpuMemoryFraction:a=H,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>he({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:_e,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:_e})]]),ye=async({platform:e,totalMemoryInBytes:t,backend:n=ge,dependencies:r,...i}={})=>{let a=ve.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},be={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},xe=e=>be[e?String(e).toLowerCase():`f16`]||be.f16,Se=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=xe(a.k),h=xe(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ce=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},we=e=>e?String(e).trim().toLowerCase():null,Te=(e={},t=null)=>{if(!e)return null;let n=we(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ee=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},De=({arch:e,nLayer:t=0})=>({arch:we(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Oe=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ke=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=we(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Te(t,r),o=r?Oe.get(r):null;if(!o)return De({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return De({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=Ee(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ae=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),je=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Me=e=>e?String(e).trim().toLowerCase():null,Ne=e=>{let t=Me(e);return t?Ae.has(t):!1},Pe=e=>{let t=Me(e);return t?je.has(t):!1},Fe=e=>Ne(e)?`recurrent`:Pe(e)?`hybrid`:`transformer`,Ie=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:S,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x}},Le=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Fe(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Se(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Re=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Fe(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},ze=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},U=new T;U.setMaxListeners(100);const Be=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ve=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Be(this.modelLoads,e,this.maxEntries),U.emit(`status:modelLoad`,e),U.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Be(this.completions,e,this.maxEntries),U.emit(`status:completion`,e),U.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Be(this.transcriptions,e,this.maxEntries),U.emit(`status:transcription`,e),U.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const W=new Ve,G=new Ve;let He=0;function Ue(e){let t=t=>e(t);return U.on(`status:change`,t),()=>U.off(`status:change`,t)}function We(e){return He+=1,{subscriberId:He,unsubscribe:Ue(e)}}function Ge(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:W.getModelLoadHistory(),completions:W.getCompletionHistory()}}}function Ke(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:G.getModelLoadHistory(),transcriptions:G.getTranscriptionHistory()}}}function qe(e){return{timestamp:new Date().toISOString(),ggmlLlm:Ge(e),ggmlStt:Ke(e)}}const{ReadableStream:Je,WritableStream:Ye}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,Xe=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Xe(e[t],n)):e[t]=n}),e),Ze=`https://huggingface.co`,Qe=`https://huggingface.co/api`,K=_.join(v.homedir(),`.buttress`,`models`),$e=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],et=.5,tt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:et},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:Qe,base_url:Ze},runtime:{cache_dir:K,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},nt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],rt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},it=(e={})=>{let t=structuredClone(tt);if(Xe(t,e),t.backend.variant=rt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(nt(t.backend.variant_preference).map(rt).filter(Boolean))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(nt(t.runtime.prefer_variants).map(rt).filter(Boolean))),t.model.preferred_quantizations=Array.from(new Set(nt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.n_parallel=Math.max(1,Number(t.model.n_parallel)||4),t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||Ze,t.model.api_base=t.model.api_base||Qe,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):K,t.runtime.session_cache={...tt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||tt.runtime.context_release_delay_ms),t},at=e=>{let t=e.toLowerCase();return $e.find(e=>t.includes(e))||null},ot=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.map(rt).filter(Boolean)))},q=async e=>{await l(e,{recursive:!0})},st=(e=K)=>_.join(e,`.metadata-cache`),ct=(e,t,n=K)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(st(n),t,`${r}.json`)},lt=async(e,t,n=K)=>{try{let r=ct(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},ut=async(e,t,n,r=K)=>{try{let i=ct(e,t,r);await q(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},dt=(e=K)=>_.join(e,`.session-state-cache`),ft=(e=K)=>_.join(dt(e),`cache-map.json`),pt=(e=K)=>_.join(dt(e),`temp`),mt=(e=K)=>_.join(dt(e),`states`),ht=()=>({version:1,entries:{},totalSize:0}),gt=async(e=K)=>{try{let t=await d(ft(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?ht():n}catch{return ht()}},_t=async(e,t=K)=>{let n=ft(t),r=`${n}.tmp.${Date.now()}`;try{await q(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},vt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},yt=(e,t=K)=>_.join(mt(t),`${e}.bin`),bt=(e=K)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(pt(e),`${t}.bin`)},xt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,St=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Ct=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>xt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=St(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},wt=async(e,t,n)=>{let r=Object.values(e.entries).sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),i=e.totalSize,a=Object.keys(e.entries).length,o=r.filter(e=>!(i>t)&&!(a>n)?!1:(i-=(e.stateFileSize||0)+(e.promptStateSize||0),--a,!0));return await Promise.all(o.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,i),o.map(e=>e.id)},Tt=async(e,t,n,r)=>{let i=Object.entries(e.entries).filter(([e,i])=>e===n||!xt(i.metadata,r)?!1:t.startsWith(i.fullText)&&i.fullText.length<t.length).map(([,e])=>e);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Et=async(e=K)=>{let t=pt(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Dt=async e=>{try{return await m(e),!0}catch{return!1}},Ot=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var kt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Ot(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Dt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await _t(this.cacheMap,this.baseDir).catch(()=>{}),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await q(dt(this.baseDir)),await q(pt(this.baseDir)),await q(mt(this.baseDir)),this.cacheMap=await gt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await _t(this.cacheMap,this.baseDir).catch(()=>{})}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>xt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Ct(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Dt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await _t(this.cacheMap,this.baseDir).catch(()=>{}),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=vt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await _t(this.cacheMap,this.baseDir).catch(()=>{}),s(),e}let c=yt(o,this.baseDir),l=i?yt(`${o}-prompt`,this.baseDir):null;try{await q(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Tt(this.cacheMap,e,o,this.metadata),await wt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await _t(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await q(pt(this.baseDir)),bt(this.baseDir)}async cleanup(){await Et(this.baseDir)}};const At=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},jt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Mt=async(e,t,n=K)=>{let r=JSON.stringify({url:e,headers:t}),i=await lt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await ut(r,`range-metadata`,o,n),o},Nt=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Pt=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Ft=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await q(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new Ye({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},It=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await lt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await jt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await ut(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await At(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.gguf`));if(u.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:$e,f=()=>{let e=d.find(e=>u.find(t=>t.toLowerCase().includes(e)));return e?{filename:u.find(t=>t.toLowerCase().includes(e)),quantization:e}:null};if(s)c||=at(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||at(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=/-(\d{5})-of-(\d{5})\.gguf$/,h=s.match(m),g=null;if(h){let[,,r]=h,i=await At(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);g=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(m,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(g+=o)}}else{let e=await jt(p,{headers:o});g=Number(e.headers.get(`content-length`))||null}let _={repoId:t,revision:n,filename:s,url:p,size:g,quantization:c,headers:o,isSplit:!!h,splitCount:h?Number(h[2]):0};return await ut(i,`artifact-info`,_,r),_},Lt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=ot(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?tt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ye({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Rt=async e=>{let t=await It(e),n=await Mt(t.url,t.headers,e.runtime.cache_dir),{arch:r,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:c,nEmbdHeadK:l,nEmbdHeadV:u,quantVersion:d,fileType:f,attentionLayerCount:p,recurrentLayerCount:m,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:S}=Ie(n),C=Number.isFinite(Number(a))?Number(a):0,w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):T,D=T>0&&w>0?w/T:128,O=l!=null&&Number.isFinite(Number(l))?Number(l):D,k=u!=null&&Number.isFinite(Number(u))?Number(u):D,A=ke({arch:r,metadata:n,nLayer:C}),j=A&&Number.isFinite(Number(A.kvLayers))?Number(A.kvLayers):C,ee=Math.max(0,Math.floor(Number(j)||0)),M={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:e.model.n_parallel??e.runtime.n_parallel,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},N=M.n_ctx?Number(M.n_ctx):null,P=N||i||4096,F=[],te=[],I=!0;if(N&&i&&N>i){I=!1;let e=`Requested context length (${N}) exceeds model training context (${i})`;F.push(e),te.push(e),P=i}N&&!i&&F.push(`Model metadata missing training context length, using requested value`);let L={k:M.cache_type_k,v:M.cache_type_v},R=t.size>0?t.size:0,ne=Le({layerCount:ee,headKvCount:E,embdHeadKCount:O,embdHeadVCount:k,cacheTypes:L,swaConfig:A,kvUnified:M.kv_unified,nParallel:M.n_parallel,swaFull:M.swa_full,arch:r,attentionLayerCount:p}),z=Re({nLayer:C,nEmbd:w,recurrentLayerCount:m,nSeqMax:M.n_parallel||4,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:S,arch:r}),B=await Lt(e,{modelBytes:R,kvCacheBytes:ne(P)+z}),re=B.selected.totalMemory||0,V=re*(e.backend.gpu_memory_fraction||1),ie=e.backend.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ae=Math.max(0,v.totalmem()*ie),oe=B.selected.hasGpu?V:ae,H=ze({maxCtx:P,availableMemory:oe,modelBytes:R,kvBytesForCtx:ne});if(!N&&H){let e=i?Math.min(H,i):H,t=Math.max(32,e);t<P&&F.push(`Context length capped to ${t} by memory limits`),P=t}P>H&&(P=H);let se=Math.floor(H);console.log(`[buttress] Memory-limited context length: ${se}`);let ce=ne(P),le=R+ce+z,ue=C?R/(C+1):R,de=0;B.selected.hasGpu&&ue>0&&(de=Math.min(C+1,Math.max(0,Math.floor(V/ue)))),console.log(`[buttress] Auto GPU layer capacity (${B.selected.variant}): ${de}/${C+1}`);let fe;fe=M.n_gpu_layers===`auto`||M.n_gpu_layers==null?de:Math.max(0,Math.min(Number(M.n_gpu_layers)||0,C+1));let pe=(()=>{let e=M.flash_attn_type&&String(M.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:B.selected.hasGpu?`auto`:`off`})(),me=e.runtime.cache_dir,he=Nt(e,t),ge=await Pt(he,t.size);return{config:e,info:{ok:I,backend:`ggml-llm`,warnings:F,errors:te,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:r,n_ctx_train:i,n_layer:C,n_embd:w,quantization_version:d,file_type:f,kv_layer_count:ee,swa:A?.enabled?{window:A.window,pattern:A.pattern,dense_first:A.denseFirst,type:A.type,layers:A.swaLayers}:null}},runtime:{...M,variant:B.selected.variant,n_ctx:P,requested_ctx:N,n_gpu_layers:fe,auto_gpu_layers:de,flash_attn_type:pe,cache_type_k:L.k,cache_type_v:L.v,estimated_max_n_ctx:se},resources:{modelBytes:R,kvCacheBytes:ce,recurrentMemoryBytes:z,totalEstimatedBytes:le,gpuCapacityBytes:re,gpuUsableBytes:V,cpuUsableBytes:ae,fit:B.selected.fit},devices:{selected:B.selected,attempts:B.attempts},download:{cacheDir:me,localPath:he,exists:ge},timestamp:new Date().toISOString()},artifact:t,metadata:{arch:r,nCtxTrain:i,nLayer:C,nEmbd:w},devices:B,cacheTypes:L,localPath:he,localExists:ge}},zt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new Je({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};W.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),W.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Bt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new Je({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};W.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),W.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Vt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Ht=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await Pt(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await Pt(d)||await Ft(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Ft(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Ut=async(e,t)=>{let n=Vt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Ht(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),W.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(W.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},Wt=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},Gt=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Wt(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Wt(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await Wt(e,t)},i),!0)};async function Kt(e,t,n={}){let{globalDownloadManager:r=null}=n,i=it(t),a=await Rt(i),o=new kt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await Ut(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download}}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Vt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Bt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return zt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Vt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[]).map(e=>Number(e));return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Vt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Vt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Vt(s),t=s.contexts.get(e);return t?Gt(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):Wt(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const qt=e=>{let t=it(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function Jt(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=it(e),o=await It(n),s=Nt(n,o),{repoId:c}=o;if(await Pt(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let u=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await Pt(f)||await Ft(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Ft(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function Yt(e){let t=it(e),n=await It(t),r=await Mt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ie(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,O=E>0&&T>0?T/E:128,k=u!=null&&Number.isFinite(Number(u))?Number(u):O,A=d!=null&&Number.isFinite(Number(d))?Number(d):O,j=ke({arch:i,metadata:r,nLayer:w}),ee=j&&Number.isFinite(Number(j.kvLayers))?Number(j.kvLayers):w,M=Math.max(0,Math.floor(Number(ee)||0)),N=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,P={k:t.model.cache_type_k,v:t.model.cache_type_v},F=n.size>0?n.size:0,te=t.model.n_parallel||4,I=Le({layerCount:M,headKvCount:D,embdHeadKCount:k,embdHeadVCount:A,cacheTypes:P,swaConfig:j,kvUnified:t.model.kv_unified,nParallel:te,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),L=Re({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:te,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=t.backend?.gpu_memory_fraction==null?tt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),ne=t.backend?.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),z=await Lt(t,{modelBytes:F,kvCacheBytes:I(N)}),B=(z.selected.totalMemory||0)*R,re=Math.max(0,v.totalmem()*ne),V=ze({maxCtx:N,availableMemory:z.selected.hasGpu?B:re,modelBytes:F,kvBytesForCtx:I}),ie=I(N),ae=I(V);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:k,nEmbdHeadV:A,nHeadCount:E,nHeadKvCount:D,kvLayerCount:M,swa:j?.enabled?{window:j.window,pattern:j.pattern,denseFirst:j.denseFirst,type:j.type,layers:j.swaLayers}:null},modelBytes:F,kvCacheBytes:ie,limitedKvCacheBytes:ae,memoryLimitedCtx:V,recurrentMemoryBytes:L,quantization:{name:n.quantization||null,fileType:p,version:f}}}const Xt=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):V(e):0;async function Zt(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await Yt(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ye({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=Xt(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=Xt(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:Qt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,$t=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),$t(e[t],n)):e[t]=n}),e),en=`https://huggingface.co`,tn=`https://huggingface.co/api`,nn=_.join(v.homedir(),`.buttress`,`models`),rn=[`cuda`,`vulkan`,`default`],an=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],on=`fp16`,sn=.5,cn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],ln=e=>{if(!e)return null;let t=e.toLowerCase();return cn.find(e=>t.includes(e))||null},un={backend:{type:`ggml-stt`,variant:null,variant_preference:rn,gpu_memory_fraction:.85,cpu_memory_fraction:sn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,on,`q5_1`],allow_local_file:!1,local_path:null,api_base:tn,base_url:en,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:nn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},dn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],fn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},pn=(e={})=>{let t=structuredClone(un);if($t(t,e),t.backend.variant=fn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(dn(t.backend.variant_preference||rn).map(fn).filter(Boolean))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...rn]),t.runtime.prefer_variants=Array.from(new Set(dn(t.runtime.prefer_variants).map(fn).filter(Boolean))),t.model.preferred_quantizations=Array.from(new Set(dn(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||en,t.model.api_base=t.model.api_base||tn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):nn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||un.runtime.context_release_delay_ms),t},mn=e=>{let t=e.toLowerCase();return an.find(e=>t.includes(e))||null},hn=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.map(fn).filter(Boolean)))},gn=async e=>{await l(e,{recursive:!0})},_n=(e=nn)=>_.join(e,`.metadata-cache`),vn=(e,t,n=nn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(_n(n),t,`${r}.json`)},yn=async(e,t,n=nn)=>{try{let r=await d(vn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},bn=async(e,t,n,r=nn)=>{try{let i=vn(e,t,r);await gn(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},xn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Sn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Cn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},wn=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Tn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await gn(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new Qt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},En=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await yn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Sn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:mn(c||``),headers:o};return await bn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await xn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:an,f=()=>{for(let e of d)if(e===on){let e=u.find(e=>{let t=e.toLowerCase();return!an.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=mn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||mn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Sn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await bn(i,`artifact-info`,g,r),g},Dn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=hn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?un.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?sn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ye({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},On=async e=>{let t=await En(e),n=Ce({modelBytes:t.size>0?t.size:0}),r=await Dn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Cn(e,t),l=await wn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:ln(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},kn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await wn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await wn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Tn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var An=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const jn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Mn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await kn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return G.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw G.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Nn=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Pn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Nn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Nn(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Nn(e,t)},i),!0)};async function Fn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=pn(t),a=await On(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new An,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Nn(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Mn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Pn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=jn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return G.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw G.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const In=e=>{let t=pn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=ln(t.model.filename);return r?`${n}:${r}`:n};async function Ln(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=pn(e),o=await En(n),s=Cn(n,o),{repoId:c}=o;if(await wn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Tn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Rn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):V(e):0;async function zn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await En(pn(i));o=e.size??null,{processingBufferBytes:s}=Ce({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ye({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Rn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Rn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return Zt(t,n);if(e===`ggml-stt`)return zn(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0-beta.1`,Bn={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0-beta.1`,"@fugood/llama.node":`^1.6.0`,"@fugood/whisper.node":`^1.0.16`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Vn=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Hn({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
2
|
+
import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";import{Elysia as n,file as r,sse as i,t as a}from"elysia";import*as o from"node:stream/web";import{ReadableStream as s}from"node:stream/web";import c,{mkdir as l,open as u,readFile as d,readdir as f,rename as p,stat as m,unlink as h,writeFile as g}from"node:fs/promises";import _ from"node:path";import v from"node:os";import{createHash as y}from"node:crypto";import{gguf as b}from"@huggingface/gguf";import{getBackendDevicesInfo as x,isLibVariantAvailable as S,loadModel as C}from"@fugood/llama.node";import w from"bytes";import{EventEmitter as T}from"node:events";import{initWhisper as E}from"@fugood/whisper.node";import D from"node:fs";import{execSync as O}from"node:child_process";import k from"@iarna/toml";import{ZodError as A,z as j}from"zod";import{fileURLToPath as ee}from"node:url";import{cors as M}from"@elysiajs/cors";import N from"node-machine-id";import P from"ms";import{Buffer as F}from"node:buffer";import te from"node:dgram";const I=1024**3,L=(e,t,n)=>Math.min(Math.max(e,t),n),R=e=>e?40:0,ne=(e=0)=>e?L(e/(12*I)*20,0,20):0,z=(e=0)=>e?L(e/(32*I)*10,0,10):0,B=e=>e?10:0,re=(e=`default`,t=null)=>{let n=String(e).toLowerCase();return n?n.includes(`cuda`)?20:n.includes(`vulkan`)?10:n.includes(`default`)?t===`darwin`||t===`ios`?15:5:0:0},V=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>{if(!a)return 0;let o=R(n)+re(t,e)+ne(r),s=z(i),c=B(a);return Math.min(100,Math.round(o+s+c))},ie=({platform:e,variant:t,hasGpu:n,gpuUsableBytes:r=0,cpuUsableBytes:i=0,ok:a=!0}={})=>({gpuPresence:R(n),variant:re(t,e),gpuMemory:ne(r),cpuMemory:z(i),availability:B(a)}),ae=[`cuda`,`vulkan`,`snapdragon`,`default`],oe=.85,H=.5,se=e=>!e&&e!==0?[]:Array.isArray(e)?e.filter(e=>e!=null):[e],ce=e=>e&&String(e).trim().toLowerCase()||null,le=({variant:e,preferVariants:t=[],variantPreference:n=[],defaultVariants:r=ae}={})=>{let i=[];e&&i.push(e),i.push(...se(t)),i.push(...se(n)),i.push(...r);let a=i.map(ce).filter(Boolean);return Array.from(new Set(a))},ue=(e={})=>{let t=String(e.type||e.deviceType||e.kind||``).toLowerCase();return!!(t.includes(`gpu`)||t.includes(`cuda`)||t.includes(`metal`)||t.includes(`vulkan`)||t.includes(`snapdragon`))},de=e=>Array.isArray(e)?e.map(e=>({...e})):[],fe=(e,t)=>e===`snapdragon`?t.filter(e=>e.deviceName!==`GPUOpenCL`):t,pe=({platform:e,totalMemoryInBytes:t,variant:n,devices:r,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:o,error:s})=>{let c=de(fe(n,r)),l=c.some(ue),u=c.filter(e=>ue(e)&&Number.isFinite(Number(e.maxMemorySize))).reduce((e,t)=>e+t.maxMemorySize,0),d=t,f=l?Math.floor(u*i):0,p=d?Math.floor(d*a):0,m={platform:e,variant:n,hasGpu:l,gpuUsableBytes:f,cpuUsableBytes:p,ok:o};return{platform:e,ok:o,variant:n,hasGpu:l,devices:c,gpuTotalBytes:u,gpuUsableBytes:f,cpuTotalBytes:d,cpuUsableBytes:p,score:V(m),breakdown:o?ie(m):null,error:s,timestamp:new Date().toISOString()}},me=({device:e,modelBytes:t=0,kvCacheBytes:n=0}={})=>{if(!e)return{totalRequiredBytes:t+n,fitsInGpu:!1,fitsInCpu:!1,limiting:`unknown-device`};let r=Math.max(0,Number(t)||0)+Math.max(0,Number(n)||0),i=e.hasGpu&&r>0&&r<=e.gpuUsableBytes,a=r>0&&r<=e.cpuUsableBytes,o=`ok`;return!i&&e.hasGpu&&(o=`gpu-memory`),a||(o=i?`cpu-memory`:`insufficient-memory`),{totalRequiredBytes:r,fitsInGpu:i,fitsInCpu:a,limiting:o}},he=async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=oe,cpuMemoryFraction:a=H,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,kvCacheBytes:l=null,limitedKvCacheBytes:u=null,dependencies:d={},defaultVariants:f=ae}={})=>{let{getBackendDevicesInfo:p,isLibVariantAvailable:m}=d;if(typeof p!=`function`||typeof m!=`function`)throw TypeError(`GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions`);let h=le({variant:t,preferVariants:n,variantPreference:r,defaultVariants:f}),g=[];for(let t of h)try{if(!await m(t))throw Error(`Variant ${t} not available on this platform`);let n=await p(t);g.push(pe({platform:e,totalMemoryInBytes:s,variant:t,devices:n,gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!0}))}catch(n){let r=n instanceof Error?n.message:String(n);g.push(pe({platform:e,totalMemoryInBytes:s,variant:t,devices:[],gpuMemoryFraction:i,cpuMemoryFraction:a,ok:!1,error:r}))}let _=g.filter(e=>e.ok)[0]||null,v={ok:!!_,selected:_?{..._,breakdown:o?_.breakdown:void 0}:null,attempts:g};if(!o&&v.selected&&delete v.selected.breakdown,!v||!c&&!l)return v;let y=e=>{if(!e)return e;let t=me({device:e,modelBytes:c||0,kvCacheBytes:l||0}),n=null;return u!=null&&u!==l&&(n=me({device:e,modelBytes:c||0,kvCacheBytes:u})),{...e,fit:t,...n&&{limitedFit:n}}};return v.selected=y(v.selected),v.attempts=Array.isArray(v.attempts)?v.attempts.map(y):v.attempts,v},ge=`ggml-llm`,_e=[`cuda`,`vulkan`,`default`],ve=new Map([[ge,he],[`ggml-stt`,async({platform:e,variant:t=null,preferVariants:n=[],variantPreference:r=[],gpuMemoryFraction:i=oe,cpuMemoryFraction:a=H,includeBreakdown:o=!1,totalMemoryInBytes:s,modelBytes:c=null,processingBytes:l=null,kvCacheBytes:u=null,dependencies:d={}}={})=>he({platform:e,variant:t,preferVariants:n,variantPreference:r&&r.length>0?r:_e,gpuMemoryFraction:i,cpuMemoryFraction:a,includeBreakdown:o,totalMemoryInBytes:s,modelBytes:c,kvCacheBytes:l??u,dependencies:d,defaultVariants:_e})]]),ye=async({platform:e,totalMemoryInBytes:t,backend:n=ge,dependencies:r,...i}={})=>{let a=ve.get(n);if(!a)throw Error(`No capability detector registered for backend "${n}"`);return await a({...i,dependencies:r,totalMemoryInBytes:t,platform:e})},be={f16:2,f32:4,q8_0:1,q6_k:.75,q5_k:.625,q5_k_m:.625,q5_k_s:.625,q5_1:.625,q5_0:.625,q4_k:.5,q4_k_m:.5,q4_k_s:.5,q4_1:.5,q4_0:.5,iq4_nl:.5},xe=e=>be[e?String(e).toLowerCase():`f16`]||be.f16,Se=(e,t,n,r,i,a={},{totalLayers:o=null,swaLayers:s=0,swaContext:c=null,swaContextMultiplier:l=1,swaAdditionalTokens:u=0,swaFull:d=!1}={})=>{if(!e||!t||!n||!r||!i)return 0;let f=o!=null&&o!==void 0?Number(o):Number(e),p=Math.max(0,Math.floor(f));if(!p)return 0;let m=xe(a.k),h=xe(a.v),g=Number(n)*(Number(r)*m+Number(i)*h);if(!g)return 0;let _=Math.max(0,Number(t)||0),v=Math.min(p,Math.max(0,Math.floor(Number(s)||0))),y=Math.max(0,p-v),b=c!=null&&Number.isFinite(Number(c))?Math.max(0,Number(c)):_,x=Math.max(1,Number(l)||1),S=Math.max(0,Number(u)||0),C=b*x+S,w=d?_:Math.min(_,C),T=y*_+v*Math.max(0,Math.floor(w));return Math.round(g*T)},Ce=({modelBytes:e=0,audioLengthSeconds:t=30,sampleRate:n=16e3,bytesPerSample:r=4}={})=>{let i=Math.max(0,Number(e)||0),a=Math.max(0,Math.floor(Math.max(0,t)*n*r)),o=1024*1024,s=1024*o,c;c=i<200*o?120*o:i<500*o?140*o:i<2*s?150*o:160*o;let l;l=i<200*o?70*o:i<500*o?135*o:(2*s,220*o);let u;u=i<100*o?20*o:i<200*o?30*o:i<500*o?85*o:i<2*s?215*o:360*o;let d=c+l+u;return{modelBytes:i,audioBufferBytes:a,processingBufferBytes:d,totalBytes:i+d+a}},we=e=>e?String(e).trim().toLowerCase():null,Te=(e={},t=null)=>{if(!e)return null;let n=we(t),r=n?`${n}.attention.sliding_window`:null,i=(r&&e[r]!=null?e[r]:null)??e[`llama.attention.sliding_window`];if(i==null)return null;let a=Number(i);return Number.isFinite(a)?a:null},Ee=(e=0,t=0,n=!1)=>{let r=Math.max(0,Math.floor(Number(e)||0)),i=Math.max(0,Math.floor(Number(t)||0));if(!r||i===1)return 0;if(i<=0)return r;let a=Math.max(0,i-1),o=Math.floor(r/i),s=r%i,c=n?Math.max(0,s-1):Math.min(s,a);return o*a+c},De=({arch:e,nLayer:t=0})=>({arch:we(e),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(t)||0)),swaLayers:0}),Oe=new Map([[`llama4`,({nSwa:e})=>e===0?{enabled:!1}:{enabled:!0,window:e&&e>0?e:8192,pattern:4,type:`chunked`}],[`afmoe`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`phi3`,()=>({enabled:!1})],[`gemma2`,({nSwa:e})=>{let t=e&&e>0?e:4096;return t?{enabled:!0,window:t,pattern:2,type:`standard`}:{enabled:!1}}],[`gemma3`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`standard`}],[`gemma3n`,({nLayer:e,nSwa:t})=>!t||t<=0?{enabled:!1}:{enabled:!0,window:t,pattern:5,type:`standard`,kvLayers:Math.min(20,e)}],[`gemma-embedding`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:6,type:`symmetric`}],[`cohere2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`olmo2`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:4,type:`standard`}],[`exaone4`,({nLayer:e,nSwa:t})=>{let n=e>=64,r=null;return t&&t>0?r=t:n&&(r=4096),r?{enabled:!0,window:r,pattern:4,type:`standard`}:{enabled:!1}}],[`gpt-oss`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:e,pattern:2,type:`standard`}],[`smallthinker`,({nSwa:e})=>!e||e<=0?{enabled:!1}:{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:`standard`}]]),ke=({arch:e,metadata:t={},nLayer:n=0}={})=>{let r=we(e||t[`general.architecture`]),i=Math.max(0,Math.floor(Number(n)||0)),a=Te(t,r),o=r?Oe.get(r):null;if(!o)return De({arch:r,nLayer:n});let s=o({nLayer:i,nSwa:a,metadata:t});if(!s||!s.enabled||!s.window||s.window<=0)return De({arch:r,nLayer:n});let c=Math.max(0,Math.floor(Number(s.pattern)||0)),l=s.kvLayers!=null&&Number.isFinite(Number(s.kvLayers))?Number(s.kvLayers):i,u=Math.max(0,Math.floor(l)),d=Ee(u,c,!!s.denseFirst);return{arch:r,enabled:d>0,window:s.window,pattern:c,denseFirst:!!s.denseFirst,type:s.type||`standard`,kvLayers:u,swaLayers:d}},Ae=new Set([`mamba`,`mamba2`,`rwkv6`,`rwkv6qwen2`,`rwkv7`,`arwkv7`]),je=new Set([`jamba`,`falcon-h1`,`plamo2`,`granitehybrid`,`lfm2`,`lfm2moe`,`nemotron_h`,`nemotron_h_moe`,`qwen3next`]),Me=e=>e?String(e).trim().toLowerCase():null,Ne=e=>{let t=Me(e);return t?Ae.has(t):!1},Pe=e=>{let t=Me(e);return t?je.has(t):!1},Fe=e=>Ne(e)?`recurrent`:Pe(e)?`hybrid`:`transformer`,Ie=(e={})=>{let t=e[`general.architecture`],n=(t,n=null)=>{let r=e[t],i=Number(r);return Number.isFinite(i)?i:n},r=(t,n=null)=>{let r=e[t];if(Array.isArray(r))return r;let i=Number(r);return Number.isFinite(i)?i:n},i=t?n(`${t}.context_length`,n(`llama.context_length`)):null,a=t?n(`${t}.block_count`,n(`llama.block_count`)):null,o=t?n(`${t}.embedding_length`,n(`llama.embedding_length`)):null,s=t?n(`${t}.attention.head_count`,n(`llama.attention.head_count`)):null,c=t?r(`${t}.attention.head_count_kv`,r(`llama.attention.head_count_kv`,s)):null,l=null,u=null;if(Array.isArray(c)){let e=c.filter(e=>Number(e)>0);e.length>0?(l=Math.max(...e.map(Number)),u=e.length):(l=0,u=0)}else l=c;let d=t?n(`${t}.attention.key_length`,n(`llama.attention.key_length`)):null,f=t?n(`${t}.attention.value_length`,n(`llama.attention.value_length`)):null,p=e[`general.quantization_version`]||null,m=e[`general.file_type`]||null,h=t?n(`${t}.ssm.conv_kernel`):null,g=t?n(`${t}.ssm.state_size`):null,_=t?n(`${t}.ssm.inner_size`):null,v=t?n(`${t}.ssm.group_count`):null,y=t?n(`${t}.ssm.time_step_rank`):null,b=t?n(`${t}.rwkv.head_size`):null,x=t?n(`${t}.rwkv.token_shift_count`,2):null,S=u!=null&&a!=null?a-u:null;return{arch:t,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:l,nEmbdHeadK:d,nEmbdHeadV:f,quantVersion:p,fileType:m,attentionLayerCount:u,recurrentLayerCount:S,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:v,ssmDtRank:y,rwkvHeadSize:b,rwkvTokenShiftCount:x}},Le=({layerCount:e,headKvCount:t,embdHeadKCount:n,embdHeadVCount:r,cacheTypes:i,swaConfig:a,kvUnified:o=!1,nParallel:s=1,swaFull:c=!1,arch:l=null,attentionLayerCount:u=null})=>{let d=Fe(l);if(d===`recurrent`)return()=>0;let f=d===`hybrid`&&u!=null?Math.max(0,Math.floor(Number(u)||0)):e,p=a?.window&&o?Math.max(1,Number(s)||1):1,m=o?1:Math.max(1,Number(s)||1);return e=>Se(f,e,t,n,r,i,{totalLayers:f,swaLayers:a?.swaLayers||0,swaContext:a?.window,swaFull:c,swaContextMultiplier:p})*m},Re=({nLayer:e,nEmbd:t,recurrentLayerCount:n=null,nSeqMax:r=1,ssmDConv:i=null,ssmDState:a=null,ssmDInner:o=null,ssmNGroup:s=null,ssmDtRank:c=null,rwkvHeadSize:l=null,rwkvTokenShiftCount:u=2,arch:d=null})=>{if(Fe(d)===`transformer`)return 0;let f=n==null?Math.max(0,Math.floor(Number(e)||0)):Math.max(0,Math.floor(Number(n)||0));if(f===0)return 0;let p=Math.max(1,Math.floor(Number(r)||1)),m=0,h=0;if(l!=null&&l>0&&t!=null&&t>0)m=Math.max(1,Number(u)||2)*t,h=t*l;else if(a!=null&&o!=null){let e=Math.max(0,Number(i)||0),t=Math.max(0,Number(a)||0),n=Math.max(0,Number(o)||0),r=Math.max(1,Number(s)||1);Math.max(0,Number(c)||0)>0?(m=e>0?(e-1)*2*r*t:0,h=Math.floor(t*n/2)):(m=e>0?(e-1)*(n+2*r*t):0,h=t*n)}else return 0;let g=(m+h)*p*f*4;return Math.max(0,g)},ze=({maxCtx:e,availableMemory:t,modelBytes:n,kvBytesForCtx:r})=>{let i=Math.max(1,Math.floor(Number(e)||0));if(!r||t<=n)return i;let a=1,o=i,s=i;for(;a<=o;){let e=Math.floor((a+o)/2);n+r(e)<=t?(s=e,a=e+1):o=e-1}return s},U=new T;U.setMaxListeners(100);const Be=(e,t,n)=>{e.push({...t,timestamp:t.timestamp||new Date().toISOString()}),e.length>n&&e.shift()};var Ve=class{constructor(e=9999){this.maxEntries=e,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad(e){Be(this.modelLoads,e,this.maxEntries),U.emit(`status:modelLoad`,e),U.emit(`status:change`,{type:`modelLoad`,entry:e})}addCompletion(e){Be(this.completions,e,this.maxEntries),U.emit(`status:completion`,e),U.emit(`status:change`,{type:`completion`,entry:e})}addTranscription(e){Be(this.transcriptions,e,this.maxEntries),U.emit(`status:transcription`,e),U.emit(`status:change`,{type:`transcription`,entry:e})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}};const W=new Ve,G=new Ve;let He=0;function Ue(e){let t=t=>e(t);return U.on(`status:change`,t),()=>U.off(`status:change`,t)}function We(e){return He+=1,{subscriberId:He,unsubscribe:Ue(e)}}function Ge(e){let t=[];return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-llm`).map(([e,n])=>{let{instance:r}=n,i=[];return r.contexts&&(i=Array.from(r.contexts.entries()).map(([n,r])=>{let i={key:n,refCount:r.refCount,hasModel:!!r.context},a=r.context.parallel.getStatus();return i.parallelStatus=a,t.push({generatorId:e,contextKey:n,...a}),i})),{id:e,type:n.type,refCount:n.refCount,repoId:r.info?.model?.repoId||null,quantization:r.info?.model?.quantization||null,variant:r.info?.runtime?.variant||null,nCtx:r.info?.runtime?.n_ctx||null,nParallel:r.info?.runtime?.n_parallel||null,contexts:i}}),parallelStatuses:t,history:{modelLoads:W.getModelLoadHistory(),completions:W.getCompletionHistory()}}}function Ke(e){return{generators:Array.from(e.entries()).filter(([,e])=>e.type===`ggml-stt`).map(([e,t])=>{let{instance:n}=t,r=n.getStatus?.()||{},i=r.queueStatus||{processing:!1,queuedCount:0};return{id:e,type:t.type,refCount:t.refCount,repoId:n.info?.model?.repoId||null,quantization:n.info?.model?.quantization||null,modelType:n.info?.model?.modelType||null,variant:n.info?.runtime?.variant||null,hasContext:r.hasContext||!1,contextRefCount:r.contextRefCount||0,queueStatus:i}}),history:{modelLoads:G.getModelLoadHistory(),transcriptions:G.getTranscriptionHistory()}}}function qe(e){return{timestamp:new Date().toISOString(),ggmlLlm:Ge(e),ggmlStt:Ke(e)}}const{ReadableStream:Je,WritableStream:Ye}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,Xe=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),Xe(e[t],n)):e[t]=n}),e),Ze=`https://huggingface.co`,Qe=`https://huggingface.co/api`,K=_.join(v.homedir(),`.buttress`,`models`),$e=[`mxfp4`,`q8_0`,`q6_k`,`q6`,`q5_k_m`,`q5_k_s`,`q5_k`,`q5_1`,`q5_0`,`q4_k_m`,`q4_k_s`,`q4_k`,`q4_1`,`q4_0`,`q3`,`q2`],et=.5,tt={backend:{type:`ggml-llm`,variant:null,variant_preference:[`cuda`,`vulkan`,`snapdragon`,`default`],gpu_memory_fraction:.85,cpu_memory_fraction:et},model:{repo_id:null,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:`auto`,allow_local_file:!1,local_path:null,api_base:Qe,base_url:Ze},runtime:{cache_dir:K,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10*1024*1024*1024,max_entries:1e3},context_release_delay_ms:1e4}},nt=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],rt=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`snapdragon`,`default`].includes(t)?t:null},it=(e={})=>{let t=structuredClone(tt);if(Xe(t,e),t.backend.variant=rt(t.backend.variant),t.backend.variant_preference=Array.from(new Set(nt(t.backend.variant_preference).map(rt).filter(Boolean))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[`cuda`,`vulkan`,`snapdragon`,`default`]),t.runtime.prefer_variants=Array.from(new Set(nt(t.runtime.prefer_variants).map(rt).filter(Boolean))),t.model.preferred_quantizations=Array.from(new Set(nt(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.n_parallel=t.model.n_parallel?Math.max(1,Number(t.model.n_parallel)):void 0,t.model.n_batch=Math.max(1,Number(t.model.n_batch)||512),t.model.base_url=t.model.base_url||Ze,t.model.api_base=t.model.api_base||Qe,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):K,t.runtime.session_cache={...tt.runtime.session_cache,...t.runtime.session_cache||{}},t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||tt.runtime.context_release_delay_ms),t},at=e=>{let t=e.toLowerCase();return $e.find(e=>t.includes(e))||null},ot=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.map(rt).filter(Boolean)))},q=async e=>{await l(e,{recursive:!0})},st=(e=K)=>_.join(e,`.metadata-cache`),ct=(e,t,n=K)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(st(n),t,`${r}.json`)},lt=async(e,t,n=K)=>{try{let r=ct(e,t,n),i=await d(r,`utf-8`);return console.log(`[Cache] Hit ${t} cache:`,_.basename(r)),JSON.parse(i,(e,t)=>typeof t==`string`&&t.startsWith(`__bigint__`)?BigInt(t.slice(10)):t)}catch{return null}},ut=async(e,t,n,r=K)=>{try{let i=ct(e,t,r);await q(_.dirname(i)),await g(i,JSON.stringify(n,(e,t)=>typeof t==`bigint`?`__bigint__${t.toString()}`:t),`utf-8`),console.log(`[Cache] Wrote ${t} cache:`,_.basename(i))}catch(e){console.warn(`[Cache] Failed to write ${t} cache:`,e.message)}},dt=(e=K)=>_.join(e,`.session-state-cache`),ft=(e=K)=>_.join(dt(e),`cache-map.json`),pt=(e=K)=>_.join(dt(e),`temp`),mt=(e=K)=>_.join(dt(e),`states`),ht=()=>({version:1,entries:{},totalSize:0}),gt=async(e=K)=>{try{let t=await d(ft(e),`utf-8`),n=JSON.parse(t);return!n.entries||typeof n.entries!=`object`?ht():n}catch{return ht()}},_t=async(e,t=K)=>{let n=ft(t),r=`${n}.tmp.${Date.now()}`;try{await q(_.dirname(n)),await g(r,JSON.stringify(e,null,2),`utf-8`),await p(r,n)}catch(e){throw await h(r).catch(()=>{}),e}},vt=(e,t)=>{let n=JSON.stringify({text:e,model:t.modelPath,variant:t.variant,n_gpu_layers:t.n_gpu_layers,n_ctx:t.n_ctx,cacheTypeK:t.cacheTypeK,cacheTypeV:t.cacheTypeV,kvUnified:t.kvUnified,swaFull:t.swaFull,flashAttnType:t.flashAttnType});return y(`sha256`).update(n).digest(`hex`).slice(0,24)},yt=(e,t=K)=>_.join(mt(t),`${e}.bin`),bt=(e=K)=>{let t=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return _.join(pt(e),`${t}.bin`)},xt=(e,t)=>e.modelPath===t.modelPath&&e.variant===t.variant&&e.n_gpu_layers===t.n_gpu_layers&&e.n_ctx>=t.n_ctx&&e.cacheTypeK===t.cacheTypeK&&e.cacheTypeV===t.cacheTypeV&&e.kvUnified===t.kvUnified&&e.swaFull===t.swaFull&&e.flashAttnType===t.flashAttnType&&!!e.isRecurrent==!!t.isRecurrent&&!!e.isHybrid==!!t.isHybrid,St=(e,t)=>{let n=Math.min(e.length,t.length),r=0;for(;r<n&&e[r]===t[r];)r+=1;return r},Ct=(e,t,n,r=!1)=>{let i=Object.values(n.entries);console.log(`[SessionCache] Finding match for promptText (${e.length} chars), exactMatch=${r}`),console.log(`[SessionCache] Checking ${i.length} cache entries`);let a=i.filter(e=>xt(e.metadata,t));if(r){let t=a.find(t=>t.fullText===e);return t?(console.log(`[SessionCache] Exact match found: ${t.id} (${t.fullText.length} chars)`),{entry:t,prefixLength:t.fullText.length,exactMatch:!0}):null}let o=a.reduce((t,n)=>{let r=St(e,n.fullText);return r>t.prefixLen||r===t.prefixLen&&n.fullText.length>(t.entry?.fullText?.length||0)?{entry:n,prefixLen:r}:t},{entry:null,prefixLen:0});return o.entry?(console.log(`[SessionCache] Prefix match found: ${o.entry.id} (${o.prefixLen}/${o.entry.fullText.length} chars)`),{entry:o.entry,prefixLength:o.prefixLen}):(console.log(`[SessionCache] No match found`),null)},wt=async(e,t,n)=>{let r=Object.values(e.entries).sort((e,t)=>new Date(e.lastAccessedAt)-new Date(t.lastAccessedAt)),i=e.totalSize,a=Object.keys(e.entries).length,o=r.filter(e=>!(i>t)&&!(a>n)?!1:(i-=(e.stateFileSize||0)+(e.promptStateSize||0),--a,!0));return await Promise.all(o.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),delete e.entries[t.id],console.log(`[SessionCache] Evicted entry: ${t.id}`)})),e.totalSize=Math.max(0,i),o.map(e=>e.id)},Tt=async(e,t,n,r)=>{let i=Object.entries(e.entries).filter(([e,i])=>e===n||!xt(i.metadata,r)?!1:t.startsWith(i.fullText)&&i.fullText.length<t.length).map(([,e])=>e);return await Promise.all(i.map(async t=>{await h(t.stateFilePath).catch(()=>{}),t.promptStatePath&&await h(t.promptStatePath).catch(()=>{}),e.totalSize-=(t.stateFileSize||0)+(t.promptStateSize||0),delete e.entries[t.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${t.id} (${t.promptText.length} prompt chars)`)})),i.map(e=>e.id)},Et=async(e=K)=>{let t=pt(e);try{let e=await f(t),n=Date.now();await Promise.all(e.map(async e=>{let r=_.join(t,e),i=await m(r).catch(()=>null);i&&n-i.mtimeMs>36e5&&(await h(r).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${e}`))}))}catch{}},Dt=async e=>{try{return await m(e),!0}catch{return!1}},Ot=(e,t)=>e==null?t:typeof e==`number`?e:typeof e==`string`?w.parse(e)??t:t;var kt=class e{constructor(e,t){this.config=e,this.plan=t,this.baseDir=e.runtime.cache_dir,this.enabled=e.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Ot(e.runtime.session_cache?.max_size_bytes,10*1024*1024*1024),this.maxEntries=e.runtime.session_cache?.max_entries||1e3,this.metadata={variant:t.info?.runtime?.variant||null,n_gpu_layers:t.info?.runtime?.n_gpu_layers||0,n_ctx:t.info?.runtime?.n_ctx||0,modelPath:t.localPath,cacheTypeK:t.info?.runtime?.cache_type_k||`f16`,cacheTypeV:t.info?.runtime?.cache_type_v||`f16`,kvUnified:t.info?.runtime?.kv_unified??null,swaFull:t.info?.runtime?.swa_full??null,flashAttnType:t.info?.runtime?.flash_attn_type||`off`,isRecurrent:!1,isHybrid:!1},this.cacheMap=null,this.initialized=!1}updateModelInfo(e){e&&(this.metadata.isRecurrent=!!e.is_recurrent,this.metadata.isHybrid=!!e.is_hybrid,(this.metadata.isRecurrent||this.metadata.isHybrid)&&console.log(`[SessionCache] Model architecture: recurrent=${this.metadata.isRecurrent}, hybrid=${this.metadata.isHybrid}`))}requiresExactMatch(){return this.metadata.isRecurrent||this.metadata.isHybrid}static checkTokenPrefixMatch(e,t){if(e.length>t.length)return!1;for(let n=0;n<e.length;n+=1)if(e[n]!==t[n])return!1;return!0}static async tokenizeToArray(e,t){let n=await e.tokenize(t);return Array.from(n?.tokens||[])}async findFormattedMatchForRecurrent(t,n,r){let i=await e.tokenizeToArray(r,n),a=t.map(async t=>{try{let n=await e.tokenizeToArray(r,t.fullText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!1,tokenCount:n.length};if(t.promptStatePath&&t.promptText){let n=await e.tokenizeToArray(r,t.promptText);if(e.checkTokenPrefixMatch(n,i))return{entry:t,usePromptState:!0,tokenCount:n.length}}return null}catch(e){return console.warn(`[SessionCache] Failed to check entry ${t.id}: ${e.message}`),null}}),o=(await Promise.all(a)).find(e=>e!==null);if(!o)return console.log(`[SessionCache] No token prefix match found for recurrent/hybrid model`),null;let{entry:s,usePromptState:c,tokenCount:l}=o;return console.log(`[SessionCache] Token prefix match: ${s.id} (${l} tokens, usePromptState=${c})`),await Dt(c?s.promptStatePath:s.stateFilePath)?(s.lastAccessedAt=new Date().toISOString(),await _t(this.cacheMap,this.baseDir).catch(()=>{}),{entry:s,usePromptState:c}):(await this.removeStaleEntry(s),null)}async initialize(){if(!(!this.enabled||this.initialized))try{await q(dt(this.baseDir)),await q(pt(this.baseDir)),await q(mt(this.baseDir)),this.cacheMap=await gt(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch(e){console.warn(`[SessionCache] Failed to initialize: ${e.message}`),this.enabled=!1}}async removeStaleEntry(e){console.log(`[SessionCache] Removing stale entry: ${e.id}`),e.stateFilePath&&await h(e.stateFilePath).catch(()=>{}),e.promptStatePath&&await h(e.promptStatePath).catch(()=>{}),delete this.cacheMap.entries[e.id],this.cacheMap.totalSize-=(e.stateFileSize||0)+(e.promptStateSize||0),await _t(this.cacheMap,this.baseDir).catch(()=>{})}async findMatchingEntry(e,t=null){if(!this.enabled||!this.cacheMap)return null;let n=this.requiresExactMatch();if(n&&t){let n=Object.values(this.cacheMap.entries).filter(e=>xt(e.metadata,this.metadata)&&e.fullText);return this.findFormattedMatchForRecurrent(n,e,t)}let r=Ct(e,this.metadata,this.cacheMap,n);if(!r)return null;let{entry:i}=r;return await Dt(i.stateFilePath)?(i.lastAccessedAt=new Date().toISOString(),await _t(this.cacheMap,this.baseDir).catch(()=>{}),{entry:i,usePromptState:!1}):(await this.removeStaleEntry(i),null)}async prepareCompletionOptions(e,t,n=null){let r={options:e,cacheEntry:null,promptPrefix:null};if(!this.enabled)return r;let i=await this.findMatchingEntry(t,n);if(!i)return r;let{entry:a,usePromptState:o}=i,s=o?a.promptStatePath:a.stateFilePath,c=o?a.promptText:a.fullText;return console.log(`[SessionCache] Found matching entry: ${a.id} (${c.length} chars, usePromptState=${o})`),{options:{...e,load_state_path:s},cacheEntry:a,promptPrefix:c}}async saveCompletionState(e,t,n,r=0,i=null){if(!this.enabled)return null;let a=e+t,o=vt(a,this.metadata),s=()=>{n&&h(n).catch(()=>{}),i&&h(i).catch(()=>{})};if(this.cacheMap.entries[o]){console.log(`[SessionCache] Entry already exists for prompt: ${o}, updating position`);let e=this.cacheMap.entries[o];return e.lastAccessedAt=new Date().toISOString(),delete this.cacheMap.entries[o],this.cacheMap.entries[o]=e,await _t(this.cacheMap,this.baseDir).catch(()=>{}),s(),e}let c=yt(o,this.baseDir),l=i?yt(`${o}-prompt`,this.baseDir):null;try{await q(_.dirname(c)),await p(n,c);let s=await m(c),u=0;if(i&&l)try{await p(i,l),u=(await m(l)).size,console.log(`[SessionCache] Saved prompt state: ${l}`)}catch(e){console.warn(`[SessionCache] Failed to save prompt state: ${e.message}`)}let d={id:o,promptText:e,completionText:t,fullText:a,promptTokenCount:r,stateFilePath:c,stateFileSize:s.size,promptStatePath:l||null,promptStateSize:u,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[o]=d,this.cacheMap.totalSize+=s.size+u,this.requiresExactMatch()||await Tt(this.cacheMap,e,o,this.metadata),await wt(this.cacheMap,this.maxSizeBytes,this.maxEntries),await _t(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${o} (${s.size} bytes, ${a.length} chars)`),d}catch(e){return console.warn(`[SessionCache] Failed to save state: ${e.message}`),s(),null}}async generateTempStatePath(){return await q(pt(this.baseDir)),bt(this.baseDir)}async cleanup(){await Et(this.baseDir)}};const At=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},jt=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Mt=async(e,t,n=K)=>{let r=JSON.stringify({url:e,headers:t}),i=await lt(r,`range-metadata`,n);if(i)return i;let a=!/^https?:/i.test(e),{metadata:o}=await b(e,{fetch,additionalFetchHeaders:t,allowLocalFile:a});return await ut(r,`range-metadata`,o,n),o},Nt=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},Pt=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Ft=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await q(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new Ye({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},It=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await lt(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await jt(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c={repoId:t,revision:n,filename:e.model.filename||e.model.url.split(`/`).pop(),url:e.model.url,size:s,headers:o};return await ut(i,`artifact-info`,c,r),c}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await At(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.gguf`));if(u.length===0)throw Error(`No GGUF artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:$e,f=()=>{let e=d.find(e=>u.find(t=>t.toLowerCase().includes(e)));return e?{filename:u.find(t=>t.toLowerCase().includes(e)),quantization:e}:null};if(s)c||=at(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||at(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=/-(\d{5})-of-(\d{5})\.gguf$/,h=s.match(m),g=null;if(h){let[,,r]=h,i=await At(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),a=i?.siblings||i?.files||[],c=Number(r);g=0;for(let e=1;e<=c;e+=1){let t=String(e).padStart(5,`0`),n=s.replace(m,`-${t}-of-${r}.gguf`),i=a.find(e=>(e.rfilename||e.path||e.filename)===n),o=Number(i?.size);Number.isFinite(o)&&o>0&&(g+=o)}}else{let e=await jt(p,{headers:o});g=Number(e.headers.get(`content-length`))||null}let _={repoId:t,revision:n,filename:s,url:p,size:g,quantization:c,headers:o,isSplit:!!h,splitCount:h?Number(h[2]):0};return await ut(i,`artifact-info`,_,r),_},Lt=async(e,{modelBytes:t=null,kvCacheBytes:n=null}={})=>{let r=ot(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?tt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ye({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,variant:i||null,preferVariants:a,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},Rt=async e=>{let t=await It(e),n=await Mt(t.url,t.headers,e.runtime.cache_dir),{arch:r,nCtxTrain:i,nLayer:a,nEmbd:o,nHead:s,nHeadKv:c,nEmbdHeadK:l,nEmbdHeadV:u,quantVersion:d,fileType:f,attentionLayerCount:p,recurrentLayerCount:m,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:S}=Ie(n),C=Number.isFinite(Number(a))?Number(a):0,w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):T,D=T>0&&w>0?w/T:128,O=l!=null&&Number.isFinite(Number(l))?Number(l):D,k=u!=null&&Number.isFinite(Number(u))?Number(u):D,A=ke({arch:r,metadata:n,nLayer:C}),j=A&&Number.isFinite(Number(A.kvLayers))?Number(A.kvLayers):C,ee=Math.max(0,Math.floor(Number(j)||0)),M={use_mmap:e.model.use_mmap??e.runtime.use_mmap,use_mlock:e.model.use_mlock??e.runtime.use_mlock,n_threads:e.model.n_threads??e.runtime.n_threads,n_ctx:e.model.n_ctx??e.runtime.n_ctx,n_batch:e.model.n_batch??e.runtime.n_batch,n_ubatch:e.model.n_ubatch??e.runtime.n_ubatch,n_cpu_moe:e.model.n_cpu_moe??e.runtime.n_cpu_moe,n_parallel:(e.model.n_parallel??e.runtime.n_parallel)||4,cpu_mask:e.model.cpu_mask??e.runtime.cpu_mask,cpu_strict:e.model.cpu_strict??e.runtime.cpu_strict,devices:e.model.devices??e.runtime.devices,n_gpu_layers:e.model.n_gpu_layers??e.runtime.n_gpu_layers,flash_attn_type:e.model.flash_attn_type??e.runtime.flash_attn_type,cache_type_k:e.model.cache_type_k??e.runtime.cache_type_k,cache_type_v:e.model.cache_type_v??e.runtime.cache_type_v,kv_unified:e.model.kv_unified??e.runtime.kv_unified,swa_full:e.model.swa_full??e.runtime.swa_full,ctx_shift:e.model.ctx_shift??e.runtime.ctx_shift},N=M.n_ctx?Number(M.n_ctx):null,P=N||i||4096,F=[],te=[],I=!0;if(N&&i&&N>i){I=!1;let e=`Requested context length (${N}) exceeds model training context (${i})`;F.push(e),te.push(e),P=i}N&&!i&&F.push(`Model metadata missing training context length, using requested value`);let L={k:M.cache_type_k,v:M.cache_type_v},R=t.size>0?t.size:0,ne=Le({layerCount:ee,headKvCount:E,embdHeadKCount:O,embdHeadVCount:k,cacheTypes:L,swaConfig:A,kvUnified:M.kv_unified,nParallel:M.n_parallel,swaFull:M.swa_full,arch:r,attentionLayerCount:p}),z=Re({nLayer:C,nEmbd:w,recurrentLayerCount:m,nSeqMax:M.n_parallel||4,ssmDConv:h,ssmDState:g,ssmDInner:_,ssmNGroup:y,ssmDtRank:b,rwkvHeadSize:x,rwkvTokenShiftCount:S,arch:r}),B=await Lt(e,{modelBytes:R,kvCacheBytes:ne(P)+z}),re=B.selected.totalMemory||0,V=re*(e.backend.gpu_memory_fraction||1),ie=e.backend.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),ae=Math.max(0,v.totalmem()*ie),oe=B.selected.hasGpu?V:ae,H=ze({maxCtx:P,availableMemory:oe,modelBytes:R,kvBytesForCtx:ne});if(!N&&H){let e=i?Math.min(H,i):H,t=Math.max(32,e);t<P&&F.push(`Context length capped to ${t} by memory limits`),P=t}P>H&&(P=H);let se=Math.floor(H);console.log(`[buttress] Memory-limited context length: ${se}`);let ce=ne(P),le=R+ce+z,ue=C?R/(C+1):R,de=0;B.selected.hasGpu&&ue>0&&(de=Math.min(C+1,Math.max(0,Math.floor(V/ue)))),console.log(`[buttress] Auto GPU layer capacity (${B.selected.variant}): ${de}/${C+1}`);let fe;fe=M.n_gpu_layers===`auto`||M.n_gpu_layers==null?de:Math.max(0,Math.min(Number(M.n_gpu_layers)||0,C+1));let pe=(()=>{let e=M.flash_attn_type&&String(M.flash_attn_type).toLowerCase();return e===`on`||e===`off`?e:B.selected.hasGpu?`auto`:`off`})(),me=e.runtime.cache_dir,he=Nt(e,t),ge=await Pt(he,t.size);return{config:e,info:{ok:I,backend:`ggml-llm`,warnings:F,errors:te,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,url:t.url,sizeBytes:t.size,metadata:{architecture:r,n_ctx_train:i,n_layer:C,n_embd:w,quantization_version:d,file_type:f,kv_layer_count:ee,swa:A?.enabled?{window:A.window,pattern:A.pattern,dense_first:A.denseFirst,type:A.type,layers:A.swaLayers}:null}},runtime:{...M,variant:B.selected.variant,n_ctx:P,requested_ctx:N,n_gpu_layers:fe,auto_gpu_layers:de,flash_attn_type:pe,cache_type_k:L.k,cache_type_v:L.v,estimated_max_n_ctx:se},resources:{modelBytes:R,kvCacheBytes:ce,recurrentMemoryBytes:z,totalEstimatedBytes:le,gpuCapacityBytes:re,gpuUsableBytes:V,cpuUsableBytes:ae,fit:B.selected.fit},devices:{selected:B.selected,attempts:B.attempts},download:{cacheDir:me,localPath:he,exists:ge},timestamp:new Date().toISOString()},artifact:t,metadata:{arch:r,nCtxTrain:i,nLayer:C,nEmbd:w},devices:B,cacheTypes:L,localPath:he,localExists:ge}},zt=(e,t,n=null,r=null)=>{let i,a=Date.now(),o=0;return new Je({async start(s){try{let c=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(o+=1),s.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:l}=c;i=c.stop;let u=await c.promise;console.log(`[Completion] Result:`,u),s.enqueue({event:`result`,data:{requestId:l,...u}}),s.close();let d=Date.now()-a,f=u.timings||{};W.addCompletion({id:`completion-${l}`,generatorId:n,requestId:l,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,cacheTokens:f.cache_n??0,promptTokens:f.prompt_n??0,tokensGenerated:f.predicted_n??o,tokensPerSecond:f.predicted_per_second??0,promptPerSecond:f.prompt_per_second??0,durationMs:d,success:!0,interrupted:u.interrupted||!1,contextFull:u.context_full||u.contextFull||!1})}catch(e){s.enqueue({event:`error`,data:{message:e?.message||String(e)}}),s.error(e),W.addCompletion({id:`completion-${Date.now()}`,generatorId:n,repoId:r?.repoId||null,quantization:r?.quantization||null,variant:r?.variant||null,durationMs:Date.now()-a,tokensGenerated:o,success:!1,error:e?.message||String(e)})}},cancel(){i&&i()}})},Bt=(e,t,n,r,i,a,o=null,s=null,c=null)=>{let l,u=``,d=!1,f=Date.now(),p=0,m=()=>{i&&h(i).catch(()=>{}),c&&h(c).catch(()=>{})};return new Je({async start(h){try{let g=await e.parallel.completion(t,(e,t)=>{t&&(t.token&&(u+=t.token,p+=1),h.enqueue({event:`token`,data:{requestId:e,...t}}))}),{requestId:_}=g;l=g.stop;let v=await g.promise;v.text?u=v.text:v.content&&(u=v.content),d=!v.interrupted&&!v.context_full,console.log(`[Completion] Result:`,v),h.enqueue({event:`result`,data:{requestId:_,...v}}),h.close();let y=Date.now()-f,b=v.timings||{};W.addCompletion({id:`completion-${_}`,generatorId:o,requestId:_,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,cacheTokens:b.cache_n??0,promptTokens:b.prompt_n??a??0,tokensGenerated:b.predicted_n??p,tokensPerSecond:b.predicted_per_second??0,promptPerSecond:b.prompt_per_second??0,durationMs:y,success:!0,interrupted:v.interrupted||!1,contextFull:v.context_full||v.contextFull||!1,usedCache:!!t.load_state_path}),d&&n.enabled&&u?n.saveCompletionState(r,u,i,a,c).catch(e=>{console.warn(`[SessionCache] Save failed:`,e.message)}):m()}catch(e){h.enqueue({event:`error`,data:{message:e?.message||String(e)}}),h.error(e),W.addCompletion({id:`completion-${Date.now()}`,generatorId:o,repoId:s?.repoId||null,quantization:s?.quantization||null,variant:s?.variant||null,durationMs:Date.now()-f,tokensGenerated:p,success:!1,error:e?.message||String(e)}),m()}},cancel(){l&&l(),m()}})},Vt=e=>{let t={model:e.plan.localPath,runtime:e.plan.info.runtime};return y(`sha256`).update(JSON.stringify(t)).digest(`hex`).slice(0,24)},Ht=async(e,t,n,r=null)=>{let{config:i,localPath:a,artifact:o}=e;if(e.localExists&&!t.has(a))return e.info.download.exists=!0,typeof n==`function`&&n(.5),a;if(i.model.local_path&&!i.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let s=a;if(r){let t=r.getDownload(s);if(t){console.log(`[ensureModelFile] Waiting for global download: ${o.repoId}`);try{if(await t,await Pt(a,o.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(.5),a}catch(e){console.warn(`[ensureModelFile] Global download failed, will retry: ${e.message}`)}}}t.has(s)||t.set(s,(async()=>{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(a),r=o.splitCount,s=0;for(let a=1;a<=r;a+=1){let c=String(a).padStart(5,`0`),l=o.filename.replace(e,`-${c}-of-${String(r).padStart(5,`0`)}.gguf`),u=`${i.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${l}`,d=_.join(t,l);await Pt(d)||await Ft(u,o.headers,d,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(s+e)/r,i=Math.round(t*100);console.log(`Downloading model splits: ${Math.min(100,i)}%`),typeof n==`function`&&n(t*.5)}}),s+=1}}else console.log(`Downloading model: 0%`),await Ft(o.url,o.headers,a,o.size,e=>{if(e>=0&&Number.isFinite(e)){let t=Math.round(e*100);console.log(`Downloading model: ${Math.min(100,t)}%`),typeof n==`function`&&n(e*.5)}});e.localExists=!0,e.info.download.exists=!0})());try{await t.get(s)}finally{t.delete(s)}return a},Ut=async(e,t)=>{let n=Vt(e),r=e.contexts.get(n);if(r&&!r.released)return r.releaseTimer&&(clearTimeout(r.releaseTimer),r.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${n}"`)),r.releaseRequested=!1,r.refCount+=1,console.log(`[Context] Reusing existing context "${n}", refCount=${r.refCount}`),typeof t==`function`&&t(0),r.context||await r.ready,typeof t==`function`&&t(1),r;r?console.log(`[Context] Record exists but released=${r.released}, creating new context`):console.log(`[Context] No existing record for "${n}", creating new context`),r={key:n,refCount:1,ready:null,released:!1},e.contexts.set(n,r),r.ready=(async()=>{let i=Date.now(),a=await Ht(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let o={model:a,n_threads:e.plan.info.runtime.n_threads,use_mmap:e.plan.info.runtime.use_mmap,use_mlock:e.plan.info.runtime.use_mlock,cpu_mask:e.plan.info.runtime.cpu_mask,cpu_strict:e.plan.info.runtime.cpu_strict,devices:e.plan.info.runtime.devices,n_ctx:e.plan.info.runtime.n_ctx,n_gpu_layers:e.plan.info.runtime.n_gpu_layers,n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch,n_ubatch:e.plan.info.runtime.n_ubatch,n_cpu_moe:e.plan.info.runtime.n_cpu_moe,flash_attn_type:e.plan.info.runtime.flash_attn_type,ctx_shift:e.plan.info.runtime.ctx_shift,kv_unified:e.plan.info.runtime.kv_unified,swa_full:e.plan.info.runtime.swa_full,lib_variant:e.plan.info.runtime.variant};e.plan.info.runtime.flash_attn_type!==`off`&&(o.cache_type_k=e.plan.info.runtime.cache_type_k,o.cache_type_v=e.plan.info.runtime.cache_type_v),console.log(`[Context] Load Options:`,o);let s;try{if(s=await C(o,e=>{typeof t==`function`&&(t(.5+e*.25),e%5==0&&console.log(`[Context] Load Model Progress:`,e))}),e.plan.info.runtime.n_parallel&&!await s.parallel.enable({n_parallel:e.plan.info.runtime.n_parallel,n_batch:e.plan.info.runtime.n_batch}))throw Error(`Failed to enable parallel decoding mode for context`);return typeof t==`function`&&t(1),r.context=s,r.modelInfo=s.getModelInfo(),W.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,nCtx:e.plan.info.runtime?.n_ctx||null,nGpuLayers:e.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-i,success:!0}),r}catch(t){if(W.addModelLoad({id:`${e.id}-${n}`,generatorId:e.id,contextKey:n,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-i,success:!1,error:t?.message||String(t)}),s)try{s.release()}catch{}throw t}})();try{return await r.ready,r}catch(t){throw e.contexts.delete(n),t}},Wt=async(e,t,n=!1)=>{if(t.released||!n&&t.refCount>0)return!1;t.released=!0,e.contexts.delete(t.key);try{t.context?.parallel?.disable?.()}catch{}return await t.context?.release?.(),!0},Gt=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Wt(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Wt(e,t):(console.log(`[Context] Scheduling release in ${i}ms for context "${t.key}"`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] Release cancelled, refCount=${t.refCount} for context "${t.key}"`),t.releaseRequested=!1;return}console.log(`[Context] Releasing context "${t.key}" after ${i}ms delay`),await Wt(e,t)},i),!0)};async function Kt(e,t,n={}){let{globalDownloadManager:r=null}=n,i=it(t),a=await Rt(i),o=new kt(i,a);await o.initialize();let s={id:e,type:`ggml-llm`,config:i,plan:a,info:a.info,contexts:new Map,downloads:new Map,globalDownloadManager:r,sessionCache:o,finalized:!1};return{id:e,type:`ggml-llm`,info:a.info,contexts:s.contexts,initContext:async(e={})=>{let{onProgress:t}=e,n=await Ut(s,t);return s.sessionCache.updateModelInfo(n.modelInfo),{modelInfo:n.modelInfo?{...n.modelInfo}:null,runtime:{...s.plan.info.runtime},download:{...s.plan.info.download}}},completion:async(e={})=>{let{options:t={},useCache:n=!0}=e,r=Vt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=t.prompt||``,o=null,c=null;if(!a&&t.messages){({messages:o}=t),c={chatTemplate:t.chat_template||t.chatTemplate,jinja:t.jinja??!0,tools:t.tools,parallel_tool_calls:t.parallel_tool_calls,tool_choice:t.tool_choice,reasoning_format:t.reasoning_format,enable_thinking:t.enable_thinking,add_generation_prompt:t.add_generation_prompt,now:t.now,chat_template_kwargs:t.chat_template_kwargs};let e=await i.context.getFormattedChat(o,c.chatTemplate,c);a=e?.prompt||e||``}if(n&&s.sessionCache.enabled&&a){let{options:e}=await s.sessionCache.prepareCompletionOptions(t,a,i.context),n=await s.sessionCache.generateTempStatePath(),r=(await i.context.tokenize(a))?.tokens?.length||0,o={...e,save_state_path:n},c=s.sessionCache.requiresExactMatch(),l=!!o.load_state_path,u=null;c&&!l&&(u=await s.sessionCache.generateTempStatePath(),o.save_prompt_state_path=u);let d={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return Bt(i.context,o,s.sessionCache,a,n,r,s.id,d,u)}let l={repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null};return zt(i.context,t,s.id,l)},tokenize:async(e={})=>{let{text:t=``,params:n={}}=e,r=Vt(s),i=s.contexts.get(r);if(!i)throw Error(`Context "${r}" not initialized`);await i.ready;let a=await i.context.tokenize(t,n);if(!a)return{tokens:[]};let o=Array.from(a.tokens??[]).map(e=>Number(e));return{...a,tokens:o}},detokenize:async(e={})=>{let{tokens:t=[]}=e,n=Vt(s),r=s.contexts.get(n);if(!r)throw Error(`Context "${n}" not initialized`);await r.ready;let i=t.map(e=>Number(e));return r.context.detokenize(i)},applyChatTemplate:async(e={})=>{let{messages:t=[],template:n,params:r}=e,i=Vt(s),a=s.contexts.get(i);if(!a)throw Error(`Context "${i}" not initialized`);return await a.ready,await a.context.getFormattedChat(t,n,r)},releaseContext:async()=>{if(s.finalized)return!1;let e=Vt(s),t=s.contexts.get(e);return t?Gt(s,t,!1):!1},finalize:async()=>{if(s.finalized)return;s.finalized=!0;let e=Array.from(s.contexts.values()),t=e.map(e=>e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),e.refCount>0)?Promise.resolve(!1):Wt(s,e));await Promise.allSettled(t),(e.length===0||e.every(e=>e.released))&&await s.sessionCache.cleanup()},getStatus:()=>{let e=[],t=Array.from(s.contexts.entries()).map(([t,n])=>{let r={key:t,refCount:n.refCount,hasModel:!!n.context},i=n.context.parallel.getStatus();return r.parallelStatus=i,e.push({contextKey:t,...i}),r});return{id:s.id,type:s.type,repoId:s.plan.info.model?.repoId||null,quantization:s.plan.info.model?.quantization||null,variant:s.plan.info.runtime?.variant||null,nCtx:s.plan.info.runtime?.n_ctx||null,nParallel:s.plan.info.runtime?.n_parallel||null,contexts:t,parallelStatuses:e}},subscribeParallelStatus:e=>{let t=Array.from(s.contexts.entries()).map(([t,n])=>n.context.parallel.subscribeToStatus(n=>{e({contextKey:t,...n})}));return{remove:()=>{t.forEach(e=>{e?.remove&&e.remove()})}}},hasPendingReleases:()=>Array.from(s.contexts.values()).some(e=>!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0)),resetFinalized:()=>{s.finalized=!1}}}const qt=e=>{let t=it(e);return t.model.repo_id||t.model.repository||t.model.model||null};async function Jt(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=it(e),o=await It(n),s=Nt(n,o),{repoId:c}=o;if(await Pt(s,o.size))return console.log(`[Download] Model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting download: ${c}`);let u=(async()=>{try{if(o.isSplit&&o.splitCount>0){let e=/-(\d{5})-of-(\d{5})\.gguf$/,t=_.dirname(s),i=o.splitCount,a=0;for(let s=1;s<=i;s+=1){let l=String(s).padStart(5,`0`),u=o.filename.replace(e,`-${l}-of-${String(i).padStart(5,`0`)}.gguf`),d=`${n.model.base_url.replace(/\/+$/,``)}/${o.repoId}/resolve/${o.revision}/${u}`,f=_.join(t,u);await Pt(f)||await Ft(d,o.headers,f,null,e=>{if(e>=0&&Number.isFinite(e)){let t=(a+e)/i;console.log(`[Download] ${c}: ${Math.round(t*100)}%`),typeof r==`function`&&r(t)}}),a+=1}}else await Ft(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))});console.log(`[Download] Completed: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}async function Yt(e){let t=it(e),n=await It(t),r=await Mt(n.url,n.headers,t.runtime.cache_dir),{arch:i,nCtxTrain:a,nLayer:o,nEmbd:s,nHead:c,nHeadKv:l,nEmbdHeadK:u,nEmbdHeadV:d,quantVersion:f,fileType:p,attentionLayerCount:m,recurrentLayerCount:h,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C}=Ie(r),w=Number.isFinite(Number(o))?Number(o):0,T=Number.isFinite(Number(s))?Number(s):0,E=Number.isFinite(Number(c))?Number(c):0,D=Number.isFinite(Number(l))?Number(l):E,O=E>0&&T>0?T/E:128,k=u!=null&&Number.isFinite(Number(u))?Number(u):O,A=d!=null&&Number.isFinite(Number(d))?Number(d):O,j=ke({arch:i,metadata:r,nLayer:w}),ee=j&&Number.isFinite(Number(j.kvLayers))?Number(j.kvLayers):w,M=Math.max(0,Math.floor(Number(ee)||0)),N=(t.model.n_ctx?Number(t.model.n_ctx):null)||a||4096,P={k:t.model.cache_type_k,v:t.model.cache_type_v},F=n.size>0?n.size:0,te=t.model.n_parallel||4,I=Le({layerCount:M,headKvCount:D,embdHeadKCount:k,embdHeadVCount:A,cacheTypes:P,swaConfig:j,kvUnified:t.model.kv_unified,nParallel:te,swaFull:t.model.swa_full,arch:i,attentionLayerCount:m}),L=Re({nLayer:w,nEmbd:T,recurrentLayerCount:h,nSeqMax:te,ssmDConv:g,ssmDState:_,ssmDInner:y,ssmNGroup:b,ssmDtRank:x,rwkvHeadSize:S,rwkvTokenShiftCount:C,arch:i}),R=t.backend?.gpu_memory_fraction==null?tt.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(t.backend.gpu_memory_fraction))),ne=t.backend?.cpu_memory_fraction==null?et:Math.min(1,Math.max(0,Number(t.backend.cpu_memory_fraction))),z=await Lt(t,{modelBytes:F,kvCacheBytes:I(N)}),B=(z.selected.totalMemory||0)*R,re=Math.max(0,v.totalmem()*ne),V=ze({maxCtx:N,availableMemory:z.selected.hasGpu?B:re,modelBytes:F,kvBytesForCtx:I}),ie=I(N),ae=I(V);return{kvInfo:{nCtxTrain:a,nLayer:w,nEmbd:T,nHeadKv:D,nEmbdHeadK:k,nEmbdHeadV:A,nHeadCount:E,nHeadKvCount:D,kvLayerCount:M,swa:j?.enabled?{window:j.window,pattern:j.pattern,denseFirst:j.denseFirst,type:j.type,layers:j.swaLayers}:null},modelBytes:F,kvCacheBytes:ie,limitedKvCacheBytes:ae,memoryLimitedCtx:V,recurrentMemoryBytes:L,quantization:{name:n.quantization||null,fileType:p,version:f}}}const Xt=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):V(e):0;async function Zt(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null,l=null,u=null,d=null,f=null;if(i)try{let{modelBytes:e,kvCacheBytes:t,limitedKvCacheBytes:n,memoryLimitedCtx:r,recurrentMemoryBytes:a,kvInfo:p,quantization:m}=await Yt(i);o=e,s=t,c=n,l=r,u=a,d=p,f=m}catch{}let p=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),m=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),h=await ye({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-llm`,includeBreakdown:r,gpuMemoryFraction:p,cpuMemoryFraction:m,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s,limitedKvCacheBytes:c}),g=h.selected,_=Xt(g);g.modelBytes=o||null,g.kvCacheBytes=s||null,g.memoryLimitedCtx=l||null,g.limitedKvCacheBytes=c||null,g.recurrentMemoryBytes=u||null,g.kvInfo=d||null,g.quantization=f||null;let y=null,b=null;if(e){let t=Xt(e);b={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!h.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else{let e=b.fit,a=b.limitedFit,o=g?.fit,s=g?.limitedFit,c=e?.fitsInGpu||e?.fitsInCpu||a?.fitsInGpu||a?.fitsInCpu,l=o?.fitsInGpu||o?.fitsInCpu||s?.fitsInGpu||s?.fitsInCpu;c&&!l?(r=`local`,i=`client-fits-in-memory`):l&&!c?(r=`buttress`,i=`buttress-fits-in-memory`):t>_*n?(r=`local`,i=`client-better`):_>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}y={buttressScore:_,clientScore:t,threshold:n,recommendation:r,reason:i}}!h.ok&&!y&&(y={buttressScore:_,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let C=null;return i&&(C={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,nCtx:i.model?.n_ctx||null,cacheKType:i.model?.cache_type_k||`f16`,cacheVType:i.model?.cache_type_v||`f16`}),{type:`ggml-llm`,timestamp:new Date().toISOString(),buttress:h,client:b,comparison:y,modelConfig:C}}const{WritableStream:Qt}=typeof globalThis<`u`&&globalThis.ReadableStream&&globalThis.WritableStream?{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream}:o,$t=(e={},t={})=>(Object.entries(t||{}).forEach(([t,n])=>{n&&typeof n==`object`&&!Array.isArray(n)?((!e[t]||typeof e[t]!=`object`)&&(e[t]={}),$t(e[t],n)):e[t]=n}),e),en=`https://huggingface.co`,tn=`https://huggingface.co/api`,nn=_.join(v.homedir(),`.buttress`,`models`),rn=[`cuda`,`vulkan`,`default`],an=[`q8_0`,`q5_1`,`q5_0`,`q4_1`,`q4_0`],on=`fp16`,sn=.5,cn=[`large-v3-turbo`,`distil-large-v3`,`large-v3`,`large-v2`,`large-v1`,`large`,`distil-medium`,`medium.en`,`medium`,`small.en-tdrz`,`distil-small.en`,`small.en`,`small`,`base.en`,`base`,`tiny.en`,`tiny`],ln=e=>{if(!e)return null;let t=e.toLowerCase();return cn.find(e=>t.includes(e))||null},un={backend:{type:`ggml-stt`,variant:null,variant_preference:rn,gpu_memory_fraction:.85,cpu_memory_fraction:sn},model:{repo_id:`BricksDisplay/whisper-ggml`,revision:`main`,filename:null,url:null,quantization:null,preferred_quantizations:[`q8_0`,on,`q5_1`],allow_local_file:!1,local_path:null,api_base:tn,base_url:en,use_gpu:!0,use_flash_attn:`auto`},runtime:{cache_dir:nn,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}},dn=(e,t=[])=>!e&&e!==0?[...t]:Array.isArray(e)?e.filter(e=>e!=null):[e],fn=e=>{if(!e)return null;let t=String(e).toLowerCase();return[`cuda`,`vulkan`,`default`].includes(t)?t:null},pn=(e={})=>{let t=structuredClone(un);if($t(t,e),t.backend.variant=fn(t.backend.variant),t.backend.variant_preference=Array.from(new Set(dn(t.backend.variant_preference||rn).map(fn).filter(Boolean))),t.backend.variant_preference.length===0&&(t.backend.variant_preference=[...rn]),t.runtime.prefer_variants=Array.from(new Set(dn(t.runtime.prefer_variants).map(fn).filter(Boolean))),t.model.preferred_quantizations=Array.from(new Set(dn(t.model.preferred_quantizations||t.model.quantizations).map(e=>e?String(e).toLowerCase():null).filter(Boolean))),t.model.quantization){let e=String(t.model.quantization).toLowerCase();t.model.preferred_quantizations.includes(e)||t.model.preferred_quantizations.unshift(e)}return t.model.base_url=t.model.base_url||en,t.model.api_base=t.model.api_base||tn,t.runtime.cache_dir=t.runtime.cache_dir?_.resolve(t.runtime.cache_dir):nn,t.runtime.context_release_delay_ms=Math.max(0,Number(t.runtime.context_release_delay_ms)||un.runtime.context_release_delay_ms),t},mn=e=>{let t=e.toLowerCase();return an.find(e=>t.includes(e))||null},hn=e=>{let t=[];return e.backend.variant&&t.push(e.backend.variant),e.runtime.prefer_variants.length>0&&t.push(...e.runtime.prefer_variants),t.push(...e.backend.variant_preference),t.push(`default`),Array.from(new Set(t.map(fn).filter(Boolean)))},gn=async e=>{await l(e,{recursive:!0})},_n=(e=nn)=>_.join(e,`.metadata-cache`),vn=(e,t,n=nn)=>{let r=y(`sha256`).update(e).digest(`hex`);return _.join(_n(n),t,`${r}.json`)},yn=async(e,t,n=nn)=>{try{let r=await d(vn(e,t,n),`utf-8`);return JSON.parse(r)}catch{return null}},bn=async(e,t,n,r=nn)=>{try{let i=vn(e,t,r);await gn(_.dirname(i)),await g(i,JSON.stringify(n),`utf-8`)}catch{}},xn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,t);if(!n.ok){let t=await n.text().catch(()=>``);throw Error(`Failed to fetch ${e}: ${n.status} ${n.statusText} ${t}`.trim())}return n.json()},Sn=async(e,t={})=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);let n=await fetch(e,{...t,method:`HEAD`});if(!n.ok)throw Error(`Failed to fetch headers for ${e}: ${n.status} ${n.statusText}`);return n},Cn=(e,t)=>{if(e.model.local_path)return _.resolve(e.model.local_path);let n=t.repoId.split(`/`),r=_.join(e.runtime.cache_dir,...n,t.revision);return _.join(r,t.filename)},wn=async(e,t)=>{try{let n=await m(e);return t?n.size===t:!0}catch{return!1}},Tn=async(e,t,n,r,i)=>{if(typeof fetch!=`function`)throw Error(`Global fetch is not available in this runtime`);await gn(_.dirname(n));let a=await fetch(e,{headers:t});if(!a.ok||!a.body)throw Error(`Failed to download ${e}: ${a.status} ${a.statusText}`);let o=await u(n,`w`),s=Number(a.headers.get(`content-length`))||r||0,c=0,l=.05;try{await a.body.pipeTo(new Qt({async write(e){if(await o.write(e),c+=e.byteLength,typeof i==`function`&&s>0){let e=Math.min(1,c/s);for(;e>=l;)i(l),l+=.05}},async close(){await o.close(),typeof i==`function`&&i(1)},async abort(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}}))}catch(e){throw await o.close().catch(()=>{}),await h(n).catch(()=>{}),e}if(r){let e=await m(n);if(e.size!==r)throw await h(n).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${r} got ${e.size}`)}},En=async e=>{let t=e.model.repo_id||e.model.repository||e.model.model;if(!t)throw Error("`model.repo_id` is required in Buttress backend config");let n=e.model.revision||`main`,r=e.runtime.cache_dir,i=JSON.stringify({repoId:t,revision:n,filename:e.model.filename,url:e.model.url,quantization:e.model.quantization,preferred_quantizations:e.model.preferred_quantizations}),a=await yn(i,`artifact-info`,r);if(a)return a;let o={...e.runtime.http_headers||{}};if(e.runtime.huggingface_token&&(o.Authorization=`Bearer ${e.runtime.huggingface_token}`),e.model.url){let a=await Sn(e.model.url,{headers:o}),s=Number(a.headers.get(`content-length`))||null,c=e.model.filename||e.model.url.split(`/`).pop(),l={repoId:t,revision:n,filename:c,url:e.model.url,size:s,quantization:mn(c||``),headers:o};return await bn(i,`artifact-info`,l,r),l}let{filename:s}=e.model,c=e.model.quantization&&String(e.model.quantization).toLowerCase(),l=await xn(`${e.model.api_base}/models/${t}?revision=${n}&blobs=true`,{headers:o}),u=(l?.siblings||l?.files||[]).map(e=>e.rfilename||e.path||e.filename).filter(e=>typeof e==`string`&&e.endsWith(`.bin`));if(u.length===0)throw Error(`No model artifacts found in repo ${t}`);let d=e.model.preferred_quantizations.length>0?e.model.preferred_quantizations:an,f=()=>{for(let e of d)if(e===on){let e=u.find(e=>{let t=e.toLowerCase();return!an.some(e=>t.includes(e))});if(e)return{filename:e,quantization:null}}else{let t=u.find(t=>t.toLowerCase().includes(e));if(t)return{filename:t,quantization:e}}return null};if(s)c||=mn(s);else{let{filename:e,quantization:t}=f()||{filename:u[0],quantization:null};s=e,c=t||mn(s)}let p=`${e.model.base_url.replace(/\/+$/,``)}/${t}/resolve/${n}/${s}`,m=await Sn(p,{headers:o}),h=Number(m.headers.get(`content-length`))||null,g={repoId:t,revision:n,filename:s,url:p,size:h,quantization:c,headers:o,isSplit:!1,splitCount:0};return await bn(i,`artifact-info`,g,r),g},Dn=async(e,{modelBytes:t=null,processingBytes:n=null}={})=>{let r=hn(e),[i,...a]=r,o=e.backend?.gpu_memory_fraction==null?un.backend.gpu_memory_fraction||1:Math.min(1,Math.max(0,Number(e.backend.gpu_memory_fraction))),s=e.backend?.cpu_memory_fraction==null?sn:Math.min(1,Math.max(0,Number(e.backend.cpu_memory_fraction))),c=await ye({platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,variant:i||null,preferVariants:a,variantPreference:e.backend.variant_preference,gpuMemoryFraction:o,cpuMemoryFraction:s,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:t,kvCacheBytes:n}),l=e=>({...e,devices:Array.isArray(e.devices)?e.devices:[],ok:e.ok,hasGpu:!!e.hasGpu,totalMemory:e.gpuTotalBytes||e.totalMemory||0,error:e.ok?null:Error(e.error||`Variant ${e.variant} not available on this platform`)});if(!c.ok||!c.selected){let e=(c.attempts||[]).map(e=>`${e.variant}: ${e.error||`unknown error`}`).join(`; `);throw Error(`Unable to initialize any backend variant (${r.join(`, `)}). Errors: ${e}`)}let u=(c.attempts||[]).map(l);return{selected:l(c.selected),attempts:u}},On=async e=>{let t=await En(e),n=Ce({modelBytes:t.size>0?t.size:0}),r=await Dn(e,{modelBytes:n.modelBytes,processingBytes:n.processingBufferBytes}),i=r.selected.hasGpu&&(r.selected.fit?.fitsInGpu===void 0?!0:r.selected.fit.fitsInGpu);e.model.use_gpu===!1&&(i=!1);let a=e.model.use_flash_attn&&String(e.model.use_flash_attn).toLowerCase(),o;o=a===`on`||a===`true`?!0:a===`off`||a===`false`?!1:i;let s=e.runtime.cache_dir,c=Cn(e,t),l=await wn(c,t.size);return{config:e,info:{ok:!0,backend:`ggml-stt`,model:{repoId:t.repoId,revision:t.revision,filename:t.filename,quantization:t.quantization,modelType:ln(t.filename),url:t.url,sizeBytes:t.size},runtime:{variant:r.selected.variant,use_gpu:i,use_flash_attn:o,max_threads:e.runtime.max_threads?Number(e.runtime.max_threads):null},resources:{...n,gpuCapacityBytes:r.selected.gpuTotalBytes,gpuUsableBytes:r.selected.gpuUsableBytes,cpuUsableBytes:r.selected.cpuUsableBytes,fit:r.selected.fit},devices:{selected:r.selected,attempts:r.attempts},download:{cacheDir:s,localPath:c,exists:l},timestamp:new Date().toISOString()},artifact:t,memory:n,devices:r,localPath:c,localExists:l}},kn=async(e,t,n,r=null)=>{let{localPath:i,artifact:a,config:o}=e;if(e.localExists)return typeof n==`function`&&n(1),i;if(r){let t=r.getDownload(i);if(t){console.log(`[ensureModelFile] Waiting for global STT download: ${a.repoId}`);try{if(await t,await wn(i,a.size))return e.localExists=!0,e.info.download.exists=!0,typeof n==`function`&&n(1),i}catch(e){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${e.message}`)}}}let s=t.get(i);if(s)return await s,typeof n==`function`&&n(1),i;let c=(async()=>{if(o.model.allow_local_file){if(!await wn(i,a.size))throw Error(`Local model file not found: ${i}`);return i}return await Tn(a.url,a.headers,i,a.size,n),i})();t.set(i,c);try{return await c,i}finally{t.delete(i)}};var An=class{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue(e,t=null){return new Promise((n,r)=>{this.queue.push({task:e,resolve:n,reject:r,taskId:t}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:e,resolve:t,reject:n,taskId:r}=this.queue.shift();this.currentTaskId=r;try{t(await e())}catch(e){n(e)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}};const jn=e=>{if(!e)return null;if(e instanceof ArrayBuffer)return e;if(ArrayBuffer.isView(e))return e.buffer;if(typeof e==`string`){let t=e.startsWith(`data:`)?e.split(`,`)[1]||``:e,n=Buffer.from(t,`base64`);return n.buffer.slice(n.byteOffset,n.byteOffset+n.byteLength)}throw Error(`Unsupported audioData format, expected base64 string or ArrayBuffer`)},Mn=async(e,t)=>{if(e.contextRecord&&!e.contextRecord.released)return e.contextRecord.releaseTimer&&(clearTimeout(e.contextRecord.releaseTimer),e.contextRecord.releaseTimer=null,console.log(`[Context] Cancelled pending STT release`)),e.contextRecord.releaseRequested=!1,e.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${e.contextRecord.refCount}`),typeof t==`function`&&t(0),e.contextRecord.context||await e.contextRecord.ready,typeof t==`function`&&t(1),e.contextRecord;e.contextRecord?console.log(`[Context] STT record exists but released=${e.contextRecord.released}, creating new context`):console.log(`[Context] No existing STT record, creating new context`);let n={refCount:1,ready:null,released:!1};e.contextRecord=n,n.ready=(async()=>{let r=Date.now();try{typeof t==`function`&&t(0);let i=await kn(e.plan,e.downloads,t,e.globalDownloadManager);typeof t==`function`&&t(.5);let a=await E({filePath:i,useFlashAttn:e.plan.info.runtime.flash_attn_type===`on`,useGpu:e.plan.info.runtime.n_gpu_layers>0,nThreads:e.plan.info.runtime.n_threads},e.plan.info.runtime.variant);typeof t==`function`&&t(1),n.context=a;try{n.modelInfo=a.getModelInfo()}catch{n.modelInfo=null}return G.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,useGpu:e.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-r,success:!0}),n}catch(t){throw G.addModelLoad({id:e.id,generatorId:e.id,repoId:e.plan.info.model?.repoId||null,quantization:e.plan.info.model?.quantization||null,modelType:e.plan.info.model?.modelType||null,variant:e.plan.info.runtime?.variant||null,durationMs:Date.now()-r,success:!1,error:t?.message||String(t)}),t}})();try{return await n.ready,typeof t==`function`&&t(1),n}catch(t){throw e.contextRecord=null,t}},Nn=async(e,t,n=!1)=>t.released||!n&&t.refCount>0?!1:(t.released=!0,e.contextRecord=null,await t.context?.release?.(),!0),Pn=async(e,t,n=!1)=>{if(t.releaseRequested=!0,t.releaseTimer&&=(clearTimeout(t.releaseTimer),null),n)t.refCount=0;else if(t.refCount=Math.max(0,t.refCount-1),t.refCount>0)return t.releaseRequested=!1,!1;let r=e.config.runtime.context_release_delay_ms;if(typeof r!=`number`||!Number.isFinite(r))return Nn(e,t);let i=Math.max(0,Math.floor(r));return n||i<=0?Nn(e,t):(console.log(`[Context] Scheduling STT release in ${i}ms`),t.releaseTimer=setTimeout(async()=>{if(t.releaseTimer=null,t.refCount>0){console.log(`[Context] STT release cancelled, refCount=${t.refCount}`),t.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${i}ms delay`),await Nn(e,t)},i),!0)};async function Fn(e,t,n={}){let{globalDownloadManager:r=null}=n,i=pn(t),a=await On(i),o={id:e,type:`ggml-stt`,config:i,plan:a,info:a.info,contextRecord:null,downloads:new Map,globalDownloadManager:r,queue:new An,finalized:!1},s=async()=>{if(o.finalized)return;o.finalized=!0;let e=o.contextRecord;e&&(e.released||e.releaseRequested||e.releaseTimer||(e.refCount=Math.max(0,e.refCount-1),!(e.refCount>0)&&await Nn(o,e)))},c=async(e={})=>{let{onProgress:t}=e;try{let e=await Mn(o,t);return{modelInfo:e.modelInfo&&typeof e.modelInfo==`object`?{...e.modelInfo}:null,runtime:{...o.plan.info.runtime},download:{...o.plan.info.download}}}catch(e){throw console.error(`[Context] Error initializing context:`,e),e}},l=async()=>{if(o.finalized)return!1;let e=o.contextRecord;return e?Pn(o,e):!1},u=async(e={})=>{let{audioPath:t,audioData:n,options:r={}}=e,i=o.contextRecord;if(!i)throw Error(`Context not initialized`);let a={...r};o.plan.info.runtime.max_threads&&a.maxThreads==null&&(a.maxThreads=o.plan.info.runtime.max_threads);let s=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,c=Date.now();return o.queue.enqueue(async()=>{await i.ready;try{let e;if(n){let t=jn(n),{promise:r}=i.context.transcribeData(t,a);e=await r}else{if(!t)throw Error(`audioPath or audioData is required for transcription`);let n=_.resolve(t),{promise:r}=i.context.transcribe(n,a);e=await r}return G.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,segmentCount:e?.segments?.length||0,textLength:e?.text?.length||0,success:!0}),e}catch(e){throw G.addTranscription({id:s,generatorId:o.id,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,durationMs:Date.now()-c,success:!1,error:e?.message||String(e)}),e}},s)};return{id:e,type:`ggml-stt`,info:a.info,queue:o.queue,initContext:c,transcribe:async(e={})=>u(e),transcribeData:async(e={})=>u(e),releaseContext:l,finalize:s,getStatus:()=>({id:o.id,type:o.type,repoId:o.plan.info.model?.repoId||null,quantization:o.plan.info.model?.quantization||null,modelType:o.plan.info.model?.modelType||null,variant:o.plan.info.runtime?.variant||null,hasContext:!!o.contextRecord?.context,contextRefCount:o.contextRecord?.refCount||0,queueStatus:o.queue.getStatus()}),hasPendingReleases:()=>{let e=o.contextRecord;return e?!e.released&&(e.releaseRequested||e.releaseTimer||e.refCount>0):!1},resetFinalized:()=>{o.finalized=!1}}}const In=e=>{let t=pn(e),n=t.model.repo_id||t.model.repository||t.model.model||null;if(!n)return null;let r=ln(t.model.filename);return r?`${n}:${r}`:n};async function Ln(e,t,n={}){let{onProgress:r,onComplete:i,onError:a}=n;try{let n=pn(e),o=await En(n),s=Cn(n,o),{repoId:c}=o;if(await wn(s,o.size))return console.log(`[Download] STT model already exists: ${c} at ${s}`),typeof i==`function`&&i({localPath:s,repoId:c,alreadyExists:!0}),{started:!1,localPath:s,repoId:c,alreadyExists:!0};let l=t.getDownload(s);if(l)return console.log(`[Download] Already downloading STT model: ${c}`),l.then(()=>{typeof i==`function`&&i({localPath:s,repoId:c,joinedExisting:!0})}).catch(e=>{typeof a==`function`&&a(e)}),{started:!1,localPath:s,repoId:c,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${c}`);let u=(async()=>{try{await Tn(o.url,o.headers,s,o.size,e=>{e>=0&&Number.isFinite(e)&&(console.log(`[Download] ${c}: ${Math.round(e*100)}%`),typeof r==`function`&&r(e))}),console.log(`[Download] Completed STT model: ${c}`),typeof i==`function`&&i({localPath:s,repoId:c})}catch(e){throw console.error(`[Download] Failed STT model: ${c}`,e.message),typeof a==`function`&&a(e),e}finally{t.deleteDownload(s)}})();return t.setDownload(s,u),{started:!0,localPath:s,repoId:c}}catch(e){return console.error(`[Download] Failed to start STT download:`,e.message),typeof a==`function`&&a(e),{started:!1,localPath:null,repoId:null,error:e.message}}}const Rn=e=>e?typeof e.score==`number`&&Number.isFinite(e.score)?Number(e.score):V(e):0;async function zn(e=null,t={}){let{threshold:n=1.1,includeBreakdown:r=!1,config:i,...a}=t,o=null,s=null,c=null;if(i)try{let e=await En(pn(i));o=e.size??null,{processingBufferBytes:s}=Ce({modelBytes:o}),c=e.quantization||null}catch{}let l=i?.backend?.gpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.gpu_memory_fraction))),u=i?.backend?.cpu_memory_fraction==null?void 0:Math.min(1,Math.max(0,Number(i.backend.cpu_memory_fraction))),d=await ye({...a,platform:process.platform,totalMemoryInBytes:v.totalmem(),backend:`ggml-stt`,includeBreakdown:r,gpuMemoryFraction:l,cpuMemoryFraction:u,dependencies:{getBackendDevicesInfo:x,isLibVariantAvailable:S},modelBytes:o,kvCacheBytes:s}),f=d.selected,p=Rn(f);f&&(f.modelBytes=o||null,f.processingBytes=s||null,f.quantization=c||null);let m=null,h=null;if(e){let t=Rn(e);h={...e,score:t};let r=`buttress`,i=`buttress-higher-score`;if(!d.ok)r=`local`,i=`buttress-unavailable`;else if(!t&&t!==0)r=`buttress`,i=`missing-client-score`;else if(e.fit&&f?.fit){let a=e.fit.fitsInGpu||e.fit.fitsInCpu,o=f.fit.fitsInGpu||f.fit.fitsInCpu;a&&!o?(r=`local`,i=`client-fits-in-memory`):o&&!a?(r=`buttress`,i=`buttress-fits-in-memory`):t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`)}else t>p*n?(r=`local`,i=`client-better`):p>t*n?(r=`buttress`,i=`buttress-better`):(r=`either`,i=`comparable-scores`);m={buttressScore:p,clientScore:t,threshold:n,recommendation:r,reason:i}}!d.ok&&!m&&(m={buttressScore:p,clientScore:e?.score??null,threshold:n,recommendation:`local`,reason:`buttress-unavailable`});let g=null;return i&&(g={repoId:i.model?.repo_id||null,quantization:i.model?.quantization||null,filename:i.model?.filename||null}),{type:`ggml-stt`,timestamp:new Date().toISOString(),buttress:d,client:h,comparison:m,modelConfig:g}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return Zt(t,n);if(e===`ggml-stt`)return zn(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0-beta.3`,Bn={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0-beta.1`,"@fugood/llama.node":`^1.6.0`,"@fugood/whisper.node":`^1.0.16`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Vn=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Hn({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
3
3
|
`),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
|
|
4
4
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=Le(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
|
|
5
5
|
---`),n.push(`
|
|
@@ -26,7 +26,7 @@ import{t as e}from"./chunk-C8PTHxhX.mjs";import{node as t}from"@elysiajs/node";i
|
|
|
26
26
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
27
27
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
28
28
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
29
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Kn=e({finalizeGenerator:()=>Xn,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>$n,ggmlLlm:()=>Zn,ggmlStt:()=>Qn,globalDownloadManager:()=>qn,showModelsTable:()=>Hn,showSttModelsTable:()=>Wn,startGenerator:()=>Yn,startModelDownload:()=>tr,status:()=>er,testGgmlLlmCapabilities:()=>Un,testGgmlSttCapabilities:()=>Gn});const Z=new Map,qn={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Jn=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Jn(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Yn(e,t){let n={"ggml-llm":{create:Kt,getId:qt},"ggml-stt":{create:Fn,getId:In}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:qn}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Xn(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Zn={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Qn={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function $n(e,t){return e===`ggml-llm`?qt(t):e===`ggml-stt`?In(t):null}const er={getFullStatus:()=>qe(Z),getGgmlLlmStatus:()=>Ge(Z),getGgmlSttStatus:()=>Ke(Z),subscribeToStatus:Ue,subscribeToStatusWithId:We,llmStatusTracker:W,sttStatusTracker:G,statusEmitter:U};async function tr(e,t,n={}){let r={"ggml-llm":Jt,"ggml-stt":Ln}[e];return r?r(t,qn,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var nr=`@fugood/buttress-server`,rr=`2.24.0-beta.1`,ir={name:nr,version:rr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.6.0`,"@fugood/whisper.node":`^1.0.16`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`70a0207cf51102a2273056cc59951e5efdd75859`};const ar=()=>({version:rr,name:nr,description:ir.description}),or=typeof process<`u`&&process.versions&&process.versions.node,sr=e=>new n({adapter:or?t():void 0,...e}),cr=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),lr=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ur=e=>{let t=sr(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,lr,{response:cr}),t};const dr=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var fr=sr().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return dr?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const pr=_.dirname(ee(import.meta.url)),mr=async()=>{let e=[_.join(pr,`..`,`public`,`status.html`),_.join(pr,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},hr=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},gr=async()=>{let e=await mr();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var _r=sr().get(`/status`,gr).get(`/status/`,gr).get(`/buttress/status`,({store:{backend:e}})=>hr(e));const vr=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,yr=new Map;async function br(e,t,n){let r=(t.generators||[]).filter(e=>e.type===`ggml-llm`);if(r.length===0)throw Error(`No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.`);let i=r[0],a=n||i.model?.repo_id;if(n){let e=r.find(e=>e.model?.repo_id===n);e&&(i=e)}else a=i.model?.repo_id;let o=a,s=yr.get(o);if(s?.initialized)return s;let{generators:c,server:l,...u}=t.global||{},d={...u,...i,model:{...i.model,repo_id:a}};console.log(`[OpenAI] Creating generator for ${o}`);let{id:f}=await e.startGenerator(`ggml-llm`,d),p={id:f,config:d,repoId:a,initialized:!1};return yr.set(o,p),await e.ggmlLlm.initContext(f,{}),p.initialized=!0,console.log(`[OpenAI] Generator ready: ${o}`),p}async function xr(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content==null?u.token!=null&&(a+=u.token):a+=u.content;else if(r===`result`)u.text?a=u.text:u.content&&(a=u.content),u.reasoning_content&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l={prompt_tokens:u.prompt_tokens||u.promptTokens||0,completion_tokens:u.tokens_predicted||u.tokensPredicted||0,total_tokens:(u.prompt_tokens||u.promptTokens||0)+(u.tokens_predicted||u.tokensPredicted||0)};else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function Sr({global:e}){let t=sr({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>e.type===`ggml-llm`).map(e=>({id:e.model?.repo_id||`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await br(a,r,c),t=vr(),n=Math.floor(Date.now()/1e3),v=e.repoId||`ggml-llm`,y={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(y.temperature=u),f!=null&&(y.top_p=f),p!=null&&(y.n_predict=p),d!=null&&(y.stop=Array.isArray(d)?d:[d]),m!=null&&(y.presence_penalty=m),h!=null&&(y.frequency_penalty=h),l!=null&&(y.tools=l),g!=null&&(y.tool_choice=g),y.enable_thinking=!1;let b=await a.ggmlLlm.completion(e.id,{options:y});if(!s)return await xr(b,t,n,v);let x=_?.include_usage===!0,S=b.getReader(),C=``,w=``,T=new Map,E=new Map;try{let e=!1;for(;!e;){let r=await S.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>C.length&&(e.content=t.slice(C.length),C=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>w.length&&(e.reasoning_content=t.slice(w.length),w=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};E.has(r)||(E.set(r,e.id||`call_${t}_${r}`),i.id=E.get(r),i.type=`function`);let a=e.function?.arguments||``,o=T.get(r)||``,s={};!T.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),T.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:v,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||E.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:v,choices:[{index:0,delta:{},finish_reason:e}]};x&&(r.usage={prompt_tokens:o.prompt_tokens||o.promptTokens||0,completion_tokens:o.tokens_predicted||o.tokensPredicted||0,total_tokens:(o.prompt_tokens||o.promptTokens||0)+(o.tokens_predicted||o.tokensPredicted||0)}),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{S.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())}))})}),t}const Cr=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Cr(n[e]||{},t):n[e]=t}),n},wr=e=>e&&typeof e==`object`?structuredClone(e):null,Tr=(e,t)=>Cr(wr(e)||{},wr(t)||{}),Er=(e,t)=>Cr(structuredClone(e.global),t||{}),Dr=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Er(e,a)}}return Object.keys(e.global).length>0?Er(e,{}):null},Or={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},kr=e=>e?e===!0?{...Or}:Cr(Or,e):null,Ar=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},jr=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Mr=6e4,Nr=1024*1024*50,Pr=e=>{let t=N.machineIdSync(),n=Cr({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Mr,max_body_size:Nr},autodiscover:!1},wr(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:kr(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:jr(i.max_body_size,w.parse,Nr),session_timeout:jr(i.session_timeout,P,Mr)},global:s,generators:r}},Fr={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ir={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=wr(i),c=Tr(Dr(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=wr(i),o=Tr(Dr(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const Lr={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};var Rr={initContext({backend:e,session:t},n,r){return new s({async start(i){try{let a=await e.ggmlLlm.initContext(n,{...r,onProgress:e=>{i.enqueue({event:`progress`,data:{progress:e}})}});t.initializedContexts.add(n),await new Promise(e=>setTimeout(e,1e3));let{download:o,...s}=a||{};i.enqueue({event:`result`,data:{result:s}}),i.close()}catch(e){i.error(e)}}})},completion({backend:e},t,n){return console.log(`[Server] Completion:`,{id:t,property:n}),e.ggmlLlm.completion(t,n)},async tokenize({backend:e},t,n){return console.log(`[Server] Tokenize:`,{id:t,property:n}),e.ggmlLlm.tokenize(t,n)},async detokenize({backend:e},t,n){return console.log(`[Server] Detokenize:`,{id:t,property:n}),e.ggmlLlm.detokenize(t,n)},async applyChatTemplate({backend:e},t,n){return console.log(`[Server] Apply Chat Template:`,{id:t,property:n}),e.ggmlLlm.applyChatTemplate(t,n)},async releaseContext({backend:e,session:t},n,r){return console.log(`[Server] Release Context:`,{id:n,force:r}),t.initializedContexts.has(n)?(t.initializedContexts.delete(n),e.ggmlLlm.releaseContext(n,{force:r})):(console.log(`[Server] Release Context skipped - not initialized by this session:`,{id:n}),{released:!1,skipped:!0})}};const zr={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Br={common:Ir,ggmlLlm:Rr,ggmlStt:{initContext({backend:e,session:t},n,r){return new s({async start(i){try{let a=await e.ggmlStt.initContext(n,{...r,onProgress:e=>{i.enqueue({event:`progress`,data:{progress:e}})}});t.initializedContexts.add(n),await new Promise(e=>setTimeout(e,1e3));let{download:o,...s}=a||{};i.enqueue({event:`result`,data:{result:s}}),i.close()}catch(e){i.error(e)}}})},async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},async releaseContext({backend:e,session:t},n,r){return console.log(`[Server] Release STT Context:`,{id:n,force:r}),t.initializedContexts.has(n)?(t.initializedContexts.delete(n),e.ggmlStt.releaseContext(n,{force:r})):(console.log(`[Server] Release STT Context skipped - not initialized by this session:`,{id:n}),{released:!1,skipped:!0})}}},Vr={common:Fr,ggmlLlm:Lr,ggmlStt:zr};var Hr=Br;const Ur=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Wr=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Gr=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=te.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Kr=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Gr(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const qr=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=ar(),Jr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Yr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
29
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Kn=e({finalizeGenerator:()=>Xn,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>$n,ggmlLlm:()=>Zn,ggmlStt:()=>Qn,globalDownloadManager:()=>qn,showModelsTable:()=>Hn,showSttModelsTable:()=>Wn,startGenerator:()=>Yn,startModelDownload:()=>tr,status:()=>er,testGgmlLlmCapabilities:()=>Un,testGgmlSttCapabilities:()=>Gn});const Z=new Map,qn={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Jn=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Jn(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Yn(e,t){let n={"ggml-llm":{create:Kt,getId:qt},"ggml-stt":{create:Fn,getId:In}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:qn}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Xn(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Zn={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Qn={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function $n(e,t){return e===`ggml-llm`?qt(t):e===`ggml-stt`?In(t):null}const er={getFullStatus:()=>qe(Z),getGgmlLlmStatus:()=>Ge(Z),getGgmlSttStatus:()=>Ke(Z),subscribeToStatus:Ue,subscribeToStatusWithId:We,llmStatusTracker:W,sttStatusTracker:G,statusEmitter:U};async function tr(e,t,n={}){let r={"ggml-llm":Jt,"ggml-stt":Ln}[e];return r?r(t,qn,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var nr=`@fugood/buttress-server`,rr=`2.24.0-beta.3`,ir={name:nr,version:rr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.6.0`,"@fugood/whisper.node":`^1.0.16`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`7cf2a6850a7765fd801e1bbdbb571871bae92f62`};const ar=()=>({version:rr,name:nr,description:ir.description}),or=typeof process<`u`&&process.versions&&process.versions.node,sr=e=>new n({adapter:or?t():void 0,...e}),cr=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),lr=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ur=e=>{let t=sr(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,lr,{response:cr}),t};const dr=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var fr=sr().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return dr?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const pr=_.dirname(ee(import.meta.url)),mr=async()=>{let e=[_.join(pr,`..`,`public`,`status.html`),_.join(pr,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},hr=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},gr=async()=>{let e=await mr();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var _r=sr().get(`/status`,gr).get(`/status/`,gr).get(`/buttress/status`,({store:{backend:e}})=>hr(e));const vr=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,yr=new Map;async function br(e,t,n){let r=(t.generators||[]).filter(e=>e.type===`ggml-llm`);if(r.length===0)throw Error(`No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.`);let i=r[0],a=n||i.model?.repo_id;if(n){let e=r.find(e=>e.model?.repo_id===n);e&&(i=e)}else a=i.model?.repo_id;let o=a,s=yr.get(o);if(s?.initialized)return s;let{generators:c,server:l,...u}=t.global||{},d={...u,...i,model:{...i.model,repo_id:a}};console.log(`[OpenAI] Creating generator for ${o}`);let{id:f}=await e.startGenerator(`ggml-llm`,d),p={id:f,config:d,repoId:a,initialized:!1};return yr.set(o,p),await e.ggmlLlm.initContext(f,{}),p.initialized=!0,console.log(`[OpenAI] Generator ready: ${o}`),p}async function xr(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content==null?u.token!=null&&(a+=u.token):a+=u.content;else if(r===`result`)u.text?a=u.text:u.content&&(a=u.content),u.reasoning_content&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l={prompt_tokens:u.prompt_tokens||u.promptTokens||0,completion_tokens:u.tokens_predicted||u.tokensPredicted||0,total_tokens:(u.prompt_tokens||u.promptTokens||0)+(u.tokens_predicted||u.tokensPredicted||0)};else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function Sr({global:e}){let t=sr({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>e.type===`ggml-llm`).map(e=>({id:e.model?.repo_id||`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await br(a,r,c),t=vr(),n=Math.floor(Date.now()/1e3),v=e.repoId||`ggml-llm`,y={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(y.temperature=u),f!=null&&(y.top_p=f),p!=null&&(y.n_predict=p),d!=null&&(y.stop=Array.isArray(d)?d:[d]),m!=null&&(y.presence_penalty=m),h!=null&&(y.frequency_penalty=h),l!=null&&(y.tools=l),g!=null&&(y.tool_choice=g),y.enable_thinking=!1;let b=await a.ggmlLlm.completion(e.id,{options:y});if(!s)return await xr(b,t,n,v);let x=_?.include_usage===!0,S=b.getReader(),C=``,w=``,T=new Map,E=new Map;try{let e=!1;for(;!e;){let r=await S.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>C.length&&(e.content=t.slice(C.length),C=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>w.length&&(e.reasoning_content=t.slice(w.length),w=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};E.has(r)||(E.set(r,e.id||`call_${t}_${r}`),i.id=E.get(r),i.type=`function`);let a=e.function?.arguments||``,o=T.get(r)||``,s={};!T.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),T.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:v,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||E.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:v,choices:[{index:0,delta:{},finish_reason:e}]};x&&(r.usage={prompt_tokens:o.prompt_tokens||o.promptTokens||0,completion_tokens:o.tokens_predicted||o.tokensPredicted||0,total_tokens:(o.prompt_tokens||o.promptTokens||0)+(o.tokens_predicted||o.tokensPredicted||0)}),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{S.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())}))})}),t}const Cr=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Cr(n[e]||{},t):n[e]=t}),n},wr=e=>e&&typeof e==`object`?structuredClone(e):null,Tr=(e,t)=>Cr(wr(e)||{},wr(t)||{}),Er=(e,t)=>Cr(structuredClone(e.global),t||{}),Dr=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Er(e,a)}}return Object.keys(e.global).length>0?Er(e,{}):null},Or={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},kr=e=>e?e===!0?{...Or}:Cr(Or,e):null,Ar=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},jr=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Mr=6e4,Nr=1024*1024*50,Pr=e=>{let t=N.machineIdSync(),n=Cr({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Mr,max_body_size:Nr},autodiscover:!1},wr(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:kr(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:jr(i.max_body_size,w.parse,Nr),session_timeout:jr(i.session_timeout,P,Mr)},global:s,generators:r}},Fr={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ir={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=wr(i),c=Tr(Dr(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=wr(i),o=Tr(Dr(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const Lr={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};var Rr={initContext({backend:e,session:t},n,r){return new s({async start(i){try{let a=await e.ggmlLlm.initContext(n,{...r,onProgress:e=>{i.enqueue({event:`progress`,data:{progress:e}})}});t.initializedContexts.add(n),await new Promise(e=>setTimeout(e,1e3));let{download:o,...s}=a||{};i.enqueue({event:`result`,data:{result:s}}),i.close()}catch(e){i.error(e)}}})},completion({backend:e},t,n){return console.log(`[Server] Completion:`,{id:t,property:n}),e.ggmlLlm.completion(t,n)},async tokenize({backend:e},t,n){return console.log(`[Server] Tokenize:`,{id:t,property:n}),e.ggmlLlm.tokenize(t,n)},async detokenize({backend:e},t,n){return console.log(`[Server] Detokenize:`,{id:t,property:n}),e.ggmlLlm.detokenize(t,n)},async applyChatTemplate({backend:e},t,n){return console.log(`[Server] Apply Chat Template:`,{id:t,property:n}),e.ggmlLlm.applyChatTemplate(t,n)},async releaseContext({backend:e,session:t},n,r){return console.log(`[Server] Release Context:`,{id:n,force:r}),t.initializedContexts.has(n)?(t.initializedContexts.delete(n),e.ggmlLlm.releaseContext(n,{force:r})):(console.log(`[Server] Release Context skipped - not initialized by this session:`,{id:n}),{released:!1,skipped:!0})}};const zr={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Br={common:Ir,ggmlLlm:Rr,ggmlStt:{initContext({backend:e,session:t},n,r){return new s({async start(i){try{let a=await e.ggmlStt.initContext(n,{...r,onProgress:e=>{i.enqueue({event:`progress`,data:{progress:e}})}});t.initializedContexts.add(n),await new Promise(e=>setTimeout(e,1e3));let{download:o,...s}=a||{};i.enqueue({event:`result`,data:{result:s}}),i.close()}catch(e){i.error(e)}}})},async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},async releaseContext({backend:e,session:t},n,r){return console.log(`[Server] Release STT Context:`,{id:n,force:r}),t.initializedContexts.has(n)?(t.initializedContexts.delete(n),e.ggmlStt.releaseContext(n,{force:r})):(console.log(`[Server] Release STT Context skipped - not initialized by this session:`,{id:n}),{released:!1,skipped:!0})}}},Vr={common:Fr,ggmlLlm:Lr,ggmlStt:zr};var Hr=Br;const Ur=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Wr=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Gr=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=te.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Kr=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Gr(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const qr=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=ar(),Jr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Yr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
30
30
|
`),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
|
|
31
31
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=Le(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
|
|
32
32
|
---`),n.push(`
|
|
@@ -87,4 +87,4 @@ Examples:
|
|
|
87
87
|
bricks-buttress --test-caps ggml-llm --test-models-default
|
|
88
88
|
bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
|
|
89
89
|
`),process.exit(0));let e=process.argv.findIndex(e=>e===`--port`||e===`-p`),t=e>=0?Number(process.argv[e+1]):void 0,n=process.argv.findIndex(e=>e===`--config`||e===`-c`),r=n>=0?process.argv[n+1]:null,i=null;if(r){let e;if(r.includes(`
|
|
90
|
-
`))e=r;else{let t=_.resolve(r);try{e=D.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=k.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),tr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=process.argv.findIndex(e=>e===`--test-caps`);if(c>=0){let e=process.argv[c+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&(console.error(`Only ggml-llm and ggml-stt backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Zr({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Zr({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Qr({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Yr({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Yr({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Xr({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let l=Pr(i);t&&(l.server.port=t),l.server.port||(l.server.port=2080),li({config:l,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`}).then(async({port:e,openaiEnabled:t,autoDiscover:n})=>{let r=qr();console.log(`Buttress server listening on port ${e}`),console.log(`--------------------------------`),await oi(),console.log(),console.log(`Current supported Generators:`),console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${r}:${e} to connect to this server via LAN.`),console.log(`- Visit http://${r}:${e}/status to see status via LAN.`),console.log(),t?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${r}:${e}/oai-compat/v1`),console.log(`- Chat completions: POST http://${r}:${e}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${r}:${e}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable`),console.log()),n&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:ti,name:ni}=ar(),ri=async()=>{let e=`https://registry.npmjs.org/${ni}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},ii=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},ai=e=>{console.log(``),console.log(`\x1B[33mâ•─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,ti.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,ni.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},oi=async()=>{try{let e=await ri();e&&ii(ti,e)&&ai(e)}catch{}},si=typeof process<`u`&&process.versions&&process.versions.node,ci=async({backend:e,router:r,config:i,enableOpenAICompat:o})=>{try{await c.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let l=qr()||`0.0.0.0`,u={id:i.server.id,name:i.server.name,version:ti,address:l,port:i.server.port,url:`http://${l}:${i.server.port}`,generators:Ar(i,i.generators.map(e=>e.type)),authentication:{required:!0,type:`device-group`}},d=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:si?t():void 0}).state({sessions:new Map,backend:e||Kn,config:i,serverInfo:u});r&&d.use(r),i.autodiscover?.http?.enabled&&d.use(ur(i)),d.use(fr),d.use(_r),o&&d.use(Sr(i));let f={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return d.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),open(e){let t=e.id??e.raw?.id??e.remoteAddress;if(console.log(`[Request] New connection: ${t}`),!e.data.store.sessions.has(t))e.data.store.sessions.set(t,{streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null});else{let n=e.data.store.sessions.get(t);clearTimeout(n.timeout),n.timeout=null}},async message(e,{id:t,method:n,params:r}){let i=e.id??e.raw?.id??e.remoteAddress;console.log(`[Request] Received request from ${i}: ${n}`);let a=e.data.store.sessions.get(i),[o,c]=n.split(`.`),l=Hr[o]?.[c];if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){a.streamReaders.has(t)&&(a.streamReaders.get(t)?.cancel(),a.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let u=Ur(r),d=Vr[o]?.[c],f=d?d.parse(u):u,p=await l({...e.data.store,peerId:i,session:a},...f);if(p instanceof s){let n=p.getReader();a.streamReaders.set(t,n),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:r,done:i}=await n.read();if(i)break;let{event:a,data:o}=r;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:Wr(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(n){console.error(n),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:Wr(n),id:t}))}a.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:Wr(p),id:t}))}catch(n){if(n instanceof A){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INVALID_PARAMS,message:`Invalid params`,data:n.issues},id:t}));return}console.error(n),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INTERNAL_ERROR,message:String(n)},id:t}))}},async close(e){let t=e.id??e.raw?.id??e.remoteAddress;console.log(`[Request] Connection closed: ${t}`);let{backend:n,sessions:r}=e.data.store,a=r.get(t);a&&(a.streamReaders.forEach(e=>e.cancel()),a.streamReaders.clear(),a.timeout=setTimeout(()=>{r.delete(t),console.log(`[Request] Session timed out: ${t}`);let{generators:e}=a;e.forEach(e=>{n.finalizeGenerator(e)})},i.server.session_timeout))}}),{app:d,config:i}},li=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1})=>{let{app:i,config:a}=await ci({backend:e,router:t,config:n,enableOpenAICompat:r}),{server:{port:o}}=a,s=[new Promise(e=>i.listen(o,e))],c=null;return a.autodiscover&&(c=new Kr(a.autodiscover,()=>i.store.serverInfo),s.push(c.start())),await Promise.all(s),{app:i,port:o,openaiEnabled:r,autoDiscover:c}},ui=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||ui.includes(process.argv[1]))&&await ei();export{oi as checkAndNotifyUpdates,ri as checkForUpdates,ii as compareVersions,ci as createServer,ai as logUpdateMessage,Pr as processConfig,tr as startModelDownload,li as startServer};
|
|
90
|
+
`))e=r;else{let t=_.resolve(r);try{e=D.readFileSync(t,`utf8`)}catch(e){console.error(`Failed to read Buttress config at ${t}:`,e),process.exit(1)}}try{let t=k.parse(e);t.env&&typeof t.env==`object`&&(Object.entries(t.env).forEach(([e,t])=>{process.env[e]===void 0&&(process.env[e]=String(t))}),delete t.env),i=t}catch(e){console.error(`Failed to parse TOML config:`,e),process.exit(1)}}async function a(e){if(!e?.generators||!Array.isArray(e.generators))return;let t=e.generators.filter(e=>{if(!e.model?.download)return!1;let{type:t}=e;return!t||t!==`ggml-llm`&&t!==`ggml-stt`?(console.warn(`[Download] Skipping unknown generator type: ${t}`),!1):!0});if(t.length===0)return;let{server:n,generators:r,...i}=e,a=t.map(e=>{let{type:t}=e,n=e.model?.repo_id;return console.log(`[Download] Starting pre-download for ${t}: ${n}`),tr(t,{...i,backend:e.backend||{},model:e.model||{},runtime:{...i.runtime,...e.runtime||{}}},{onProgress:()=>{},onComplete:({repoId:e,alreadyExists:t})=>{t?console.log(`[Download] Pre-download complete (already exists): ${e}`):console.log(`[Download] Pre-download complete: ${e}`)},onError:e=>{console.error(`[Download] Pre-download failed for ${n}:`,e.message)}})}),o=await Promise.all(a),s=o.filter(e=>e.started).length,c=o.filter(e=>e.alreadyExists).length,l=o.filter(e=>e.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${s} started, ${c} already exist, ${l} already downloading`)}let o=[`ggml-org/gpt-oss-20b-GGUF`,`ggml-org/gpt-oss-120b-GGUF`,`unsloth/Nemotron-3-Nano-30B-A3B-GGUF`,`unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF`,`unsloth/GLM-4.7-Flash-GGUF`,`DevQuasar/MiniMaxAI.MiniMax-M2.5-GGUF`,`bartowski/Mistral-Nemo-Instruct-2407-GGUF`,`mistralai/Magistral-Small-2509-GGUF`,`mistralai/Ministral-3-14B-Reasoning-2512-GGUF`,`bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF`,`bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF`,`ggml-org/gemma-3-12b-it-qat-GGUF`,`ggml-org/gemma-3-27b-it-qat-GGUF`,`unsloth/phi-4-GGUF`],s=[`BricksDisplay/whisper-ggml:ggml-small.bin`,`BricksDisplay/whisper-ggml:ggml-small-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-medium.bin`,`BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin`,`BricksDisplay/whisper-ggml:ggml-large-v3.bin`],c=process.argv.findIndex(e=>e===`--test-caps`);if(c>=0){let e=process.argv[c+1]||`ggml-llm`;e!==`ggml-llm`&&e!==`ggml-stt`&&(console.error(`Only ggml-llm and ggml-stt backends are supported for testing capabilities`),process.exit(1));let t=process.argv.findIndex(e=>e===`--test-models`),n=process.argv.includes(`--test-models-default`);if(e===`ggml-stt`)if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Zr({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Zr({modelIds:s,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Qr({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}else if(t>=0){let e=process.argv[t+1];e||(console.error(`Error: --test-models requires a comma-separated list of model IDs`),process.exit(1)),await Yr({modelIds:e.split(`,`).map(e=>e.trim()),defaultConfig:i})}else if(n)await Yr({modelIds:o,defaultConfig:i});else{let e=process.argv.findIndex(e=>e===`--test-caps-model-id`);await Xr({modelId:e>=0?process.argv[e+1]:null,defaultConfig:i})}}let l=Pr(i);t&&(l.server.port=t),l.server.port||(l.server.port=2080),li({config:l,enableOpenAICompat:process.env.ENABLE_OPENAI_COMPAT_ENDPOINT===`1`}).then(async({port:e,openaiEnabled:t,autoDiscover:n})=>{let r=qr();console.log(`Buttress server listening on port ${e}`),console.log(`--------------------------------`),await oi(),console.log(),console.log(`Current supported Generators:`),console.log(`- LLM (GGML)`),console.log(`- STT (GGML)`),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${r}:${e} to connect to this server via LAN.`),console.log(`- Visit http://${r}:${e}/status to see status via LAN.`),console.log(),t?(console.log(`OpenAI-compatible API [EXPERIMENTAL]:`),console.log(`- Base URL: http://${r}:${e}/oai-compat/v1`),console.log(`- Chat completions: POST http://${r}:${e}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${r}:${e}/oai-compat/v1/models`),console.log()):(console.log(`OpenAI-compatible API [EXPERIMENTAL]: disabled`),console.log(` Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable`),console.log()),n&&(console.log(`Auto-discover enabled`),console.log()),i&&await a(i)}).catch(e=>{console.error(`Failed to start Buttress server:`,e),process.exitCode=1})};const{version:ti,name:ni}=ar(),ri=async()=>{let e=`https://registry.npmjs.org/${ni}/latest`;try{let t=new AbortController,n=setTimeout(()=>t.abort(),3e3),r=await fetch(e,{headers:{Accept:`application/json`},signal:t.signal});return clearTimeout(n),r.ok&&(await r.json()).version||null}catch{return null}},ii=(e,t)=>{if(!t)return!1;let n=e.split(/[.-]/),r=t.split(/[.-]/);for(let e=0;e<Math.max(n.length,r.length);e+=1){let t=parseInt(n[e])||0,i=parseInt(r[e])||0;if(i>t)return!0;if(i<t)return!1}return!1},ai=e=>{console.log(``),console.log(`\x1B[33mâ•─────────────────────────────────────────────────╮\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m`,ti.padEnd(12),e.padEnd(12),`\x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m`),console.log(`\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m`,ni.padEnd(27)),console.log(`\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m`),console.log(``)},oi=async()=>{try{let e=await ri();e&&ii(ti,e)&&ai(e)}catch{}},si=typeof process<`u`&&process.versions&&process.versions.node,ci=async({backend:e,router:r,config:i,enableOpenAICompat:o})=>{try{await c.mkdir(i.server.temp_file_dir,{recursive:!0})}catch{}let l=qr()||`0.0.0.0`,u={id:i.server.id,name:i.server.name,version:ti,address:l,port:i.server.port,url:`http://${l}:${i.server.port}`,generators:Ar(i,i.generators.map(e=>e.type)),authentication:{required:!0,type:`device-group`}},d=new n({serve:{maxRequestBodySize:i.server.max_body_size},websocket:{idleTimeout:Math.ceil(i.server.session_timeout/1e3)},adapter:si?t():void 0}).state({sessions:new Map,backend:e||Kn,config:i,serverInfo:u});r&&d.use(r),i.autodiscover?.http?.enabled&&d.use(ur(i)),d.use(fr),d.use(_r),o&&d.use(Sr(i));let f={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return d.ws(`/buttress/rpc`,{parse:(e,t)=>{if(typeof t==`string`)try{return JSON.parse(t)}catch{return e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INVALID_REQUEST,message:`Invalid request`},id:null})),null}return t},body:a.Object({jsonrpc:a.String(),method:a.String(),params:a.String(),id:a.String()}),open(e){let t=e.id??e.raw?.id??e.remoteAddress;if(console.log(`[Request] New connection: ${t}`),!e.data.store.sessions.has(t))e.data.store.sessions.set(t,{streamReaders:new Map,generators:new Set,initializedContexts:new Set,timeout:null});else{let n=e.data.store.sessions.get(t);clearTimeout(n.timeout),n.timeout=null}},async message(e,{id:t,method:n,params:r}){let i=e.id??e.raw?.id??e.remoteAddress;console.log(`[Request] Received request from ${i}: ${n}`);let a=e.data.store.sessions.get(i),[o,c]=n.split(`.`),l=Hr[o]?.[c];if(!l){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.METHOD_NOT_FOUND,message:`Method not found`},id:t}));return}try{if(n===`cancel`){a.streamReaders.has(t)&&(a.streamReaders.get(t)?.cancel(),a.streamReaders.delete(t));return}if(n===`ping`){e.send(JSON.stringify({jsonrpc:`2.0`,result:`pong`,id:t}));return}let u=Ur(r),d=Vr[o]?.[c],f=d?d.parse(u):u,p=await l({...e.data.store,peerId:i,session:a},...f);if(p instanceof s){let n=p.getReader();a.streamReaders.set(t,n),e.send(JSON.stringify({jsonrpc:`2.0`,result:{type:`stream`},id:t}));try{for(;;){let{value:r,done:i}=await n.read();if(i)break;let{event:a,data:o}=r;e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/${a}`,params:Wr(o),id:t}))}e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_end`,id:t}))}catch(n){console.error(n),e.send(JSON.stringify({jsonrpc:`2.0`,method:`notification/_error`,params:Wr(n),id:t}))}a.streamReaders.delete(t)}else e.send(JSON.stringify({jsonrpc:`2.0`,result:Wr(p),id:t}))}catch(n){if(n instanceof A){e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INVALID_PARAMS,message:`Invalid params`,data:n.issues},id:t}));return}console.error(n),e.send(JSON.stringify({jsonrpc:`2.0`,error:{code:f.INTERNAL_ERROR,message:String(n)},id:t}))}},async close(e){let t=e.id??e.raw?.id??e.remoteAddress;console.log(`[Request] Connection closed: ${t}`);let{backend:n,sessions:r}=e.data.store,a=r.get(t);a&&(a.streamReaders.forEach(e=>e.cancel()),a.streamReaders.clear(),a.timeout=setTimeout(()=>{r.delete(t),console.log(`[Request] Session timed out: ${t}`);let{generators:e}=a;e.forEach(e=>{n.finalizeGenerator(e)})},i.server.session_timeout))}}),{app:d,config:i}},li=async({backend:e,router:t,config:n,enableOpenAICompat:r=!1})=>{let{app:i,config:a}=await ci({backend:e,router:t,config:n,enableOpenAICompat:r}),{server:{port:o}}=a,s=[new Promise(e=>i.listen(o,e))],c=null;return a.autodiscover&&(c=new Kr(a.autodiscover,()=>i.store.serverInfo),s.push(c.start())),await Promise.all(s),{app:i,port:o,openaiEnabled:r,autoDiscover:c}},ui=[new URL(`index.mjs`,import.meta.url).pathname,new URL(`index.ts`,import.meta.url).pathname];(process.argv[1]?.endsWith(`/bricks-buttress`)||ui.includes(process.argv[1]))&&await ei();export{oi as checkAndNotifyUpdates,ri as checkForUpdates,ii as compareVersions,ci as createServer,ai as logUpdateMessage,Pr as processConfig,tr as startModelDownload,li as startServer};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/buttress-server",
|
|
3
|
-
"version": "2.24.0-beta.
|
|
3
|
+
"version": "2.24.0-beta.3",
|
|
4
4
|
"main": "lib/index.mjs",
|
|
5
5
|
"types": "lib/index.d.mts",
|
|
6
6
|
"type": "module",
|
|
@@ -43,5 +43,5 @@
|
|
|
43
43
|
"tsdown": "^0.20.1",
|
|
44
44
|
"typescript": "^5.9.3"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "7cf2a6850a7765fd801e1bbdbb571871bae92f62"
|
|
47
47
|
}
|