@fugood/buttress-server 2.23.0-beta.37 → 2.23.0-beta.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.mjs CHANGED
@@ -1,4 +1,5 @@
1
- import{createRequire as x3}from"node:module";var M3=Object.defineProperty;var F3=($,Z)=>{for(var X in Z)M3($,X,{get:Z[X],enumerable:!0,configurable:!0,set:(j)=>Z[X]=()=>j})};var S=($,Z)=>()=>($&&(Z=$($=0)),Z);var Q4=x3(import.meta.url);var J4=($,Z,X)=>Math.min(Math.max($,Z),X),N4=($)=>$?40:0,H4=($=0)=>{if(!$)return 0;return J4($/12884901888*20,0,20)},V4=($=0)=>{if(!$)return 0;return J4($/34359738368*10,0,10)},U4=($)=>$?10:0,G4=($="default",Z=null)=>{let X=String($).toLowerCase();if(!X)return 0;if(X.includes("cuda"))return 20;if(X.includes("vulkan"))return 10;if(X.includes("default"))return Z==="darwin"||Z==="ios"?15:5;return 0},K0=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>{if(!H)return 0;let Q=N4(X)+G4(Z,$)+H4(j),J=V4(W),N=U4(H);return Math.min(100,Math.round(Q+J+N))},K1=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>({gpuPresence:N4(X),variant:G4(Z,$),gpuMemory:H4(j),cpuMemory:V4(W),availability:U4(H)});var O4,D0=0.85,b0=0.5,K4=($)=>{if(!$&&$!==0)return[];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},S3=($)=>{if(!$)return null;return String($).trim().toLowerCase()||null},P3=({variant:$,preferVariants:Z=[],variantPreference:X=[],defaultVariants:j=O4}={})=>{let W=[];if($)W.push($);W.push(...K4(Z)),W.push(...K4(X)),W.push(...j);let H=W.map(S3).filter(Boolean);return Array.from(new Set(H))},_4=($={})=>{let Z=String($.type||$.deviceType||$.kind||"").toLowerCase();if(Z.includes("gpu"))return!0;if(Z.includes("cuda"))return!0;if(Z.includes("metal"))return!0;if(Z.includes("vulkan"))return!0;if(Z.includes("snapdragon"))return!0;return!1},T3=($)=>{if(!Array.isArray($))return[];return $.map((Z)=>({...Z}))},k3=($,Z)=>{if($==="snapdragon")return Z.filter((X)=>X.deviceName!=="GPUOpenCL");return Z},q4=({platform:$,totalMemoryInBytes:Z,variant:X,devices:j,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:Q,error:J})=>{let N=T3(k3(X,j)),G=N.some(_4),_=N.filter((w)=>_4(w)&&Number.isFinite(Number(w.maxMemorySize))).reduce((w,U)=>w+U.maxMemorySize,0),O=Z,z=G?Math.floor(_*W):0,V=O?Math.floor(O*H):0,R={platform:$,variant:X,hasGpu:G,gpuUsableBytes:z,cpuUsableBytes:V,ok:Q},q=K0(R),A=Q?K1(R):null;return{platform:$,ok:Q,variant:X,hasGpu:G,devices:N,gpuTotalBytes:_,gpuUsableBytes:z,cpuTotalBytes:O,cpuUsableBytes:V,score:q,breakdown:A,error:J,timestamp:new Date().toISOString()}},_1=({device:$,modelBytes:Z=0,kvCacheBytes:X=0}={})=>{if(!$)return{totalRequiredBytes:Z+X,fitsInGpu:!1,fitsInCpu:!1,limiting:"unknown-device"};let j=Math.max(0,Number(Z)||0)+Math.max(0,Number(X)||0),W=$.hasGpu&&j>0&&j<=$.gpuUsableBytes,H=j>0&&j<=$.cpuUsableBytes,Q="ok";if(!W&&$.hasGpu)Q="gpu-memory";if(!H)Q=W?"cpu-memory":"insufficient-memory";return{totalRequiredBytes:j,fitsInGpu:W,fitsInCpu:H,limiting:Q}},z0=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,kvCacheBytes:G=null,limitedKvCacheBytes:_=null,dependencies:O={},defaultVariants:z=O4}={})=>{let{getBackendDevicesInfo:V,isLibVariantAvailable:R}=O;if(typeof V!=="function"||typeof R!=="function")throw TypeError("GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions");let q=P3({variant:Z,preferVariants:X,variantPreference:j,defaultVariants:z}),A=[];for(let L of q)try{if(!await R(L))throw Error(`Variant ${L} not available on this platform`);let E=await V(L);A.push(q4({platform:$,totalMemoryInBytes:J,variant:L,devices:E,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!0}))}catch(B){let E=B instanceof Error?B.message:String(B);A.push(q4({platform:$,totalMemoryInBytes:J,variant:L,devices:[],gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!1,error:E}))}let U=A.filter((L)=>L.ok)[0]||null,Y={ok:Boolean(U),selected:U?{...U,breakdown:Q?U.breakdown:void 0}:null,attempts:A};if(!Q&&Y.selected)delete Y.selected.breakdown;if(!Y||!N&&!G)return Y;let K=(L)=>{if(!L)return L;let B=_1({device:L,modelBytes:N||0,kvCacheBytes:G||0}),E=null;if(_!=null&&_!==G)E=_1({device:L,modelBytes:N||0,kvCacheBytes:_});return{...L,fit:B,...E&&{limitedFit:E}}};return Y.selected=K(Y.selected),Y.attempts=Array.isArray(Y.attempts)?Y.attempts.map(K):Y.attempts,Y},v0="ggml-llm";var h0=S(()=>{O4=["cuda","vulkan","snapdragon","default"]});var q1="ggml-stt",L4,O1=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,processingBytes:G=null,kvCacheBytes:_=null,dependencies:O={}}={})=>{let z=j&&j.length>0?j:L4;return z0({platform:$,variant:Z,preferVariants:X,variantPreference:z,gpuMemoryFraction:W,cpuMemoryFraction:H,includeBreakdown:Q,totalMemoryInBytes:J,modelBytes:N,kvCacheBytes:G??_,dependencies:O,defaultVariants:L4})};var L1=S(()=>{h0();L4=["cuda","vulkan","default"]});var D3,_0=async({platform:$,totalMemoryInBytes:Z,backend:X=v0,dependencies:j,...W}={})=>{let H=D3.get(X);if(!H)throw Error(`No capability detector registered for backend "${X}"`);return await H({...W,dependencies:j,totalMemoryInBytes:Z,platform:$})};var A4=S(()=>{h0();L1();D3=new Map([[v0,z0],[q1,O1]])});var z4,A1=($)=>{let Z=$?String($).toLowerCase():"f16";return z4[Z]||z4.f16},z1=($,Z,X,j,W,H={},{totalLayers:Q=null,swaLayers:J=0,swaContext:N=null,swaContextMultiplier:G=1,swaAdditionalTokens:_=0,swaFull:O=!1}={})=>{if(!$||!Z||!X||!j||!W)return 0;let z=Q!=null&&Q!==void 0?Number(Q):Number($),V=Math.max(0,Math.floor(z));if(!V)return 0;let R=A1(H.k),q=A1(H.v),A=Number(X)*(Number(j)*R+Number(W)*q);if(!A)return 0;let w=Math.max(0,Number(Z)||0),U=Math.min(V,Math.max(0,Math.floor(Number(J)||0))),Y=Math.max(0,V-U),K=N!=null&&Number.isFinite(Number(N))?Math.max(0,Number(N)):w,L=Math.max(1,Number(G)||1),B=Math.max(0,Number(_)||0),E=K*L+B,M=O?w:Math.min(w,E),F=Y*w+U*Math.max(0,Math.floor(M));return Math.round(A*F)},C0=({modelBytes:$=0,audioLengthSeconds:Z=30,sampleRate:X=16000,bytesPerSample:j=4}={})=>{let W=Math.max(0,Number($)||0),H=Math.max(0,Math.floor(Math.max(0,Z)*X*j)),Q=1048576,J=1073741824,N;if(W<209715200)N=125829120;else if(W<524288000)N=146800640;else if(W<2147483648)N=157286400;else N=167772160;let G;if(W<209715200)G=73400320;else if(W<524288000)G=141557760;else if(W<2147483648)G=230686720;else G=230686720;let _;if(W<104857600)_=20971520;else if(W<209715200)_=31457280;else if(W<524288000)_=89128960;else if(W<2147483648)_=225443840;else _=377487360;let O=N+G+_,z=W+O+H;return{modelBytes:W,audioBufferBytes:H,processingBufferBytes:O,totalBytes:z}};var B1=S(()=>{z4={f16:2,f32:4,q8_0:1,q6_k:0.75,q5_k:0.625,q5_k_m:0.625,q5_k_s:0.625,q5_1:0.625,q5_0:0.625,q4_k:0.5,q4_k_m:0.5,q4_k_s:0.5,q4_1:0.5,q4_0:0.5,iq4_nl:0.5}});var w1=($)=>$?String($).trim().toLowerCase():null,b3=($={},Z=null)=>{if(!$)return null;let X=w1(Z),j=X?`${X}.attention.sliding_window`:null,W=(j&&$[j]!=null?$[j]:null)??$["llama.attention.sliding_window"];if(W==null)return null;let H=Number(W);return Number.isFinite(H)?H:null},w4=($=0,Z=0,X=!1)=>{let j=Math.max(0,Math.floor(Number($)||0)),W=Math.max(0,Math.floor(Number(Z)||0));if(!j||W===1)return 0;if(W<=0)return j;let H=Math.max(0,W-1),Q=Math.floor(j/W),J=j%W,N=X?Math.max(0,J-1):Math.min(J,H);return Q*H+N},B4=({arch:$,nLayer:Z=0})=>({arch:w1($),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(Z)||0)),swaLayers:0}),v3,I0=({arch:$,metadata:Z={},nLayer:X=0}={})=>{let j=w1($||Z["general.architecture"]),W=Math.max(0,Math.floor(Number(X)||0)),H=b3(Z,j),Q=j?v3.get(j):null;if(!Q)return B4({arch:j,nLayer:X});let J=Q({nLayer:W,nSwa:H,metadata:Z});if(!J||!J.enabled||!J.window||J.window<=0)return B4({arch:j,nLayer:X});let N=Math.max(0,Math.floor(Number(J.pattern)||0)),G=J.kvLayers!=null&&Number.isFinite(Number(J.kvLayers))?Number(J.kvLayers):W,_=Math.max(0,Math.floor(G)),O=w4(_,N,Boolean(J.denseFirst));return{arch:j,enabled:O>0,window:J.window,pattern:N,denseFirst:Boolean(J.denseFirst),type:J.type||"standard",kvLayers:_,swaLayers:O}};var R4=S(()=>{v3=new Map([["llama4",({nSwa:$})=>{if($===0)return{enabled:!1};return{enabled:!0,window:$&&$>0?$:8192,pattern:4,type:"chunked"}}],["afmoe",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["phi3",()=>({enabled:!1})],["gemma2",({nSwa:$})=>{let Z=$&&$>0?$:4096;if(!Z)return{enabled:!1};return{enabled:!0,window:Z,pattern:2,type:"standard"}}],["gemma3",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"standard"}}],["gemma3n",({nLayer:$,nSwa:Z})=>{if(!Z||Z<=0)return{enabled:!1};return{enabled:!0,window:Z,pattern:5,type:"standard",kvLayers:Math.min(20,$)}}],["gemma-embedding",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"symmetric"}}],["cohere2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["olmo2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["exaone4",({nLayer:$,nSwa:Z})=>{let X=$>=64,j=null;if(Z&&Z>0)j=Z;else if(X)j=4096;if(!j)return{enabled:!1};return{enabled:!0,window:j,pattern:4,type:"standard"}}],["gpt-oss",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:2,type:"standard"}}],["smallthinker",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:"standard"}}]])});var y0=($={})=>{let Z=$["general.architecture"],X=(z,V=null)=>{let R=$[z],q=Number(R);return Number.isFinite(q)?q:V},j=Z?X(`${Z}.context_length`,X("llama.context_length")):null,W=Z?X(`${Z}.block_count`,X("llama.block_count")):null,H=Z?X(`${Z}.embedding_length`,X("llama.embedding_length")):null,Q=Z?X(`${Z}.attention.head_count`,X("llama.attention.head_count")):null,J=Z?X(`${Z}.attention.head_count_kv`,X("llama.attention.head_count_kv",Q)):null,N=Z?X(`${Z}.attention.key_length`,X("llama.attention.key_length")):null,G=Z?X(`${Z}.attention.value_length`,X("llama.attention.value_length")):null,_=$["general.quantization_version"]||null,O=$["general.file_type"]||null;return{arch:Z,nCtxTrain:j,nLayer:W,nEmbd:H,nHead:Q,nHeadKv:J,nEmbdHeadK:N,nEmbdHeadV:G,quantVersion:_,fileType:O}},N0=({layerCount:$,headKvCount:Z,embdHeadKCount:X,embdHeadVCount:j,cacheTypes:W,swaConfig:H,kvUnified:Q=!1,nParallel:J=1,swaFull:N=!1})=>{let G=H?.window&&Q?Math.max(1,Number(J)||1):1;return(_)=>z1($,_,Z,X,j,W,{totalLayers:$,swaLayers:H?.swaLayers||0,swaContext:H?.window,swaFull:N,swaContextMultiplier:G})},u0=({maxCtx:$,availableMemory:Z,modelBytes:X,kvBytesForCtx:j})=>{let W=Math.max(1,Math.floor(Number($)||0));if(!j||Z<=X)return W;let H=1,Q=W,J=W;while(H<=Q){let N=Math.floor((H+Q)/2);if(X+j(N)<=Z)J=N,H=N+1;else Q=N-1}return J};var E4=S(()=>{B1()});var B0=S(()=>{A4();B1();h0();L1();R4();E4()});import{EventEmitter as h3}from"node:events";class E1{constructor($=C3){this.maxEntries=$,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad($){R1(this.modelLoads,$,this.maxEntries),o.emit("status:modelLoad",$),o.emit("status:change",{type:"modelLoad",entry:$})}addCompletion($){R1(this.completions,$,this.maxEntries),o.emit("status:completion",$),o.emit("status:change",{type:"completion",entry:$})}addTranscription($){R1(this.transcriptions,$,this.maxEntries),o.emit("status:transcription",$),o.emit("status:change",{type:"transcription",entry:$})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}}function M1($){let Z=(X)=>$(X);return o.on("status:change",Z),()=>o.off("status:change",Z)}function F4($){M4+=1;let Z=M4,X=M1($);return{subscriberId:Z,unsubscribe:X}}function F1($){let Z=[];return{generators:Array.from($.entries()).filter(([,j])=>j.type==="ggml-llm").map(([j,W])=>{let{instance:H}=W,Q=[];if(H.contexts)Q=Array.from(H.contexts.entries()).map(([J,N])=>{let G={key:J,refCount:N.refCount,hasModel:Boolean(N.context)},_=N.context.parallel.getStatus();return G.parallelStatus=_,Z.push({generatorId:j,contextKey:J,..._}),G});return{id:j,type:W.type,refCount:W.refCount,repoId:H.info?.model?.repoId||null,quantization:H.info?.model?.quantization||null,variant:H.info?.runtime?.variant||null,nCtx:H.info?.runtime?.n_ctx||null,nParallel:H.info?.runtime?.n_parallel||null,contexts:Q}}),parallelStatuses:Z,history:{modelLoads:t.getModelLoadHistory(),completions:t.getCompletionHistory()}}}function x1($){return{generators:Array.from($.entries()).filter(([,X])=>X.type==="ggml-stt").map(([X,j])=>{let{instance:W}=j,H=W.getStatus?.()||{},Q=H.queueStatus||{processing:!1,queuedCount:0};return{id:X,type:j.type,refCount:j.refCount,repoId:W.info?.model?.repoId||null,quantization:W.info?.model?.quantization||null,variant:W.info?.runtime?.variant||null,hasContext:H.hasContext||!1,contextRefCount:H.contextRefCount||0,queueStatus:Q}}),history:{modelLoads:J0.getModelLoadHistory(),transcriptions:J0.getTranscriptionHistory()}}}function x4($){return{timestamp:new Date().toISOString(),ggmlLlm:F1($),ggmlStt:x1($)}}var C3=9999,o,R1=($,Z,X)=>{if($.push({...Z,timestamp:Z.timestamp||new Date().toISOString()}),$.length>X)$.shift()},t,J0,M4=0;var m0=S(()=>{o=new h3;o.setMaxListeners(100);t=new E1,J0=new E1});import b from"node:path";import E0 from"node:os";import{stat as M0,mkdir as I3,open as y3,unlink as e,readFile as b4,writeFile as v4,rename as h4,readdir as u3}from"node:fs/promises";import{createHash as k1}from"node:crypto";import{gguf as m3}from"@huggingface/gguf";import{loadModel as p3,getBackendDevicesInfo as C4,isLibVariantAvailable as I4}from"@fugood/llama.node";import f3 from"bytes";import*as g3 from"node:stream/web";class n4{constructor($,Z){this.config=$,this.plan=Z,this.baseDir=$.runtime.cache_dir,this.enabled=$.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=Q6($.runtime.session_cache?.max_size_bytes,10737418240),this.maxEntries=$.runtime.session_cache?.max_entries||1000,this.metadata={variant:Z.info?.runtime?.variant||null,n_gpu_layers:Z.info?.runtime?.n_gpu_layers||0,n_ctx:Z.info?.runtime?.n_ctx||0,modelPath:Z.localPath,cacheTypeK:Z.info?.runtime?.cache_type_k||"f16",cacheTypeV:Z.info?.runtime?.cache_type_v||"f16"},this.cacheMap=null,this.initialized=!1}async initialize(){if(!this.enabled||this.initialized)return;try{await H0(n0(this.baseDir)),await H0(g0(this.baseDir)),await H0(l4(this.baseDir)),this.cacheMap=await r3(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch($){console.warn(`[SessionCache] Failed to initialize: ${$.message}`),this.enabled=!1}}async findMatchingEntry($){if(!this.enabled||!this.cacheMap)return null;let Z=X6($,this.metadata,this.cacheMap);if(Z){let{entry:X}=Z;if(!await Y6(X.stateFilePath))return console.log(`[SessionCache] Removing stale entry: ${X.id}`),delete this.cacheMap.entries[X.id],this.cacheMap.totalSize-=X.stateFileSize||0,await P1(this.cacheMap,this.baseDir).catch(()=>{}),null;return X.lastAccessedAt=new Date().toISOString(),await P1(this.cacheMap,this.baseDir).catch(()=>{}),{entry:X}}return null}async prepareCompletionOptions($,Z){if(!this.enabled)return{options:$,cacheEntry:null,promptPrefix:null};let X=await this.findMatchingEntry(Z);if(X){let{entry:j}=X;return console.log(`[SessionCache] Found matching entry: ${j.id} (${j.fullText.length} chars, loadStateSize=${j.loadStateSize})`),{options:{...$,load_state_path:j.stateFilePath},cacheEntry:j,promptPrefix:j.fullText}}return{options:$,cacheEntry:null,promptPrefix:null}}async saveCompletionState($,Z,X,j=0){if(!this.enabled)return null;let W=a3($,this.metadata);if(this.cacheMap.entries[W])return console.log(`[SessionCache] Entry already exists for prompt: ${W}`),await e(X).catch(()=>{}),this.cacheMap.entries[W];let H=$+Z,Q=o3(W,this.baseDir);try{await H0(b.dirname(Q)),await h4(X,Q);let J=await M0(Q),N={id:W,promptText:$,completionText:Z,fullText:H,promptTokenCount:j,stateFilePath:Q,stateFileSize:J.size,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[W]=N,this.cacheMap.totalSize+=J.size,await j6(this.cacheMap,this.maxSizeBytes,this.maxEntries),await P1(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${W} (${J.size} bytes)`),N}catch(J){return console.warn(`[SessionCache] Failed to save state: ${J.message}`),await e(X).catch(()=>{}),null}}async generateTempStatePath(){return await H0(g0(this.baseDir)),t3(this.baseDir)}async cleanup(){await W6(this.baseDir)}}async function a4($,Z,X={}){let{globalDownloadManager:j=null}=X,W=l0(Z),H=await J6(W),Q=new n4(W,H);await Q.initialize();let J={id:$,type:"ggml-llm",config:W,plan:H,info:H.info,contexts:new Map,downloads:new Map,globalDownloadManager:j,sessionCache:Q,finalized:!1},N=async()=>{if(J.finalized)return;J.finalized=!0;let Y=Array.from(J.contexts.values()),K=Y.map((B)=>{if(B.released)return Promise.resolve(!1);if(B.releaseRequested||B.releaseTimer)return Promise.resolve(!1);if(B.refCount=Math.max(0,B.refCount-1),B.refCount>0)return Promise.resolve(!1);return f0(J,B)});if(await Promise.allSettled(K),Y.length===0||Y.every((B)=>B.released))await J.sessionCache.cleanup()},G=async(Y={})=>{let{onProgress:K}=Y,L=await U6(J,K);return{modelInfo:L.modelInfo?{...L.modelInfo}:null,runtime:{...J.plan.info.runtime},download:{...J.plan.info.download}}},_=async()=>{if(J.finalized)return!1;let Y=O0(J),K=J.contexts.get(Y);if(!K)return!1;return G6(J,K,!1)},O=async(Y={})=>{let{options:K={},useCache:L=!0}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=K.prompt||"";if(!M&&K.messages){let x=await E.context.getFormattedChat(K.messages,K.chat_template||K.chatTemplate,{jinja:K.jinja??!0,tools:K.tools,parallel_tool_calls:K.parallel_tool_calls,tool_choice:K.tool_choice,enable_thinking:K.enable_thinking,add_generation_prompt:K.add_generation_prompt,now:K.now,chat_template_kwargs:K.chat_template_kwargs});M=x?.prompt||x||""}if(L&&J.sessionCache.enabled&&M){let{options:x}=await J.sessionCache.prepareCompletionOptions(K,M),k=await J.sessionCache.generateTempStatePath(),m=(await E.context.tokenize(M))?.tokens?.length||0,p={...x,save_state_path:k,save_state_size:m};console.log(`[SessionCache] save_state_size=${m} (prompt tokens)`);let n={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return H6(E.context,p,J.sessionCache,M,k,m,J.id,n)}let F={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return N6(E.context,K,J.id,F)},z=async(Y={})=>{let{text:K="",params:L={}}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=await E.context.tokenize(K,L);if(!M)return{tokens:[]};let F=Array.from(M.tokens??[]).map((x)=>Number(x));return{...M,tokens:F}},V=async(Y={})=>{let{tokens:K=[]}=Y,L=O0(J),B=J.contexts.get(L);if(!B)throw Error(`Context "${L}" not initialized`);await B.ready;let E=K.map((M)=>Number(M));return B.context.detokenize(E)},R=async(Y={})=>{let{messages:K=[],template:L,params:B}=Y,E=O0(J),M=J.contexts.get(E);if(!M)throw Error(`Context "${E}" not initialized`);return await M.ready,await M.context.getFormattedChat(K,L,B)},q=()=>Array.from(J.contexts.values()).some((Y)=>!Y.released&&(Y.releaseRequested||Y.releaseTimer||Y.refCount>0)),A=()=>{J.finalized=!1},w=()=>{let Y=[],K=Array.from(J.contexts.entries()).map(([L,B])=>{let E={key:L,refCount:B.refCount,hasModel:Boolean(B.context)},M=B.context.parallel.getStatus();return E.parallelStatus=M,Y.push({contextKey:L,...M}),E});return{id:J.id,type:J.type,repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null,nCtx:J.plan.info.runtime?.n_ctx||null,nParallel:J.plan.info.runtime?.n_parallel||null,contexts:K,parallelStatuses:Y}},U=(Y)=>{let K=Array.from(J.contexts.entries()).map(([L,B])=>B.context.parallel.subscribeToStatus((E)=>{Y({contextKey:L,...E})}));return{remove:()=>{K.forEach((L)=>{if(L?.remove)L.remove()})}}};return{id:$,type:"ggml-llm",info:H.info,contexts:J.contexts,initContext:G,completion:O,tokenize:z,detokenize:V,applyChatTemplate:R,releaseContext:_,finalize:N,getStatus:w,subscribeParallelStatus:U,hasPendingReleases:q,resetFinalized:A}}async function o4($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=l0($),J=await D1(Q),N=s4(Q,J),{repoId:G}=J;if(await R0(N,J.size)){if(console.log(`[Download] Model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting download: ${G}`);let z=(async()=>{try{if(J.isSplit&&J.splitCount>0){let V=/-(\d{5})-of-(\d{5})\.gguf$/,R=b.dirname(N),q=J.splitCount,A=0;for(let w=1;w<=q;w+=1){let U=String(w).padStart(5,"0"),Y=J.filename.replace(V,`-${U}-of-${String(q).padStart(5,"0")}.gguf`),K=`${Q.model.base_url.replace(/\/+$/,"")}/${J.repoId}/resolve/${J.revision}/${Y}`,L=b.join(R,Y);if(!await R0(L))await c0(K,J.headers,L,null,(E)=>{if(E>=0&&Number.isFinite(E)){let M=(A+E)/q;if(console.log(`[Download] ${G}: ${Math.round(M*100)}%`),typeof j==="function")j(M)}});A+=1}}else await c0(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}});if(console.log(`[Download] Completed: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function K6($){let Z=l0($),X=await D1(Z),j=await i4(X.url,X.headers,Z.runtime.cache_dir),{arch:W,nCtxTrain:H,nLayer:Q,nEmbd:J,nHead:N,nHeadKv:G,nEmbdHeadK:_,nEmbdHeadV:O,quantVersion:z,fileType:V}=y0(j),R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):0,w=Number.isFinite(Number(G))?Number(G):A,U=A>0&&q>0?q/A:128,Y=_!=null&&Number.isFinite(Number(_))?Number(_):U,K=O!=null&&Number.isFinite(Number(O))?Number(O):U,L=I0({arch:W,metadata:j,nLayer:R}),B=L&&Number.isFinite(Number(L.kvLayers))?Number(L.kvLayers):R,E=Math.max(0,Math.floor(Number(B)||0)),F=(Z.model.n_ctx?Number(Z.model.n_ctx):null)||H||4096,x={k:Z.model.cache_type_k,v:Z.model.cache_type_v},k=X.size>0?X.size:0,D=N0({layerCount:E,headKvCount:w,embdHeadKCount:Y,embdHeadVCount:K,cacheTypes:x,swaConfig:L,kvUnified:Z.model.kv_unified,nParallel:Z.model.n_parallel,swaFull:Z.model.swa_full}),m=Z.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,p=Z.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.cpu_memory_fraction))):d0,n=D(F),i=await r4(Z,{modelBytes:k,kvCacheBytes:n}),c=(i.selected.totalMemory||0)*m,T=Math.max(0,E0.totalmem()*p),f=i.selected.hasGpu?c:T,h=u0({maxCtx:F,availableMemory:f,modelBytes:k,kvBytesForCtx:D}),s=D(F),I=D(h);return{kvInfo:{nCtxTrain:H,nLayer:R,nEmbd:q,nHeadKv:w,nEmbdHeadK:Y,nEmbdHeadV:K,nHeadCount:A,nHeadKvCount:w,kvLayerCount:E,swa:L?.enabled?{window:L.window,pattern:L.pattern,denseFirst:L.denseFirst,type:L.type,layers:L.swaLayers}:null},modelBytes:k,kvCacheBytes:s,limitedKvCacheBytes:I,memoryLimitedCtx:h,quantization:{name:X.quantization||null,fileType:V,version:z}}}async function t4($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null,G=null,_=null,O=null;if(W)try{let{modelBytes:K,kvCacheBytes:L,limitedKvCacheBytes:B,memoryLimitedCtx:E,kvInfo:M,quantization:F}=await K6(W);Q=K,J=L,N=B,G=E,_=M,O=F}catch(K){}let z=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,V=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,R=await _0({...H,platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",includeBreakdown:j,gpuMemoryFraction:z,cpuMemoryFraction:V,dependencies:{getBackendDevicesInfo:C4,isLibVariantAvailable:I4},modelBytes:Q,kvCacheBytes:J,limitedKvCacheBytes:N}),q=R.selected,A=D4(q);q.modelBytes=Q||null,q.kvCacheBytes=J||null,q.memoryLimitedCtx=G||null,q.limitedKvCacheBytes=N||null,q.kvInfo=_||null,q.quantization=O||null;let w=null,U=null;if($){let K=D4($);U={...$,score:K};let L="buttress",B="buttress-higher-score";if(!R.ok)L="local",B="buttress-unavailable";else if(!K&&K!==0)L="buttress",B="missing-client-score";else{let{fit:E,limitedFit:M}=U,F=q?.fit,x=q?.limitedFit,k=E?.fitsInGpu||E?.fitsInCpu||M?.fitsInGpu||M?.fitsInCpu,D=F?.fitsInGpu||F?.fitsInCpu||x?.fitsInGpu||x?.fitsInCpu;if(k&&!D)L="local",B="client-fits-in-memory";else if(D&&!k)L="buttress",B="buttress-fits-in-memory";else if(K>A*X)L="local",B="client-better";else if(A>K*X)L="buttress",B="buttress-better";else L="either",B="comparable-scores"}w={buttressScore:A,clientScore:K,threshold:X,recommendation:L,reason:B}}if(!R.ok&&!w)w={buttressScore:A,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let Y=null;if(W)Y={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,nCtx:W.model?.n_ctx||null,cacheKType:W.model?.cache_type_k||"f16",cacheVType:W.model?.cache_type_v||"f16"};return{type:"ggml-llm",timestamp:new Date().toISOString(),buttress:R,client:U,comparison:w,modelConfig:Y}}var c3=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return g3},d3,y4,l3,u4=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};u4($[X],j)}else $[X]=j}),$},n3=".gguf",m4="https://huggingface.co",p4="https://huggingface.co/api",d,f4,d0=0.5,w0,S1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},p0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","snapdragon","default"].includes(Z))return Z;return null},l0=($={})=>{let Z=JSON.parse(JSON.stringify(w0));if(u4(Z,$),Z.backend.variant=p0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(S1(Z.backend.variant_preference).map(p0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=["cuda","vulkan","snapdragon","default"];if(Z.runtime.prefer_variants=Array.from(new Set(S1(Z.runtime.prefer_variants).map(p0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(S1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.n_parallel=Math.max(1,Number(Z.model.n_parallel)||4),Z.model.n_batch=Math.max(1,Number(Z.model.n_batch)||512),Z.model.base_url=Z.model.base_url||m4,Z.model.api_base=Z.model.api_base||p4,Z.runtime.cache_dir=Z.runtime.cache_dir?b.resolve(Z.runtime.cache_dir):d,Z.runtime.session_cache={...w0.runtime.session_cache,...Z.runtime.session_cache||{}},Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||w0.runtime.context_release_delay_ms),Z},S4=($)=>{let Z=$.toLowerCase();return f4.find((j)=>Z.includes(j))||null},i3=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(p0).filter(Boolean)))},H0=async($)=>{await I3($,{recursive:!0})},s3=($=d)=>b.join($,".metadata-cache"),g4=($,Z,X=d)=>{let j=k1("sha256").update($).digest("hex");return b.join(s3(X),Z,`${j}.json`)},c4=async($,Z,X=d)=>{try{let j=g4($,Z,X),W=await b4(j,"utf-8");return console.log(`[Cache] Hit ${Z} cache:`,b.basename(j)),JSON.parse(W,(H,Q)=>{if(typeof Q==="string"&&Q.startsWith("__bigint__"))return BigInt(Q.slice(10));return Q})}catch(j){return null}},T1=async($,Z,X,j=d)=>{try{let W=g4($,Z,j);await H0(b.dirname(W)),await v4(W,JSON.stringify(X,(H,Q)=>{if(typeof Q==="bigint")return`__bigint__${Q.toString()}`;return Q}),"utf-8"),console.log(`[Cache] Wrote ${Z} cache:`,b.basename(W))}catch(W){console.warn(`[Cache] Failed to write ${Z} cache:`,W.message)}},n0=($=d)=>b.join($,".session-state-cache"),d4=($=d)=>b.join(n0($),"cache-map.json"),g0=($=d)=>b.join(n0($),"temp"),l4=($=d)=>b.join(n0($),"states"),P4=()=>({version:1,entries:{},totalSize:0}),r3=async($=d)=>{try{let Z=d4($),X=await b4(Z,"utf-8"),j=JSON.parse(X);if(!j.entries||typeof j.entries!=="object")return P4();return j}catch{return P4()}},P1=async($,Z=d)=>{let X=d4(Z),j=`${X}.tmp.${Date.now()}`;try{await H0(b.dirname(X)),await v4(j,JSON.stringify($,null,2),"utf-8"),await h4(j,X)}catch(W){throw await e(j).catch(()=>{}),W}},a3=($,Z)=>{let X=JSON.stringify({text:$,model:Z.modelPath,variant:Z.variant,n_gpu_layers:Z.n_gpu_layers,n_ctx:Z.n_ctx});return k1("sha256").update(X).digest("hex").slice(0,24)},o3=($,Z=d)=>b.join(l4(Z),`${$}.bin`),t3=($=d)=>{let Z=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return b.join(g0($),`${Z}.bin`)},e3=($,Z)=>$.modelPath===Z.modelPath&&$.variant===Z.variant&&$.n_gpu_layers===Z.n_gpu_layers&&$.n_ctx>=Z.n_ctx&&$.cacheTypeK===Z.cacheTypeK&&$.cacheTypeV===Z.cacheTypeV,$6=($,Z)=>{let X=Math.min($.length,Z.length),j=0;while(j<X&&$[j]===Z[j])j+=1;return j},Z6=100,X6=($,Z,X)=>{let j=Object.values(X.entries);console.log(`[SessionCache] Finding match for promptText (${$.length} chars)`),console.log(`[SessionCache] Checking ${j.length} cache entries`);let H=j.filter((Q)=>e3(Q.metadata,Z)).reduce((Q,J)=>{let N=$6($,J.fullText);if(N>=Z6&&N>Q.prefixLen)return{entry:J,prefixLen:N};return Q},{entry:null,prefixLen:0});if(H.entry)return console.log(`[SessionCache] Prefix match found: ${H.entry.id} (${H.prefixLen}/${H.entry.fullText.length} chars)`),{entry:H.entry,prefixLength:H.prefixLen};return console.log("[SessionCache] No match found"),null},j6=async($,Z,X)=>{let j=Object.values($.entries).sort((J,N)=>new Date(J.lastAccessedAt)-new Date(N.lastAccessedAt)),W=$.totalSize,H=Object.keys($.entries).length,Q=j.filter((J)=>{let N=W>Z,G=H>X;if(!N&&!G)return!1;return W-=J.stateFileSize||0,H-=1,!0});return await Promise.all(Q.map(async(J)=>{await e(J.stateFilePath).catch(()=>{}),delete $.entries[J.id],console.log(`[SessionCache] Evicted entry: ${J.id}`)})),$.totalSize=Math.max(0,W),Q.map((J)=>J.id)},W6=async($=d)=>{let Z=g0($);try{let X=await u3(Z),j=Date.now(),W=3600000;await Promise.all(X.map(async(H)=>{let Q=b.join(Z,H),J=await M0(Q).catch(()=>null);if(J&&j-J.mtimeMs>3600000)await e(Q).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${H}`)}))}catch{}},Y6=async($)=>{try{return await M0($),!0}catch{return!1}},Q6=($,Z)=>{if($==null)return Z;if(typeof $==="number")return $;if(typeof $==="string"){let X=f3.parse($);return X!=null?X:Z}return Z},T4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},k4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},i4=async($,Z,X=d)=>{let j=JSON.stringify({url:$,headers:Z}),W=await c4(j,"range-metadata",X);if(W)return W;let H=!/^https?:/i.test($),{metadata:Q}=await m3($,{fetch,additionalFetchHeaders:Z,allowLocalFile:H});return await T1(j,"range-metadata",Q,X),Q},s4=($,Z)=>{if($.model.local_path)return b.resolve($.model.local_path);let X=Z.repoId.split("/"),j=b.join($.runtime.cache_dir,...X,Z.revision);return b.join(j,Z.filename)},R0=async($,Z)=>{try{let X=await M0($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},c0=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await H0(b.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await y3(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new l3({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}if(j){let _=await M0(X);if(_.size!==j)throw await e(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},D1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await c4(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let Y=await k4($.model.url,{headers:Q}),K=Number(Y.headers.get("content-length"))||null,L=$.model.filename||$.model.url.split("/").pop(),B={repoId:Z,revision:X,filename:L,url:$.model.url,size:K,headers:Q};return await T1(W,"artifact-info",B,j),B}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await T4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((Y)=>Y.rfilename||Y.path||Y.filename).filter((Y)=>typeof Y==="string"&&Y.endsWith(n3));if(O.length===0)throw Error(`No GGUF artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:f4,V=()=>{let Y=z.find((K)=>{return O.find((B)=>B.toLowerCase().includes(K))});if(Y)return{filename:O.find((L)=>L.toLowerCase().includes(Y)),quantization:Y};return null};if(!J){let Y=V()||{filename:O[0],quantization:null},{filename:K,quantization:L}=Y;J=K,N=L||S4(J)}else if(!N)N=S4(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=/-(\d{5})-of-(\d{5})\.gguf$/,A=J.match(q),w=null;if(A){let[,,Y]=A,K=await T4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),L=K?.siblings||K?.files||[],B=Number(Y);w=0;for(let E=1;E<=B;E+=1){let M=String(E).padStart(5,"0"),F=J.replace(q,`-${M}-of-${Y}.gguf`),x=L.find((D)=>(D.rfilename||D.path||D.filename)===F),k=Number(x?.size);if(Number.isFinite(k)&&k>0)w+=k}}else{let Y=await k4(R,{headers:Q});w=Number(Y.headers.get("content-length"))||null}let U={repoId:Z,revision:X,filename:J,url:R,size:w,quantization:N,headers:Q,isSplit:Boolean(A),splitCount:A?Number(A[2]):0};return await T1(W,"artifact-info",U,j),U},r4=async($,{modelBytes:Z=null,kvCacheBytes:X=null}={})=>{let j=i3($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,N=await _0({platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",variant:W||null,preferVariants:H,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:C4,isLibVariantAvailable:I4},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},J6=async($)=>{let Z=await D1($),X=await i4(Z.url,Z.headers,$.runtime.cache_dir),{arch:j,nCtxTrain:W,nLayer:H,nEmbd:Q,nHead:J,nHeadKv:N,nEmbdHeadK:G,nEmbdHeadV:_,quantVersion:O,fileType:z}=y0(X),V=Number.isFinite(Number(H))?Number(H):0,R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):q,w=q>0&&R>0?R/q:128,U=G!=null&&Number.isFinite(Number(G))?Number(G):w,Y=_!=null&&Number.isFinite(Number(_))?Number(_):w,K=I0({arch:j,metadata:X,nLayer:V}),L=K&&Number.isFinite(Number(K.kvLayers))?Number(K.kvLayers):V,B=Math.max(0,Math.floor(Number(L)||0)),E={use_mmap:$.model.use_mmap??$.runtime.use_mmap,use_mlock:$.model.use_mlock??$.runtime.use_mlock,n_threads:$.model.n_threads??$.runtime.n_threads,n_ctx:$.model.n_ctx??$.runtime.n_ctx,n_batch:$.model.n_batch??$.runtime.n_batch,n_ubatch:$.model.n_ubatch??$.runtime.n_ubatch,n_cpu_moe:$.model.n_cpu_moe??$.runtime.n_cpu_moe,n_parallel:$.model.n_parallel??$.runtime.n_parallel,cpu_mask:$.model.cpu_mask??$.runtime.cpu_mask,cpu_strict:$.model.cpu_strict??$.runtime.cpu_strict,devices:$.model.devices??$.runtime.devices,n_gpu_layers:$.model.n_gpu_layers??$.runtime.n_gpu_layers,flash_attn_type:$.model.flash_attn_type??$.runtime.flash_attn_type,cache_type_k:$.model.cache_type_k??$.runtime.cache_type_k,cache_type_v:$.model.cache_type_v??$.runtime.cache_type_v,kv_unified:$.model.kv_unified??$.runtime.kv_unified,swa_full:$.model.swa_full??$.runtime.swa_full,ctx_shift:$.model.ctx_shift??$.runtime.ctx_shift},M=E.n_ctx?Number(E.n_ctx):null,F=M||W||4096,x=[],k=[],D=!0;if(M&&W&&M>W){D=!1;let Q0=`Requested context length (${M}) exceeds model training context (${W})`;x.push(Q0),k.push(Q0),F=W}if(M&&!W)x.push("Model metadata missing training context length, using requested value");let m={k:E.cache_type_k,v:E.cache_type_v},p=Z.size>0?Z.size:0,n=N0({layerCount:B,headKvCount:A,embdHeadKCount:U,embdHeadVCount:Y,cacheTypes:m,swaConfig:K,kvUnified:E.kv_unified,nParallel:E.n_parallel,swaFull:E.swa_full}),i=n(F),v=await r4($,{modelBytes:p,kvCacheBytes:i}),c=v.selected.totalMemory||0,T=c*($.backend.gpu_memory_fraction||1),f=$.backend.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,h=Math.max(0,E0.totalmem()*f),s=v.selected.hasGpu?T:h,I=u0({maxCtx:F,availableMemory:s,modelBytes:p,kvBytesForCtx:n});if(!M&&I){let Q0=W?Math.min(I,W):I,U1=Math.max(32,Q0);if(U1<F)x.push(`Context length capped to ${U1} by memory limits`);F=U1}if(F>I)F=I;let a=Math.floor(I);console.log(`[buttress] Memory-limited context length: ${a}`);let q0=n(F),T0=p+q0,G0=V?p/(V+1):p,k0=0;if(v.selected.hasGpu&&G0>0)k0=Math.min(V+1,Math.max(0,Math.floor(T/G0)));console.log(`[buttress] Auto GPU layer capacity (${v.selected.variant}): ${k0}/${V+1}`);let H1;if(E.n_gpu_layers==="auto"||E.n_gpu_layers==null)H1=k0;else H1=Math.max(0,Math.min(Number(E.n_gpu_layers)||0,V+1));let w3=(()=>{let Q0=E.flash_attn_type&&String(E.flash_attn_type).toLowerCase();if(Q0==="on"||Q0==="off")return Q0;if(Q0==="auto")return v.selected.hasGpu?"auto":"off";return v.selected.hasGpu?"auto":"off"})(),R3=$.runtime.cache_dir,V1=s4($,Z),Y4=await R0(V1,Z.size),E3={ok:D,backend:"ggml-llm",warnings:x,errors:k,model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,url:Z.url,sizeBytes:Z.size,metadata:{architecture:j,n_ctx_train:W,n_layer:V,n_embd:R,quantization_version:O,file_type:z,kv_layer_count:B,swa:K?.enabled?{window:K.window,pattern:K.pattern,dense_first:K.denseFirst,type:K.type,layers:K.swaLayers}:null}},runtime:{...E,variant:v.selected.variant,n_ctx:F,requested_ctx:M,n_gpu_layers:H1,auto_gpu_layers:k0,flash_attn_type:w3,cache_type_k:m.k,cache_type_v:m.v,estimated_max_n_ctx:a},resources:{modelBytes:p,kvCacheBytes:q0,totalEstimatedBytes:T0,gpuCapacityBytes:c,gpuUsableBytes:T,cpuUsableBytes:h,fit:v.selected.fit},devices:{selected:v.selected,attempts:v.attempts},download:{cacheDir:R3,localPath:V1,exists:Y4},timestamp:new Date().toISOString()};return{config:$,info:E3,artifact:Z,metadata:{arch:j,nCtxTrain:W,nLayer:V,nEmbd:R},devices:v,cacheTypes:m,localPath:V1,localExists:Y4}},N6=($,Z,X=null,j=null)=>{let W,H=Date.now(),Q=0;return new y4({async start(J){try{let N=await $.parallel.completion(Z,(V,R)=>{if(!R)return;if(R.token)Q+=1;J.enqueue({event:"token",data:{requestId:V,...R}})}),{requestId:G}=N;W=N.stop;let _=await N.promise;console.log("[Completion] Result:",_),J.enqueue({event:"result",data:{requestId:G,..._}}),J.close();let O=Date.now()-H,z=_.timings||{};t.addCompletion({id:`completion-${G}`,generatorId:X,requestId:G,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,promptTokens:z.prompt_n??0,tokensGenerated:z.predicted_n??Q,tokensPerSecond:z.predicted_per_second??0,promptPerSecond:z.prompt_per_second??0,durationMs:O,success:!0,interrupted:_.interrupted||!1,contextFull:_.context_full||_.contextFull||!1})}catch(N){J.enqueue({event:"error",data:{message:N?.message||String(N)}}),J.error(N),t.addCompletion({id:`completion-${Date.now()}`,generatorId:X,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,durationMs:Date.now()-H,tokensGenerated:Q,success:!1,error:N?.message||String(N)})}},cancel(){if(W)W()}})},H6=($,Z,X,j,W,H,Q=null,J=null)=>{let N,G="",_=!1,O=Date.now(),z=0;return new y4({async start(V){try{let R=await $.parallel.completion(Z,(Y,K)=>{if(!K)return;if(K.token)G+=K.token,z+=1;V.enqueue({event:"token",data:{requestId:Y,...K}})}),{requestId:q}=R;N=R.stop;let A=await R.promise;if(A.text)G=A.text;else if(A.content)G=A.content;_=!A.interrupted&&!A.context_full,console.log("[Completion] Result:",A),V.enqueue({event:"result",data:{requestId:q,...A}}),V.close();let w=Date.now()-O,U=A.timings||{};if(t.addCompletion({id:`completion-${q}`,generatorId:Q,requestId:q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,promptTokens:U.prompt_n??H??0,tokensGenerated:U.predicted_n??z,tokensPerSecond:U.predicted_per_second??0,promptPerSecond:U.prompt_per_second??0,durationMs:w,success:!0,interrupted:A.interrupted||!1,contextFull:A.context_full||A.contextFull||!1,usedCache:Boolean(Z.load_state_path)}),_&&X.enabled&&G)X.saveCompletionState(j,G,W,H).catch((Y)=>{console.warn("[SessionCache] Save failed:",Y.message)});else if(W)e(W).catch(()=>{})}catch(R){V.enqueue({event:"error",data:{message:R?.message||String(R)}}),V.error(R),t.addCompletion({id:`completion-${Date.now()}`,generatorId:Q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,durationMs:Date.now()-O,tokensGenerated:z,success:!1,error:R?.message||String(R)}),e(W).catch(()=>{})}},cancel(){if(N)N();e(W).catch(()=>{})}})},O0=($)=>{let Z={model:$.plan.localPath,runtime:$.plan.info.runtime};return k1("sha256").update(JSON.stringify(Z)).digest("hex").slice(0,24)},V6=async($,Z,X,j=null)=>{let{config:W,localPath:H,artifact:Q}=$;if($.localExists&&!Z.has(H)){if($.info.download.exists=!0,typeof X==="function")X(0.5);return H}if(W.model.local_path&&!W.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let J=H;if(j){let N=j.getDownload(J);if(N){console.log(`[ensureModelFile] Waiting for global download: ${Q.repoId}`);try{if(await N,await R0(H,Q.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(0.5);return H}}catch(G){console.warn(`[ensureModelFile] Global download failed, will retry: ${G.message}`)}}}if(!Z.has(J))Z.set(J,(async()=>{if(Q.isSplit&&Q.splitCount>0){let N=/-(\d{5})-of-(\d{5})\.gguf$/,G=b.dirname(H),_=Q.splitCount,O=0;for(let z=1;z<=_;z+=1){let V=String(z).padStart(5,"0"),R=Q.filename.replace(N,`-${V}-of-${String(_).padStart(5,"0")}.gguf`),q=`${W.model.base_url.replace(/\/+$/,"")}/${Q.repoId}/resolve/${Q.revision}/${R}`,A=b.join(G,R);if(!await R0(A))await c0(q,Q.headers,A,null,(U)=>{if(U>=0&&Number.isFinite(U)){let Y=(O+U)/_,K=Math.round(Y*100);if(console.log(`Downloading model splits: ${Math.min(100,K)}%`),typeof X==="function")X(Y*0.5)}});O+=1}}else console.log("Downloading model: 0%"),await c0(Q.url,Q.headers,H,Q.size,(N)=>{if(N>=0&&Number.isFinite(N)){let G=Math.round(N*100);if(console.log(`Downloading model: ${Math.min(100,G)}%`),typeof X==="function")X(N*0.5)}});$.localExists=!0,$.info.download.exists=!0})());try{await Z.get(J)}finally{Z.delete(J)}return H},U6=async($,Z)=>{let X=O0($),j=$.contexts.get(X);if(j&&!j.released){if(j.releaseTimer)clearTimeout(j.releaseTimer),j.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${X}"`);if(j.releaseRequested=!1,j.refCount+=1,console.log(`[Context] Reusing existing context "${X}", refCount=${j.refCount}`),typeof Z==="function")Z(0);if(!j.context)await j.ready;if(typeof Z==="function")Z(1);return j}if(j)console.log(`[Context] Record exists but released=${j.released}, creating new context`);else console.log(`[Context] No existing record for "${X}", creating new context`);j={key:X,refCount:1,ready:null,released:!1},$.contexts.set(X,j),j.ready=(async()=>{let W=Date.now(),H=await V6($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let Q={model:H,n_threads:$.plan.info.runtime.n_threads,use_mmap:$.plan.info.runtime.use_mmap,use_mlock:$.plan.info.runtime.use_mlock,cpu_mask:$.plan.info.runtime.cpu_mask,cpu_strict:$.plan.info.runtime.cpu_strict,devices:$.plan.info.runtime.devices,n_ctx:$.plan.info.runtime.n_ctx,n_gpu_layers:$.plan.info.runtime.n_gpu_layers,n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch,n_ubatch:$.plan.info.runtime.n_ubatch,n_cpu_moe:$.plan.info.runtime.n_cpu_moe,flash_attn_type:$.plan.info.runtime.flash_attn_type,ctx_shift:$.plan.info.runtime.ctx_shift,kv_unified:$.plan.info.runtime.kv_unified,swa_full:$.plan.info.runtime.swa_full,lib_variant:$.plan.info.runtime.variant};if($.plan.info.runtime.flash_attn_type!=="off")Q.cache_type_k=$.plan.info.runtime.cache_type_k,Q.cache_type_v=$.plan.info.runtime.cache_type_v;console.log("[Context] Load Options:",Q);let J;try{if(J=await p3(Q,(N)=>{if(typeof Z==="function"){if(Z(0.5+N*0.25),N%5===0)console.log("[Context] Load Model Progress:",N)}}),$.plan.info.runtime.n_parallel){if(!await J.parallel.enable({n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch}))throw Error("Failed to enable parallel decoding mode for context")}if(typeof Z==="function")Z(1);return j.context=J,j.modelInfo=J.getModelInfo(),t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,nCtx:$.plan.info.runtime?.n_ctx||null,nGpuLayers:$.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-W,success:!0}),j}catch(N){if(t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-W,success:!1,error:N?.message||String(N)}),J)try{J.release()}catch(G){}throw N}})();try{return await j.ready,j}catch(W){throw $.contexts.delete(X),W}},f0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;Z.released=!0,$.contexts.delete(Z.key);try{Z.context?.parallel?.disable?.()}catch(j){}return await Z.context?.release?.(),!0},G6=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return f0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return f0($,Z);return console.log(`[Context] Scheduling release in ${W}ms for context "${Z.key}"`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] Release cancelled, refCount=${Z.refCount} for context "${Z.key}"`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing context "${Z.key}" after ${W}ms delay`),await f0($,Z)},W),!0},b1=($)=>{let Z=l0($);return Z.model.repo_id||Z.model.repository||Z.model.model||null},D4=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var v1=S(()=>{B0();m0();d3=c3(),{ReadableStream:y4,WritableStream:l3}=d3,d=b.join(E0.homedir(),".buttress","models"),f4=["mxfp4","q8_0","q6_k","q6","q5_k_m","q5_k_s","q5_k","q5_1","q5_0","q4_k_m","q4_k_s","q4_k","q4_1","q4_0","q3","q2"],w0={backend:{type:"ggml-llm",variant:null,variant_preference:["cuda","vulkan","snapdragon","default"],gpu_memory_fraction:0.85,cpu_memory_fraction:d0},model:{repo_id:null,revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:"auto",allow_local_file:!1,local_path:null,api_base:p4,base_url:m4},runtime:{cache_dir:d,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10737418240,max_entries:1000},context_release_delay_ms:1e4}}});import X0 from"node:path";import p1 from"node:os";import{stat as X2,mkdir as _6,open as q6,unlink as h1,readFile as O6,writeFile as L6}from"node:fs/promises";import{createHash as A6}from"node:crypto";import{initWhisper as z6}from"@fugood/whisper.node";import{getBackendDevicesInfo as j2,isLibVariantAvailable as W2}from"@fugood/llama.node";import*as B6 from"node:stream/web";class _2{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue($,Z=null){return new Promise((X,j)=>{this.queue.push({task:$,resolve:X,reject:j,taskId:Z}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:$,resolve:Z,reject:X,taskId:j}=this.queue.shift();this.currentTaskId=j;try{let W=await $();Z(W)}catch(W){X(W)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}}async function q2($,Z,X={}){let{globalDownloadManager:j=null}=X,W=a0(Z),H=await T6(W),Q={id:$,type:"ggml-stt",config:W,plan:H,info:H.info,contextRecord:null,downloads:new Map,globalDownloadManager:j,queue:new _2,finalized:!1},J=async()=>{if(Q.finalized)return;Q.finalized=!0;let A=Q.contextRecord;if(!A)return;if(A.released)return;if(A.releaseRequested||A.releaseTimer)return;if(A.refCount=Math.max(0,A.refCount-1),A.refCount>0)return;await s0(Q,A)},N=async(A={})=>{let{onProgress:w}=A;try{let U=await b6(Q,w);return{modelInfo:U.modelInfo&&typeof U.modelInfo==="object"?{...U.modelInfo}:null,runtime:{...Q.plan.info.runtime},download:{...Q.plan.info.download}}}catch(U){throw console.error("[Context] Error initializing context:",U),U}},G=async()=>{if(Q.finalized)return!1;let A=Q.contextRecord;if(!A)return!1;return v6(Q,A)},_=async(A={})=>{let{audioPath:w,audioData:U,options:Y={}}=A,K=Q.contextRecord;if(!K)throw Error("Context not initialized");let L={...Y};if(Q.plan.info.runtime.max_threads&&L.maxThreads==null)L.maxThreads=Q.plan.info.runtime.max_threads;let B=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,E=Date.now();return Q.queue.enqueue(async()=>{await K.ready;try{let M;if(U){let F=D6(U),{promise:x}=K.context.transcribeData(F,L);M=await x}else{if(!w)throw Error("audioPath or audioData is required for transcription");let F=X0.resolve(w),{promise:x}=K.context.transcribe(F,L);M=await x}return J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,segmentCount:M?.segments?.length||0,textLength:M?.text?.length||0,success:!0}),M}catch(M){throw J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,success:!1,error:M?.message||String(M)}),M}},B)},O=async(A={})=>_(A),z=async(A={})=>_(A),V=()=>{let A=Q.contextRecord;if(!A)return!1;return!A.released&&(A.releaseRequested||A.releaseTimer||A.refCount>0)},R=()=>{Q.finalized=!1},q=()=>({id:Q.id,type:Q.type,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,variant:Q.plan.info.runtime?.variant||null,hasContext:Boolean(Q.contextRecord?.context),contextRefCount:Q.contextRecord?.refCount||0,queueStatus:Q.queue.getStatus()});return{id:$,type:"ggml-stt",info:H.info,queue:Q.queue,initContext:N,transcribe:O,transcribeData:z,releaseContext:G,finalize:J,getStatus:q,hasPendingReleases:V,resetFinalized:R}}async function O2($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=a0($),J=await f1(Q),N=G2(Q,J),{repoId:G}=J;if(await r0(N,J.size)){if(console.log(`[Download] STT model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading STT model: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${G}`);let z=(async()=>{try{if(await K2(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}}),console.log(`[Download] Completed STT model: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed STT model: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start STT download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function L2($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null;if(W)try{let w=a0(W),U=await f1(w);Q=U.size??null,{processingBufferBytes:J}=C0({modelBytes:Q}),N=U.quantization||null}catch(w){}let G=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,_=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,O=await _0({...H,platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",includeBreakdown:j,gpuMemoryFraction:G,cpuMemoryFraction:_,dependencies:{getBackendDevicesInfo:j2,isLibVariantAvailable:W2},modelBytes:Q,kvCacheBytes:J}),z=O.selected,V=Z2(z);if(z)z.modelBytes=Q||null,z.processingBytes=J||null,z.quantization=N||null;let R=null,q=null;if($){let w=Z2($);q={...$,score:w};let U="buttress",Y="buttress-higher-score";if(!O.ok)U="local",Y="buttress-unavailable";else if(!w&&w!==0)U="buttress",Y="missing-client-score";else if($.fit&&z?.fit){let K=$.fit.fitsInGpu||$.fit.fitsInCpu,L=z.fit.fitsInGpu||z.fit.fitsInCpu;if(K&&!L)U="local",Y="client-fits-in-memory";else if(L&&!K)U="buttress",Y="buttress-fits-in-memory";else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores"}else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores";R={buttressScore:V,clientScore:w,threshold:X,recommendation:U,reason:Y}}if(!O.ok&&!R)R={buttressScore:V,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let A=null;if(W)A={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,filename:W.model?.filename||null};return{type:"ggml-stt",timestamp:new Date().toISOString(),buttress:O,client:q,comparison:R,modelConfig:A}}var w6=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return B6},R6,Y2=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};Y2($[X],j)}else $[X]=j}),$},E6=".bin",Q2="https://huggingface.co",J2="https://huggingface.co/api",L0,y1,u1,N2="fp16",H2=0.5,m1,C1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},i0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","default"].includes(Z))return Z;return null},a0=($={})=>{let Z=JSON.parse(JSON.stringify(m1));if(Y2(Z,$),Z.backend.variant=i0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(C1(Z.backend.variant_preference||y1).map(i0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=[...y1];if(Z.runtime.prefer_variants=Array.from(new Set(C1(Z.runtime.prefer_variants).map(i0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(C1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.base_url=Z.model.base_url||Q2,Z.model.api_base=Z.model.api_base||J2,Z.runtime.cache_dir=Z.runtime.cache_dir?X0.resolve(Z.runtime.cache_dir):L0,Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||m1.runtime.context_release_delay_ms),Z},I1=($)=>{let Z=$.toLowerCase();return u1.find((j)=>Z.includes(j))||null},M6=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(i0).filter(Boolean)))},V2=async($)=>{await _6($,{recursive:!0})},F6=($=L0)=>X0.join($,".metadata-cache"),U2=($,Z,X=L0)=>{let j=A6("sha256").update($).digest("hex");return X0.join(F6(X),Z,`${j}.json`)},x6=async($,Z,X=L0)=>{try{let j=U2($,Z,X),W=await O6(j,"utf-8");return JSON.parse(W)}catch(j){return null}},e4=async($,Z,X,j=L0)=>{try{let W=U2($,Z,j);await V2(X0.dirname(W)),await L6(W,JSON.stringify(X),"utf-8")}catch(W){}},S6=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},$2=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},G2=($,Z)=>{if($.model.local_path)return X0.resolve($.model.local_path);let X=Z.repoId.split("/"),j=X0.join($.runtime.cache_dir,...X,Z.revision);return X0.join(j,Z.filename)},r0=async($,Z)=>{try{let X=await X2($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},K2=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await V2(X0.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await q6(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new R6({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}if(j){let _=await X2(X);if(_.size!==j)throw await h1(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},f1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await x6(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let U=await $2($.model.url,{headers:Q}),Y=Number(U.headers.get("content-length"))||null,K=$.model.filename||$.model.url.split("/").pop(),L={repoId:Z,revision:X,filename:K,url:$.model.url,size:Y,quantization:I1(K||""),headers:Q};return await e4(W,"artifact-info",L,j),L}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await S6(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((U)=>U.rfilename||U.path||U.filename).filter((U)=>typeof U==="string"&&U.endsWith(E6));if(O.length===0)throw Error(`No model artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:u1,V=()=>{for(let U of z)if(U===N2){let Y=O.find((K)=>{let L=K.toLowerCase();return!u1.some((B)=>L.includes(B))});if(Y)return{filename:Y,quantization:null}}else{let Y=O.find((K)=>K.toLowerCase().includes(U));if(Y)return{filename:Y,quantization:U}}return null};if(!J){let U=V()||{filename:O[0],quantization:null},{filename:Y,quantization:K}=U;J=Y,N=K||I1(J)}else if(!N)N=I1(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=await $2(R,{headers:Q}),A=Number(q.headers.get("content-length"))||null,w={repoId:Z,revision:X,filename:J,url:R,size:A,quantization:N,headers:Q,isSplit:!1,splitCount:0};return await e4(W,"artifact-info",w,j),w},P6=async($,{modelBytes:Z=null,processingBytes:X=null}={})=>{let j=M6($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):m1.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):H2,N=await _0({platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",variant:W||null,preferVariants:H,variantPreference:$.backend.variant_preference,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:j2,isLibVariantAvailable:W2},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},T6=async($)=>{let Z=await f1($),X=C0({modelBytes:Z.size>0?Z.size:0}),j=await P6($,{modelBytes:X.modelBytes,processingBytes:X.processingBufferBytes}),W=j.selected.hasGpu&&(j.selected.fit?.fitsInGpu!==void 0?j.selected.fit.fitsInGpu:!0);if($.model.use_gpu===!1)W=!1;let H=$.model.use_flash_attn&&String($.model.use_flash_attn).toLowerCase(),Q;if(H==="on"||H==="true")Q=!0;else if(H==="off"||H==="false")Q=!1;else Q=W;let J=$.runtime.cache_dir,N=G2($,Z),G=await r0(N,Z.size),_={ok:!0,backend:"ggml-stt",model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,url:Z.url,sizeBytes:Z.size},runtime:{variant:j.selected.variant,use_gpu:W,use_flash_attn:Q,max_threads:$.runtime.max_threads?Number($.runtime.max_threads):null},resources:{...X,gpuCapacityBytes:j.selected.gpuTotalBytes,gpuUsableBytes:j.selected.gpuUsableBytes,cpuUsableBytes:j.selected.cpuUsableBytes,fit:j.selected.fit},devices:{selected:j.selected,attempts:j.attempts},download:{cacheDir:J,localPath:N,exists:G},timestamp:new Date().toISOString()};return{config:$,info:_,artifact:Z,memory:X,devices:j,localPath:N,localExists:G}},k6=async($,Z,X,j=null)=>{let{localPath:W,artifact:H,config:Q}=$;if($.localExists){if(typeof X==="function")X(1);return W}if(j){let G=j.getDownload(W);if(G){console.log(`[ensureModelFile] Waiting for global STT download: ${H.repoId}`);try{if(await G,await r0(W,H.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(1);return W}}catch(_){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${_.message}`)}}}let J=Z.get(W);if(J){if(await J,typeof X==="function")X(1);return W}let N=(async()=>{if(Q.model.allow_local_file){if(!await r0(W,H.size))throw Error(`Local model file not found: ${W}`);return W}return await K2(H.url,H.headers,W,H.size,X),W})();Z.set(W,N);try{return await N,W}finally{Z.delete(W)}},D6=($)=>{if(!$)return null;if($ instanceof ArrayBuffer)return $;if(ArrayBuffer.isView($))return $.buffer;if(typeof $==="string"){let Z=$.startsWith("data:")?$.split(",")[1]||"":$,X=Buffer.from(Z,"base64");return X.buffer.slice(X.byteOffset,X.byteOffset+X.byteLength)}throw Error("Unsupported audioData format, expected base64 string or ArrayBuffer")},b6=async($,Z)=>{if($.contextRecord&&!$.contextRecord.released){if($.contextRecord.releaseTimer)clearTimeout($.contextRecord.releaseTimer),$.contextRecord.releaseTimer=null,console.log("[Context] Cancelled pending STT release");if($.contextRecord.releaseRequested=!1,$.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${$.contextRecord.refCount}`),typeof Z==="function")Z(0);if(!$.contextRecord.context)await $.contextRecord.ready;if(typeof Z==="function")Z(1);return $.contextRecord}if($.contextRecord)console.log(`[Context] STT record exists but released=${$.contextRecord.released}, creating new context`);else console.log("[Context] No existing STT record, creating new context");let X={refCount:1,ready:null,released:!1};$.contextRecord=X,X.ready=(async()=>{let j=Date.now();try{if(typeof Z==="function")Z(0);let W=await k6($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let H=await z6({filePath:W,useFlashAttn:$.plan.info.runtime.flash_attn_type==="on",useGpu:$.plan.info.runtime.n_gpu_layers>0,nThreads:$.plan.info.runtime.n_threads},$.plan.info.runtime.variant);if(typeof Z==="function")Z(1);X.context=H;try{X.modelInfo=H.getModelInfo()}catch(Q){X.modelInfo=null}return J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,useGpu:$.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-j,success:!0}),X}catch(W){throw J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-j,success:!1,error:W?.message||String(W)}),W}})();try{if(await X.ready,typeof Z==="function")Z(1);return X}catch(j){throw $.contextRecord=null,j}},s0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;return Z.released=!0,$.contextRecord=null,await Z.context?.release?.(),!0},v6=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return s0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return s0($,Z);return console.log(`[Context] Scheduling STT release in ${W}ms`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] STT release cancelled, refCount=${Z.refCount}`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${W}ms delay`),await s0($,Z)},W),!0},h6,C6=($)=>{if(!$)return null;let Z=$.toLowerCase();return h6.find((X)=>Z.includes(X))||null},g1=($)=>{let Z=a0($),X=Z.model.repo_id||Z.model.repository||Z.model.model||null;if(!X)return null;let j=C6(Z.model.filename);if(j)return`${X}:${j}`;return X},Z2=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var c1=S(()=>{B0();m0();({WritableStream:R6}=w6()),L0=X0.join(p1.homedir(),".buttress","models"),y1=["cuda","vulkan","default"],u1=["q8_0","q5_1","q5_0","q4_1","q4_0"],m1={backend:{type:"ggml-stt",variant:null,variant_preference:y1,gpu_memory_fraction:0.85,cpu_memory_fraction:H2},model:{repo_id:"BricksDisplay/whisper-ggml",revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:["q8_0",N2,"q5_1"],allow_local_file:!1,local_path:null,api_base:J2,base_url:Q2,use_gpu:!0,use_flash_attn:"auto"},runtime:{cache_dir:L0,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}};h6=["large-v3-turbo","distil-large-v3","large-v3","large-v2","large-v1","large","distil-medium","medium.en","medium","small.en-tdrz","distil-small.en","small.en","small","base.en","base","tiny.en","tiny"]});async function r($,Z=null,X={}){if($==="ggml-llm")return t4(Z,X);if($==="ggml-stt")return L2(Z,X);throw Error(`Unknown backend type: ${$}`)}var d1=S(()=>{v1();c1()});var C;var A2=S(()=>{C={name:"@fugood/buttress-backend-core",private:!0,type:"module",version:"2.23.0-beta.36",main:"src/index.js",types:"lib/types/index.d.ts",scripts:{build:"tsc --noResolve --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js"},dependencies:{"@fugood/buttress-hardware-guardrails":"^2.23.0-beta.35","@fugood/llama.node":"^1.4.11","@fugood/whisper.node":"^1.0.11","@huggingface/gguf":"^0.3.2","@iarna/toml":"^3.0.0",bytes:"^3.1.0"}}});import y from"node:os";import z2 from"node:fs";import B2 from"node:path";import{execSync as o0}from"node:child_process";import w2 from"@iarna/toml";async function E2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating model capabilities comparison...
1
+ #!/usr/bin/env node
2
+ import{createRequire as T3}from"node:module";var S3=Object.defineProperty;var P3=($,Z)=>{for(var X in Z)S3($,X,{get:Z[X],enumerable:!0,configurable:!0,set:(j)=>Z[X]=()=>j})};var S=($,Z)=>()=>($&&(Z=$($=0)),Z);var J4=T3(import.meta.url);var N4=($,Z,X)=>Math.min(Math.max($,Z),X),H4=($)=>$?40:0,V4=($=0)=>{if(!$)return 0;return N4($/12884901888*20,0,20)},U4=($=0)=>{if(!$)return 0;return N4($/34359738368*10,0,10)},G4=($)=>$?10:0,K4=($="default",Z=null)=>{let X=String($).toLowerCase();if(!X)return 0;if(X.includes("cuda"))return 20;if(X.includes("vulkan"))return 10;if(X.includes("default"))return Z==="darwin"||Z==="ios"?15:5;return 0},K0=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>{if(!H)return 0;let Q=H4(X)+K4(Z,$)+V4(j),J=U4(W),N=G4(H);return Math.min(100,Math.round(Q+J+N))},K1=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>({gpuPresence:H4(X),variant:K4(Z,$),gpuMemory:V4(j),cpuMemory:U4(W),availability:G4(H)});var L4,D0=0.85,b0=0.5,_4=($)=>{if(!$&&$!==0)return[];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},k3=($)=>{if(!$)return null;return String($).trim().toLowerCase()||null},D3=({variant:$,preferVariants:Z=[],variantPreference:X=[],defaultVariants:j=L4}={})=>{let W=[];if($)W.push($);W.push(..._4(Z)),W.push(..._4(X)),W.push(...j);let H=W.map(k3).filter(Boolean);return Array.from(new Set(H))},q4=($={})=>{let Z=String($.type||$.deviceType||$.kind||"").toLowerCase();if(Z.includes("gpu"))return!0;if(Z.includes("cuda"))return!0;if(Z.includes("metal"))return!0;if(Z.includes("vulkan"))return!0;if(Z.includes("snapdragon"))return!0;return!1},b3=($)=>{if(!Array.isArray($))return[];return $.map((Z)=>({...Z}))},v3=($,Z)=>{if($==="snapdragon")return Z.filter((X)=>X.deviceName!=="GPUOpenCL");return Z},O4=({platform:$,totalMemoryInBytes:Z,variant:X,devices:j,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:Q,error:J})=>{let N=b3(v3(X,j)),G=N.some(q4),_=N.filter((w)=>q4(w)&&Number.isFinite(Number(w.maxMemorySize))).reduce((w,U)=>w+U.maxMemorySize,0),O=Z,z=G?Math.floor(_*W):0,V=O?Math.floor(O*H):0,R={platform:$,variant:X,hasGpu:G,gpuUsableBytes:z,cpuUsableBytes:V,ok:Q},q=K0(R),A=Q?K1(R):null;return{platform:$,ok:Q,variant:X,hasGpu:G,devices:N,gpuTotalBytes:_,gpuUsableBytes:z,cpuTotalBytes:O,cpuUsableBytes:V,score:q,breakdown:A,error:J,timestamp:new Date().toISOString()}},_1=({device:$,modelBytes:Z=0,kvCacheBytes:X=0}={})=>{if(!$)return{totalRequiredBytes:Z+X,fitsInGpu:!1,fitsInCpu:!1,limiting:"unknown-device"};let j=Math.max(0,Number(Z)||0)+Math.max(0,Number(X)||0),W=$.hasGpu&&j>0&&j<=$.gpuUsableBytes,H=j>0&&j<=$.cpuUsableBytes,Q="ok";if(!W&&$.hasGpu)Q="gpu-memory";if(!H)Q=W?"cpu-memory":"insufficient-memory";return{totalRequiredBytes:j,fitsInGpu:W,fitsInCpu:H,limiting:Q}},z0=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,kvCacheBytes:G=null,limitedKvCacheBytes:_=null,dependencies:O={},defaultVariants:z=L4}={})=>{let{getBackendDevicesInfo:V,isLibVariantAvailable:R}=O;if(typeof V!=="function"||typeof R!=="function")throw TypeError("GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions");let q=D3({variant:Z,preferVariants:X,variantPreference:j,defaultVariants:z}),A=[];for(let L of q)try{if(!await R(L))throw Error(`Variant ${L} not available on this platform`);let E=await V(L);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:E,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!0}))}catch(B){let E=B instanceof Error?B.message:String(B);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:[],gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!1,error:E}))}let U=A.filter((L)=>L.ok)[0]||null,Y={ok:Boolean(U),selected:U?{...U,breakdown:Q?U.breakdown:void 0}:null,attempts:A};if(!Q&&Y.selected)delete Y.selected.breakdown;if(!Y||!N&&!G)return Y;let K=(L)=>{if(!L)return L;let B=_1({device:L,modelBytes:N||0,kvCacheBytes:G||0}),E=null;if(_!=null&&_!==G)E=_1({device:L,modelBytes:N||0,kvCacheBytes:_});return{...L,fit:B,...E&&{limitedFit:E}}};return Y.selected=K(Y.selected),Y.attempts=Array.isArray(Y.attempts)?Y.attempts.map(K):Y.attempts,Y},v0="ggml-llm";var h0=S(()=>{L4=["cuda","vulkan","snapdragon","default"]});var q1="ggml-stt",A4,O1=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,processingBytes:G=null,kvCacheBytes:_=null,dependencies:O={}}={})=>{let z=j&&j.length>0?j:A4;return z0({platform:$,variant:Z,preferVariants:X,variantPreference:z,gpuMemoryFraction:W,cpuMemoryFraction:H,includeBreakdown:Q,totalMemoryInBytes:J,modelBytes:N,kvCacheBytes:G??_,dependencies:O,defaultVariants:A4})};var L1=S(()=>{h0();A4=["cuda","vulkan","default"]});var h3,_0=async({platform:$,totalMemoryInBytes:Z,backend:X=v0,dependencies:j,...W}={})=>{let H=h3.get(X);if(!H)throw Error(`No capability detector registered for backend "${X}"`);return await H({...W,dependencies:j,totalMemoryInBytes:Z,platform:$})};var z4=S(()=>{h0();L1();h3=new Map([[v0,z0],[q1,O1]])});var B4,A1=($)=>{let Z=$?String($).toLowerCase():"f16";return B4[Z]||B4.f16},z1=($,Z,X,j,W,H={},{totalLayers:Q=null,swaLayers:J=0,swaContext:N=null,swaContextMultiplier:G=1,swaAdditionalTokens:_=0,swaFull:O=!1}={})=>{if(!$||!Z||!X||!j||!W)return 0;let z=Q!=null&&Q!==void 0?Number(Q):Number($),V=Math.max(0,Math.floor(z));if(!V)return 0;let R=A1(H.k),q=A1(H.v),A=Number(X)*(Number(j)*R+Number(W)*q);if(!A)return 0;let w=Math.max(0,Number(Z)||0),U=Math.min(V,Math.max(0,Math.floor(Number(J)||0))),Y=Math.max(0,V-U),K=N!=null&&Number.isFinite(Number(N))?Math.max(0,Number(N)):w,L=Math.max(1,Number(G)||1),B=Math.max(0,Number(_)||0),E=K*L+B,M=O?w:Math.min(w,E),F=Y*w+U*Math.max(0,Math.floor(M));return Math.round(A*F)},C0=({modelBytes:$=0,audioLengthSeconds:Z=30,sampleRate:X=16000,bytesPerSample:j=4}={})=>{let W=Math.max(0,Number($)||0),H=Math.max(0,Math.floor(Math.max(0,Z)*X*j)),Q=1048576,J=1073741824,N;if(W<209715200)N=125829120;else if(W<524288000)N=146800640;else if(W<2147483648)N=157286400;else N=167772160;let G;if(W<209715200)G=73400320;else if(W<524288000)G=141557760;else if(W<2147483648)G=230686720;else G=230686720;let _;if(W<104857600)_=20971520;else if(W<209715200)_=31457280;else if(W<524288000)_=89128960;else if(W<2147483648)_=225443840;else _=377487360;let O=N+G+_,z=W+O+H;return{modelBytes:W,audioBufferBytes:H,processingBufferBytes:O,totalBytes:z}};var B1=S(()=>{B4={f16:2,f32:4,q8_0:1,q6_k:0.75,q5_k:0.625,q5_k_m:0.625,q5_k_s:0.625,q5_1:0.625,q5_0:0.625,q4_k:0.5,q4_k_m:0.5,q4_k_s:0.5,q4_1:0.5,q4_0:0.5,iq4_nl:0.5}});var w1=($)=>$?String($).trim().toLowerCase():null,C3=($={},Z=null)=>{if(!$)return null;let X=w1(Z),j=X?`${X}.attention.sliding_window`:null,W=(j&&$[j]!=null?$[j]:null)??$["llama.attention.sliding_window"];if(W==null)return null;let H=Number(W);return Number.isFinite(H)?H:null},R4=($=0,Z=0,X=!1)=>{let j=Math.max(0,Math.floor(Number($)||0)),W=Math.max(0,Math.floor(Number(Z)||0));if(!j||W===1)return 0;if(W<=0)return j;let H=Math.max(0,W-1),Q=Math.floor(j/W),J=j%W,N=X?Math.max(0,J-1):Math.min(J,H);return Q*H+N},w4=({arch:$,nLayer:Z=0})=>({arch:w1($),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(Z)||0)),swaLayers:0}),I3,I0=({arch:$,metadata:Z={},nLayer:X=0}={})=>{let j=w1($||Z["general.architecture"]),W=Math.max(0,Math.floor(Number(X)||0)),H=C3(Z,j),Q=j?I3.get(j):null;if(!Q)return w4({arch:j,nLayer:X});let J=Q({nLayer:W,nSwa:H,metadata:Z});if(!J||!J.enabled||!J.window||J.window<=0)return w4({arch:j,nLayer:X});let N=Math.max(0,Math.floor(Number(J.pattern)||0)),G=J.kvLayers!=null&&Number.isFinite(Number(J.kvLayers))?Number(J.kvLayers):W,_=Math.max(0,Math.floor(G)),O=R4(_,N,Boolean(J.denseFirst));return{arch:j,enabled:O>0,window:J.window,pattern:N,denseFirst:Boolean(J.denseFirst),type:J.type||"standard",kvLayers:_,swaLayers:O}};var E4=S(()=>{I3=new Map([["llama4",({nSwa:$})=>{if($===0)return{enabled:!1};return{enabled:!0,window:$&&$>0?$:8192,pattern:4,type:"chunked"}}],["afmoe",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["phi3",()=>({enabled:!1})],["gemma2",({nSwa:$})=>{let Z=$&&$>0?$:4096;if(!Z)return{enabled:!1};return{enabled:!0,window:Z,pattern:2,type:"standard"}}],["gemma3",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"standard"}}],["gemma3n",({nLayer:$,nSwa:Z})=>{if(!Z||Z<=0)return{enabled:!1};return{enabled:!0,window:Z,pattern:5,type:"standard",kvLayers:Math.min(20,$)}}],["gemma-embedding",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"symmetric"}}],["cohere2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["olmo2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["exaone4",({nLayer:$,nSwa:Z})=>{let X=$>=64,j=null;if(Z&&Z>0)j=Z;else if(X)j=4096;if(!j)return{enabled:!1};return{enabled:!0,window:j,pattern:4,type:"standard"}}],["gpt-oss",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:2,type:"standard"}}],["smallthinker",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:"standard"}}]])});var y0=($={})=>{let Z=$["general.architecture"],X=(z,V=null)=>{let R=$[z],q=Number(R);return Number.isFinite(q)?q:V},j=Z?X(`${Z}.context_length`,X("llama.context_length")):null,W=Z?X(`${Z}.block_count`,X("llama.block_count")):null,H=Z?X(`${Z}.embedding_length`,X("llama.embedding_length")):null,Q=Z?X(`${Z}.attention.head_count`,X("llama.attention.head_count")):null,J=Z?X(`${Z}.attention.head_count_kv`,X("llama.attention.head_count_kv",Q)):null,N=Z?X(`${Z}.attention.key_length`,X("llama.attention.key_length")):null,G=Z?X(`${Z}.attention.value_length`,X("llama.attention.value_length")):null,_=$["general.quantization_version"]||null,O=$["general.file_type"]||null;return{arch:Z,nCtxTrain:j,nLayer:W,nEmbd:H,nHead:Q,nHeadKv:J,nEmbdHeadK:N,nEmbdHeadV:G,quantVersion:_,fileType:O}},N0=({layerCount:$,headKvCount:Z,embdHeadKCount:X,embdHeadVCount:j,cacheTypes:W,swaConfig:H,kvUnified:Q=!1,nParallel:J=1,swaFull:N=!1})=>{let G=H?.window&&Q?Math.max(1,Number(J)||1):1;return(_)=>z1($,_,Z,X,j,W,{totalLayers:$,swaLayers:H?.swaLayers||0,swaContext:H?.window,swaFull:N,swaContextMultiplier:G})},u0=({maxCtx:$,availableMemory:Z,modelBytes:X,kvBytesForCtx:j})=>{let W=Math.max(1,Math.floor(Number($)||0));if(!j||Z<=X)return W;let H=1,Q=W,J=W;while(H<=Q){let N=Math.floor((H+Q)/2);if(X+j(N)<=Z)J=N,H=N+1;else Q=N-1}return J};var M4=S(()=>{B1()});var B0=S(()=>{z4();B1();h0();L1();E4();M4()});import{EventEmitter as y3}from"node:events";class E1{constructor($=u3){this.maxEntries=$,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad($){R1(this.modelLoads,$,this.maxEntries),o.emit("status:modelLoad",$),o.emit("status:change",{type:"modelLoad",entry:$})}addCompletion($){R1(this.completions,$,this.maxEntries),o.emit("status:completion",$),o.emit("status:change",{type:"completion",entry:$})}addTranscription($){R1(this.transcriptions,$,this.maxEntries),o.emit("status:transcription",$),o.emit("status:change",{type:"transcription",entry:$})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}}function M1($){let Z=(X)=>$(X);return o.on("status:change",Z),()=>o.off("status:change",Z)}function x4($){F4+=1;let Z=F4,X=M1($);return{subscriberId:Z,unsubscribe:X}}function F1($){let Z=[];return{generators:Array.from($.entries()).filter(([,j])=>j.type==="ggml-llm").map(([j,W])=>{let{instance:H}=W,Q=[];if(H.contexts)Q=Array.from(H.contexts.entries()).map(([J,N])=>{let G={key:J,refCount:N.refCount,hasModel:Boolean(N.context)},_=N.context.parallel.getStatus();return G.parallelStatus=_,Z.push({generatorId:j,contextKey:J,..._}),G});return{id:j,type:W.type,refCount:W.refCount,repoId:H.info?.model?.repoId||null,quantization:H.info?.model?.quantization||null,variant:H.info?.runtime?.variant||null,nCtx:H.info?.runtime?.n_ctx||null,nParallel:H.info?.runtime?.n_parallel||null,contexts:Q}}),parallelStatuses:Z,history:{modelLoads:t.getModelLoadHistory(),completions:t.getCompletionHistory()}}}function x1($){return{generators:Array.from($.entries()).filter(([,X])=>X.type==="ggml-stt").map(([X,j])=>{let{instance:W}=j,H=W.getStatus?.()||{},Q=H.queueStatus||{processing:!1,queuedCount:0};return{id:X,type:j.type,refCount:j.refCount,repoId:W.info?.model?.repoId||null,quantization:W.info?.model?.quantization||null,modelType:W.info?.model?.modelType||null,variant:W.info?.runtime?.variant||null,hasContext:H.hasContext||!1,contextRefCount:H.contextRefCount||0,queueStatus:Q}}),history:{modelLoads:J0.getModelLoadHistory(),transcriptions:J0.getTranscriptionHistory()}}}function S4($){return{timestamp:new Date().toISOString(),ggmlLlm:F1($),ggmlStt:x1($)}}var u3=9999,o,R1=($,Z,X)=>{if($.push({...Z,timestamp:Z.timestamp||new Date().toISOString()}),$.length>X)$.shift()},t,J0,F4=0;var m0=S(()=>{o=new y3;o.setMaxListeners(100);t=new E1,J0=new E1});import b from"node:path";import E0 from"node:os";import{stat as M0,mkdir as m3,open as p3,unlink as e,readFile as v4,writeFile as h4,rename as C4,readdir as f3}from"node:fs/promises";import{createHash as k1}from"node:crypto";import{gguf as g3}from"@huggingface/gguf";import{loadModel as c3,getBackendDevicesInfo as I4,isLibVariantAvailable as y4}from"@fugood/llama.node";import d3 from"bytes";import*as l3 from"node:stream/web";class i4{constructor($,Z){this.config=$,this.plan=Z,this.baseDir=$.runtime.cache_dir,this.enabled=$.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=H$($.runtime.session_cache?.max_size_bytes,10737418240),this.maxEntries=$.runtime.session_cache?.max_entries||1000,this.metadata={variant:Z.info?.runtime?.variant||null,n_gpu_layers:Z.info?.runtime?.n_gpu_layers||0,n_ctx:Z.info?.runtime?.n_ctx||0,modelPath:Z.localPath,cacheTypeK:Z.info?.runtime?.cache_type_k||"f16",cacheTypeV:Z.info?.runtime?.cache_type_v||"f16",kvUnified:Z.info?.runtime?.kv_unified??null,swaFull:Z.info?.runtime?.swa_full??null,flashAttnType:Z.info?.runtime?.flash_attn_type||"off"},this.cacheMap=null,this.initialized=!1}async initialize(){if(!this.enabled||this.initialized)return;try{await H0(n0(this.baseDir)),await H0(g0(this.baseDir)),await H0(n4(this.baseDir)),this.cacheMap=await t3(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch($){console.warn(`[SessionCache] Failed to initialize: ${$.message}`),this.enabled=!1}}async findMatchingEntry($){if(!this.enabled||!this.cacheMap)return null;let Z=Y$($,this.metadata,this.cacheMap);if(Z){let{entry:X}=Z;if(!await N$(X.stateFilePath))return console.log(`[SessionCache] Removing stale entry: ${X.id}`),delete this.cacheMap.entries[X.id],this.cacheMap.totalSize-=X.stateFileSize||0,await P1(this.cacheMap,this.baseDir).catch(()=>{}),null;return X.lastAccessedAt=new Date().toISOString(),await P1(this.cacheMap,this.baseDir).catch(()=>{}),{entry:X}}return null}async prepareCompletionOptions($,Z){if(!this.enabled)return{options:$,cacheEntry:null,promptPrefix:null};let X=await this.findMatchingEntry(Z);if(X){let{entry:j}=X;return console.log(`[SessionCache] Found matching entry: ${j.id} (${j.fullText.length} chars, loadStateSize=${j.loadStateSize})`),{options:{...$,load_state_path:j.stateFilePath},cacheEntry:j,promptPrefix:j.fullText}}return{options:$,cacheEntry:null,promptPrefix:null}}async saveCompletionState($,Z,X,j=0){if(!this.enabled)return null;let W=e3($,this.metadata);if(this.cacheMap.entries[W])return console.log(`[SessionCache] Entry already exists for prompt: ${W}`),await e(X).catch(()=>{}),this.cacheMap.entries[W];let H=$+Z,Q=$$(W,this.baseDir);try{await H0(b.dirname(Q)),await C4(X,Q);let J=await M0(Q),N={id:W,promptText:$,completionText:Z,fullText:H,promptTokenCount:j,stateFilePath:Q,stateFileSize:J.size,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[W]=N,this.cacheMap.totalSize+=J.size,await Q$(this.cacheMap,this.maxSizeBytes,this.maxEntries),await P1(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${W} (${J.size} bytes)`),N}catch(J){return console.warn(`[SessionCache] Failed to save state: ${J.message}`),await e(X).catch(()=>{}),null}}async generateTempStatePath(){return await H0(g0(this.baseDir)),Z$(this.baseDir)}async cleanup(){await J$(this.baseDir)}}async function o4($,Z,X={}){let{globalDownloadManager:j=null}=X,W=l0(Z),H=await V$(W),Q=new i4(W,H);await Q.initialize();let J={id:$,type:"ggml-llm",config:W,plan:H,info:H.info,contexts:new Map,downloads:new Map,globalDownloadManager:j,sessionCache:Q,finalized:!1},N=async()=>{if(J.finalized)return;J.finalized=!0;let Y=Array.from(J.contexts.values()),K=Y.map((B)=>{if(B.released)return Promise.resolve(!1);if(B.releaseRequested||B.releaseTimer)return Promise.resolve(!1);if(B.refCount=Math.max(0,B.refCount-1),B.refCount>0)return Promise.resolve(!1);return f0(J,B)});if(await Promise.allSettled(K),Y.length===0||Y.every((B)=>B.released))await J.sessionCache.cleanup()},G=async(Y={})=>{let{onProgress:K}=Y,L=await _$(J,K);return{modelInfo:L.modelInfo?{...L.modelInfo}:null,runtime:{...J.plan.info.runtime},download:{...J.plan.info.download}}},_=async()=>{if(J.finalized)return!1;let Y=O0(J),K=J.contexts.get(Y);if(!K)return!1;return q$(J,K,!1)},O=async(Y={})=>{let{options:K={},useCache:L=!0}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=K.prompt||"";if(!M&&K.messages){let x=await E.context.getFormattedChat(K.messages,K.chat_template||K.chatTemplate,{jinja:K.jinja??!0,tools:K.tools,parallel_tool_calls:K.parallel_tool_calls,tool_choice:K.tool_choice,enable_thinking:K.enable_thinking,add_generation_prompt:K.add_generation_prompt,now:K.now,chat_template_kwargs:K.chat_template_kwargs});M=x?.prompt||x||""}if(L&&J.sessionCache.enabled&&M){let{options:x}=await J.sessionCache.prepareCompletionOptions(K,M),k=await J.sessionCache.generateTempStatePath(),m=(await E.context.tokenize(M))?.tokens?.length||0,p={...x,save_state_path:k,save_state_size:m};console.log(`[SessionCache] save_state_size=${m} (prompt tokens)`);let n={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return G$(E.context,p,J.sessionCache,M,k,m,J.id,n)}let F={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return U$(E.context,K,J.id,F)},z=async(Y={})=>{let{text:K="",params:L={}}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=await E.context.tokenize(K,L);if(!M)return{tokens:[]};let F=Array.from(M.tokens??[]).map((x)=>Number(x));return{...M,tokens:F}},V=async(Y={})=>{let{tokens:K=[]}=Y,L=O0(J),B=J.contexts.get(L);if(!B)throw Error(`Context "${L}" not initialized`);await B.ready;let E=K.map((M)=>Number(M));return B.context.detokenize(E)},R=async(Y={})=>{let{messages:K=[],template:L,params:B}=Y,E=O0(J),M=J.contexts.get(E);if(!M)throw Error(`Context "${E}" not initialized`);return await M.ready,await M.context.getFormattedChat(K,L,B)},q=()=>Array.from(J.contexts.values()).some((Y)=>!Y.released&&(Y.releaseRequested||Y.releaseTimer||Y.refCount>0)),A=()=>{J.finalized=!1},w=()=>{let Y=[],K=Array.from(J.contexts.entries()).map(([L,B])=>{let E={key:L,refCount:B.refCount,hasModel:Boolean(B.context)},M=B.context.parallel.getStatus();return E.parallelStatus=M,Y.push({contextKey:L,...M}),E});return{id:J.id,type:J.type,repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null,nCtx:J.plan.info.runtime?.n_ctx||null,nParallel:J.plan.info.runtime?.n_parallel||null,contexts:K,parallelStatuses:Y}},U=(Y)=>{let K=Array.from(J.contexts.entries()).map(([L,B])=>B.context.parallel.subscribeToStatus((E)=>{Y({contextKey:L,...E})}));return{remove:()=>{K.forEach((L)=>{if(L?.remove)L.remove()})}}};return{id:$,type:"ggml-llm",info:H.info,contexts:J.contexts,initContext:G,completion:O,tokenize:z,detokenize:V,applyChatTemplate:R,releaseContext:_,finalize:N,getStatus:w,subscribeParallelStatus:U,hasPendingReleases:q,resetFinalized:A}}async function t4($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=l0($),J=await D1(Q),N=r4(Q,J),{repoId:G}=J;if(await R0(N,J.size)){if(console.log(`[Download] Model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting download: ${G}`);let z=(async()=>{try{if(J.isSplit&&J.splitCount>0){let V=/-(\d{5})-of-(\d{5})\.gguf$/,R=b.dirname(N),q=J.splitCount,A=0;for(let w=1;w<=q;w+=1){let U=String(w).padStart(5,"0"),Y=J.filename.replace(V,`-${U}-of-${String(q).padStart(5,"0")}.gguf`),K=`${Q.model.base_url.replace(/\/+$/,"")}/${J.repoId}/resolve/${J.revision}/${Y}`,L=b.join(R,Y);if(!await R0(L))await c0(K,J.headers,L,null,(E)=>{if(E>=0&&Number.isFinite(E)){let M=(A+E)/q;if(console.log(`[Download] ${G}: ${Math.round(M*100)}%`),typeof j==="function")j(M)}});A+=1}}else await c0(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}});if(console.log(`[Download] Completed: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function O$($){let Z=l0($),X=await D1(Z),j=await s4(X.url,X.headers,Z.runtime.cache_dir),{arch:W,nCtxTrain:H,nLayer:Q,nEmbd:J,nHead:N,nHeadKv:G,nEmbdHeadK:_,nEmbdHeadV:O,quantVersion:z,fileType:V}=y0(j),R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):0,w=Number.isFinite(Number(G))?Number(G):A,U=A>0&&q>0?q/A:128,Y=_!=null&&Number.isFinite(Number(_))?Number(_):U,K=O!=null&&Number.isFinite(Number(O))?Number(O):U,L=I0({arch:W,metadata:j,nLayer:R}),B=L&&Number.isFinite(Number(L.kvLayers))?Number(L.kvLayers):R,E=Math.max(0,Math.floor(Number(B)||0)),F=(Z.model.n_ctx?Number(Z.model.n_ctx):null)||H||4096,x={k:Z.model.cache_type_k,v:Z.model.cache_type_v},k=X.size>0?X.size:0,D=N0({layerCount:E,headKvCount:w,embdHeadKCount:Y,embdHeadVCount:K,cacheTypes:x,swaConfig:L,kvUnified:Z.model.kv_unified,nParallel:Z.model.n_parallel,swaFull:Z.model.swa_full}),m=Z.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,p=Z.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.cpu_memory_fraction))):d0,n=D(F),i=await a4(Z,{modelBytes:k,kvCacheBytes:n}),c=(i.selected.totalMemory||0)*m,T=Math.max(0,E0.totalmem()*p),f=i.selected.hasGpu?c:T,h=u0({maxCtx:F,availableMemory:f,modelBytes:k,kvBytesForCtx:D}),s=D(F),I=D(h);return{kvInfo:{nCtxTrain:H,nLayer:R,nEmbd:q,nHeadKv:w,nEmbdHeadK:Y,nEmbdHeadV:K,nHeadCount:A,nHeadKvCount:w,kvLayerCount:E,swa:L?.enabled?{window:L.window,pattern:L.pattern,denseFirst:L.denseFirst,type:L.type,layers:L.swaLayers}:null},modelBytes:k,kvCacheBytes:s,limitedKvCacheBytes:I,memoryLimitedCtx:h,quantization:{name:X.quantization||null,fileType:V,version:z}}}async function e4($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null,G=null,_=null,O=null;if(W)try{let{modelBytes:K,kvCacheBytes:L,limitedKvCacheBytes:B,memoryLimitedCtx:E,kvInfo:M,quantization:F}=await O$(W);Q=K,J=L,N=B,G=E,_=M,O=F}catch(K){}let z=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,V=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,R=await _0({...H,platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",includeBreakdown:j,gpuMemoryFraction:z,cpuMemoryFraction:V,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Q,kvCacheBytes:J,limitedKvCacheBytes:N}),q=R.selected,A=b4(q);q.modelBytes=Q||null,q.kvCacheBytes=J||null,q.memoryLimitedCtx=G||null,q.limitedKvCacheBytes=N||null,q.kvInfo=_||null,q.quantization=O||null;let w=null,U=null;if($){let K=b4($);U={...$,score:K};let L="buttress",B="buttress-higher-score";if(!R.ok)L="local",B="buttress-unavailable";else if(!K&&K!==0)L="buttress",B="missing-client-score";else{let{fit:E,limitedFit:M}=U,F=q?.fit,x=q?.limitedFit,k=E?.fitsInGpu||E?.fitsInCpu||M?.fitsInGpu||M?.fitsInCpu,D=F?.fitsInGpu||F?.fitsInCpu||x?.fitsInGpu||x?.fitsInCpu;if(k&&!D)L="local",B="client-fits-in-memory";else if(D&&!k)L="buttress",B="buttress-fits-in-memory";else if(K>A*X)L="local",B="client-better";else if(A>K*X)L="buttress",B="buttress-better";else L="either",B="comparable-scores"}w={buttressScore:A,clientScore:K,threshold:X,recommendation:L,reason:B}}if(!R.ok&&!w)w={buttressScore:A,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let Y=null;if(W)Y={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,nCtx:W.model?.n_ctx||null,cacheKType:W.model?.cache_type_k||"f16",cacheVType:W.model?.cache_type_v||"f16"};return{type:"ggml-llm",timestamp:new Date().toISOString(),buttress:R,client:U,comparison:w,modelConfig:Y}}var n3=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return l3},i3,u4,s3,m4=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};m4($[X],j)}else $[X]=j}),$},r3=".gguf",p4="https://huggingface.co",f4="https://huggingface.co/api",d,g4,d0=0.5,w0,S1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},p0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","snapdragon","default"].includes(Z))return Z;return null},l0=($={})=>{let Z=JSON.parse(JSON.stringify(w0));if(m4(Z,$),Z.backend.variant=p0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(S1(Z.backend.variant_preference).map(p0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=["cuda","vulkan","snapdragon","default"];if(Z.runtime.prefer_variants=Array.from(new Set(S1(Z.runtime.prefer_variants).map(p0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(S1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.n_parallel=Math.max(1,Number(Z.model.n_parallel)||4),Z.model.n_batch=Math.max(1,Number(Z.model.n_batch)||512),Z.model.base_url=Z.model.base_url||p4,Z.model.api_base=Z.model.api_base||f4,Z.runtime.cache_dir=Z.runtime.cache_dir?b.resolve(Z.runtime.cache_dir):d,Z.runtime.session_cache={...w0.runtime.session_cache,...Z.runtime.session_cache||{}},Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||w0.runtime.context_release_delay_ms),Z},P4=($)=>{let Z=$.toLowerCase();return g4.find((j)=>Z.includes(j))||null},a3=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(p0).filter(Boolean)))},H0=async($)=>{await m3($,{recursive:!0})},o3=($=d)=>b.join($,".metadata-cache"),c4=($,Z,X=d)=>{let j=k1("sha256").update($).digest("hex");return b.join(o3(X),Z,`${j}.json`)},d4=async($,Z,X=d)=>{try{let j=c4($,Z,X),W=await v4(j,"utf-8");return console.log(`[Cache] Hit ${Z} cache:`,b.basename(j)),JSON.parse(W,(H,Q)=>{if(typeof Q==="string"&&Q.startsWith("__bigint__"))return BigInt(Q.slice(10));return Q})}catch(j){return null}},T1=async($,Z,X,j=d)=>{try{let W=c4($,Z,j);await H0(b.dirname(W)),await h4(W,JSON.stringify(X,(H,Q)=>{if(typeof Q==="bigint")return`__bigint__${Q.toString()}`;return Q}),"utf-8"),console.log(`[Cache] Wrote ${Z} cache:`,b.basename(W))}catch(W){console.warn(`[Cache] Failed to write ${Z} cache:`,W.message)}},n0=($=d)=>b.join($,".session-state-cache"),l4=($=d)=>b.join(n0($),"cache-map.json"),g0=($=d)=>b.join(n0($),"temp"),n4=($=d)=>b.join(n0($),"states"),T4=()=>({version:1,entries:{},totalSize:0}),t3=async($=d)=>{try{let Z=l4($),X=await v4(Z,"utf-8"),j=JSON.parse(X);if(!j.entries||typeof j.entries!=="object")return T4();return j}catch{return T4()}},P1=async($,Z=d)=>{let X=l4(Z),j=`${X}.tmp.${Date.now()}`;try{await H0(b.dirname(X)),await h4(j,JSON.stringify($,null,2),"utf-8"),await C4(j,X)}catch(W){throw await e(j).catch(()=>{}),W}},e3=($,Z)=>{let X=JSON.stringify({text:$,model:Z.modelPath,variant:Z.variant,n_gpu_layers:Z.n_gpu_layers,n_ctx:Z.n_ctx,cacheTypeK:Z.cacheTypeK,cacheTypeV:Z.cacheTypeV,kvUnified:Z.kvUnified,swaFull:Z.swaFull,flashAttnType:Z.flashAttnType});return k1("sha256").update(X).digest("hex").slice(0,24)},$$=($,Z=d)=>b.join(n4(Z),`${$}.bin`),Z$=($=d)=>{let Z=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return b.join(g0($),`${Z}.bin`)},X$=($,Z)=>$.modelPath===Z.modelPath&&$.variant===Z.variant&&$.n_gpu_layers===Z.n_gpu_layers&&$.n_ctx>=Z.n_ctx&&$.cacheTypeK===Z.cacheTypeK&&$.cacheTypeV===Z.cacheTypeV&&$.kvUnified===Z.kvUnified&&$.swaFull===Z.swaFull&&$.flashAttnType===Z.flashAttnType,j$=($,Z)=>{let X=Math.min($.length,Z.length),j=0;while(j<X&&$[j]===Z[j])j+=1;return j},W$=100,Y$=($,Z,X)=>{let j=Object.values(X.entries);console.log(`[SessionCache] Finding match for promptText (${$.length} chars)`),console.log(`[SessionCache] Checking ${j.length} cache entries`);let H=j.filter((Q)=>X$(Q.metadata,Z)).reduce((Q,J)=>{let N=j$($,J.fullText);if(N>=W$&&N>Q.prefixLen)return{entry:J,prefixLen:N};return Q},{entry:null,prefixLen:0});if(H.entry)return console.log(`[SessionCache] Prefix match found: ${H.entry.id} (${H.prefixLen}/${H.entry.fullText.length} chars)`),{entry:H.entry,prefixLength:H.prefixLen};return console.log("[SessionCache] No match found"),null},Q$=async($,Z,X)=>{let j=Object.values($.entries).sort((J,N)=>new Date(J.lastAccessedAt)-new Date(N.lastAccessedAt)),W=$.totalSize,H=Object.keys($.entries).length,Q=j.filter((J)=>{let N=W>Z,G=H>X;if(!N&&!G)return!1;return W-=J.stateFileSize||0,H-=1,!0});return await Promise.all(Q.map(async(J)=>{await e(J.stateFilePath).catch(()=>{}),delete $.entries[J.id],console.log(`[SessionCache] Evicted entry: ${J.id}`)})),$.totalSize=Math.max(0,W),Q.map((J)=>J.id)},J$=async($=d)=>{let Z=g0($);try{let X=await f3(Z),j=Date.now(),W=3600000;await Promise.all(X.map(async(H)=>{let Q=b.join(Z,H),J=await M0(Q).catch(()=>null);if(J&&j-J.mtimeMs>3600000)await e(Q).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${H}`)}))}catch{}},N$=async($)=>{try{return await M0($),!0}catch{return!1}},H$=($,Z)=>{if($==null)return Z;if(typeof $==="number")return $;if(typeof $==="string"){let X=d3.parse($);return X!=null?X:Z}return Z},k4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},D4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},s4=async($,Z,X=d)=>{let j=JSON.stringify({url:$,headers:Z}),W=await d4(j,"range-metadata",X);if(W)return W;let H=!/^https?:/i.test($),{metadata:Q}=await g3($,{fetch,additionalFetchHeaders:Z,allowLocalFile:H});return await T1(j,"range-metadata",Q,X),Q},r4=($,Z)=>{if($.model.local_path)return b.resolve($.model.local_path);let X=Z.repoId.split("/"),j=b.join($.runtime.cache_dir,...X,Z.revision);return b.join(j,Z.filename)},R0=async($,Z)=>{try{let X=await M0($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},c0=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await H0(b.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await p3(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new s3({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}if(j){let _=await M0(X);if(_.size!==j)throw await e(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},D1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await d4(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let Y=await D4($.model.url,{headers:Q}),K=Number(Y.headers.get("content-length"))||null,L=$.model.filename||$.model.url.split("/").pop(),B={repoId:Z,revision:X,filename:L,url:$.model.url,size:K,headers:Q};return await T1(W,"artifact-info",B,j),B}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await k4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((Y)=>Y.rfilename||Y.path||Y.filename).filter((Y)=>typeof Y==="string"&&Y.endsWith(r3));if(O.length===0)throw Error(`No GGUF artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:g4,V=()=>{let Y=z.find((K)=>{return O.find((B)=>B.toLowerCase().includes(K))});if(Y)return{filename:O.find((L)=>L.toLowerCase().includes(Y)),quantization:Y};return null};if(!J){let Y=V()||{filename:O[0],quantization:null},{filename:K,quantization:L}=Y;J=K,N=L||P4(J)}else if(!N)N=P4(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=/-(\d{5})-of-(\d{5})\.gguf$/,A=J.match(q),w=null;if(A){let[,,Y]=A,K=await k4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),L=K?.siblings||K?.files||[],B=Number(Y);w=0;for(let E=1;E<=B;E+=1){let M=String(E).padStart(5,"0"),F=J.replace(q,`-${M}-of-${Y}.gguf`),x=L.find((D)=>(D.rfilename||D.path||D.filename)===F),k=Number(x?.size);if(Number.isFinite(k)&&k>0)w+=k}}else{let Y=await D4(R,{headers:Q});w=Number(Y.headers.get("content-length"))||null}let U={repoId:Z,revision:X,filename:J,url:R,size:w,quantization:N,headers:Q,isSplit:Boolean(A),splitCount:A?Number(A[2]):0};return await T1(W,"artifact-info",U,j),U},a4=async($,{modelBytes:Z=null,kvCacheBytes:X=null}={})=>{let j=a3($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,N=await _0({platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",variant:W||null,preferVariants:H,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},V$=async($)=>{let Z=await D1($),X=await s4(Z.url,Z.headers,$.runtime.cache_dir),{arch:j,nCtxTrain:W,nLayer:H,nEmbd:Q,nHead:J,nHeadKv:N,nEmbdHeadK:G,nEmbdHeadV:_,quantVersion:O,fileType:z}=y0(X),V=Number.isFinite(Number(H))?Number(H):0,R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):q,w=q>0&&R>0?R/q:128,U=G!=null&&Number.isFinite(Number(G))?Number(G):w,Y=_!=null&&Number.isFinite(Number(_))?Number(_):w,K=I0({arch:j,metadata:X,nLayer:V}),L=K&&Number.isFinite(Number(K.kvLayers))?Number(K.kvLayers):V,B=Math.max(0,Math.floor(Number(L)||0)),E={use_mmap:$.model.use_mmap??$.runtime.use_mmap,use_mlock:$.model.use_mlock??$.runtime.use_mlock,n_threads:$.model.n_threads??$.runtime.n_threads,n_ctx:$.model.n_ctx??$.runtime.n_ctx,n_batch:$.model.n_batch??$.runtime.n_batch,n_ubatch:$.model.n_ubatch??$.runtime.n_ubatch,n_cpu_moe:$.model.n_cpu_moe??$.runtime.n_cpu_moe,n_parallel:$.model.n_parallel??$.runtime.n_parallel,cpu_mask:$.model.cpu_mask??$.runtime.cpu_mask,cpu_strict:$.model.cpu_strict??$.runtime.cpu_strict,devices:$.model.devices??$.runtime.devices,n_gpu_layers:$.model.n_gpu_layers??$.runtime.n_gpu_layers,flash_attn_type:$.model.flash_attn_type??$.runtime.flash_attn_type,cache_type_k:$.model.cache_type_k??$.runtime.cache_type_k,cache_type_v:$.model.cache_type_v??$.runtime.cache_type_v,kv_unified:$.model.kv_unified??$.runtime.kv_unified,swa_full:$.model.swa_full??$.runtime.swa_full,ctx_shift:$.model.ctx_shift??$.runtime.ctx_shift},M=E.n_ctx?Number(E.n_ctx):null,F=M||W||4096,x=[],k=[],D=!0;if(M&&W&&M>W){D=!1;let Q0=`Requested context length (${M}) exceeds model training context (${W})`;x.push(Q0),k.push(Q0),F=W}if(M&&!W)x.push("Model metadata missing training context length, using requested value");let m={k:E.cache_type_k,v:E.cache_type_v},p=Z.size>0?Z.size:0,n=N0({layerCount:B,headKvCount:A,embdHeadKCount:U,embdHeadVCount:Y,cacheTypes:m,swaConfig:K,kvUnified:E.kv_unified,nParallel:E.n_parallel,swaFull:E.swa_full}),i=n(F),v=await a4($,{modelBytes:p,kvCacheBytes:i}),c=v.selected.totalMemory||0,T=c*($.backend.gpu_memory_fraction||1),f=$.backend.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,h=Math.max(0,E0.totalmem()*f),s=v.selected.hasGpu?T:h,I=u0({maxCtx:F,availableMemory:s,modelBytes:p,kvBytesForCtx:n});if(!M&&I){let Q0=W?Math.min(I,W):I,U1=Math.max(32,Q0);if(U1<F)x.push(`Context length capped to ${U1} by memory limits`);F=U1}if(F>I)F=I;let a=Math.floor(I);console.log(`[buttress] Memory-limited context length: ${a}`);let q0=n(F),T0=p+q0,G0=V?p/(V+1):p,k0=0;if(v.selected.hasGpu&&G0>0)k0=Math.min(V+1,Math.max(0,Math.floor(T/G0)));console.log(`[buttress] Auto GPU layer capacity (${v.selected.variant}): ${k0}/${V+1}`);let H1;if(E.n_gpu_layers==="auto"||E.n_gpu_layers==null)H1=k0;else H1=Math.max(0,Math.min(Number(E.n_gpu_layers)||0,V+1));let M3=(()=>{let Q0=E.flash_attn_type&&String(E.flash_attn_type).toLowerCase();if(Q0==="on"||Q0==="off")return Q0;if(Q0==="auto")return v.selected.hasGpu?"auto":"off";return v.selected.hasGpu?"auto":"off"})(),F3=$.runtime.cache_dir,V1=r4($,Z),Q4=await R0(V1,Z.size),x3={ok:D,backend:"ggml-llm",warnings:x,errors:k,model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,url:Z.url,sizeBytes:Z.size,metadata:{architecture:j,n_ctx_train:W,n_layer:V,n_embd:R,quantization_version:O,file_type:z,kv_layer_count:B,swa:K?.enabled?{window:K.window,pattern:K.pattern,dense_first:K.denseFirst,type:K.type,layers:K.swaLayers}:null}},runtime:{...E,variant:v.selected.variant,n_ctx:F,requested_ctx:M,n_gpu_layers:H1,auto_gpu_layers:k0,flash_attn_type:M3,cache_type_k:m.k,cache_type_v:m.v,estimated_max_n_ctx:a},resources:{modelBytes:p,kvCacheBytes:q0,totalEstimatedBytes:T0,gpuCapacityBytes:c,gpuUsableBytes:T,cpuUsableBytes:h,fit:v.selected.fit},devices:{selected:v.selected,attempts:v.attempts},download:{cacheDir:F3,localPath:V1,exists:Q4},timestamp:new Date().toISOString()};return{config:$,info:x3,artifact:Z,metadata:{arch:j,nCtxTrain:W,nLayer:V,nEmbd:R},devices:v,cacheTypes:m,localPath:V1,localExists:Q4}},U$=($,Z,X=null,j=null)=>{let W,H=Date.now(),Q=0;return new u4({async start(J){try{let N=await $.parallel.completion(Z,(V,R)=>{if(!R)return;if(R.token)Q+=1;J.enqueue({event:"token",data:{requestId:V,...R}})}),{requestId:G}=N;W=N.stop;let _=await N.promise;console.log("[Completion] Result:",_),J.enqueue({event:"result",data:{requestId:G,..._}}),J.close();let O=Date.now()-H,z=_.timings||{};t.addCompletion({id:`completion-${G}`,generatorId:X,requestId:G,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,cacheTokens:z.cache_n??0,promptTokens:z.prompt_n??0,tokensGenerated:z.predicted_n??Q,tokensPerSecond:z.predicted_per_second??0,promptPerSecond:z.prompt_per_second??0,durationMs:O,success:!0,interrupted:_.interrupted||!1,contextFull:_.context_full||_.contextFull||!1})}catch(N){J.enqueue({event:"error",data:{message:N?.message||String(N)}}),J.error(N),t.addCompletion({id:`completion-${Date.now()}`,generatorId:X,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,durationMs:Date.now()-H,tokensGenerated:Q,success:!1,error:N?.message||String(N)})}},cancel(){if(W)W()}})},G$=($,Z,X,j,W,H,Q=null,J=null)=>{let N,G="",_=!1,O=Date.now(),z=0;return new u4({async start(V){try{let R=await $.parallel.completion(Z,(Y,K)=>{if(!K)return;if(K.token)G+=K.token,z+=1;V.enqueue({event:"token",data:{requestId:Y,...K}})}),{requestId:q}=R;N=R.stop;let A=await R.promise;if(A.text)G=A.text;else if(A.content)G=A.content;_=!A.interrupted&&!A.context_full,console.log("[Completion] Result:",A),V.enqueue({event:"result",data:{requestId:q,...A}}),V.close();let w=Date.now()-O,U=A.timings||{};if(t.addCompletion({id:`completion-${q}`,generatorId:Q,requestId:q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,cacheTokens:U.cache_n??0,promptTokens:U.prompt_n??H??0,tokensGenerated:U.predicted_n??z,tokensPerSecond:U.predicted_per_second??0,promptPerSecond:U.prompt_per_second??0,durationMs:w,success:!0,interrupted:A.interrupted||!1,contextFull:A.context_full||A.contextFull||!1,usedCache:Boolean(Z.load_state_path)}),_&&X.enabled&&G)X.saveCompletionState(j,G,W,H).catch((Y)=>{console.warn("[SessionCache] Save failed:",Y.message)});else if(W)e(W).catch(()=>{})}catch(R){V.enqueue({event:"error",data:{message:R?.message||String(R)}}),V.error(R),t.addCompletion({id:`completion-${Date.now()}`,generatorId:Q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,durationMs:Date.now()-O,tokensGenerated:z,success:!1,error:R?.message||String(R)}),e(W).catch(()=>{})}},cancel(){if(N)N();e(W).catch(()=>{})}})},O0=($)=>{let Z={model:$.plan.localPath,runtime:$.plan.info.runtime};return k1("sha256").update(JSON.stringify(Z)).digest("hex").slice(0,24)},K$=async($,Z,X,j=null)=>{let{config:W,localPath:H,artifact:Q}=$;if($.localExists&&!Z.has(H)){if($.info.download.exists=!0,typeof X==="function")X(0.5);return H}if(W.model.local_path&&!W.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let J=H;if(j){let N=j.getDownload(J);if(N){console.log(`[ensureModelFile] Waiting for global download: ${Q.repoId}`);try{if(await N,await R0(H,Q.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(0.5);return H}}catch(G){console.warn(`[ensureModelFile] Global download failed, will retry: ${G.message}`)}}}if(!Z.has(J))Z.set(J,(async()=>{if(Q.isSplit&&Q.splitCount>0){let N=/-(\d{5})-of-(\d{5})\.gguf$/,G=b.dirname(H),_=Q.splitCount,O=0;for(let z=1;z<=_;z+=1){let V=String(z).padStart(5,"0"),R=Q.filename.replace(N,`-${V}-of-${String(_).padStart(5,"0")}.gguf`),q=`${W.model.base_url.replace(/\/+$/,"")}/${Q.repoId}/resolve/${Q.revision}/${R}`,A=b.join(G,R);if(!await R0(A))await c0(q,Q.headers,A,null,(U)=>{if(U>=0&&Number.isFinite(U)){let Y=(O+U)/_,K=Math.round(Y*100);if(console.log(`Downloading model splits: ${Math.min(100,K)}%`),typeof X==="function")X(Y*0.5)}});O+=1}}else console.log("Downloading model: 0%"),await c0(Q.url,Q.headers,H,Q.size,(N)=>{if(N>=0&&Number.isFinite(N)){let G=Math.round(N*100);if(console.log(`Downloading model: ${Math.min(100,G)}%`),typeof X==="function")X(N*0.5)}});$.localExists=!0,$.info.download.exists=!0})());try{await Z.get(J)}finally{Z.delete(J)}return H},_$=async($,Z)=>{let X=O0($),j=$.contexts.get(X);if(j&&!j.released){if(j.releaseTimer)clearTimeout(j.releaseTimer),j.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${X}"`);if(j.releaseRequested=!1,j.refCount+=1,console.log(`[Context] Reusing existing context "${X}", refCount=${j.refCount}`),typeof Z==="function")Z(0);if(!j.context)await j.ready;if(typeof Z==="function")Z(1);return j}if(j)console.log(`[Context] Record exists but released=${j.released}, creating new context`);else console.log(`[Context] No existing record for "${X}", creating new context`);j={key:X,refCount:1,ready:null,released:!1},$.contexts.set(X,j),j.ready=(async()=>{let W=Date.now(),H=await K$($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let Q={model:H,n_threads:$.plan.info.runtime.n_threads,use_mmap:$.plan.info.runtime.use_mmap,use_mlock:$.plan.info.runtime.use_mlock,cpu_mask:$.plan.info.runtime.cpu_mask,cpu_strict:$.plan.info.runtime.cpu_strict,devices:$.plan.info.runtime.devices,n_ctx:$.plan.info.runtime.n_ctx,n_gpu_layers:$.plan.info.runtime.n_gpu_layers,n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch,n_ubatch:$.plan.info.runtime.n_ubatch,n_cpu_moe:$.plan.info.runtime.n_cpu_moe,flash_attn_type:$.plan.info.runtime.flash_attn_type,ctx_shift:$.plan.info.runtime.ctx_shift,kv_unified:$.plan.info.runtime.kv_unified,swa_full:$.plan.info.runtime.swa_full,lib_variant:$.plan.info.runtime.variant};if($.plan.info.runtime.flash_attn_type!=="off")Q.cache_type_k=$.plan.info.runtime.cache_type_k,Q.cache_type_v=$.plan.info.runtime.cache_type_v;console.log("[Context] Load Options:",Q);let J;try{if(J=await c3(Q,(N)=>{if(typeof Z==="function"){if(Z(0.5+N*0.25),N%5===0)console.log("[Context] Load Model Progress:",N)}}),$.plan.info.runtime.n_parallel){if(!await J.parallel.enable({n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch}))throw Error("Failed to enable parallel decoding mode for context")}if(typeof Z==="function")Z(1);return j.context=J,j.modelInfo=J.getModelInfo(),t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,nCtx:$.plan.info.runtime?.n_ctx||null,nGpuLayers:$.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-W,success:!0}),j}catch(N){if(t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-W,success:!1,error:N?.message||String(N)}),J)try{J.release()}catch(G){}throw N}})();try{return await j.ready,j}catch(W){throw $.contexts.delete(X),W}},f0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;Z.released=!0,$.contexts.delete(Z.key);try{Z.context?.parallel?.disable?.()}catch(j){}return await Z.context?.release?.(),!0},q$=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return f0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return f0($,Z);return console.log(`[Context] Scheduling release in ${W}ms for context "${Z.key}"`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] Release cancelled, refCount=${Z.refCount} for context "${Z.key}"`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing context "${Z.key}" after ${W}ms delay`),await f0($,Z)},W),!0},b1=($)=>{let Z=l0($);return Z.model.repo_id||Z.model.repository||Z.model.model||null},b4=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var v1=S(()=>{B0();m0();i3=n3(),{ReadableStream:u4,WritableStream:s3}=i3,d=b.join(E0.homedir(),".buttress","models"),g4=["mxfp4","q8_0","q6_k","q6","q5_k_m","q5_k_s","q5_k","q5_1","q5_0","q4_k_m","q4_k_s","q4_k","q4_1","q4_0","q3","q2"],w0={backend:{type:"ggml-llm",variant:null,variant_preference:["cuda","vulkan","snapdragon","default"],gpu_memory_fraction:0.85,cpu_memory_fraction:d0},model:{repo_id:null,revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:"auto",allow_local_file:!1,local_path:null,api_base:f4,base_url:p4},runtime:{cache_dir:d,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10737418240,max_entries:1000},context_release_delay_ms:1e4}}});import X0 from"node:path";import p1 from"node:os";import{stat as j2,mkdir as L$,open as A$,unlink as h1,readFile as z$,writeFile as B$}from"node:fs/promises";import{createHash as w$}from"node:crypto";import{initWhisper as R$}from"@fugood/whisper.node";import{getBackendDevicesInfo as W2,isLibVariantAvailable as Y2}from"@fugood/llama.node";import*as E$ from"node:stream/web";class O2{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue($,Z=null){return new Promise((X,j)=>{this.queue.push({task:$,resolve:X,reject:j,taskId:Z}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:$,resolve:Z,reject:X,taskId:j}=this.queue.shift();this.currentTaskId=j;try{let W=await $();Z(W)}catch(W){X(W)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}}async function L2($,Z,X={}){let{globalDownloadManager:j=null}=X,W=a0(Z),H=await v$(W),Q={id:$,type:"ggml-stt",config:W,plan:H,info:H.info,contextRecord:null,downloads:new Map,globalDownloadManager:j,queue:new O2,finalized:!1},J=async()=>{if(Q.finalized)return;Q.finalized=!0;let A=Q.contextRecord;if(!A)return;if(A.released)return;if(A.releaseRequested||A.releaseTimer)return;if(A.refCount=Math.max(0,A.refCount-1),A.refCount>0)return;await s0(Q,A)},N=async(A={})=>{let{onProgress:w}=A;try{let U=await I$(Q,w);return{modelInfo:U.modelInfo&&typeof U.modelInfo==="object"?{...U.modelInfo}:null,runtime:{...Q.plan.info.runtime},download:{...Q.plan.info.download}}}catch(U){throw console.error("[Context] Error initializing context:",U),U}},G=async()=>{if(Q.finalized)return!1;let A=Q.contextRecord;if(!A)return!1;return y$(Q,A)},_=async(A={})=>{let{audioPath:w,audioData:U,options:Y={}}=A,K=Q.contextRecord;if(!K)throw Error("Context not initialized");let L={...Y};if(Q.plan.info.runtime.max_threads&&L.maxThreads==null)L.maxThreads=Q.plan.info.runtime.max_threads;let B=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,E=Date.now();return Q.queue.enqueue(async()=>{await K.ready;try{let M;if(U){let F=C$(U),{promise:x}=K.context.transcribeData(F,L);M=await x}else{if(!w)throw Error("audioPath or audioData is required for transcription");let F=X0.resolve(w),{promise:x}=K.context.transcribe(F,L);M=await x}return J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,segmentCount:M?.segments?.length||0,textLength:M?.text?.length||0,success:!0}),M}catch(M){throw J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,success:!1,error:M?.message||String(M)}),M}},B)},O=async(A={})=>_(A),z=async(A={})=>_(A),V=()=>{let A=Q.contextRecord;if(!A)return!1;return!A.released&&(A.releaseRequested||A.releaseTimer||A.refCount>0)},R=()=>{Q.finalized=!1},q=()=>({id:Q.id,type:Q.type,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,hasContext:Boolean(Q.contextRecord?.context),contextRefCount:Q.contextRecord?.refCount||0,queueStatus:Q.queue.getStatus()});return{id:$,type:"ggml-stt",info:H.info,queue:Q.queue,initContext:N,transcribe:O,transcribeData:z,releaseContext:G,finalize:J,getStatus:q,hasPendingReleases:V,resetFinalized:R}}async function A2($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=a0($),J=await f1(Q),N=_2(Q,J),{repoId:G}=J;if(await r0(N,J.size)){if(console.log(`[Download] STT model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading STT model: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${G}`);let z=(async()=>{try{if(await q2(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}}),console.log(`[Download] Completed STT model: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed STT model: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start STT download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function z2($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null;if(W)try{let w=a0(W),U=await f1(w);Q=U.size??null,{processingBufferBytes:J}=C0({modelBytes:Q}),N=U.quantization||null}catch(w){}let G=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,_=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,O=await _0({...H,platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",includeBreakdown:j,gpuMemoryFraction:G,cpuMemoryFraction:_,dependencies:{getBackendDevicesInfo:W2,isLibVariantAvailable:Y2},modelBytes:Q,kvCacheBytes:J}),z=O.selected,V=X2(z);if(z)z.modelBytes=Q||null,z.processingBytes=J||null,z.quantization=N||null;let R=null,q=null;if($){let w=X2($);q={...$,score:w};let U="buttress",Y="buttress-higher-score";if(!O.ok)U="local",Y="buttress-unavailable";else if(!w&&w!==0)U="buttress",Y="missing-client-score";else if($.fit&&z?.fit){let K=$.fit.fitsInGpu||$.fit.fitsInCpu,L=z.fit.fitsInGpu||z.fit.fitsInCpu;if(K&&!L)U="local",Y="client-fits-in-memory";else if(L&&!K)U="buttress",Y="buttress-fits-in-memory";else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores"}else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores";R={buttressScore:V,clientScore:w,threshold:X,recommendation:U,reason:Y}}if(!O.ok&&!R)R={buttressScore:V,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let A=null;if(W)A={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,filename:W.model?.filename||null};return{type:"ggml-stt",timestamp:new Date().toISOString(),buttress:O,client:q,comparison:R,modelConfig:A}}var M$=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return E$},F$,Q2=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};Q2($[X],j)}else $[X]=j}),$},x$=".bin",J2="https://huggingface.co",N2="https://huggingface.co/api",L0,y1,u1,H2="fp16",V2=0.5,S$,U2=($)=>{if(!$)return null;let Z=$.toLowerCase();return S$.find((X)=>Z.includes(X))||null},m1,C1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},i0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","default"].includes(Z))return Z;return null},a0=($={})=>{let Z=JSON.parse(JSON.stringify(m1));if(Q2(Z,$),Z.backend.variant=i0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(C1(Z.backend.variant_preference||y1).map(i0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=[...y1];if(Z.runtime.prefer_variants=Array.from(new Set(C1(Z.runtime.prefer_variants).map(i0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(C1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.base_url=Z.model.base_url||J2,Z.model.api_base=Z.model.api_base||N2,Z.runtime.cache_dir=Z.runtime.cache_dir?X0.resolve(Z.runtime.cache_dir):L0,Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||m1.runtime.context_release_delay_ms),Z},I1=($)=>{let Z=$.toLowerCase();return u1.find((j)=>Z.includes(j))||null},P$=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(i0).filter(Boolean)))},G2=async($)=>{await L$($,{recursive:!0})},T$=($=L0)=>X0.join($,".metadata-cache"),K2=($,Z,X=L0)=>{let j=w$("sha256").update($).digest("hex");return X0.join(T$(X),Z,`${j}.json`)},k$=async($,Z,X=L0)=>{try{let j=K2($,Z,X),W=await z$(j,"utf-8");return JSON.parse(W)}catch(j){return null}},$2=async($,Z,X,j=L0)=>{try{let W=K2($,Z,j);await G2(X0.dirname(W)),await B$(W,JSON.stringify(X),"utf-8")}catch(W){}},D$=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},Z2=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},_2=($,Z)=>{if($.model.local_path)return X0.resolve($.model.local_path);let X=Z.repoId.split("/"),j=X0.join($.runtime.cache_dir,...X,Z.revision);return X0.join(j,Z.filename)},r0=async($,Z)=>{try{let X=await j2($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},q2=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await G2(X0.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await A$(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new F$({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}if(j){let _=await j2(X);if(_.size!==j)throw await h1(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},f1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await k$(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let U=await Z2($.model.url,{headers:Q}),Y=Number(U.headers.get("content-length"))||null,K=$.model.filename||$.model.url.split("/").pop(),L={repoId:Z,revision:X,filename:K,url:$.model.url,size:Y,quantization:I1(K||""),headers:Q};return await $2(W,"artifact-info",L,j),L}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await D$(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((U)=>U.rfilename||U.path||U.filename).filter((U)=>typeof U==="string"&&U.endsWith(x$));if(O.length===0)throw Error(`No model artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:u1,V=()=>{for(let U of z)if(U===H2){let Y=O.find((K)=>{let L=K.toLowerCase();return!u1.some((B)=>L.includes(B))});if(Y)return{filename:Y,quantization:null}}else{let Y=O.find((K)=>K.toLowerCase().includes(U));if(Y)return{filename:Y,quantization:U}}return null};if(!J){let U=V()||{filename:O[0],quantization:null},{filename:Y,quantization:K}=U;J=Y,N=K||I1(J)}else if(!N)N=I1(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=await Z2(R,{headers:Q}),A=Number(q.headers.get("content-length"))||null,w={repoId:Z,revision:X,filename:J,url:R,size:A,quantization:N,headers:Q,isSplit:!1,splitCount:0};return await $2(W,"artifact-info",w,j),w},b$=async($,{modelBytes:Z=null,processingBytes:X=null}={})=>{let j=P$($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):m1.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):V2,N=await _0({platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",variant:W||null,preferVariants:H,variantPreference:$.backend.variant_preference,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:W2,isLibVariantAvailable:Y2},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},v$=async($)=>{let Z=await f1($),X=C0({modelBytes:Z.size>0?Z.size:0}),j=await b$($,{modelBytes:X.modelBytes,processingBytes:X.processingBufferBytes}),W=j.selected.hasGpu&&(j.selected.fit?.fitsInGpu!==void 0?j.selected.fit.fitsInGpu:!0);if($.model.use_gpu===!1)W=!1;let H=$.model.use_flash_attn&&String($.model.use_flash_attn).toLowerCase(),Q;if(H==="on"||H==="true")Q=!0;else if(H==="off"||H==="false")Q=!1;else Q=W;let J=$.runtime.cache_dir,N=_2($,Z),G=await r0(N,Z.size),_={ok:!0,backend:"ggml-stt",model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,modelType:U2(Z.filename),url:Z.url,sizeBytes:Z.size},runtime:{variant:j.selected.variant,use_gpu:W,use_flash_attn:Q,max_threads:$.runtime.max_threads?Number($.runtime.max_threads):null},resources:{...X,gpuCapacityBytes:j.selected.gpuTotalBytes,gpuUsableBytes:j.selected.gpuUsableBytes,cpuUsableBytes:j.selected.cpuUsableBytes,fit:j.selected.fit},devices:{selected:j.selected,attempts:j.attempts},download:{cacheDir:J,localPath:N,exists:G},timestamp:new Date().toISOString()};return{config:$,info:_,artifact:Z,memory:X,devices:j,localPath:N,localExists:G}},h$=async($,Z,X,j=null)=>{let{localPath:W,artifact:H,config:Q}=$;if($.localExists){if(typeof X==="function")X(1);return W}if(j){let G=j.getDownload(W);if(G){console.log(`[ensureModelFile] Waiting for global STT download: ${H.repoId}`);try{if(await G,await r0(W,H.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(1);return W}}catch(_){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${_.message}`)}}}let J=Z.get(W);if(J){if(await J,typeof X==="function")X(1);return W}let N=(async()=>{if(Q.model.allow_local_file){if(!await r0(W,H.size))throw Error(`Local model file not found: ${W}`);return W}return await q2(H.url,H.headers,W,H.size,X),W})();Z.set(W,N);try{return await N,W}finally{Z.delete(W)}},C$=($)=>{if(!$)return null;if($ instanceof ArrayBuffer)return $;if(ArrayBuffer.isView($))return $.buffer;if(typeof $==="string"){let Z=$.startsWith("data:")?$.split(",")[1]||"":$,X=Buffer.from(Z,"base64");return X.buffer.slice(X.byteOffset,X.byteOffset+X.byteLength)}throw Error("Unsupported audioData format, expected base64 string or ArrayBuffer")},I$=async($,Z)=>{if($.contextRecord&&!$.contextRecord.released){if($.contextRecord.releaseTimer)clearTimeout($.contextRecord.releaseTimer),$.contextRecord.releaseTimer=null,console.log("[Context] Cancelled pending STT release");if($.contextRecord.releaseRequested=!1,$.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${$.contextRecord.refCount}`),typeof Z==="function")Z(0);if(!$.contextRecord.context)await $.contextRecord.ready;if(typeof Z==="function")Z(1);return $.contextRecord}if($.contextRecord)console.log(`[Context] STT record exists but released=${$.contextRecord.released}, creating new context`);else console.log("[Context] No existing STT record, creating new context");let X={refCount:1,ready:null,released:!1};$.contextRecord=X,X.ready=(async()=>{let j=Date.now();try{if(typeof Z==="function")Z(0);let W=await h$($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let H=await R$({filePath:W,useFlashAttn:$.plan.info.runtime.flash_attn_type==="on",useGpu:$.plan.info.runtime.n_gpu_layers>0,nThreads:$.plan.info.runtime.n_threads},$.plan.info.runtime.variant);if(typeof Z==="function")Z(1);X.context=H;try{X.modelInfo=H.getModelInfo()}catch(Q){X.modelInfo=null}return J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,useGpu:$.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-j,success:!0}),X}catch(W){throw J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-j,success:!1,error:W?.message||String(W)}),W}})();try{if(await X.ready,typeof Z==="function")Z(1);return X}catch(j){throw $.contextRecord=null,j}},s0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;return Z.released=!0,$.contextRecord=null,await Z.context?.release?.(),!0},y$=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return s0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return s0($,Z);return console.log(`[Context] Scheduling STT release in ${W}ms`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] STT release cancelled, refCount=${Z.refCount}`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${W}ms delay`),await s0($,Z)},W),!0},g1=($)=>{let Z=a0($),X=Z.model.repo_id||Z.model.repository||Z.model.model||null;if(!X)return null;let j=U2(Z.model.filename);if(j)return`${X}:${j}`;return X},X2=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var c1=S(()=>{B0();m0();({WritableStream:F$}=M$()),L0=X0.join(p1.homedir(),".buttress","models"),y1=["cuda","vulkan","default"],u1=["q8_0","q5_1","q5_0","q4_1","q4_0"],S$=["large-v3-turbo","distil-large-v3","large-v3","large-v2","large-v1","large","distil-medium","medium.en","medium","small.en-tdrz","distil-small.en","small.en","small","base.en","base","tiny.en","tiny"],m1={backend:{type:"ggml-stt",variant:null,variant_preference:y1,gpu_memory_fraction:0.85,cpu_memory_fraction:V2},model:{repo_id:"BricksDisplay/whisper-ggml",revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:["q8_0",H2,"q5_1"],allow_local_file:!1,local_path:null,api_base:N2,base_url:J2,use_gpu:!0,use_flash_attn:"auto"},runtime:{cache_dir:L0,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}}});async function r($,Z=null,X={}){if($==="ggml-llm")return e4(Z,X);if($==="ggml-stt")return z2(Z,X);throw Error(`Unknown backend type: ${$}`)}var d1=S(()=>{v1();c1()});var C;var B2=S(()=>{C={name:"@fugood/buttress-backend-core",private:!0,type:"module",version:"2.23.0-beta.38",main:"src/index.js",types:"lib/types/index.d.ts",scripts:{build:"tsc --noResolve --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js"},dependencies:{"@fugood/buttress-hardware-guardrails":"^2.23.0-beta.37","@fugood/llama.node":"^1.4.11","@fugood/whisper.node":"^1.0.11","@huggingface/gguf":"^0.3.2","@iarna/toml":"^3.0.0",bytes:"^3.1.0"}}});import y from"node:os";import w2 from"node:fs";import R2 from"node:path";import{execSync as o0}from"node:child_process";import E2 from"@iarna/toml";async function F2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating model capabilities comparison...
2
3
  `),X.push(`${C.name} v${C.version}`),X.push(`## Model Capabilities Comparison
3
4
  `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-llm");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let K=G(Y);K={...K||{},model:{...J.runtime,...K?.model||{},repo_id:Y}};let L=await r("ggml-llm",null,{config:K,includeBreakdown:!0});_.push({modelId:Y,capabilities:L,modelInfo:L.buttress?.selected||null,modelConfig:L.modelConfig||null})}let O=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),X.push("|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|"),_.forEach(({modelId:U,modelInfo:Y,modelConfig:K})=>{let L=Y?.quantization?.name?.toUpperCase()||"N/A",B=O(Y?.modelBytes),E=K?.nCtx||Y?.kvInfo?.nCtxTrain||"N/A",M=N0(Y),F=Number(E),x=Y?.kvCacheBytes||(M&&Number.isFinite(F)&&F>0?M(F):M&&M(Y?.kvInfo?.nCtxTrain||0))||null,k=O(x),D=O(Y?.modelBytes&&x?Y.modelBytes+x:Y?.fit?.totalRequiredBytes),m=z(Y?.fit?.fitsInGpu),p=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${L} | ${B} | ${E} | ${k} | ${D} | ${m} | ${p} |`);let n=Y?.memoryLimitedCtx!=null||Y?.limitedFit!=null,i=!Y?.fit?.fitsInGpu||!Y?.fit?.fitsInCpu;if(n&&i){let v=Y?.memoryLimitedCtx||E,c=Number(v),T=Y?.limitedKvCacheBytes||M&&Number.isFinite(c)&&c>0&&M(c)||null,f=O(T),h=O(Y?.modelBytes&&T?Y.modelBytes+T:Y?.limitedFit?.totalRequiredBytes),s=z(Y?.limitedFit?.fitsInGpu),I=z(Y?.limitedFit?.fitsInCpu);if(v!==E||f!==k||h!==D)X.push(`| ↳ Limited | - | ${B} | ${v} | ${f} | ${h} | ${s} | ${I} |`)}}),X.push(`
4
5
  ---`),X.push(`
@@ -7,9 +8,9 @@ import{createRequire as x3}from"node:module";var M3=Object.defineProperty;var F3
7
8
  ${process.argv[0]} ${process.argv[1]} ${R}
8
9
  \`\`\``),X.push(`
9
10
  ### Package Information`),X.push(`- **Name:** ${C.name}`),X.push(`- **Version:** ${C.version}`),C.description)X.push(`- **Description:** ${C.description}`);if(Z&&Object.keys(Z).length>0){X.push(`
10
- ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=w2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=B2.join(process.cwd(),A);z2.writeFileSync(w,X.join(`
11
+ ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=R2.join(process.cwd(),A);w2.writeFileSync(w,X.join(`
11
12
  `),"utf8"),console.log(`
12
- Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function M2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...H}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(H)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(H).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let _=await r("ggml-llm",null,{config:G,includeBreakdown:!0}),O=_.buttress?.selected||null,z=_.modelConfig||null;if($||z?.repoId){console.log(`
13
+ Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function x2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...H}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(H)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(H).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let _=await r("ggml-llm",null,{config:G,includeBreakdown:!0}),O=_.buttress?.selected||null,z=_.modelConfig||null;if($||z?.repoId){console.log(`
13
14
  === Model Information ===`);let V=$||z?.repoId;if(console.log(`Repository ID: ${V}`),z?.quantization)console.log(`Quantization: ${z.quantization}`);if(z?.nCtx)console.log(`Context Length: ${z.nCtx}`);if(O?.quantization?.name)console.log(`Model Quantization: ${O.quantization.name.toUpperCase()}`);let R=z?.cache_type_k||"f16",q=z?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${R}, V=${q}`),O?.modelBytes&&O?.kvCacheBytes){if(console.log(`Model Size: ${(O.modelBytes/1024/1024/1024).toFixed(2)} GB`),O.kvInfo)console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(O.kvInfo)})`);else console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);if(console.log(`Total Required Memory: ${((O.modelBytes+O.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),O.memoryLimitedCtx!=null){let A=O.memoryLimitedCtx,w=O.kvInfo?.nCtxTrain;if(w)console.log(`
14
15
  Memory-Limited Context: ${A} (Train: ${w})`);else console.log(`
15
16
  Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(_.buttress?.selected?.fit){let{totalRequiredBytes:A}=_.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(_.buttress?.selected){let{selected:V}=_.buttress;console.log(`
@@ -18,35 +19,35 @@ Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limit
18
19
  Backend Variant: ${V.variant}`),console.log(`Performance Score: ${V.score}`),V.fit){if(console.log(`
19
20
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${V.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${V.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${V.fit.limiting}`),V.limitedFit)console.log(`
20
21
  --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(V.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${V.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${V.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${V.limitedFit.limiting}`)}}console.log(`
21
- === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function F2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating STT model capabilities comparison...
22
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function S2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating STT model capabilities comparison...
22
23
  `),X.push(`${C.name} v${C.version}`),X.push(`## STT Model Capabilities Comparison
23
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-stt");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:K,filename:L}=R2(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(K);B={...B||{},model:{...B?.model||{},repo_id:K,...L&&{filename:L}}};let E=await r("ggml-stt",null,{config:B,includeBreakdown:!0});_.push({modelId:Y,repoId:K,filename:L,capabilities:E,modelInfo:E.buttress?.selected||null,modelConfig:E.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),_.forEach(({modelId:U,modelInfo:Y})=>{let K=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),E=z(Y?.fit?.fitsInGpu),M=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${K} | ${L} | ${B} | ${E} | ${M} |`)}),X.push(`
24
+ `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-stt");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:K,filename:L}=M2(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(K);B={...B||{},model:{...B?.model||{},repo_id:K,...L&&{filename:L}}};let E=await r("ggml-stt",null,{config:B,includeBreakdown:!0});_.push({modelId:Y,repoId:K,filename:L,capabilities:E,modelInfo:E.buttress?.selected||null,modelConfig:E.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),_.forEach(({modelId:U,modelInfo:Y})=>{let K=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),E=z(Y?.fit?.fitsInGpu),M=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${K} | ${L} | ${B} | ${E} | ${M} |`)}),X.push(`
24
25
  ---`),X.push(`
25
26
  ### System Information`);let V=null;if(process.platform!=="win32")try{V=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${y.hostname()}`),X.push(`- **OS:** ${y.type()} ${y.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${y.cpus().length}`),X.push(`- **Total System Memory:** ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`),_.length>0){let Y=_[0].capabilities.buttress?.selected;if(Y){let K=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let E=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${E}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
26
27
  ### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
27
28
  ${process.argv[0]} ${process.argv[1]} ${R}
28
29
  \`\`\``),X.push(`
29
30
  ### Package Information`),X.push(`- **Name:** ${C.name}`),X.push(`- **Version:** ${C.version}`),C.description)X.push(`- **Description:** ${C.description}`);if(Z&&Object.keys(Z).length>0){X.push(`
30
- ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=w2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=B2.join(process.cwd(),A);z2.writeFileSync(w,X.join(`
31
+ ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=R2.join(process.cwd(),A);w2.writeFileSync(w,X.join(`
31
32
  `),"utf8"),console.log(`
32
- STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function x2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=R2($),W=Z||{},{server:H,generators:Q=[],...J}=W,N=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=N(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>N(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await r("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
33
+ STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function P2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=M2($),W=Z||{},{server:H,generators:Q=[],...J}=W,N=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=N(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>N(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await r("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
33
34
  === Model Information ===`);let q=X||R?.repoId;if(console.log(`Repository ID: ${q}`),j)console.log(`Filename: ${j}`);if(V?.modelBytes)console.log(`Model Size: ${(V.modelBytes/1024/1024).toFixed(1)} MB`);let A=V?.processingBytes||V?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(V?.modelBytes&&A)console.log(`Total Required Memory: ${((V.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(z.buttress?.selected?.fit){let{totalRequiredBytes:w}=z.buttress.selected.fit;console.log(`Total Required Memory: ${(w/1024/1024).toFixed(1)} MB`)}}if(z.buttress?.selected){let{selected:q}=z.buttress;console.log(`
34
35
  === Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${y.hostname()}`),console.log(`OS: ${y.type()} ${y.release()}`);console.log(`Platform: ${q.platform}`),console.log(`CPU Cores: ${y.cpus().length}`),console.log(`Total System Memory: ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`);let w=q.cpuTotalBytes>0?(q.cpuUsableBytes/q.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${w}% of ${(q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),q.hasGpu)console.log(`
35
36
  --- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let K=q.gpuTotalBytes>0?(q.gpuUsableBytes/q.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.metadata){if(Y.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Y.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
36
37
  Backend Variant: ${q.variant}`),console.log(`Performance Score: ${q.score}`),q.fit)console.log(`
37
38
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${q.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${q.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${q.fit.limiting}`)}console.log(`
38
- === Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var R2=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var S2=S(()=>{B0();d1();A2()});var e0={};F3(e0,{testGgmlSttCapabilities:()=>x2,testGgmlLlmCapabilities:()=>M2,status:()=>c6,startModelDownload:()=>t0,startGenerator:()=>u6,showSttModelsTable:()=>F2,showModelsTable:()=>E2,globalDownloadManager:()=>l1,ggmlStt:()=>f6,ggmlLlm:()=>p6,getModelIdentifier:()=>g6,getCapabilities:()=>r,generatorRegistry:()=>$0,finalizeGenerator:()=>m6});async function u6($,Z){let j={"ggml-llm":{create:a4,getId:b1},"ggml-stt":{create:q2,getId:g1}}[$];if(!j)throw Error(`Unsupported backend type: ${$}`);let W=j.getId(Z);if(!W)throw Error("Buttress generator config missing repo identifier");let H=`${$}:${W}`,Q=$0.get(H);if(Q)return Q.refCount+=1,Q.instance.resetFinalized?.(),{id:Q.id,info:Q.instance.info};let J=await j.create(H,Z,{globalDownloadManager:l1}),N={id:H,type:J.type,instance:J,refCount:1};return $0.set(H,N),{id:H,info:J.info}}async function m6($){let Z=$0.get($);if(!Z)return!1;if(Z.refCount-=1,Z.refCount<=0){if(await Z.instance.finalize(),!(Z.instance.hasPendingReleases?.()??!1))$0.delete($)}return!0}function g6($,Z){if($==="ggml-llm")return b1(Z);if($==="ggml-stt")return g1(Z);return null}async function t0($,Z,X={}){let W={"ggml-llm":o4,"ggml-stt":O2}[$];if(!W)return{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${$}`};return W(Z,l1,X)}var $0,l1,y6=($)=>{let Z=$0.get($);if(!Z)throw Error(`Unknown generator id "${$}"`);return Z},V0=($,Z)=>{let X=y6($);if(X.type!==Z)throw Error(`Generator "${$}" does not support ${Z} backend`);return X.instance},p6,f6,c6;var $1=S(()=>{v1();c1();d1();m0();S2();$0=new Map,l1={downloads:new Map,getDownload($){return this.downloads.get($)||null},setDownload($,Z){this.downloads.set($,Z)},deleteDownload($){this.downloads.delete($)},isDownloading($){return this.downloads.has($)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([$,Z])=>({localPath:$,promise:Z}))}};p6={async initContext($,Z){return V0($,"ggml-llm").initContext(Z)},async completion($,Z){return V0($,"ggml-llm").completion(Z)},async tokenize($,Z){return V0($,"ggml-llm").tokenize(Z)},async detokenize($,Z){return V0($,"ggml-llm").detokenize(Z)},async applyChatTemplate($,Z){return V0($,"ggml-llm").applyChatTemplate(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-llm")throw Error(`Generator "${$}" does not support ggml-llm backend`);return X.instance.releaseContext(Z)}},f6={async initContext($,Z){return V0($,"ggml-stt").initContext(Z)},async transcribe($,Z){return V0($,"ggml-stt").transcribe(Z)},async transcribeData($,Z){return V0($,"ggml-stt").transcribeData(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-stt")throw Error(`Generator "${$}" does not support ggml-stt backend`);return X.instance.releaseContext(Z)}};c6={getFullStatus:()=>x4($0),getGgmlLlmStatus:()=>F1($0),getGgmlSttStatus:()=>x1($0),subscribeToStatus:M1,subscribeToStatusWithId:F4,llmStatusTracker:t,sttStatusTracker:J0,statusEmitter:o}});import{node as d6}from"@elysiajs/node";import{Elysia as l6}from"elysia";var n6,U0=($)=>new l6({adapter:n6?d6():void 0,...$});var F0=S(()=>{n6=typeof process<"u"&&process.versions&&process.versions.node});import{t as j0}from"elysia";var i6,s6=({store:{serverInfo:$}})=>({id:$.id,name:$.name,version:$.version,generators:$.generators,authentication:$.authentication}),n1=($)=>{let Z=U0(),X=$.autodiscover.http?.path??"/buttress/info";return Z.get(X,s6,{response:i6}),Z};var P2=S(()=>{F0();i6=j0.Object({id:j0.String(),name:j0.String(),version:j0.String(),generators:j0.Array(j0.Object({type:j0.String()})),authentication:j0.Object({required:j0.Boolean(),type:j0.Literal("device-group")})})});import{t as W0,file as r6}from"elysia";import{writeFile as T2}from"node:fs/promises";import i1 from"node:path";var a6,s1;var k2=S(()=>{F0();a6=typeof process<"u"&&process.versions!=null&&process.versions.node!=null,s1=U0().post("/buttress/upload",async({body:{file:$},store:{config:Z}})=>{let X=`${Date.now()}-${$.name.replace(/[^\dA-Za-z]/g,"_")}`,j=i1.join(Z.server.temp_file_dir,X);try{if(a6)await T2(j,await $.stream());else await T2(j,await $.arrayBuffer());return{ok:!0,filename:X}}catch(W){return{ok:!1,error:String(W)}}},{body:W0.Object({file:W0.File()}),response:W0.Object({ok:W0.Boolean(),filename:W0.Optional(W0.String()),error:W0.Optional(W0.String())})}).get("/buttress/download/:filename",async({params:{filename:$},store:{config:Z},status:X})=>{let j=i1.join(Z.server.temp_file_dir,$);if(i1.relative(Z.server.temp_file_dir,j).includes(".."))return X(400),"Invalid file path";return r6(j)},{params:W0.Object({filename:W0.String()})})});import b2 from"node:path";import o6 from"node:fs/promises";import{fileURLToPath as t6}from"node:url";var e6,$$=($)=>{let{status:Z}=$;if(Z?.getFullStatus)return Z.getFullStatus();return{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},D2=async()=>{try{let $=b2.join(e6,"..","..","public","status.html"),Z=await o6.readFile($,"utf-8");return new Response(Z,{headers:{"Content-Type":"text/html; charset=utf-8"}})}catch($){return console.error("[Status] Failed to serve status page:",$),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}})}},r1;var v2=S(()=>{F0();e6=b2.dirname(t6(import.meta.url)),r1=U0().get("/status",D2).get("/status/",D2).get("/buttress/status",({store:{backend:$}})=>$$($))});import{t as P,sse as Z1}from"elysia";import{cors as Z$}from"@elysiajs/cors";async function j$($,Z,X){let W=(Z.generators||[]).filter((q)=>q.type==="ggml-llm");if(W.length===0)throw Error('No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.');let H=W[0],Q=X||H.model?.repo_id;if(X){let q=W.find((A)=>A.model?.repo_id===X);if(q)H=q}else Q=H.model?.repo_id;let J=Q,N=h2.get(J);if(N?.initialized)return N;let{generators:G,server:_,...O}=Z.global||{},z={...O,...H,model:{...H.model,repo_id:Q}};console.log(`[OpenAI] Creating generator for ${J}`);let{id:V}=await $.startGenerator("ggml-llm",z),R={id:V,config:z,repoId:Q,initialized:!1};return h2.set(J,R),await $.ggmlLlm.initContext(V,{}),R.initialized=!0,console.log(`[OpenAI] Generator ready: ${J}`),R}async function W$($,Z,X,j){let W=$.getReader(),H="",Q=null,J=null,N="stop",G={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let O=!1;while(!O){let z=await W.read();if({done:O}=z,O)break;let{event:V,data:R}=z.value;if(V==="token"){if(R.content!=null)H+=R.content;else if(R.token!=null)H+=R.token}else if(V==="result"){if(R.text)H=R.text;else if(R.content)H=R.content;if(R.reasoning_content)Q=R.reasoning_content;if(R.tool_calls?.length>0)J=R.tool_calls.map((q,A)=>({id:q.id||`call_${Z}_${A}`,type:"function",function:{name:q.function?.name||"",arguments:q.function?.arguments||""}})),N="tool_calls";else N=R.interrupted?"length":"stop";G={prompt_tokens:R.prompt_tokens||R.promptTokens||0,completion_tokens:R.tokens_predicted||R.tokensPredicted||0,total_tokens:(R.prompt_tokens||R.promptTokens||0)+(R.tokens_predicted||R.tokensPredicted||0)}}else if(V==="error")throw Error(R.message)}}finally{W.cancel().catch(()=>{})}let _={role:"assistant",content:H||null};if(Q)_.reasoning_content=Q;if(J)_.tool_calls=J;return{id:Z,object:"chat.completion",created:X,model:j,choices:[{index:0,message:_,finish_reason:N}],usage:G}}function X1({global:$}){let Z=U0({prefix:"/oai-compat"});return Z.use(Z$({origin:$?.openai_compat?.cors_allowed_origins??!1,methods:["GET","POST","OPTIONS"],allowedHeaders:["Content-Type","Authorization"],maxAge:86400,preflight:!0})),Z.get("/v1/models",({store:X})=>{let{config:j}=X,Q=(j.generators||[]).filter((J)=>J.type==="ggml-llm").map((J)=>{return{id:J.model?.repo_id||"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"}});if(Q.length===0)Q.push({id:"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"});return{object:"list",data:Q}}),Z.post("/v1/chat/completions",async function*({body:j,set:W,store:H}){let{config:Q,backend:J}=H,{messages:N=[],stream:G=!1,model:_,tools:O,temperature:z,stop:V,top_p:R,max_tokens:q,presence_penalty:A,frequency_penalty:w,tool_choice:U,stream_options:Y}=j;if(!N||N.length===0)return W.status=400,{error:{message:"messages is required and must not be empty",type:"invalid_request_error"}};try{let K=await j$(J,Q,_),L=X$(),B=Math.floor(Date.now()/1000),E=K.repoId||"ggml-llm",M={reasoning_format:"auto",messages:N,jinja:!0,add_generation_prompt:!0};if(z!=null)M.temperature=z;if(R!=null)M.top_p=R;if(q!=null)M.n_predict=q;if(V!=null)M.stop=Array.isArray(V)?V:[V];if(A!=null)M.presence_penalty=A;if(w!=null)M.frequency_penalty=w;if(O!=null)M.tools=O;if(U!=null)M.tool_choice=U;let F=await J.ggmlLlm.completion(K.id,{options:M});if(!G)return await W$(F,L,B,E);let x=Y?.include_usage===!0,k=F.getReader(),D="",m="",p=new Map,n=new Map;try{let i=!1;while(!i){let v=await k.read();if({done:i}=v,i)break;let{event:c,data:T}=v.value;if(c==="token"){let f={};if(T.content!=null){let h=T.content;if(h.length>D.length)f.content=h.slice(D.length),D=h}if(T.reasoning_content!=null){let h=T.reasoning_content;if(h.length>m.length)f.reasoning_content=h.slice(m.length),m=h}if(T.tool_calls?.length>0){let h=[];if(T.tool_calls.forEach((s,I)=>{let a={index:I};if(!n.has(I))n.set(I,s.id||`call_${L}_${I}`),a.id=n.get(I),a.type="function";let q0=s.function?.arguments||"",T0=p.get(I)||"",G0={};if(!p.has(I)&&s.function?.name)G0.name=s.function.name;if(q0.length>T0.length)G0.arguments=q0.slice(T0.length),p.set(I,q0);if(Object.keys(G0).length>0)a.function=G0,h.push(a);else if(a.id)a.function={name:s.function?.name||"",arguments:""},h.push(a)}),h.length>0)f.tool_calls=h}if(Object.keys(f).length>0)yield Z1({data:JSON.stringify({id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:f,finish_reason:null}]})})}else if(c==="result"){let f={id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:{},finish_reason:T.interrupted?"length":"stop"}]};if(x)f.usage={prompt_tokens:T.prompt_tokens||T.promptTokens||0,completion_tokens:T.tokens_predicted||T.tokensPredicted||0,total_tokens:(T.prompt_tokens||T.promptTokens||0)+(T.tokens_predicted||T.tokensPredicted||0)};yield Z1({data:JSON.stringify(f)})}else if(c==="error")yield Z1({data:JSON.stringify({error:{message:T.message,type:"server_error"}})})}yield Z1({data:"[DONE]"})}finally{k.cancel().catch(()=>{})}}catch(K){return console.error("[OpenAI] Chat completion error:",K),W.status=500,{error:{message:K.message||"Internal server error",type:"server_error"}}}},{body:P.Object({model:P.Optional(P.String()),messages:P.Array(P.Any()),stream:P.Optional(P.Boolean()),temperature:P.Optional(P.Number()),top_p:P.Optional(P.Number()),max_tokens:P.Optional(P.Number()),stop:P.Optional(P.Union([P.String(),P.Array(P.String())])),presence_penalty:P.Optional(P.Number()),frequency_penalty:P.Optional(P.Number()),tools:P.Optional(P.Array(P.Any())),tool_choice:P.Optional(P.Any()),stream_options:P.Optional(P.Object({include_usage:P.Optional(P.Boolean())}))})}),Z}var X$=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,h2;var C2=S(()=>{F0();h2=new Map});var I2=S(()=>{P2();k2();v2();C2()});import Q$ from"node:os";import J$ from"node:path";import N$ from"node-machine-id";import H$ from"bytes";import V$ from"ms";var x0=($={},Z={})=>{let X=Array.isArray($)?[...$]:{...$};return Object.entries(Z||{}).forEach(([j,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W))X[j]=x0(X[j]||{},W);else X[j]=W}),X},A0=($)=>{if(!$)return null;if(typeof $==="object")return JSON.parse(JSON.stringify($));return null},a1=($,Z)=>{let X=A0($)||{},j=A0(Z)||{};return x0(X,j)},y2=($,Z)=>x0(JSON.parse(JSON.stringify($.global)),Z||{}),o1=($,Z,X,j)=>{if($.generators.length>0){let W=$.generators.filter((H)=>H?.type===X);if(W.length>0){if(j){let H=W.find((Q)=>Z.getModelIdentifier(X,Q)===j);if(H)return y2($,H)}}}if(Object.keys($.global).length>0)return y2($,{});return null},u2,U$=($)=>{if(!$)return null;if($===!0)return{...u2};return x0(u2,$)},g2=($,Z)=>{if(!$.generators||$.generators.length===0)return Z.map((j)=>({type:j}));let X=new Set;if($.generators.forEach((j)=>{if(j.type)X.add(j.type)}),X.size===0)return Z.map((j)=>({type:j}));return Array.from(X).map((j)=>({type:j}))},m2=($,Z,X)=>{if($===void 0)return X;if(typeof $==="number")return $;return Z($)??X},G$=2080,p2=60000,f2=52428800,t1=($)=>{let Z=N$.machineIdSync(),X={server:{id:`buttress-${Z}`,name:`Buttress Server (${Z.slice(-8)})`,port:G$,temp_file_dir:J$.join(Q$.tmpdir(),".buttress"),session_timeout:p2,max_body_size:f2},autodiscover:!1},j=x0(X,A0($)||{}),W=Array.isArray(j.generators)?j.generators:[],{server:H,generators:Q,autodiscover:J,...N}=j;return{autodiscover:U$(J),server:{id:H.id,name:H.name,port:H.port,log_level:H.log_level,temp_file_dir:H.temp_file_dir,max_body_size:m2(H.max_body_size,H$.parse,f2),session_timeout:m2(H.session_timeout,V$,p2)},global:N,generators:W}};var S0=S(()=>{u2={udp:{port:8089,announcements:{enabled:!0,interval:5000},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:"/buttress/info",cors:!0}}});import{z as Z0}from"zod";var c2,d2;var l2=S(()=>{S0();c2={getCapabilities:Z0.tuple([Z0.object({type:Z0.string().optional().default("ggml-llm"),config:Z0.any().optional(),currentClientCapabilities:Z0.any().optional(),options:Z0.any().optional()}).nullable().optional()]),startGenerator:Z0.tuple([Z0.string(),Z0.any().optional()]),finalizeGenerator:Z0.tuple([Z0.string()])},d2={async getCapabilities({backend:$,config:Z},X=null){console.log("[Server] Get Capabilities:",X);let j=X||{type:"ggml-llm"},{type:W="ggml-llm",config:H,currentClientCapabilities:Q=null,options:J={}}=j,N=A0(H),G=$.getModelIdentifier(W,N),_=o1(Z,$,W,G),O=a1(_,H);if(Object.keys(O).length===0)throw Error("Buttress server missing generator configuration");if(O.backend=O.backend||{},!O.backend.type)O.backend.type=W;return $.getCapabilities(W,Q,{...J,config:O})},async startGenerator({backend:$,config:Z,session:X},j,W){console.log("[Server] Start Generator:",j,W);let H=A0(W),Q=$.getModelIdentifier(j,H),J=o1(Z,$,j,Q),N=a1(J,W);if(Object.keys(N).length===0)throw Error("Buttress server missing generator configuration");if(N.backend=N.backend||{},!N.backend.type)N.backend.type=j;let G=await $.startGenerator(j,N);return X.generators.add(G.id),G},async finalizeGenerator({backend:$,session:Z},X){return console.log("[Server] Finalize Generator:",X),Z.generators.delete(X),$.finalizeGenerator(X)}}});import{z as g}from"zod";import{ReadableStream as K$}from"node:stream/web";var n2,i2;var s2=S(()=>{n2={initContext:g.tuple([g.string(),g.any().optional()]),completion:g.tuple([g.string(),g.any().optional()]),tokenize:g.tuple([g.string(),g.any()]),detokenize:g.tuple([g.string(),g.any()]),applyChatTemplate:g.tuple([g.string(),g.any()]),releaseContext:g.tuple([g.string(),g.boolean().optional()])},i2={initContext({backend:$},Z,X){return new K$({async start(j){try{let W=await $.ggmlLlm.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},completion({backend:$},Z,X){return console.log("[Server] Completion:",{id:Z,property:X}),$.ggmlLlm.completion(Z,X)},async tokenize({backend:$},Z,X){return console.log("[Server] Tokenize:",{id:Z,property:X}),$.ggmlLlm.tokenize(Z,X)},async detokenize({backend:$},Z,X){return console.log("[Server] Detokenize:",{id:Z,property:X}),$.ggmlLlm.detokenize(Z,X)},async applyChatTemplate({backend:$},Z,X){return console.log("[Server] Apply Chat Template:",{id:Z,property:X}),$.ggmlLlm.applyChatTemplate(Z,X)},async releaseContext({backend:$},Z,X){return console.log("[Server] Release Context:",{id:Z,force:X}),$.ggmlLlm.releaseContext(Z,{force:X})}}});import{z as l}from"zod";import{ReadableStream as _$}from"node:stream/web";import q$ from"node:path";var r2,a2;var o2=S(()=>{r2={initContext:l.tuple([l.string(),l.any().optional()]),transcribe:l.tuple([l.string(),l.string(),l.any().optional()]),transcribeData:l.tuple([l.string(),l.union([l.instanceof(Buffer),l.instanceof(Uint8Array)]),l.any().optional()]),releaseContext:l.tuple([l.string(),l.boolean().optional()])},a2={initContext({backend:$},Z,X){return new _$({async start(j){try{let W=await $.ggmlStt.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},async transcribe({backend:$,config:{server:Z}},X,j,W){return console.log("[Server] Transcribe:",{id:X,audioPath:j,options:W}),$.ggmlStt.transcribe(X,{audioPath:q$.join(Z.temp_file_dir,j),options:W})},async transcribeData({backend:$},Z,X,j){return console.log("[Server] Transcribe Data:",{id:Z,audioDataLength:X?.length||0,options:j}),$.ggmlStt.transcribeData(Z,{audioData:X,options:j})},async releaseContext({backend:$},Z,X){return console.log("[Server] Release STT Context:",{id:Z,force:X}),$.ggmlStt.releaseContext(Z,{force:X})}}});var O$,t2,e2;var $3=S(()=>{l2();s2();o2();O$={common:d2,ggmlLlm:i2,ggmlStt:a2},t2={common:c2,ggmlLlm:n2,ggmlStt:r2},e2=O$});import{Buffer as j1}from"node:buffer";var Z3=($)=>{try{return JSON.parse($,(Z,X)=>{if(!X)return X;if(X?.type==="Buffer"&&X?.data)return j1.from(X.data,"base64");if(X?.type==="Uint8Array"&&X?.data){let j=j1.from(X.data,"base64");return j.buffer.slice(j.byteOffset,j.byteOffset+j.byteLength)}if(X?.type==="Error"&&X?.name&&X?.message)return Error(X.name,X.message);return X})}catch{return $}},W1=($)=>{try{return JSON.stringify($,(Z,X)=>{if(X instanceof Error)return{type:"Error",name:X.name,message:X.message};if(X instanceof j1)return{type:"Buffer",data:X.toString("base64")};if(X instanceof Uint8Array)return{type:"Uint8Array",data:j1.from(X).toString("base64")};return X})}catch{return $}};var X3=()=>{};var e1="1.0",j3=8089;import L$ from"node:dgram";class $4{name="udp";socket=null;announcementTimer=null;config;getServerInfo;port;constructor($,Z){this.config=$,this.getServerInfo=Z,this.port=$.port??j3}async start(){if(this.socket=L$.createSocket({type:"udp4",reuseAddr:!0}),this.socket.on("message",($,Z)=>{this.handleMessage($,Z)}),this.socket.on("error",($)=>{console.error("[Autodiscover UDP] Socket error:",$.message)}),await new Promise(($,Z)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),$()}),this.socket.once("error",Z)}),this.config.announcements.enabled){let $=this.config.announcements.interval??5000;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},$),this.sendAnnouncement()}}async stop(){if(this.announcementTimer)clearInterval(this.announcementTimer),this.announcementTimer=null;if(this.socket)await new Promise(($)=>{this.socket.close(()=>$())}),this.socket=null}handleMessage($,Z){try{let X=JSON.parse($.toString());if(X.t==="QUERY"&&this.config.requests.enabled){let j=X.d,W=this.config.requests.responseDelay??0,H=W>0?Math.random()*W:0;setTimeout(()=>{this.sendResponse(j.id,Z)},H)}}catch{}}sendAnnouncement(){if(!this.socket)return;let $=this.getServerInfo(),Z={t:"ANNOUNCE",v:e1,d:{info:$}},X=Buffer.from(JSON.stringify(Z));this.socket.send(X,0,X.length,this.port,"255.255.255.255",(j)=>{if(j)console.error("[Autodiscover UDP] Announcement error:",j.message)})}sendResponse($,Z){if(!this.socket)return;let X=this.getServerInfo(),j={t:"RESPONSE",v:e1,d:{request_id:$,info:X}},W=Buffer.from(JSON.stringify(j));this.socket.send(W,0,W.length,Z.port,Z.address,(H)=>{if(H)console.error("[Autodiscover UDP] Response error:",H.message)})}}var W3=()=>{};class Z4{config;getServerInfo;transports=[];started=!1;constructor($,Z){this.config=$;this.getServerInfo=Z;if($.udp?.announcements?.enabled||$.udp?.requests?.enabled)this.transports.push(new $4($.udp,Z))}async start(){if(this.started)return;(await Promise.allSettled(this.transports.map((Z)=>Z.start()))).forEach((Z,X)=>{if(Z.status==="rejected")console.error(`[Autodiscover] Failed to start ${this.transports[X].name}:`,Z.reason)}),this.started=!0}async stop(){if(!this.started)return;await Promise.allSettled(this.transports.map(($)=>$.stop())),this.started=!1}}var Y3=S(()=>{W3()});import A$ from"node:os";var Y1=()=>{let $=A$.networkInterfaces();return Object.values($).flat().find((X)=>X?.family==="IPv4"&&!X?.internal)?.address||null};var X4=()=>{};import u from"node:os";import Q3 from"node:fs";import J3 from"node:path";import{execSync as Q1}from"node:child_process";import N3 from"@iarna/toml";async function j4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),console.log(`Generating model capabilities comparison...
39
- `),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),X.push(`## Model Capabilities Comparison
39
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var M2=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var T2=S(()=>{B0();d1();B2()});var e0={};P3(e0,{testGgmlSttCapabilities:()=>P2,testGgmlLlmCapabilities:()=>x2,status:()=>l$,startModelDownload:()=>t0,startGenerator:()=>p$,showSttModelsTable:()=>S2,showModelsTable:()=>F2,globalDownloadManager:()=>l1,ggmlStt:()=>c$,ggmlLlm:()=>g$,getModelIdentifier:()=>d$,getCapabilities:()=>r,generatorRegistry:()=>$0,finalizeGenerator:()=>f$});async function p$($,Z){let j={"ggml-llm":{create:o4,getId:b1},"ggml-stt":{create:L2,getId:g1}}[$];if(!j)throw Error(`Unsupported backend type: ${$}`);let W=j.getId(Z);if(!W)throw Error("Buttress generator config missing repo identifier");let H=`${$}:${W}`,Q=$0.get(H);if(Q)return Q.refCount+=1,Q.instance.resetFinalized?.(),{id:Q.id,info:Q.instance.info};let J=await j.create(H,Z,{globalDownloadManager:l1}),N={id:H,type:J.type,instance:J,refCount:1};return $0.set(H,N),{id:H,info:J.info}}async function f$($){let Z=$0.get($);if(!Z)return!1;if(Z.refCount-=1,Z.refCount<=0){if(await Z.instance.finalize(),!(Z.instance.hasPendingReleases?.()??!1))$0.delete($)}return!0}function d$($,Z){if($==="ggml-llm")return b1(Z);if($==="ggml-stt")return g1(Z);return null}async function t0($,Z,X={}){let W={"ggml-llm":t4,"ggml-stt":A2}[$];if(!W)return{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${$}`};return W(Z,l1,X)}var $0,l1,m$=($)=>{let Z=$0.get($);if(!Z)throw Error(`Unknown generator id "${$}"`);return Z},V0=($,Z)=>{let X=m$($);if(X.type!==Z)throw Error(`Generator "${$}" does not support ${Z} backend`);return X.instance},g$,c$,l$;var $1=S(()=>{v1();c1();d1();m0();T2();$0=new Map,l1={downloads:new Map,getDownload($){return this.downloads.get($)||null},setDownload($,Z){this.downloads.set($,Z)},deleteDownload($){this.downloads.delete($)},isDownloading($){return this.downloads.has($)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([$,Z])=>({localPath:$,promise:Z}))}};g$={async initContext($,Z){return V0($,"ggml-llm").initContext(Z)},async completion($,Z){return V0($,"ggml-llm").completion(Z)},async tokenize($,Z){return V0($,"ggml-llm").tokenize(Z)},async detokenize($,Z){return V0($,"ggml-llm").detokenize(Z)},async applyChatTemplate($,Z){return V0($,"ggml-llm").applyChatTemplate(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-llm")throw Error(`Generator "${$}" does not support ggml-llm backend`);return X.instance.releaseContext(Z)}},c$={async initContext($,Z){return V0($,"ggml-stt").initContext(Z)},async transcribe($,Z){return V0($,"ggml-stt").transcribe(Z)},async transcribeData($,Z){return V0($,"ggml-stt").transcribeData(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-stt")throw Error(`Generator "${$}" does not support ggml-stt backend`);return X.instance.releaseContext(Z)}};l$={getFullStatus:()=>S4($0),getGgmlLlmStatus:()=>F1($0),getGgmlSttStatus:()=>x1($0),subscribeToStatus:M1,subscribeToStatusWithId:x4,llmStatusTracker:t,sttStatusTracker:J0,statusEmitter:o}});import{node as n$}from"@elysiajs/node";import{Elysia as i$}from"elysia";var s$,U0=($)=>new i$({adapter:s$?n$():void 0,...$});var F0=S(()=>{s$=typeof process<"u"&&process.versions&&process.versions.node});import{t as j0}from"elysia";var r$,a$=({store:{serverInfo:$}})=>({id:$.id,name:$.name,version:$.version,generators:$.generators,authentication:$.authentication}),n1=($)=>{let Z=U0(),X=$.autodiscover.http?.path??"/buttress/info";return Z.get(X,a$,{response:r$}),Z};var k2=S(()=>{F0();r$=j0.Object({id:j0.String(),name:j0.String(),version:j0.String(),generators:j0.Array(j0.Object({type:j0.String()})),authentication:j0.Object({required:j0.Boolean(),type:j0.Literal("device-group")})})});import{t as W0,file as o$}from"elysia";import{writeFile as D2}from"node:fs/promises";import i1 from"node:path";var t$,s1;var b2=S(()=>{F0();t$=typeof process<"u"&&process.versions!=null&&process.versions.node!=null,s1=U0().post("/buttress/upload",async({body:{file:$},store:{config:Z}})=>{let X=`${Date.now()}-${$.name.replace(/[^\dA-Za-z]/g,"_")}`,j=i1.join(Z.server.temp_file_dir,X);try{if(t$)await D2(j,await $.stream());else await D2(j,await $.arrayBuffer());return{ok:!0,filename:X}}catch(W){return{ok:!1,error:String(W)}}},{body:W0.Object({file:W0.File()}),response:W0.Object({ok:W0.Boolean(),filename:W0.Optional(W0.String()),error:W0.Optional(W0.String())})}).get("/buttress/download/:filename",async({params:{filename:$},store:{config:Z},status:X})=>{let j=i1.join(Z.server.temp_file_dir,$);if(i1.relative(Z.server.temp_file_dir,j).includes(".."))return X(400),"Invalid file path";return o$(j)},{params:W0.Object({filename:W0.String()})})});import r1 from"node:path";import C2 from"node:fs/promises";import{fileURLToPath as e$}from"node:url";var v2,$6=async()=>{let $=[r1.join(v2,"..","public","status.html"),r1.join(v2,"..","..","public","status.html")];return(await Promise.all($.map((X)=>C2.access(X).then(()=>X,()=>null)))).find((X)=>X!==null)??null},Z6=($)=>{let{status:Z}=$;if(Z?.getFullStatus)return Z.getFullStatus();return{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},h2=async()=>{let $=await $6();if(!$)return console.error("[Status] Failed to find status.html in candidate paths"),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}});try{let Z=await C2.readFile($,"utf-8");return new Response(Z,{headers:{"Content-Type":"text/html; charset=utf-8"}})}catch(Z){return console.error("[Status] Failed to serve status page:",Z),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}})}},a1;var I2=S(()=>{F0();v2=r1.dirname(e$(import.meta.url)),a1=U0().get("/status",h2).get("/status/",h2).get("/buttress/status",({store:{backend:$}})=>Z6($))});import{t as P,sse as Z1}from"elysia";import{cors as X6}from"@elysiajs/cors";async function W6($,Z,X){let W=(Z.generators||[]).filter((q)=>q.type==="ggml-llm");if(W.length===0)throw Error('No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.');let H=W[0],Q=X||H.model?.repo_id;if(X){let q=W.find((A)=>A.model?.repo_id===X);if(q)H=q}else Q=H.model?.repo_id;let J=Q,N=y2.get(J);if(N?.initialized)return N;let{generators:G,server:_,...O}=Z.global||{},z={...O,...H,model:{...H.model,repo_id:Q}};console.log(`[OpenAI] Creating generator for ${J}`);let{id:V}=await $.startGenerator("ggml-llm",z),R={id:V,config:z,repoId:Q,initialized:!1};return y2.set(J,R),await $.ggmlLlm.initContext(V,{}),R.initialized=!0,console.log(`[OpenAI] Generator ready: ${J}`),R}async function Y6($,Z,X,j){let W=$.getReader(),H="",Q=null,J=null,N="stop",G={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let O=!1;while(!O){let z=await W.read();if({done:O}=z,O)break;let{event:V,data:R}=z.value;if(V==="token"){if(R.content!=null)H+=R.content;else if(R.token!=null)H+=R.token}else if(V==="result"){if(R.text)H=R.text;else if(R.content)H=R.content;if(R.reasoning_content)Q=R.reasoning_content;if(R.tool_calls?.length>0)J=R.tool_calls.map((q,A)=>({id:q.id||`call_${Z}_${A}`,type:"function",function:{name:q.function?.name||"",arguments:q.function?.arguments||""}})),N="tool_calls";else N=R.interrupted?"length":"stop";G={prompt_tokens:R.prompt_tokens||R.promptTokens||0,completion_tokens:R.tokens_predicted||R.tokensPredicted||0,total_tokens:(R.prompt_tokens||R.promptTokens||0)+(R.tokens_predicted||R.tokensPredicted||0)}}else if(V==="error")throw Error(R.message)}}finally{W.cancel().catch(()=>{})}let _={role:"assistant",content:H||null};if(Q)_.reasoning_content=Q;if(J)_.tool_calls=J;return{id:Z,object:"chat.completion",created:X,model:j,choices:[{index:0,message:_,finish_reason:N}],usage:G}}function X1({global:$}){let Z=U0({prefix:"/oai-compat"});return Z.use(X6({origin:$?.openai_compat?.cors_allowed_origins??!1,methods:["GET","POST","OPTIONS"],allowedHeaders:["Content-Type","Authorization"],maxAge:86400,preflight:!0})),Z.get("/v1/models",({store:X})=>{let{config:j}=X,Q=(j.generators||[]).filter((J)=>J.type==="ggml-llm").map((J)=>{return{id:J.model?.repo_id||"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"}});if(Q.length===0)Q.push({id:"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"});return{object:"list",data:Q}}),Z.post("/v1/chat/completions",async function*({body:j,set:W,store:H}){let{config:Q,backend:J}=H,{messages:N=[],stream:G=!1,model:_,tools:O,temperature:z,stop:V,top_p:R,max_tokens:q,presence_penalty:A,frequency_penalty:w,tool_choice:U,stream_options:Y}=j;if(!N||N.length===0)return W.status=400,{error:{message:"messages is required and must not be empty",type:"invalid_request_error"}};try{let K=await W6(J,Q,_),L=j6(),B=Math.floor(Date.now()/1000),E=K.repoId||"ggml-llm",M={reasoning_format:"auto",messages:N,jinja:!0,add_generation_prompt:!0};if(z!=null)M.temperature=z;if(R!=null)M.top_p=R;if(q!=null)M.n_predict=q;if(V!=null)M.stop=Array.isArray(V)?V:[V];if(A!=null)M.presence_penalty=A;if(w!=null)M.frequency_penalty=w;if(O!=null)M.tools=O;if(U!=null)M.tool_choice=U;let F=await J.ggmlLlm.completion(K.id,{options:M});if(!G)return await Y6(F,L,B,E);let x=Y?.include_usage===!0,k=F.getReader(),D="",m="",p=new Map,n=new Map;try{let i=!1;while(!i){let v=await k.read();if({done:i}=v,i)break;let{event:c,data:T}=v.value;if(c==="token"){let f={};if(T.content!=null){let h=T.content;if(h.length>D.length)f.content=h.slice(D.length),D=h}if(T.reasoning_content!=null){let h=T.reasoning_content;if(h.length>m.length)f.reasoning_content=h.slice(m.length),m=h}if(T.tool_calls?.length>0){let h=[];if(T.tool_calls.forEach((s,I)=>{let a={index:I};if(!n.has(I))n.set(I,s.id||`call_${L}_${I}`),a.id=n.get(I),a.type="function";let q0=s.function?.arguments||"",T0=p.get(I)||"",G0={};if(!p.has(I)&&s.function?.name)G0.name=s.function.name;if(q0.length>T0.length)G0.arguments=q0.slice(T0.length),p.set(I,q0);if(Object.keys(G0).length>0)a.function=G0,h.push(a);else if(a.id)a.function={name:s.function?.name||"",arguments:""},h.push(a)}),h.length>0)f.tool_calls=h}if(Object.keys(f).length>0)yield Z1({data:JSON.stringify({id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:f,finish_reason:null}]})})}else if(c==="result"){let f={id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:{},finish_reason:T.interrupted?"length":"stop"}]};if(x)f.usage={prompt_tokens:T.prompt_tokens||T.promptTokens||0,completion_tokens:T.tokens_predicted||T.tokensPredicted||0,total_tokens:(T.prompt_tokens||T.promptTokens||0)+(T.tokens_predicted||T.tokensPredicted||0)};yield Z1({data:JSON.stringify(f)})}else if(c==="error")yield Z1({data:JSON.stringify({error:{message:T.message,type:"server_error"}})})}yield Z1({data:"[DONE]"})}finally{k.cancel().catch(()=>{})}}catch(K){return console.error("[OpenAI] Chat completion error:",K),W.status=500,{error:{message:K.message||"Internal server error",type:"server_error"}}}},{body:P.Object({model:P.Optional(P.String()),messages:P.Array(P.Any()),stream:P.Optional(P.Boolean()),temperature:P.Optional(P.Number()),top_p:P.Optional(P.Number()),max_tokens:P.Optional(P.Number()),stop:P.Optional(P.Union([P.String(),P.Array(P.String())])),presence_penalty:P.Optional(P.Number()),frequency_penalty:P.Optional(P.Number()),tools:P.Optional(P.Array(P.Any())),tool_choice:P.Optional(P.Any()),stream_options:P.Optional(P.Object({include_usage:P.Optional(P.Boolean())}))})}),Z}var j6=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,y2;var u2=S(()=>{F0();y2=new Map});var m2=S(()=>{k2();b2();I2();u2()});import J6 from"node:os";import N6 from"node:path";import H6 from"node-machine-id";import V6 from"bytes";import U6 from"ms";var x0=($={},Z={})=>{let X=Array.isArray($)?[...$]:{...$};return Object.entries(Z||{}).forEach(([j,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W))X[j]=x0(X[j]||{},W);else X[j]=W}),X},A0=($)=>{if(!$)return null;if(typeof $==="object")return JSON.parse(JSON.stringify($));return null},o1=($,Z)=>{let X=A0($)||{},j=A0(Z)||{};return x0(X,j)},p2=($,Z)=>x0(JSON.parse(JSON.stringify($.global)),Z||{}),t1=($,Z,X,j)=>{if($.generators.length>0){let W=$.generators.filter((H)=>H?.type===X);if(W.length>0){if(j){let H=W.find((Q)=>Z.getModelIdentifier(X,Q)===j);if(H)return p2($,H)}}}if(Object.keys($.global).length>0)return p2($,{});return null},f2,G6=($)=>{if(!$)return null;if($===!0)return{...f2};return x0(f2,$)},l2=($,Z)=>{if(!$.generators||$.generators.length===0)return Z.map((j)=>({type:j}));let X=new Set;if($.generators.forEach((j)=>{if(j.type)X.add(j.type)}),X.size===0)return Z.map((j)=>({type:j}));return Array.from(X).map((j)=>({type:j}))},g2=($,Z,X)=>{if($===void 0)return X;if(typeof $==="number")return $;return Z($)??X},K6=2080,c2=60000,d2=52428800,e1=($)=>{let Z=H6.machineIdSync(),X={server:{id:`buttress-${Z}`,name:`Buttress Server (${Z.slice(-8)})`,port:K6,temp_file_dir:N6.join(J6.tmpdir(),".buttress"),session_timeout:c2,max_body_size:d2},autodiscover:!1},j=x0(X,A0($)||{}),W=Array.isArray(j.generators)?j.generators:[],{server:H,generators:Q,autodiscover:J,...N}=j;return{autodiscover:G6(J),server:{id:H.id,name:H.name,port:H.port,log_level:H.log_level,temp_file_dir:H.temp_file_dir,max_body_size:g2(H.max_body_size,V6.parse,d2),session_timeout:g2(H.session_timeout,U6,c2)},global:N,generators:W}};var S0=S(()=>{f2={udp:{port:8089,announcements:{enabled:!0,interval:5000},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:"/buttress/info",cors:!0}}});import{z as Z0}from"zod";var n2,i2;var s2=S(()=>{S0();n2={getCapabilities:Z0.tuple([Z0.object({type:Z0.string().optional().default("ggml-llm"),config:Z0.any().optional(),currentClientCapabilities:Z0.any().optional(),options:Z0.any().optional()}).nullable().optional()]),startGenerator:Z0.tuple([Z0.string(),Z0.any().optional()]),finalizeGenerator:Z0.tuple([Z0.string()])},i2={async getCapabilities({backend:$,config:Z},X=null){console.log("[Server] Get Capabilities:",X);let j=X||{type:"ggml-llm"},{type:W="ggml-llm",config:H,currentClientCapabilities:Q=null,options:J={}}=j,N=A0(H),G=$.getModelIdentifier(W,N),_=t1(Z,$,W,G),O=o1(_,H);if(Object.keys(O).length===0)throw Error("Buttress server missing generator configuration");if(O.backend=O.backend||{},!O.backend.type)O.backend.type=W;return $.getCapabilities(W,Q,{...J,config:O})},async startGenerator({backend:$,config:Z,session:X},j,W){console.log("[Server] Start Generator:",j,W);let H=A0(W),Q=$.getModelIdentifier(j,H),J=t1(Z,$,j,Q),N=o1(J,W);if(Object.keys(N).length===0)throw Error("Buttress server missing generator configuration");if(N.backend=N.backend||{},!N.backend.type)N.backend.type=j;let G=await $.startGenerator(j,N);return X.generators.add(G.id),G},async finalizeGenerator({backend:$,session:Z},X){return console.log("[Server] Finalize Generator:",X),Z.generators.delete(X),$.finalizeGenerator(X)}}});import{z as g}from"zod";import{ReadableStream as _6}from"node:stream/web";var r2,a2;var o2=S(()=>{r2={initContext:g.tuple([g.string(),g.any().optional()]),completion:g.tuple([g.string(),g.any().optional()]),tokenize:g.tuple([g.string(),g.any()]),detokenize:g.tuple([g.string(),g.any()]),applyChatTemplate:g.tuple([g.string(),g.any()]),releaseContext:g.tuple([g.string(),g.boolean().optional()])},a2={initContext({backend:$},Z,X){return new _6({async start(j){try{let W=await $.ggmlLlm.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},completion({backend:$},Z,X){return console.log("[Server] Completion:",{id:Z,property:X}),$.ggmlLlm.completion(Z,X)},async tokenize({backend:$},Z,X){return console.log("[Server] Tokenize:",{id:Z,property:X}),$.ggmlLlm.tokenize(Z,X)},async detokenize({backend:$},Z,X){return console.log("[Server] Detokenize:",{id:Z,property:X}),$.ggmlLlm.detokenize(Z,X)},async applyChatTemplate({backend:$},Z,X){return console.log("[Server] Apply Chat Template:",{id:Z,property:X}),$.ggmlLlm.applyChatTemplate(Z,X)},async releaseContext({backend:$},Z,X){return console.log("[Server] Release Context:",{id:Z,force:X}),$.ggmlLlm.releaseContext(Z,{force:X})}}});import{z as l}from"zod";import{ReadableStream as q6}from"node:stream/web";import O6 from"node:path";var t2,e2;var $3=S(()=>{t2={initContext:l.tuple([l.string(),l.any().optional()]),transcribe:l.tuple([l.string(),l.string(),l.any().optional()]),transcribeData:l.tuple([l.string(),l.union([l.instanceof(Buffer),l.instanceof(Uint8Array)]),l.any().optional()]),releaseContext:l.tuple([l.string(),l.boolean().optional()])},e2={initContext({backend:$},Z,X){return new q6({async start(j){try{let W=await $.ggmlStt.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},async transcribe({backend:$,config:{server:Z}},X,j,W){return console.log("[Server] Transcribe:",{id:X,audioPath:j,options:W}),$.ggmlStt.transcribe(X,{audioPath:O6.join(Z.temp_file_dir,j),options:W})},async transcribeData({backend:$},Z,X,j){return console.log("[Server] Transcribe Data:",{id:Z,audioDataLength:X?.length||0,options:j}),$.ggmlStt.transcribeData(Z,{audioData:X,options:j})},async releaseContext({backend:$},Z,X){return console.log("[Server] Release STT Context:",{id:Z,force:X}),$.ggmlStt.releaseContext(Z,{force:X})}}});var L6,Z3,X3;var j3=S(()=>{s2();o2();$3();L6={common:i2,ggmlLlm:a2,ggmlStt:e2},Z3={common:n2,ggmlLlm:r2,ggmlStt:t2},X3=L6});import{Buffer as j1}from"node:buffer";var W3=($)=>{try{return JSON.parse($,(Z,X)=>{if(!X)return X;if(X?.type==="Buffer"&&X?.data)return j1.from(X.data,"base64");if(X?.type==="Uint8Array"&&X?.data){let j=j1.from(X.data,"base64");return j.buffer.slice(j.byteOffset,j.byteOffset+j.byteLength)}if(X?.type==="Error"&&X?.name&&X?.message)return Error(X.name,X.message);return X})}catch{return $}},W1=($)=>{try{return JSON.stringify($,(Z,X)=>{if(X instanceof Error)return{type:"Error",name:X.name,message:X.message};if(X instanceof j1)return{type:"Buffer",data:X.toString("base64")};if(X instanceof Uint8Array)return{type:"Uint8Array",data:j1.from(X).toString("base64")};return X})}catch{return $}};var Y3=()=>{};var $4="1.0",Q3=8089;import A6 from"node:dgram";class Z4{name="udp";socket=null;announcementTimer=null;config;getServerInfo;port;constructor($,Z){this.config=$,this.getServerInfo=Z,this.port=$.port??Q3}async start(){if(this.socket=A6.createSocket({type:"udp4",reuseAddr:!0}),this.socket.on("message",($,Z)=>{this.handleMessage($,Z)}),this.socket.on("error",($)=>{console.error("[Autodiscover UDP] Socket error:",$.message)}),await new Promise(($,Z)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),$()}),this.socket.once("error",Z)}),this.config.announcements.enabled){let $=this.config.announcements.interval??5000;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},$),this.sendAnnouncement()}}async stop(){if(this.announcementTimer)clearInterval(this.announcementTimer),this.announcementTimer=null;if(this.socket)await new Promise(($)=>{this.socket.close(()=>$())}),this.socket=null}handleMessage($,Z){try{let X=JSON.parse($.toString());if(X.t==="QUERY"&&this.config.requests.enabled){let j=X.d,W=this.config.requests.responseDelay??0,H=W>0?Math.random()*W:0;setTimeout(()=>{this.sendResponse(j.id,Z)},H)}}catch{}}sendAnnouncement(){if(!this.socket)return;let $=this.getServerInfo(),Z={t:"ANNOUNCE",v:$4,d:{info:$}},X=Buffer.from(JSON.stringify(Z));this.socket.send(X,0,X.length,this.port,"255.255.255.255",(j)=>{if(j)console.error("[Autodiscover UDP] Announcement error:",j.message)})}sendResponse($,Z){if(!this.socket)return;let X=this.getServerInfo(),j={t:"RESPONSE",v:$4,d:{request_id:$,info:X}},W=Buffer.from(JSON.stringify(j));this.socket.send(W,0,W.length,Z.port,Z.address,(H)=>{if(H)console.error("[Autodiscover UDP] Response error:",H.message)})}}var J3=()=>{};class X4{config;getServerInfo;transports=[];started=!1;constructor($,Z){this.config=$;this.getServerInfo=Z;if($.udp?.announcements?.enabled||$.udp?.requests?.enabled)this.transports.push(new Z4($.udp,Z))}async start(){if(this.started)return;(await Promise.allSettled(this.transports.map((Z)=>Z.start()))).forEach((Z,X)=>{if(Z.status==="rejected")console.error(`[Autodiscover] Failed to start ${this.transports[X].name}:`,Z.reason)}),this.started=!0}async stop(){if(!this.started)return;await Promise.allSettled(this.transports.map(($)=>$.stop())),this.started=!1}}var N3=S(()=>{J3()});import z6 from"node:os";var Y1=()=>{let $=z6.networkInterfaces();return Object.values($).flat().find((X)=>X?.family==="IPv4"&&!X?.internal)?.address||null};var j4=()=>{};import u from"node:os";import H3 from"node:fs";import V3 from"node:path";import{execSync as Q1}from"node:child_process";import U3 from"@iarna/toml";async function W4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),console.log(`Generating model capabilities comparison...
40
+ `),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),X.push(`## Model Capabilities Comparison
40
41
  `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-llm");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let K=G(Y);K={...K||{},model:{...J.runtime,...K?.model||{},repo_id:Y}};let L=await r("ggml-llm",null,{config:K,includeBreakdown:!0});_.push({modelId:Y,capabilities:L,modelInfo:L.buttress?.selected||null,modelConfig:L.modelConfig||null})}let O=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),X.push("|----------|-----------|--------------|--------------------|----------------------------|-----------------|-----------------|"),_.forEach(({modelId:U,modelInfo:Y,modelConfig:K})=>{let L=O(Y?.modelBytes),B=K?.nCtx||Y?.kvInfo?.nCtxTrain||"N/A",E=N0(Y),M=Number(B),F=Y?.kvCacheBytes||(E&&Number.isFinite(M)&&M>0?E(M):E&&E(Y?.kvInfo?.nCtxTrain||0))||null,x=O(F),k=O(Y?.modelBytes&&F?Y.modelBytes+F:Y?.fit?.totalRequiredBytes),D=z(Y?.fit?.fitsInGpu),m=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${L} | ${B} | ${x} | ${k} | ${D} | ${m} |`);let p=Y?.memoryLimitedCtx!=null||Y?.limitedFit!=null,n=!Y?.fit?.fitsInGpu||!Y?.fit?.fitsInCpu;if(p&&n){let i=Y?.memoryLimitedCtx||B,v=Number(i),c=Y?.limitedKvCacheBytes||E&&Number.isFinite(v)&&v>0&&E(v)||null,T=O(c),f=O(Y?.modelBytes&&c?Y.modelBytes+c:Y?.limitedFit?.totalRequiredBytes),h=z(Y?.limitedFit?.fitsInGpu),s=z(Y?.limitedFit?.fitsInCpu);if(i!==B||T!==x||f!==k)X.push(`| ↳ Limited | ${L} | ${i} | ${T} | ${f} | ${h} | ${s} |`)}}),X.push(`
41
42
  ---`),X.push(`
42
43
  ### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),_.length>0){let Y=_[0].capabilities.buttress?.selected;if(Y){let K=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let E=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${E}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
43
44
  ### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
44
45
  ${process.argv[0]} ${process.argv[1]} ${R}
45
46
  \`\`\``),X.push(`
46
- ### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.34"}`),Z&&Object.keys(Z).length>0){X.push(`
47
- ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=N3.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=J3.join(process.cwd(),A);Q3.writeFileSync(w,X.join(`
47
+ ### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.38"}`),Z&&Object.keys(Z).length>0){X.push(`
48
+ ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=U3.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=V3.join(process.cwd(),A);H3.writeFileSync(w,X.join(`
48
49
  `),"utf8"),console.log(`
49
- Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function V3({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...H}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(H)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(H).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let _=await r("ggml-llm",null,{config:G,includeBreakdown:!0}),O=_.buttress?.selected||null,z=_.modelConfig||null;if($||z?.repoId){console.log(`
50
+ Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function K3({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...H}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(H)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(H).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let _=await r("ggml-llm",null,{config:G,includeBreakdown:!0}),O=_.buttress?.selected||null,z=_.modelConfig||null;if($||z?.repoId){console.log(`
50
51
  === Model Information ===`);let V=$||z?.repoId;if(console.log(`Repository ID: ${V}`),z?.quantization)console.log(`Quantization: ${z.quantization}`);if(z?.nCtx)console.log(`Context Length: ${z.nCtx}`);if(O?.quantization){let{fileType:A}=O.quantization;if(A!=null)console.log(`Model File Type (GGUF): ${A}`)}let R=z?.cache_type_k||"f16",q=z?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${R}, V=${q}`),O?.modelBytes&&O?.kvCacheBytes){if(console.log(`Model Size: ${(O.modelBytes/1024/1024/1024).toFixed(2)} GB`),O.kvInfo)console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(O.kvInfo)})`);else console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);if(console.log(`Total Required Memory: ${((O.modelBytes+O.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),O.memoryLimitedCtx!=null){let A=O.memoryLimitedCtx,w=O.kvInfo?.nCtxTrain;if(w)console.log(`
51
52
  Memory-Limited Context: ${A} (Train: ${w})`);else console.log(`
52
53
  Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(_.buttress?.selected?.fit){let{totalRequiredBytes:A}=_.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(_.buttress?.selected){let{selected:V}=_.buttress;console.log(`
@@ -55,25 +56,25 @@ Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limit
55
56
  Backend Variant: ${V.variant}`),console.log(`Performance Score: ${V.score}`),V.fit){if(console.log(`
56
57
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${V.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${V.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${V.fit.limiting}`),V.limitedFit)console.log(`
57
58
  --- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(V.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${V.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${V.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${V.limitedFit.limiting}`)}}console.log(`
58
- === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function W4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),console.log(`Generating STT model capabilities comparison...
59
- `),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),X.push(`## STT Model Capabilities Comparison
60
- `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-stt");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:K,filename:L}=H3(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(K);B={...B||{},model:{...B?.model||{},repo_id:K,...L&&{filename:L}}};let E=await r("ggml-stt",null,{config:B,includeBreakdown:!0});_.push({modelId:Y,repoId:K,filename:L,capabilities:E,modelInfo:E.buttress?.selected||null,modelConfig:E.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),_.forEach(({modelId:U,modelInfo:Y})=>{let K=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),E=z(Y?.fit?.fitsInGpu),M=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${K} | ${L} | ${B} | ${E} | ${M} |`)}),X.push(`
59
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(_,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function Y4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),console.log(`Generating STT model capabilities comparison...
60
+ `),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),X.push(`## STT Model Capabilities Comparison
61
+ `),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let K=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))K[L]=j(K[L]||{},B);else K[L]=B}),K},W=Z||{},{server:H,generators:Q=[],...J}=W,N=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((K)=>K?.type==="ggml-stt");if(Y.length>0&&U){let K=Y.find((L)=>L.model?.repo_id===U);if(K)return N(K)}}return Object.keys(J).length>0?N({}):null},_=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:K,filename:L}=G3(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(K);B={...B||{},model:{...B?.model||{},repo_id:K,...L&&{filename:L}}};let E=await r("ggml-stt",null,{config:B,includeBreakdown:!0});_.push({modelId:Y,repoId:K,filename:L,capabilities:E,modelInfo:E.buttress?.selected||null,modelConfig:E.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),_.forEach(({modelId:U,modelInfo:Y})=>{let K=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),E=z(Y?.fit?.fitsInGpu),M=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${K} | ${L} | ${B} | ${E} | ${M} |`)}),X.push(`
61
62
  ---`),X.push(`
62
63
  ### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),_.length>0){let Y=_[0].capabilities.buttress?.selected;if(Y){let K=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let E=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${E}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
63
64
  ### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
64
65
  ${process.argv[0]} ${process.argv[1]} ${R}
65
66
  \`\`\``),X.push(`
66
- ### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.34"}`),Z&&Object.keys(Z).length>0){X.push(`
67
- ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=N3.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=J3.join(process.cwd(),A);Q3.writeFileSync(w,X.join(`
67
+ ### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.38"}`),Z&&Object.keys(Z).length>0){X.push(`
68
+ ### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=U3.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=V3.join(process.cwd(),A);H3.writeFileSync(w,X.join(`
68
69
  `),"utf8"),console.log(`
69
- STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function U3({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.34"}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=H3($),W=Z||{},{server:H,generators:Q=[],...J}=W,N=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=N(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>N(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await r("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
70
+ STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function _3({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.38"}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=G3($),W=Z||{},{server:H,generators:Q=[],...J}=W,N=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=N(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>N(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await r("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
70
71
  === Model Information ===`);let q=X||R?.repoId;if(console.log(`Repository ID: ${q}`),j)console.log(`Filename: ${j}`);if(V?.modelBytes)console.log(`Model Size: ${(V.modelBytes/1024/1024).toFixed(1)} MB`);let A=V?.processingBytes||V?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(V?.modelBytes&&A)console.log(`Total Required Memory: ${((V.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(z.buttress?.selected?.fit){let{totalRequiredBytes:w}=z.buttress.selected.fit;console.log(`Total Required Memory: ${(w/1024/1024).toFixed(1)} MB`)}}if(z.buttress?.selected){let{selected:q}=z.buttress;console.log(`
71
72
  === Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${u.hostname()}`),console.log(`OS: ${u.type()} ${u.release()}`);console.log(`Platform: ${q.platform}`),console.log(`CPU Cores: ${u.cpus().length}`),console.log(`Total System Memory: ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`);let w=q.cpuTotalBytes>0?(q.cpuUsableBytes/q.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${w}% of ${(q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),q.hasGpu)console.log(`
72
73
  --- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let K=q.gpuTotalBytes>0?(q.gpuUsableBytes/q.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${K}% of ${(q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.metadata){if(Y.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Y.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
73
74
  Backend Variant: ${q.variant}`),console.log(`Performance Score: ${q.score}`),q.fit)console.log(`
74
75
  --- Model Fit Analysis ---`),console.log(`Fits in GPU: ${q.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${q.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${q.fit.limiting}`)}console.log(`
75
- === Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var H3=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var G3=S(()=>{$1();B0()});var x$={};import z$ from"node:fs";import B$ from"node:path";import w$ from"@iarna/toml";async function R$($){if(!$?.generators||!Array.isArray($.generators))return;let Z=$.generators.filter((_)=>{if(!_.model?.download)return!1;let{type:O}=_;if(!O||O!=="ggml-llm"&&O!=="ggml-stt")return console.warn(`[Download] Skipping unknown generator type: ${O}`),!1;return!0});if(Z.length===0)return;let{server:X,generators:j,...W}=$,H=Z.map((_)=>{let{type:O}=_,z=_.model?.repo_id;console.log(`[Download] Starting pre-download for ${O}: ${z}`);let V={...W,backend:_.backend||{},model:_.model||{},runtime:{...W.runtime,..._.runtime||{}}};return t0(O,V,{onProgress:()=>{},onComplete:({repoId:R,alreadyExists:q})=>{if(q)console.log(`[Download] Pre-download complete (already exists): ${R}`);else console.log(`[Download] Pre-download complete: ${R}`)},onError:(R)=>{console.error(`[Download] Pre-download failed for ${z}:`,R.message)}})}),Q=await Promise.all(H),J=Q.filter((_)=>_.started).length,N=Q.filter((_)=>_.alreadyExists).length,G=Q.filter((_)=>_.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${J} started, ${N} already exist, ${G} already downloading`)}var K3,_3,q3,J1,Y0=null,E$,M$,O3,N1,F$;var L3=S(async()=>{G3();S0();X4();await B3();if(process.argv.includes("--version")||process.argv.includes("-v"))console.log("2.23.0-beta.34"),process.exit(0);if(process.argv.includes("--help")||process.argv.includes("-h"))console.log(`
76
- bricks-buttress v${"2.23.0-beta.34"}
76
+ === Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var G3=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var q3=S(()=>{$1();B0()});var S6={};import B6 from"node:fs";import w6 from"node:path";import R6 from"@iarna/toml";async function E6($){if(!$?.generators||!Array.isArray($.generators))return;let Z=$.generators.filter((_)=>{if(!_.model?.download)return!1;let{type:O}=_;if(!O||O!=="ggml-llm"&&O!=="ggml-stt")return console.warn(`[Download] Skipping unknown generator type: ${O}`),!1;return!0});if(Z.length===0)return;let{server:X,generators:j,...W}=$,H=Z.map((_)=>{let{type:O}=_,z=_.model?.repo_id;console.log(`[Download] Starting pre-download for ${O}: ${z}`);let V={...W,backend:_.backend||{},model:_.model||{},runtime:{...W.runtime,..._.runtime||{}}};return t0(O,V,{onProgress:()=>{},onComplete:({repoId:R,alreadyExists:q})=>{if(q)console.log(`[Download] Pre-download complete (already exists): ${R}`);else console.log(`[Download] Pre-download complete: ${R}`)},onError:(R)=>{console.error(`[Download] Pre-download failed for ${z}:`,R.message)}})}),Q=await Promise.all(H),J=Q.filter((_)=>_.started).length,N=Q.filter((_)=>_.alreadyExists).length,G=Q.filter((_)=>_.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${J} started, ${N} already exist, ${G} already downloading`)}var O3,L3,A3,J1,Y0=null,M6,F6,z3,N1,x6;var B3=S(async()=>{q3();S0();j4();await E3();if(process.argv.includes("--version")||process.argv.includes("-v"))console.log("2.23.0-beta.38"),process.exit(0);if(process.argv.includes("--help")||process.argv.includes("-h"))console.log(`
77
+ bricks-buttress v${"2.23.0-beta.38"}
77
78
 
78
79
  Buttress server for remote inference with GGML backends.
79
80
 
@@ -106,5 +107,5 @@ Examples:
106
107
  bricks-buttress --config ./config.toml
107
108
  bricks-buttress --test-caps ggml-llm --test-models-default
108
109
  bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
109
- `),process.exit(0);K3=process.argv.findIndex(($)=>$==="--port"||$==="-p"),_3=K3>=0?Number(process.argv[K3+1]):void 0,q3=process.argv.findIndex(($)=>$==="--config"||$==="-c"),J1=q3>=0?process.argv[q3+1]:null;if(J1){let $;if(J1.includes(`
110
- `))$=J1;else{let Z=B$.resolve(J1);try{$=z$.readFileSync(Z,"utf8")}catch(X){console.error(`Failed to read Buttress config at ${Z}:`,X),process.exit(1)}}try{let Z=w$.parse($);if(Z.env&&typeof Z.env==="object")Object.entries(Z.env).forEach(([X,j])=>{if(process.env[X]===void 0)process.env[X]=String(j)}),delete Z.env;Y0=Z}catch(Z){console.error("Failed to parse TOML config:",Z),process.exit(1)}}E$=["ggml-org/gpt-oss-20b-GGUF","ggml-org/gpt-oss-120b-GGUF","unsloth/Nemotron-3-Nano-30B-A3B-GGUF","unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF","bartowski/Mistral-Nemo-Instruct-2407-GGUF","mistralai/Magistral-Small-2509-GGUF","mistralai/Ministral-3-14B-Reasoning-2512-GGUF","bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF","bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF","ggml-org/gemma-3-12b-it-qat-GGUF","ggml-org/gemma-3-27b-it-qat-GGUF","unsloth/phi-4-GGUF"],M$=["BricksDisplay/whisper-ggml:ggml-small.bin","BricksDisplay/whisper-ggml:ggml-small-q8_0.bin","BricksDisplay/whisper-ggml:ggml-medium.bin","BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3.bin"],O3=process.argv.findIndex(($)=>$==="--test-caps");if(O3>=0){let $=process.argv[O3+1]||"ggml-llm";if($!=="ggml-llm"&&$!=="ggml-stt")console.error("Only ggml-llm and ggml-stt backends are supported for testing capabilities"),process.exit(1);let Z=process.argv.findIndex((j)=>j==="--test-models"),X=process.argv.includes("--test-models-default");if($==="ggml-stt")if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((H)=>H.trim());await W4({modelIds:W,defaultConfig:Y0})}else if(X)await W4({modelIds:M$,defaultConfig:Y0});else{let j=process.argv.findIndex((H)=>H==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await U3({modelId:W,defaultConfig:Y0})}else if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((H)=>H.trim());await j4({modelIds:W,defaultConfig:Y0})}else if(X)await j4({modelIds:E$,defaultConfig:Y0});else{let j=process.argv.findIndex((H)=>H==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await V3({modelId:W,defaultConfig:Y0})}}N1=t1(Y0);if(_3)N1.server.port=_3;if(!N1.server.port)N1.server.port=Number(process.env.BUTTRESS_PORT)||2080;F$=process.env.ENABLE_OPENAI_COMPAT_ENDPOINT==="1";z3({config:N1,enableOpenAICompat:F$}).then(async({port:$,openaiEnabled:Z,autoDiscover:X})=>{let j=Y1();if(console.log(`Buttress server listening on port ${$}`),console.log("--------------------------------"),await A3(),console.log(),console.log("Current supported Generators:"),console.log("- LLM (GGML)"),console.log("- STT (GGML)"),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${j}:${$} to connect to this server via LAN.`),console.log(`- Visit http://${j}:${$}/status to see status via LAN.`),console.log(),Z)console.log("OpenAI-compatible API [EXPERIMENTAL]:"),console.log(`- Base URL: http://${j}:${$}/oai-compat/v1`),console.log(`- Chat completions: POST http://${j}:${$}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${j}:${$}/oai-compat/v1/models`),console.log();else console.log("OpenAI-compatible API [EXPERIMENTAL]: disabled"),console.log(" Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable"),console.log();if(X)console.log("Auto-discover enabled"),console.log();if(Y0)await R$(Y0)}).catch(($)=>{console.error("Failed to start Buttress server:",$),process.exitCode=1})});import{node as S$}from"@elysiajs/node";import{Elysia as P$,t as P0}from"elysia";import{ReadableStream as T$}from"node:stream/web";import k$ from"node:fs/promises";import{ZodError as D$}from"zod";var b$=async()=>{let $=`https://registry.npmjs.org/${"@fugood/buttress-server"}/latest`;try{let Z=new AbortController,X=setTimeout(()=>Z.abort(),3000),j=await fetch($,{headers:{Accept:"application/json"},signal:Z.signal});if(clearTimeout(X),!j.ok)return null;return(await j.json()).version||null}catch{return null}},v$=($,Z)=>{if(!Z)return!1;let X=$.split(/[.-]/),j=Z.split(/[.-]/);for(let W=0;W<Math.max(X.length,j.length);W+=1){let H=parseInt(X[W])||0,Q=parseInt(j[W])||0;if(Q>H)return!0;if(Q<H)return!1}return!1},h$=($)=>{console.log(""),console.log("\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m"),console.log("\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m","2.23.0-beta.34".padEnd(12),$.padEnd(12),"\x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m","@fugood/buttress-server".padEnd(27)),console.log("\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m"),console.log("")},A3=async()=>{try{let $=await b$();if($&&v$("2.23.0-beta.34",$))h$($)}catch($){}},C$,I$=async({backend:$,router:Z,config:X,enableOpenAICompat:j})=>{try{await k$.mkdir(X.server.temp_file_dir,{recursive:!0})}catch{}let W=Y1()||"0.0.0.0",H={id:X.server.id,name:X.server.name,version:"2.23.0-beta.34",address:W,port:X.server.port,url:`http://${W}:${X.server.port}`,generators:g2(X,X.generators.map((N)=>N.type)),authentication:{required:!0,type:"device-group"}},Q=new P$({serve:{maxRequestBodySize:X.server.max_body_size},websocket:{idleTimeout:Math.ceil(X.server.session_timeout/1000)},adapter:C$?S$():void 0}).state({sessions:new Map,backend:$||e0,config:X,serverInfo:H});if(Z)Q.use(Z);if(X.autodiscover?.http?.enabled)Q.use(n1(X));if(Q.use(s1),Q.use(r1),j)Q.use(X1(X));let J={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return Q.ws("/buttress/rpc",{parse:(N,G)=>{if(typeof G==="string")try{return JSON.parse(G)}catch{return N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_REQUEST,message:"Invalid request"},id:null})),null}return G},body:P0.Object({jsonrpc:P0.String(),method:P0.String(),params:P0.String(),id:P0.String()}),open(N){let G=N.id??N.raw?.id??N.remoteAddress;if(console.log(`[Request] New connection: ${G}`),!N.data.store.sessions.has(G))N.data.store.sessions.set(G,{streams:new Map,generators:new Set,timeout:null});else{let _=N.data.store.sessions.get(G);clearTimeout(_.timeout),_.timeout=null}},async message(N,{id:G,method:_,params:O}){let z=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Received request from ${z}: ${_}`);let V=N.data.store.sessions.get(z),[R,q]=_.split("."),A=e2[R]?.[q];if(!A){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.METHOD_NOT_FOUND,message:"Method not found"},id:G}));return}try{if(_==="cancel"){if(V.streams.has(G))V.streams.get(G)?.cancel(),V.streams.delete(G);return}if(_==="ping"){N.send(JSON.stringify({jsonrpc:"2.0",result:"pong",id:G}));return}let w=Z3(O),U=t2[R]?.[q],Y=U?U.parse(w):w,K={...N.data.store,peerId:z,session:V},L=await A(K,...Y);if(L instanceof T$){V.streams.set(G,L),N.send(JSON.stringify({jsonrpc:"2.0",result:{type:"stream"},id:G}));try{let B=L.getReader();while(!0){let{value:E,done:M}=await B.read();if(M)break;let{event:F,data:x}=E;N.send(JSON.stringify({jsonrpc:"2.0",method:`notification/${F}`,params:W1(x),id:G}))}N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_end",id:G}))}catch(B){console.error(B),N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_error",params:W1(B),id:G}))}V.streams.delete(G)}else N.send(JSON.stringify({jsonrpc:"2.0",result:W1(L),id:G}))}catch(w){if(w instanceof D$){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_PARAMS,message:"Invalid params",data:w.issues},id:G}));return}console.error(w),N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INTERNAL_ERROR,message:String(w)},id:G}))}},async close(N){let G=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Connection closed: ${G}`);let{backend:_,sessions:O}=N.data.store,z=O.get(G);if(!z)return;z.streams.forEach((V)=>V.cancel()),z.streams.clear(),z.timeout=setTimeout(()=>{O.delete(G),console.log(`[Request] Session timed out: ${G}`);let{generators:V}=z;V.forEach((R)=>{_.finalizeGenerator(R)})},X.server.session_timeout)}}),{app:Q,config:X}},z3=async({backend:$,router:Z,config:X,enableOpenAICompat:j=!1})=>{let{app:W,config:H}=await I$({backend:$,router:Z,config:X,enableOpenAICompat:j}),{server:{port:Q}}=H,J=[new Promise((G)=>W.listen(Q,G))],N=null;if(H.autodiscover)N=new Z4(H.autodiscover,()=>W.store.serverInfo),J.push(N.start());return await Promise.all(J),{app:W,port:Q,openaiEnabled:j,autoDiscover:N}};var B3=S(async()=>{$1();I2();$3();X3();S0();Y3();X4();$1();S0();C$=typeof process<"u"&&process.versions&&process.versions.node;if(Q4.main==Q4.module)await L3().then(() => x$)});await B3();export{z3 as startServer,t0 as startModelDownload,t1 as processConfig,h$ as logUpdateMessage,I$ as createServer,v$ as compareVersions,b$ as checkForUpdates,A3 as checkAndNotifyUpdates};
110
+ `),process.exit(0);O3=process.argv.findIndex(($)=>$==="--port"||$==="-p"),L3=O3>=0?Number(process.argv[O3+1]):void 0,A3=process.argv.findIndex(($)=>$==="--config"||$==="-c"),J1=A3>=0?process.argv[A3+1]:null;if(J1){let $;if(J1.includes(`
111
+ `))$=J1;else{let Z=w6.resolve(J1);try{$=B6.readFileSync(Z,"utf8")}catch(X){console.error(`Failed to read Buttress config at ${Z}:`,X),process.exit(1)}}try{let Z=R6.parse($);if(Z.env&&typeof Z.env==="object")Object.entries(Z.env).forEach(([X,j])=>{if(process.env[X]===void 0)process.env[X]=String(j)}),delete Z.env;Y0=Z}catch(Z){console.error("Failed to parse TOML config:",Z),process.exit(1)}}M6=["ggml-org/gpt-oss-20b-GGUF","ggml-org/gpt-oss-120b-GGUF","unsloth/Nemotron-3-Nano-30B-A3B-GGUF","unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF","bartowski/Mistral-Nemo-Instruct-2407-GGUF","mistralai/Magistral-Small-2509-GGUF","mistralai/Ministral-3-14B-Reasoning-2512-GGUF","bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF","bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF","ggml-org/gemma-3-12b-it-qat-GGUF","ggml-org/gemma-3-27b-it-qat-GGUF","unsloth/phi-4-GGUF"],F6=["BricksDisplay/whisper-ggml:ggml-small.bin","BricksDisplay/whisper-ggml:ggml-small-q8_0.bin","BricksDisplay/whisper-ggml:ggml-medium.bin","BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3.bin"],z3=process.argv.findIndex(($)=>$==="--test-caps");if(z3>=0){let $=process.argv[z3+1]||"ggml-llm";if($!=="ggml-llm"&&$!=="ggml-stt")console.error("Only ggml-llm and ggml-stt backends are supported for testing capabilities"),process.exit(1);let Z=process.argv.findIndex((j)=>j==="--test-models"),X=process.argv.includes("--test-models-default");if($==="ggml-stt")if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((H)=>H.trim());await Y4({modelIds:W,defaultConfig:Y0})}else if(X)await Y4({modelIds:F6,defaultConfig:Y0});else{let j=process.argv.findIndex((H)=>H==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await _3({modelId:W,defaultConfig:Y0})}else if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((H)=>H.trim());await W4({modelIds:W,defaultConfig:Y0})}else if(X)await W4({modelIds:M6,defaultConfig:Y0});else{let j=process.argv.findIndex((H)=>H==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await K3({modelId:W,defaultConfig:Y0})}}N1=e1(Y0);if(L3)N1.server.port=L3;if(!N1.server.port)N1.server.port=Number(process.env.BUTTRESS_PORT)||2080;x6=process.env.ENABLE_OPENAI_COMPAT_ENDPOINT==="1";R3({config:N1,enableOpenAICompat:x6}).then(async({port:$,openaiEnabled:Z,autoDiscover:X})=>{let j=Y1();if(console.log(`Buttress server listening on port ${$}`),console.log("--------------------------------"),await w3(),console.log(),console.log("Current supported Generators:"),console.log("- LLM (GGML)"),console.log("- STT (GGML)"),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${j}:${$} to connect to this server via LAN.`),console.log(`- Visit http://${j}:${$}/status to see status via LAN.`),console.log(),Z)console.log("OpenAI-compatible API [EXPERIMENTAL]:"),console.log(`- Base URL: http://${j}:${$}/oai-compat/v1`),console.log(`- Chat completions: POST http://${j}:${$}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${j}:${$}/oai-compat/v1/models`),console.log();else console.log("OpenAI-compatible API [EXPERIMENTAL]: disabled"),console.log(" Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable"),console.log();if(X)console.log("Auto-discover enabled"),console.log();if(Y0)await E6(Y0)}).catch(($)=>{console.error("Failed to start Buttress server:",$),process.exitCode=1})});import{node as P6}from"@elysiajs/node";import{Elysia as T6,t as P0}from"elysia";import{ReadableStream as k6}from"node:stream/web";import D6 from"node:fs/promises";import{ZodError as b6}from"zod";var v6=async()=>{let $=`https://registry.npmjs.org/${"@fugood/buttress-server"}/latest`;try{let Z=new AbortController,X=setTimeout(()=>Z.abort(),3000),j=await fetch($,{headers:{Accept:"application/json"},signal:Z.signal});if(clearTimeout(X),!j.ok)return null;return(await j.json()).version||null}catch{return null}},h6=($,Z)=>{if(!Z)return!1;let X=$.split(/[.-]/),j=Z.split(/[.-]/);for(let W=0;W<Math.max(X.length,j.length);W+=1){let H=parseInt(X[W])||0,Q=parseInt(j[W])||0;if(Q>H)return!0;if(Q<H)return!1}return!1},C6=($)=>{console.log(""),console.log("\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m"),console.log("\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m","2.23.0-beta.38".padEnd(12),$.padEnd(12),"\x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m","@fugood/buttress-server".padEnd(27)),console.log("\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m"),console.log("")},w3=async()=>{try{let $=await v6();if($&&h6("2.23.0-beta.38",$))C6($)}catch($){}},I6,y6=async({backend:$,router:Z,config:X,enableOpenAICompat:j})=>{try{await D6.mkdir(X.server.temp_file_dir,{recursive:!0})}catch{}let W=Y1()||"0.0.0.0",H={id:X.server.id,name:X.server.name,version:"2.23.0-beta.38",address:W,port:X.server.port,url:`http://${W}:${X.server.port}`,generators:l2(X,X.generators.map((N)=>N.type)),authentication:{required:!0,type:"device-group"}},Q=new T6({serve:{maxRequestBodySize:X.server.max_body_size},websocket:{idleTimeout:Math.ceil(X.server.session_timeout/1000)},adapter:I6?P6():void 0}).state({sessions:new Map,backend:$||e0,config:X,serverInfo:H});if(Z)Q.use(Z);if(X.autodiscover?.http?.enabled)Q.use(n1(X));if(Q.use(s1),Q.use(a1),j)Q.use(X1(X));let J={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return Q.ws("/buttress/rpc",{parse:(N,G)=>{if(typeof G==="string")try{return JSON.parse(G)}catch{return N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_REQUEST,message:"Invalid request"},id:null})),null}return G},body:P0.Object({jsonrpc:P0.String(),method:P0.String(),params:P0.String(),id:P0.String()}),open(N){let G=N.id??N.raw?.id??N.remoteAddress;if(console.log(`[Request] New connection: ${G}`),!N.data.store.sessions.has(G))N.data.store.sessions.set(G,{streams:new Map,generators:new Set,timeout:null});else{let _=N.data.store.sessions.get(G);clearTimeout(_.timeout),_.timeout=null}},async message(N,{id:G,method:_,params:O}){let z=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Received request from ${z}: ${_}`);let V=N.data.store.sessions.get(z),[R,q]=_.split("."),A=X3[R]?.[q];if(!A){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.METHOD_NOT_FOUND,message:"Method not found"},id:G}));return}try{if(_==="cancel"){if(V.streams.has(G))V.streams.get(G)?.cancel(),V.streams.delete(G);return}if(_==="ping"){N.send(JSON.stringify({jsonrpc:"2.0",result:"pong",id:G}));return}let w=W3(O),U=Z3[R]?.[q],Y=U?U.parse(w):w,K={...N.data.store,peerId:z,session:V},L=await A(K,...Y);if(L instanceof k6){V.streams.set(G,L),N.send(JSON.stringify({jsonrpc:"2.0",result:{type:"stream"},id:G}));try{let B=L.getReader();while(!0){let{value:E,done:M}=await B.read();if(M)break;let{event:F,data:x}=E;N.send(JSON.stringify({jsonrpc:"2.0",method:`notification/${F}`,params:W1(x),id:G}))}N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_end",id:G}))}catch(B){console.error(B),N.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_error",params:W1(B),id:G}))}V.streams.delete(G)}else N.send(JSON.stringify({jsonrpc:"2.0",result:W1(L),id:G}))}catch(w){if(w instanceof b6){N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_PARAMS,message:"Invalid params",data:w.issues},id:G}));return}console.error(w),N.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INTERNAL_ERROR,message:String(w)},id:G}))}},async close(N){let G=N.id??N.raw?.id??N.remoteAddress;console.log(`[Request] Connection closed: ${G}`);let{backend:_,sessions:O}=N.data.store,z=O.get(G);if(!z)return;z.streams.forEach((V)=>V.cancel()),z.streams.clear(),z.timeout=setTimeout(()=>{O.delete(G),console.log(`[Request] Session timed out: ${G}`);let{generators:V}=z;V.forEach((R)=>{_.finalizeGenerator(R)})},X.server.session_timeout)}}),{app:Q,config:X}},R3=async({backend:$,router:Z,config:X,enableOpenAICompat:j=!1})=>{let{app:W,config:H}=await y6({backend:$,router:Z,config:X,enableOpenAICompat:j}),{server:{port:Q}}=H,J=[new Promise((G)=>W.listen(Q,G))],N=null;if(H.autodiscover)N=new X4(H.autodiscover,()=>W.store.serverInfo),J.push(N.start());return await Promise.all(J),{app:W,port:Q,openaiEnabled:j,autoDiscover:N}};var E3=S(async()=>{$1();m2();j3();Y3();S0();N3();j4();$1();S0();I6=typeof process<"u"&&process.versions&&process.versions.node;if(J4.main==J4.module)await B3().then(() => S6)});await E3();export{R3 as startServer,t0 as startModelDownload,e1 as processConfig,C6 as logUpdateMessage,y6 as createServer,h6 as compareVersions,v6 as checkForUpdates,w3 as checkAndNotifyUpdates};