@fugood/buttress-server 2.23.0-beta.39 → 2.23.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.mjs +32 -32
- package/package.json +3 -3
package/lib/index.mjs
CHANGED
|
@@ -1,80 +1,80 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{createRequire as T3}from"node:module";var S3=Object.defineProperty;var P3=($,Z)=>{for(var X in Z)S3($,X,{get:Z[X],enumerable:!0,configurable:!0,set:(j)=>Z[X]=()=>j})};var S=($,Z)=>()=>($&&(Z=$($=0)),Z);var J4=T3(import.meta.url);var N4=($,Z,X)=>Math.min(Math.max($,Z),X),H4=($)=>$?40:0,V4=($=0)=>{if(!$)return 0;return N4($/12884901888*20,0,20)},U4=($=0)=>{if(!$)return 0;return N4($/34359738368*10,0,10)},G4=($)=>$?10:0,K4=($="default",Z=null)=>{let X=String($).toLowerCase();if(!X)return 0;if(X.includes("cuda"))return 20;if(X.includes("vulkan"))return 10;if(X.includes("default"))return Z==="darwin"||Z==="ios"?15:5;return 0},K0=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>{if(!H)return 0;let Q=H4(X)+K4(Z,$)+V4(j),J=U4(W),N=G4(H);return Math.min(100,Math.round(Q+J+N))},K1=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:H=!0}={})=>({gpuPresence:H4(X),variant:K4(Z,$),gpuMemory:V4(j),cpuMemory:U4(W),availability:G4(H)});var L4,D0=0.85,b0=0.5,_4=($)=>{if(!$&&$!==0)return[];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},k3=($)=>{if(!$)return null;return String($).trim().toLowerCase()||null},D3=({variant:$,preferVariants:Z=[],variantPreference:X=[],defaultVariants:j=L4}={})=>{let W=[];if($)W.push($);W.push(..._4(Z)),W.push(..._4(X)),W.push(...j);let H=W.map(k3).filter(Boolean);return Array.from(new Set(H))},q4=($={})=>{let Z=String($.type||$.deviceType||$.kind||"").toLowerCase();if(Z.includes("gpu"))return!0;if(Z.includes("cuda"))return!0;if(Z.includes("metal"))return!0;if(Z.includes("vulkan"))return!0;if(Z.includes("snapdragon"))return!0;return!1},b3=($)=>{if(!Array.isArray($))return[];return $.map((Z)=>({...Z}))},v3=($,Z)=>{if($==="snapdragon")return Z.filter((X)=>X.deviceName!=="GPUOpenCL");return Z},O4=({platform:$,totalMemoryInBytes:Z,variant:X,devices:j,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:Q,error:J})=>{let N=b3(v3(X,j)),G=N.some(q4),_=N.filter((w)=>q4(w)&&Number.isFinite(Number(w.maxMemorySize))).reduce((w,U)=>w+U.maxMemorySize,0),O=Z,z=G?Math.floor(_*W):0,V=O?Math.floor(O*H):0,R={platform:$,variant:X,hasGpu:G,gpuUsableBytes:z,cpuUsableBytes:V,ok:Q},q=K0(R),A=Q?K1(R):null;return{platform:$,ok:Q,variant:X,hasGpu:G,devices:N,gpuTotalBytes:_,gpuUsableBytes:z,cpuTotalBytes:O,cpuUsableBytes:V,score:q,breakdown:A,error:J,timestamp:new Date().toISOString()}},_1=({device:$,modelBytes:Z=0,kvCacheBytes:X=0}={})=>{if(!$)return{totalRequiredBytes:Z+X,fitsInGpu:!1,fitsInCpu:!1,limiting:"unknown-device"};let j=Math.max(0,Number(Z)||0)+Math.max(0,Number(X)||0),W=$.hasGpu&&j>0&&j<=$.gpuUsableBytes,H=j>0&&j<=$.cpuUsableBytes,Q="ok";if(!W&&$.hasGpu)Q="gpu-memory";if(!H)Q=W?"cpu-memory":"insufficient-memory";return{totalRequiredBytes:j,fitsInGpu:W,fitsInCpu:H,limiting:Q}},z0=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,kvCacheBytes:G=null,limitedKvCacheBytes:_=null,dependencies:O={},defaultVariants:z=L4}={})=>{let{getBackendDevicesInfo:V,isLibVariantAvailable:R}=O;if(typeof V!=="function"||typeof R!=="function")throw TypeError("GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions");let q=D3({variant:Z,preferVariants:X,variantPreference:j,defaultVariants:z}),A=[];for(let L of q)try{if(!await R(L))throw Error(`Variant ${L} not available on this platform`);let E=await V(L);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:E,gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!0}))}catch(B){let E=B instanceof Error?B.message:String(B);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:[],gpuMemoryFraction:W,cpuMemoryFraction:H,ok:!1,error:E}))}let U=A.filter((L)=>L.ok)[0]||null,Y={ok:Boolean(U),selected:U?{...U,breakdown:Q?U.breakdown:void 0}:null,attempts:A};if(!Q&&Y.selected)delete Y.selected.breakdown;if(!Y||!N&&!G)return Y;let K=(L)=>{if(!L)return L;let B=_1({device:L,modelBytes:N||0,kvCacheBytes:G||0}),E=null;if(_!=null&&_!==G)E=_1({device:L,modelBytes:N||0,kvCacheBytes:_});return{...L,fit:B,...E&&{limitedFit:E}}};return Y.selected=K(Y.selected),Y.attempts=Array.isArray(Y.attempts)?Y.attempts.map(K):Y.attempts,Y},v0="ggml-llm";var h0=S(()=>{L4=["cuda","vulkan","snapdragon","default"]});var q1="ggml-stt",A4,O1=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:H=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:N=null,processingBytes:G=null,kvCacheBytes:_=null,dependencies:O={}}={})=>{let z=j&&j.length>0?j:A4;return z0({platform:$,variant:Z,preferVariants:X,variantPreference:z,gpuMemoryFraction:W,cpuMemoryFraction:H,includeBreakdown:Q,totalMemoryInBytes:J,modelBytes:N,kvCacheBytes:G??_,dependencies:O,defaultVariants:A4})};var L1=S(()=>{h0();A4=["cuda","vulkan","default"]});var h3,_0=async({platform:$,totalMemoryInBytes:Z,backend:X=v0,dependencies:j,...W}={})=>{let H=h3.get(X);if(!H)throw Error(`No capability detector registered for backend "${X}"`);return await H({...W,dependencies:j,totalMemoryInBytes:Z,platform:$})};var z4=S(()=>{h0();L1();h3=new Map([[v0,z0],[q1,O1]])});var B4,A1=($)=>{let Z=$?String($).toLowerCase():"f16";return B4[Z]||B4.f16},z1=($,Z,X,j,W,H={},{totalLayers:Q=null,swaLayers:J=0,swaContext:N=null,swaContextMultiplier:G=1,swaAdditionalTokens:_=0,swaFull:O=!1}={})=>{if(!$||!Z||!X||!j||!W)return 0;let z=Q!=null&&Q!==void 0?Number(Q):Number($),V=Math.max(0,Math.floor(z));if(!V)return 0;let R=A1(H.k),q=A1(H.v),A=Number(X)*(Number(j)*R+Number(W)*q);if(!A)return 0;let w=Math.max(0,Number(Z)||0),U=Math.min(V,Math.max(0,Math.floor(Number(J)||0))),Y=Math.max(0,V-U),K=N!=null&&Number.isFinite(Number(N))?Math.max(0,Number(N)):w,L=Math.max(1,Number(G)||1),B=Math.max(0,Number(_)||0),E=K*L+B,M=O?w:Math.min(w,E),F=Y*w+U*Math.max(0,Math.floor(M));return Math.round(A*F)},C0=({modelBytes:$=0,audioLengthSeconds:Z=30,sampleRate:X=16000,bytesPerSample:j=4}={})=>{let W=Math.max(0,Number($)||0),H=Math.max(0,Math.floor(Math.max(0,Z)*X*j)),Q=1048576,J=1073741824,N;if(W<209715200)N=125829120;else if(W<524288000)N=146800640;else if(W<2147483648)N=157286400;else N=167772160;let G;if(W<209715200)G=73400320;else if(W<524288000)G=141557760;else if(W<2147483648)G=230686720;else G=230686720;let _;if(W<104857600)_=20971520;else if(W<209715200)_=31457280;else if(W<524288000)_=89128960;else if(W<2147483648)_=225443840;else _=377487360;let O=N+G+_,z=W+O+H;return{modelBytes:W,audioBufferBytes:H,processingBufferBytes:O,totalBytes:z}};var B1=S(()=>{B4={f16:2,f32:4,q8_0:1,q6_k:0.75,q5_k:0.625,q5_k_m:0.625,q5_k_s:0.625,q5_1:0.625,q5_0:0.625,q4_k:0.5,q4_k_m:0.5,q4_k_s:0.5,q4_1:0.5,q4_0:0.5,iq4_nl:0.5}});var w1=($)=>$?String($).trim().toLowerCase():null,C3=($={},Z=null)=>{if(!$)return null;let X=w1(Z),j=X?`${X}.attention.sliding_window`:null,W=(j&&$[j]!=null?$[j]:null)??$["llama.attention.sliding_window"];if(W==null)return null;let H=Number(W);return Number.isFinite(H)?H:null},R4=($=0,Z=0,X=!1)=>{let j=Math.max(0,Math.floor(Number($)||0)),W=Math.max(0,Math.floor(Number(Z)||0));if(!j||W===1)return 0;if(W<=0)return j;let H=Math.max(0,W-1),Q=Math.floor(j/W),J=j%W,N=X?Math.max(0,J-1):Math.min(J,H);return Q*H+N},w4=({arch:$,nLayer:Z=0})=>({arch:w1($),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(Z)||0)),swaLayers:0}),I3,I0=({arch:$,metadata:Z={},nLayer:X=0}={})=>{let j=w1($||Z["general.architecture"]),W=Math.max(0,Math.floor(Number(X)||0)),H=C3(Z,j),Q=j?I3.get(j):null;if(!Q)return w4({arch:j,nLayer:X});let J=Q({nLayer:W,nSwa:H,metadata:Z});if(!J||!J.enabled||!J.window||J.window<=0)return w4({arch:j,nLayer:X});let N=Math.max(0,Math.floor(Number(J.pattern)||0)),G=J.kvLayers!=null&&Number.isFinite(Number(J.kvLayers))?Number(J.kvLayers):W,_=Math.max(0,Math.floor(G)),O=R4(_,N,Boolean(J.denseFirst));return{arch:j,enabled:O>0,window:J.window,pattern:N,denseFirst:Boolean(J.denseFirst),type:J.type||"standard",kvLayers:_,swaLayers:O}};var E4=S(()=>{I3=new Map([["llama4",({nSwa:$})=>{if($===0)return{enabled:!1};return{enabled:!0,window:$&&$>0?$:8192,pattern:4,type:"chunked"}}],["afmoe",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["phi3",()=>({enabled:!1})],["gemma2",({nSwa:$})=>{let Z=$&&$>0?$:4096;if(!Z)return{enabled:!1};return{enabled:!0,window:Z,pattern:2,type:"standard"}}],["gemma3",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"standard"}}],["gemma3n",({nLayer:$,nSwa:Z})=>{if(!Z||Z<=0)return{enabled:!1};return{enabled:!0,window:Z,pattern:5,type:"standard",kvLayers:Math.min(20,$)}}],["gemma-embedding",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"symmetric"}}],["cohere2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["olmo2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["exaone4",({nLayer:$,nSwa:Z})=>{let X=$>=64,j=null;if(Z&&Z>0)j=Z;else if(X)j=4096;if(!j)return{enabled:!1};return{enabled:!0,window:j,pattern:4,type:"standard"}}],["gpt-oss",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:2,type:"standard"}}],["smallthinker",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:"standard"}}]])});var y0=($={})=>{let Z=$["general.architecture"],X=(z,V=null)=>{let R=$[z],q=Number(R);return Number.isFinite(q)?q:V},j=Z?X(`${Z}.context_length`,X("llama.context_length")):null,W=Z?X(`${Z}.block_count`,X("llama.block_count")):null,H=Z?X(`${Z}.embedding_length`,X("llama.embedding_length")):null,Q=Z?X(`${Z}.attention.head_count`,X("llama.attention.head_count")):null,J=Z?X(`${Z}.attention.head_count_kv`,X("llama.attention.head_count_kv",Q)):null,N=Z?X(`${Z}.attention.key_length`,X("llama.attention.key_length")):null,G=Z?X(`${Z}.attention.value_length`,X("llama.attention.value_length")):null,_=$["general.quantization_version"]||null,O=$["general.file_type"]||null;return{arch:Z,nCtxTrain:j,nLayer:W,nEmbd:H,nHead:Q,nHeadKv:J,nEmbdHeadK:N,nEmbdHeadV:G,quantVersion:_,fileType:O}},N0=({layerCount:$,headKvCount:Z,embdHeadKCount:X,embdHeadVCount:j,cacheTypes:W,swaConfig:H,kvUnified:Q=!1,nParallel:J=1,swaFull:N=!1})=>{let G=H?.window&&Q?Math.max(1,Number(J)||1):1;return(_)=>z1($,_,Z,X,j,W,{totalLayers:$,swaLayers:H?.swaLayers||0,swaContext:H?.window,swaFull:N,swaContextMultiplier:G})},u0=({maxCtx:$,availableMemory:Z,modelBytes:X,kvBytesForCtx:j})=>{let W=Math.max(1,Math.floor(Number($)||0));if(!j||Z<=X)return W;let H=1,Q=W,J=W;while(H<=Q){let N=Math.floor((H+Q)/2);if(X+j(N)<=Z)J=N,H=N+1;else Q=N-1}return J};var M4=S(()=>{B1()});var B0=S(()=>{z4();B1();h0();L1();E4();M4()});import{EventEmitter as y3}from"node:events";class E1{constructor($=u3){this.maxEntries=$,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad($){R1(this.modelLoads,$,this.maxEntries),o.emit("status:modelLoad",$),o.emit("status:change",{type:"modelLoad",entry:$})}addCompletion($){R1(this.completions,$,this.maxEntries),o.emit("status:completion",$),o.emit("status:change",{type:"completion",entry:$})}addTranscription($){R1(this.transcriptions,$,this.maxEntries),o.emit("status:transcription",$),o.emit("status:change",{type:"transcription",entry:$})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}}function M1($){let Z=(X)=>$(X);return o.on("status:change",Z),()=>o.off("status:change",Z)}function x4($){F4+=1;let Z=F4,X=M1($);return{subscriberId:Z,unsubscribe:X}}function F1($){let Z=[];return{generators:Array.from($.entries()).filter(([,j])=>j.type==="ggml-llm").map(([j,W])=>{let{instance:H}=W,Q=[];if(H.contexts)Q=Array.from(H.contexts.entries()).map(([J,N])=>{let G={key:J,refCount:N.refCount,hasModel:Boolean(N.context)},_=N.context.parallel.getStatus();return G.parallelStatus=_,Z.push({generatorId:j,contextKey:J,..._}),G});return{id:j,type:W.type,refCount:W.refCount,repoId:H.info?.model?.repoId||null,quantization:H.info?.model?.quantization||null,variant:H.info?.runtime?.variant||null,nCtx:H.info?.runtime?.n_ctx||null,nParallel:H.info?.runtime?.n_parallel||null,contexts:Q}}),parallelStatuses:Z,history:{modelLoads:t.getModelLoadHistory(),completions:t.getCompletionHistory()}}}function x1($){return{generators:Array.from($.entries()).filter(([,X])=>X.type==="ggml-stt").map(([X,j])=>{let{instance:W}=j,H=W.getStatus?.()||{},Q=H.queueStatus||{processing:!1,queuedCount:0};return{id:X,type:j.type,refCount:j.refCount,repoId:W.info?.model?.repoId||null,quantization:W.info?.model?.quantization||null,modelType:W.info?.model?.modelType||null,variant:W.info?.runtime?.variant||null,hasContext:H.hasContext||!1,contextRefCount:H.contextRefCount||0,queueStatus:Q}}),history:{modelLoads:J0.getModelLoadHistory(),transcriptions:J0.getTranscriptionHistory()}}}function S4($){return{timestamp:new Date().toISOString(),ggmlLlm:F1($),ggmlStt:x1($)}}var u3=9999,o,R1=($,Z,X)=>{if($.push({...Z,timestamp:Z.timestamp||new Date().toISOString()}),$.length>X)$.shift()},t,J0,F4=0;var m0=S(()=>{o=new y3;o.setMaxListeners(100);t=new E1,J0=new E1});import b from"node:path";import E0 from"node:os";import{stat as M0,mkdir as m3,open as p3,unlink as e,readFile as v4,writeFile as h4,rename as C4,readdir as f3}from"node:fs/promises";import{createHash as k1}from"node:crypto";import{gguf as g3}from"@huggingface/gguf";import{loadModel as c3,getBackendDevicesInfo as I4,isLibVariantAvailable as y4}from"@fugood/llama.node";import d3 from"bytes";import*as l3 from"node:stream/web";class i4{constructor($,Z){this.config=$,this.plan=Z,this.baseDir=$.runtime.cache_dir,this.enabled=$.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=H$($.runtime.session_cache?.max_size_bytes,10737418240),this.maxEntries=$.runtime.session_cache?.max_entries||1000,this.metadata={variant:Z.info?.runtime?.variant||null,n_gpu_layers:Z.info?.runtime?.n_gpu_layers||0,n_ctx:Z.info?.runtime?.n_ctx||0,modelPath:Z.localPath,cacheTypeK:Z.info?.runtime?.cache_type_k||"f16",cacheTypeV:Z.info?.runtime?.cache_type_v||"f16",kvUnified:Z.info?.runtime?.kv_unified??null,swaFull:Z.info?.runtime?.swa_full??null,flashAttnType:Z.info?.runtime?.flash_attn_type||"off"},this.cacheMap=null,this.initialized=!1}async initialize(){if(!this.enabled||this.initialized)return;try{await H0(n0(this.baseDir)),await H0(g0(this.baseDir)),await H0(n4(this.baseDir)),this.cacheMap=await t3(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch($){console.warn(`[SessionCache] Failed to initialize: ${$.message}`),this.enabled=!1}}async findMatchingEntry($){if(!this.enabled||!this.cacheMap)return null;let Z=Y$($,this.metadata,this.cacheMap);if(Z){let{entry:X}=Z;if(!await N$(X.stateFilePath))return console.log(`[SessionCache] Removing stale entry: ${X.id}`),delete this.cacheMap.entries[X.id],this.cacheMap.totalSize-=X.stateFileSize||0,await P1(this.cacheMap,this.baseDir).catch(()=>{}),null;return X.lastAccessedAt=new Date().toISOString(),await P1(this.cacheMap,this.baseDir).catch(()=>{}),{entry:X}}return null}async prepareCompletionOptions($,Z){if(!this.enabled)return{options:$,cacheEntry:null,promptPrefix:null};let X=await this.findMatchingEntry(Z);if(X){let{entry:j}=X;return console.log(`[SessionCache] Found matching entry: ${j.id} (${j.fullText.length} chars, loadStateSize=${j.loadStateSize})`),{options:{...$,load_state_path:j.stateFilePath},cacheEntry:j,promptPrefix:j.fullText}}return{options:$,cacheEntry:null,promptPrefix:null}}async saveCompletionState($,Z,X,j=0){if(!this.enabled)return null;let W=e3($,this.metadata);if(this.cacheMap.entries[W])return console.log(`[SessionCache] Entry already exists for prompt: ${W}`),await e(X).catch(()=>{}),this.cacheMap.entries[W];let H=$+Z,Q=$$(W,this.baseDir);try{await H0(b.dirname(Q)),await C4(X,Q);let J=await M0(Q),N={id:W,promptText:$,completionText:Z,fullText:H,promptTokenCount:j,stateFilePath:Q,stateFileSize:J.size,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[W]=N,this.cacheMap.totalSize+=J.size,await Q$(this.cacheMap,this.maxSizeBytes,this.maxEntries),await P1(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${W} (${J.size} bytes)`),N}catch(J){return console.warn(`[SessionCache] Failed to save state: ${J.message}`),await e(X).catch(()=>{}),null}}async generateTempStatePath(){return await H0(g0(this.baseDir)),Z$(this.baseDir)}async cleanup(){await J$(this.baseDir)}}async function o4($,Z,X={}){let{globalDownloadManager:j=null}=X,W=l0(Z),H=await V$(W),Q=new i4(W,H);await Q.initialize();let J={id:$,type:"ggml-llm",config:W,plan:H,info:H.info,contexts:new Map,downloads:new Map,globalDownloadManager:j,sessionCache:Q,finalized:!1},N=async()=>{if(J.finalized)return;J.finalized=!0;let Y=Array.from(J.contexts.values()),K=Y.map((B)=>{if(B.released)return Promise.resolve(!1);if(B.releaseRequested||B.releaseTimer)return Promise.resolve(!1);if(B.refCount=Math.max(0,B.refCount-1),B.refCount>0)return Promise.resolve(!1);return f0(J,B)});if(await Promise.allSettled(K),Y.length===0||Y.every((B)=>B.released))await J.sessionCache.cleanup()},G=async(Y={})=>{let{onProgress:K}=Y,L=await _$(J,K);return{modelInfo:L.modelInfo?{...L.modelInfo}:null,runtime:{...J.plan.info.runtime},download:{...J.plan.info.download}}},_=async()=>{if(J.finalized)return!1;let Y=O0(J),K=J.contexts.get(Y);if(!K)return!1;return q$(J,K,!1)},O=async(Y={})=>{let{options:K={},useCache:L=!0}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=K.prompt||"";if(!M&&K.messages){let x=await E.context.getFormattedChat(K.messages,K.chat_template||K.chatTemplate,{jinja:K.jinja??!0,tools:K.tools,parallel_tool_calls:K.parallel_tool_calls,tool_choice:K.tool_choice,enable_thinking:K.enable_thinking,add_generation_prompt:K.add_generation_prompt,now:K.now,chat_template_kwargs:K.chat_template_kwargs});M=x?.prompt||x||""}if(L&&J.sessionCache.enabled&&M){let{options:x}=await J.sessionCache.prepareCompletionOptions(K,M),k=await J.sessionCache.generateTempStatePath(),m=(await E.context.tokenize(M))?.tokens?.length||0,p={...x,save_state_path:k,save_state_size:m};console.log(`[SessionCache] save_state_size=${m} (prompt tokens)`);let l={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return G$(E.context,p,J.sessionCache,M,k,m,J.id,l)}let F={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return U$(E.context,K,J.id,F)},z=async(Y={})=>{let{text:K="",params:L={}}=Y,B=O0(J),E=J.contexts.get(B);if(!E)throw Error(`Context "${B}" not initialized`);await E.ready;let M=await E.context.tokenize(K,L);if(!M)return{tokens:[]};let F=Array.from(M.tokens??[]).map((x)=>Number(x));return{...M,tokens:F}},V=async(Y={})=>{let{tokens:K=[]}=Y,L=O0(J),B=J.contexts.get(L);if(!B)throw Error(`Context "${L}" not initialized`);await B.ready;let E=K.map((M)=>Number(M));return B.context.detokenize(E)},R=async(Y={})=>{let{messages:K=[],template:L,params:B}=Y,E=O0(J),M=J.contexts.get(E);if(!M)throw Error(`Context "${E}" not initialized`);return await M.ready,await M.context.getFormattedChat(K,L,B)},q=()=>Array.from(J.contexts.values()).some((Y)=>!Y.released&&(Y.releaseRequested||Y.releaseTimer||Y.refCount>0)),A=()=>{J.finalized=!1},w=()=>{let Y=[],K=Array.from(J.contexts.entries()).map(([L,B])=>{let E={key:L,refCount:B.refCount,hasModel:Boolean(B.context)},M=B.context.parallel.getStatus();return E.parallelStatus=M,Y.push({contextKey:L,...M}),E});return{id:J.id,type:J.type,repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null,nCtx:J.plan.info.runtime?.n_ctx||null,nParallel:J.plan.info.runtime?.n_parallel||null,contexts:K,parallelStatuses:Y}},U=(Y)=>{let K=Array.from(J.contexts.entries()).map(([L,B])=>B.context.parallel.subscribeToStatus((E)=>{Y({contextKey:L,...E})}));return{remove:()=>{K.forEach((L)=>{if(L?.remove)L.remove()})}}};return{id:$,type:"ggml-llm",info:H.info,contexts:J.contexts,initContext:G,completion:O,tokenize:z,detokenize:V,applyChatTemplate:R,releaseContext:_,finalize:N,getStatus:w,subscribeParallelStatus:U,hasPendingReleases:q,resetFinalized:A}}async function t4($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=l0($),J=await D1(Q),N=r4(Q,J),{repoId:G}=J;if(await R0(N,J.size)){if(console.log(`[Download] Model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting download: ${G}`);let z=(async()=>{try{if(J.isSplit&&J.splitCount>0){let V=/-(\d{5})-of-(\d{5})\.gguf$/,R=b.dirname(N),q=J.splitCount,A=0;for(let w=1;w<=q;w+=1){let U=String(w).padStart(5,"0"),Y=J.filename.replace(V,`-${U}-of-${String(q).padStart(5,"0")}.gguf`),K=`${Q.model.base_url.replace(/\/+$/,"")}/${J.repoId}/resolve/${J.revision}/${Y}`,L=b.join(R,Y);if(!await R0(L))await c0(K,J.headers,L,null,(E)=>{if(E>=0&&Number.isFinite(E)){let M=(A+E)/q;if(console.log(`[Download] ${G}: ${Math.round(M*100)}%`),typeof j==="function")j(M)}});A+=1}}else await c0(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}});if(console.log(`[Download] Completed: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function O$($){let Z=l0($),X=await D1(Z),j=await s4(X.url,X.headers,Z.runtime.cache_dir),{arch:W,nCtxTrain:H,nLayer:Q,nEmbd:J,nHead:N,nHeadKv:G,nEmbdHeadK:_,nEmbdHeadV:O,quantVersion:z,fileType:V}=y0(j),R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):0,w=Number.isFinite(Number(G))?Number(G):A,U=A>0&&q>0?q/A:128,Y=_!=null&&Number.isFinite(Number(_))?Number(_):U,K=O!=null&&Number.isFinite(Number(O))?Number(O):U,L=I0({arch:W,metadata:j,nLayer:R}),B=L&&Number.isFinite(Number(L.kvLayers))?Number(L.kvLayers):R,E=Math.max(0,Math.floor(Number(B)||0)),F=(Z.model.n_ctx?Number(Z.model.n_ctx):null)||H||4096,x={k:Z.model.cache_type_k,v:Z.model.cache_type_v},k=X.size>0?X.size:0,D=N0({layerCount:E,headKvCount:w,embdHeadKCount:Y,embdHeadVCount:K,cacheTypes:x,swaConfig:L,kvUnified:Z.model.kv_unified,nParallel:Z.model.n_parallel,swaFull:Z.model.swa_full}),m=Z.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,p=Z.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.cpu_memory_fraction))):d0,l=D(F),i=await a4(Z,{modelBytes:k,kvCacheBytes:l}),c=(i.selected.totalMemory||0)*m,T=Math.max(0,E0.totalmem()*p),f=i.selected.hasGpu?c:T,h=u0({maxCtx:F,availableMemory:f,modelBytes:k,kvBytesForCtx:D}),s=D(F),I=D(h);return{kvInfo:{nCtxTrain:H,nLayer:R,nEmbd:q,nHeadKv:w,nEmbdHeadK:Y,nEmbdHeadV:K,nHeadCount:A,nHeadKvCount:w,kvLayerCount:E,swa:L?.enabled?{window:L.window,pattern:L.pattern,denseFirst:L.denseFirst,type:L.type,layers:L.swaLayers}:null},modelBytes:k,kvCacheBytes:s,limitedKvCacheBytes:I,memoryLimitedCtx:h,quantization:{name:X.quantization||null,fileType:V,version:z}}}async function e4($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null,G=null,_=null,O=null;if(W)try{let{modelBytes:K,kvCacheBytes:L,limitedKvCacheBytes:B,memoryLimitedCtx:E,kvInfo:M,quantization:F}=await O$(W);Q=K,J=L,N=B,G=E,_=M,O=F}catch(K){}let z=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,V=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,R=await _0({...H,platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",includeBreakdown:j,gpuMemoryFraction:z,cpuMemoryFraction:V,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Q,kvCacheBytes:J,limitedKvCacheBytes:N}),q=R.selected,A=b4(q);q.modelBytes=Q||null,q.kvCacheBytes=J||null,q.memoryLimitedCtx=G||null,q.limitedKvCacheBytes=N||null,q.kvInfo=_||null,q.quantization=O||null;let w=null,U=null;if($){let K=b4($);U={...$,score:K};let L="buttress",B="buttress-higher-score";if(!R.ok)L="local",B="buttress-unavailable";else if(!K&&K!==0)L="buttress",B="missing-client-score";else{let{fit:E,limitedFit:M}=U,F=q?.fit,x=q?.limitedFit,k=E?.fitsInGpu||E?.fitsInCpu||M?.fitsInGpu||M?.fitsInCpu,D=F?.fitsInGpu||F?.fitsInCpu||x?.fitsInGpu||x?.fitsInCpu;if(k&&!D)L="local",B="client-fits-in-memory";else if(D&&!k)L="buttress",B="buttress-fits-in-memory";else if(K>A*X)L="local",B="client-better";else if(A>K*X)L="buttress",B="buttress-better";else L="either",B="comparable-scores"}w={buttressScore:A,clientScore:K,threshold:X,recommendation:L,reason:B}}if(!R.ok&&!w)w={buttressScore:A,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let Y=null;if(W)Y={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,nCtx:W.model?.n_ctx||null,cacheKType:W.model?.cache_type_k||"f16",cacheVType:W.model?.cache_type_v||"f16"};return{type:"ggml-llm",timestamp:new Date().toISOString(),buttress:R,client:U,comparison:w,modelConfig:Y}}var n3=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return l3},i3,u4,s3,m4=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};m4($[X],j)}else $[X]=j}),$},r3=".gguf",p4="https://huggingface.co",f4="https://huggingface.co/api",d,g4,d0=0.5,w0,S1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},p0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","snapdragon","default"].includes(Z))return Z;return null},l0=($={})=>{let Z=JSON.parse(JSON.stringify(w0));if(m4(Z,$),Z.backend.variant=p0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(S1(Z.backend.variant_preference).map(p0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=["cuda","vulkan","snapdragon","default"];if(Z.runtime.prefer_variants=Array.from(new Set(S1(Z.runtime.prefer_variants).map(p0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(S1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.n_parallel=Math.max(1,Number(Z.model.n_parallel)||4),Z.model.n_batch=Math.max(1,Number(Z.model.n_batch)||512),Z.model.base_url=Z.model.base_url||p4,Z.model.api_base=Z.model.api_base||f4,Z.runtime.cache_dir=Z.runtime.cache_dir?b.resolve(Z.runtime.cache_dir):d,Z.runtime.session_cache={...w0.runtime.session_cache,...Z.runtime.session_cache||{}},Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||w0.runtime.context_release_delay_ms),Z},P4=($)=>{let Z=$.toLowerCase();return g4.find((j)=>Z.includes(j))||null},a3=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(p0).filter(Boolean)))},H0=async($)=>{await m3($,{recursive:!0})},o3=($=d)=>b.join($,".metadata-cache"),c4=($,Z,X=d)=>{let j=k1("sha256").update($).digest("hex");return b.join(o3(X),Z,`${j}.json`)},d4=async($,Z,X=d)=>{try{let j=c4($,Z,X),W=await v4(j,"utf-8");return console.log(`[Cache] Hit ${Z} cache:`,b.basename(j)),JSON.parse(W,(H,Q)=>{if(typeof Q==="string"&&Q.startsWith("__bigint__"))return BigInt(Q.slice(10));return Q})}catch(j){return null}},T1=async($,Z,X,j=d)=>{try{let W=c4($,Z,j);await H0(b.dirname(W)),await h4(W,JSON.stringify(X,(H,Q)=>{if(typeof Q==="bigint")return`__bigint__${Q.toString()}`;return Q}),"utf-8"),console.log(`[Cache] Wrote ${Z} cache:`,b.basename(W))}catch(W){console.warn(`[Cache] Failed to write ${Z} cache:`,W.message)}},n0=($=d)=>b.join($,".session-state-cache"),l4=($=d)=>b.join(n0($),"cache-map.json"),g0=($=d)=>b.join(n0($),"temp"),n4=($=d)=>b.join(n0($),"states"),T4=()=>({version:1,entries:{},totalSize:0}),t3=async($=d)=>{try{let Z=l4($),X=await v4(Z,"utf-8"),j=JSON.parse(X);if(!j.entries||typeof j.entries!=="object")return T4();return j}catch{return T4()}},P1=async($,Z=d)=>{let X=l4(Z),j=`${X}.tmp.${Date.now()}`;try{await H0(b.dirname(X)),await h4(j,JSON.stringify($,null,2),"utf-8"),await C4(j,X)}catch(W){throw await e(j).catch(()=>{}),W}},e3=($,Z)=>{let X=JSON.stringify({text:$,model:Z.modelPath,variant:Z.variant,n_gpu_layers:Z.n_gpu_layers,n_ctx:Z.n_ctx,cacheTypeK:Z.cacheTypeK,cacheTypeV:Z.cacheTypeV,kvUnified:Z.kvUnified,swaFull:Z.swaFull,flashAttnType:Z.flashAttnType});return k1("sha256").update(X).digest("hex").slice(0,24)},$$=($,Z=d)=>b.join(n4(Z),`${$}.bin`),Z$=($=d)=>{let Z=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return b.join(g0($),`${Z}.bin`)},X$=($,Z)=>$.modelPath===Z.modelPath&&$.variant===Z.variant&&$.n_gpu_layers===Z.n_gpu_layers&&$.n_ctx>=Z.n_ctx&&$.cacheTypeK===Z.cacheTypeK&&$.cacheTypeV===Z.cacheTypeV&&$.kvUnified===Z.kvUnified&&$.swaFull===Z.swaFull&&$.flashAttnType===Z.flashAttnType,j$=($,Z)=>{let X=Math.min($.length,Z.length),j=0;while(j<X&&$[j]===Z[j])j+=1;return j},W$=100,Y$=($,Z,X)=>{let j=Object.values(X.entries);console.log(`[SessionCache] Finding match for promptText (${$.length} chars)`),console.log(`[SessionCache] Checking ${j.length} cache entries`);let H=j.filter((Q)=>X$(Q.metadata,Z)).reduce((Q,J)=>{let N=j$($,J.fullText);if(N>=W$&&N>Q.prefixLen)return{entry:J,prefixLen:N};return Q},{entry:null,prefixLen:0});if(H.entry)return console.log(`[SessionCache] Prefix match found: ${H.entry.id} (${H.prefixLen}/${H.entry.fullText.length} chars)`),{entry:H.entry,prefixLength:H.prefixLen};return console.log("[SessionCache] No match found"),null},Q$=async($,Z,X)=>{let j=Object.values($.entries).sort((J,N)=>new Date(J.lastAccessedAt)-new Date(N.lastAccessedAt)),W=$.totalSize,H=Object.keys($.entries).length,Q=j.filter((J)=>{let N=W>Z,G=H>X;if(!N&&!G)return!1;return W-=J.stateFileSize||0,H-=1,!0});return await Promise.all(Q.map(async(J)=>{await e(J.stateFilePath).catch(()=>{}),delete $.entries[J.id],console.log(`[SessionCache] Evicted entry: ${J.id}`)})),$.totalSize=Math.max(0,W),Q.map((J)=>J.id)},J$=async($=d)=>{let Z=g0($);try{let X=await f3(Z),j=Date.now(),W=3600000;await Promise.all(X.map(async(H)=>{let Q=b.join(Z,H),J=await M0(Q).catch(()=>null);if(J&&j-J.mtimeMs>3600000)await e(Q).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${H}`)}))}catch{}},N$=async($)=>{try{return await M0($),!0}catch{return!1}},H$=($,Z)=>{if($==null)return Z;if(typeof $==="number")return $;if(typeof $==="string"){let X=d3.parse($);return X!=null?X:Z}return Z},k4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},D4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},s4=async($,Z,X=d)=>{let j=JSON.stringify({url:$,headers:Z}),W=await d4(j,"range-metadata",X);if(W)return W;let H=!/^https?:/i.test($),{metadata:Q}=await g3($,{fetch,additionalFetchHeaders:Z,allowLocalFile:H});return await T1(j,"range-metadata",Q,X),Q},r4=($,Z)=>{if($.model.local_path)return b.resolve($.model.local_path);let X=Z.repoId.split("/"),j=b.join($.runtime.cache_dir,...X,Z.revision);return b.join(j,Z.filename)},R0=async($,Z)=>{try{let X=await M0($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},c0=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await H0(b.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await p3(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new s3({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await e(X).catch(()=>{}),_}if(j){let _=await M0(X);if(_.size!==j)throw await e(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},D1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await d4(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let Y=await D4($.model.url,{headers:Q}),K=Number(Y.headers.get("content-length"))||null,L=$.model.filename||$.model.url.split("/").pop(),B={repoId:Z,revision:X,filename:L,url:$.model.url,size:K,headers:Q};return await T1(W,"artifact-info",B,j),B}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await k4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((Y)=>Y.rfilename||Y.path||Y.filename).filter((Y)=>typeof Y==="string"&&Y.endsWith(r3));if(O.length===0)throw Error(`No GGUF artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:g4,V=()=>{let Y=z.find((K)=>{return O.find((B)=>B.toLowerCase().includes(K))});if(Y)return{filename:O.find((L)=>L.toLowerCase().includes(Y)),quantization:Y};return null};if(!J){let Y=V()||{filename:O[0],quantization:null},{filename:K,quantization:L}=Y;J=K,N=L||P4(J)}else if(!N)N=P4(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=/-(\d{5})-of-(\d{5})\.gguf$/,A=J.match(q),w=null;if(A){let[,,Y]=A,K=await k4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),L=K?.siblings||K?.files||[],B=Number(Y);w=0;for(let E=1;E<=B;E+=1){let M=String(E).padStart(5,"0"),F=J.replace(q,`-${M}-of-${Y}.gguf`),x=L.find((D)=>(D.rfilename||D.path||D.filename)===F),k=Number(x?.size);if(Number.isFinite(k)&&k>0)w+=k}}else{let Y=await D4(R,{headers:Q});w=Number(Y.headers.get("content-length"))||null}let U={repoId:Z,revision:X,filename:J,url:R,size:w,quantization:N,headers:Q,isSplit:Boolean(A),splitCount:A?Number(A[2]):0};return await T1(W,"artifact-info",U,j),U},a4=async($,{modelBytes:Z=null,kvCacheBytes:X=null}={})=>{let j=a3($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,N=await _0({platform:process.platform,totalMemoryInBytes:E0.totalmem(),backend:"ggml-llm",variant:W||null,preferVariants:H,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},V$=async($)=>{let Z=await D1($),X=await s4(Z.url,Z.headers,$.runtime.cache_dir),{arch:j,nCtxTrain:W,nLayer:H,nEmbd:Q,nHead:J,nHeadKv:N,nEmbdHeadK:G,nEmbdHeadV:_,quantVersion:O,fileType:z}=y0(X),V=Number.isFinite(Number(H))?Number(H):0,R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(N))?Number(N):q,w=q>0&&R>0?R/q:128,U=G!=null&&Number.isFinite(Number(G))?Number(G):w,Y=_!=null&&Number.isFinite(Number(_))?Number(_):w,K=I0({arch:j,metadata:X,nLayer:V}),L=K&&Number.isFinite(Number(K.kvLayers))?Number(K.kvLayers):V,B=Math.max(0,Math.floor(Number(L)||0)),E={use_mmap:$.model.use_mmap??$.runtime.use_mmap,use_mlock:$.model.use_mlock??$.runtime.use_mlock,n_threads:$.model.n_threads??$.runtime.n_threads,n_ctx:$.model.n_ctx??$.runtime.n_ctx,n_batch:$.model.n_batch??$.runtime.n_batch,n_ubatch:$.model.n_ubatch??$.runtime.n_ubatch,n_cpu_moe:$.model.n_cpu_moe??$.runtime.n_cpu_moe,n_parallel:$.model.n_parallel??$.runtime.n_parallel,cpu_mask:$.model.cpu_mask??$.runtime.cpu_mask,cpu_strict:$.model.cpu_strict??$.runtime.cpu_strict,devices:$.model.devices??$.runtime.devices,n_gpu_layers:$.model.n_gpu_layers??$.runtime.n_gpu_layers,flash_attn_type:$.model.flash_attn_type??$.runtime.flash_attn_type,cache_type_k:$.model.cache_type_k??$.runtime.cache_type_k,cache_type_v:$.model.cache_type_v??$.runtime.cache_type_v,kv_unified:$.model.kv_unified??$.runtime.kv_unified,swa_full:$.model.swa_full??$.runtime.swa_full,ctx_shift:$.model.ctx_shift??$.runtime.ctx_shift},M=E.n_ctx?Number(E.n_ctx):null,F=M||W||4096,x=[],k=[],D=!0;if(M&&W&&M>W){D=!1;let Q0=`Requested context length (${M}) exceeds model training context (${W})`;x.push(Q0),k.push(Q0),F=W}if(M&&!W)x.push("Model metadata missing training context length, using requested value");let m={k:E.cache_type_k,v:E.cache_type_v},p=Z.size>0?Z.size:0,l=N0({layerCount:B,headKvCount:A,embdHeadKCount:U,embdHeadVCount:Y,cacheTypes:m,swaConfig:K,kvUnified:E.kv_unified,nParallel:E.n_parallel,swaFull:E.swa_full}),i=l(F),v=await a4($,{modelBytes:p,kvCacheBytes:i}),c=v.selected.totalMemory||0,T=c*($.backend.gpu_memory_fraction||1),f=$.backend.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,h=Math.max(0,E0.totalmem()*f),s=v.selected.hasGpu?T:h,I=u0({maxCtx:F,availableMemory:s,modelBytes:p,kvBytesForCtx:l});if(!M&&I){let Q0=W?Math.min(I,W):I,U1=Math.max(32,Q0);if(U1<F)x.push(`Context length capped to ${U1} by memory limits`);F=U1}if(F>I)F=I;let a=Math.floor(I);console.log(`[buttress] Memory-limited context length: ${a}`);let q0=l(F),T0=p+q0,G0=V?p/(V+1):p,k0=0;if(v.selected.hasGpu&&G0>0)k0=Math.min(V+1,Math.max(0,Math.floor(T/G0)));console.log(`[buttress] Auto GPU layer capacity (${v.selected.variant}): ${k0}/${V+1}`);let H1;if(E.n_gpu_layers==="auto"||E.n_gpu_layers==null)H1=k0;else H1=Math.max(0,Math.min(Number(E.n_gpu_layers)||0,V+1));let M3=(()=>{let Q0=E.flash_attn_type&&String(E.flash_attn_type).toLowerCase();if(Q0==="on"||Q0==="off")return Q0;if(Q0==="auto")return v.selected.hasGpu?"auto":"off";return v.selected.hasGpu?"auto":"off"})(),F3=$.runtime.cache_dir,V1=r4($,Z),Q4=await R0(V1,Z.size),x3={ok:D,backend:"ggml-llm",warnings:x,errors:k,model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,url:Z.url,sizeBytes:Z.size,metadata:{architecture:j,n_ctx_train:W,n_layer:V,n_embd:R,quantization_version:O,file_type:z,kv_layer_count:B,swa:K?.enabled?{window:K.window,pattern:K.pattern,dense_first:K.denseFirst,type:K.type,layers:K.swaLayers}:null}},runtime:{...E,variant:v.selected.variant,n_ctx:F,requested_ctx:M,n_gpu_layers:H1,auto_gpu_layers:k0,flash_attn_type:M3,cache_type_k:m.k,cache_type_v:m.v,estimated_max_n_ctx:a},resources:{modelBytes:p,kvCacheBytes:q0,totalEstimatedBytes:T0,gpuCapacityBytes:c,gpuUsableBytes:T,cpuUsableBytes:h,fit:v.selected.fit},devices:{selected:v.selected,attempts:v.attempts},download:{cacheDir:F3,localPath:V1,exists:Q4},timestamp:new Date().toISOString()};return{config:$,info:x3,artifact:Z,metadata:{arch:j,nCtxTrain:W,nLayer:V,nEmbd:R},devices:v,cacheTypes:m,localPath:V1,localExists:Q4}},U$=($,Z,X=null,j=null)=>{let W,H=Date.now(),Q=0;return new u4({async start(J){try{let N=await $.parallel.completion(Z,(V,R)=>{if(!R)return;if(R.token)Q+=1;J.enqueue({event:"token",data:{requestId:V,...R}})}),{requestId:G}=N;W=N.stop;let _=await N.promise;console.log("[Completion] Result:",_),J.enqueue({event:"result",data:{requestId:G,..._}}),J.close();let O=Date.now()-H,z=_.timings||{};t.addCompletion({id:`completion-${G}`,generatorId:X,requestId:G,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,cacheTokens:z.cache_n??0,promptTokens:z.prompt_n??0,tokensGenerated:z.predicted_n??Q,tokensPerSecond:z.predicted_per_second??0,promptPerSecond:z.prompt_per_second??0,durationMs:O,success:!0,interrupted:_.interrupted||!1,contextFull:_.context_full||_.contextFull||!1})}catch(N){J.enqueue({event:"error",data:{message:N?.message||String(N)}}),J.error(N),t.addCompletion({id:`completion-${Date.now()}`,generatorId:X,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,durationMs:Date.now()-H,tokensGenerated:Q,success:!1,error:N?.message||String(N)})}},cancel(){if(W)W()}})},G$=($,Z,X,j,W,H,Q=null,J=null)=>{let N,G="",_=!1,O=Date.now(),z=0;return new u4({async start(V){try{let R=await $.parallel.completion(Z,(Y,K)=>{if(!K)return;if(K.token)G+=K.token,z+=1;V.enqueue({event:"token",data:{requestId:Y,...K}})}),{requestId:q}=R;N=R.stop;let A=await R.promise;if(A.text)G=A.text;else if(A.content)G=A.content;_=!A.interrupted&&!A.context_full,console.log("[Completion] Result:",A),V.enqueue({event:"result",data:{requestId:q,...A}}),V.close();let w=Date.now()-O,U=A.timings||{};if(t.addCompletion({id:`completion-${q}`,generatorId:Q,requestId:q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,cacheTokens:U.cache_n??0,promptTokens:U.prompt_n??H??0,tokensGenerated:U.predicted_n??z,tokensPerSecond:U.predicted_per_second??0,promptPerSecond:U.prompt_per_second??0,durationMs:w,success:!0,interrupted:A.interrupted||!1,contextFull:A.context_full||A.contextFull||!1,usedCache:Boolean(Z.load_state_path)}),_&&X.enabled&&G)X.saveCompletionState(j,G,W,H).catch((Y)=>{console.warn("[SessionCache] Save failed:",Y.message)});else if(W)e(W).catch(()=>{})}catch(R){V.enqueue({event:"error",data:{message:R?.message||String(R)}}),V.error(R),t.addCompletion({id:`completion-${Date.now()}`,generatorId:Q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,durationMs:Date.now()-O,tokensGenerated:z,success:!1,error:R?.message||String(R)}),e(W).catch(()=>{})}},cancel(){if(N)N();e(W).catch(()=>{})}})},O0=($)=>{let Z={model:$.plan.localPath,runtime:$.plan.info.runtime};return k1("sha256").update(JSON.stringify(Z)).digest("hex").slice(0,24)},K$=async($,Z,X,j=null)=>{let{config:W,localPath:H,artifact:Q}=$;if($.localExists&&!Z.has(H)){if($.info.download.exists=!0,typeof X==="function")X(0.5);return H}if(W.model.local_path&&!W.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let J=H;if(j){let N=j.getDownload(J);if(N){console.log(`[ensureModelFile] Waiting for global download: ${Q.repoId}`);try{if(await N,await R0(H,Q.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(0.5);return H}}catch(G){console.warn(`[ensureModelFile] Global download failed, will retry: ${G.message}`)}}}if(!Z.has(J))Z.set(J,(async()=>{if(Q.isSplit&&Q.splitCount>0){let N=/-(\d{5})-of-(\d{5})\.gguf$/,G=b.dirname(H),_=Q.splitCount,O=0;for(let z=1;z<=_;z+=1){let V=String(z).padStart(5,"0"),R=Q.filename.replace(N,`-${V}-of-${String(_).padStart(5,"0")}.gguf`),q=`${W.model.base_url.replace(/\/+$/,"")}/${Q.repoId}/resolve/${Q.revision}/${R}`,A=b.join(G,R);if(!await R0(A))await c0(q,Q.headers,A,null,(U)=>{if(U>=0&&Number.isFinite(U)){let Y=(O+U)/_,K=Math.round(Y*100);if(console.log(`Downloading model splits: ${Math.min(100,K)}%`),typeof X==="function")X(Y*0.5)}});O+=1}}else console.log("Downloading model: 0%"),await c0(Q.url,Q.headers,H,Q.size,(N)=>{if(N>=0&&Number.isFinite(N)){let G=Math.round(N*100);if(console.log(`Downloading model: ${Math.min(100,G)}%`),typeof X==="function")X(N*0.5)}});$.localExists=!0,$.info.download.exists=!0})());try{await Z.get(J)}finally{Z.delete(J)}return H},_$=async($,Z)=>{let X=O0($),j=$.contexts.get(X);if(j&&!j.released){if(j.releaseTimer)clearTimeout(j.releaseTimer),j.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${X}"`);if(j.releaseRequested=!1,j.refCount+=1,console.log(`[Context] Reusing existing context "${X}", refCount=${j.refCount}`),typeof Z==="function")Z(0);if(!j.context)await j.ready;if(typeof Z==="function")Z(1);return j}if(j)console.log(`[Context] Record exists but released=${j.released}, creating new context`);else console.log(`[Context] No existing record for "${X}", creating new context`);j={key:X,refCount:1,ready:null,released:!1},$.contexts.set(X,j),j.ready=(async()=>{let W=Date.now(),H=await K$($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let Q={model:H,n_threads:$.plan.info.runtime.n_threads,use_mmap:$.plan.info.runtime.use_mmap,use_mlock:$.plan.info.runtime.use_mlock,cpu_mask:$.plan.info.runtime.cpu_mask,cpu_strict:$.plan.info.runtime.cpu_strict,devices:$.plan.info.runtime.devices,n_ctx:$.plan.info.runtime.n_ctx,n_gpu_layers:$.plan.info.runtime.n_gpu_layers,n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch,n_ubatch:$.plan.info.runtime.n_ubatch,n_cpu_moe:$.plan.info.runtime.n_cpu_moe,flash_attn_type:$.plan.info.runtime.flash_attn_type,ctx_shift:$.plan.info.runtime.ctx_shift,kv_unified:$.plan.info.runtime.kv_unified,swa_full:$.plan.info.runtime.swa_full,lib_variant:$.plan.info.runtime.variant};if($.plan.info.runtime.flash_attn_type!=="off")Q.cache_type_k=$.plan.info.runtime.cache_type_k,Q.cache_type_v=$.plan.info.runtime.cache_type_v;console.log("[Context] Load Options:",Q);let J;try{if(J=await c3(Q,(N)=>{if(typeof Z==="function"){if(Z(0.5+N*0.25),N%5===0)console.log("[Context] Load Model Progress:",N)}}),$.plan.info.runtime.n_parallel){if(!await J.parallel.enable({n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch}))throw Error("Failed to enable parallel decoding mode for context")}if(typeof Z==="function")Z(1);return j.context=J,j.modelInfo=J.getModelInfo(),t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,nCtx:$.plan.info.runtime?.n_ctx||null,nGpuLayers:$.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-W,success:!0}),j}catch(N){if(t.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-W,success:!1,error:N?.message||String(N)}),J)try{J.release()}catch(G){}throw N}})();try{return await j.ready,j}catch(W){throw $.contexts.delete(X),W}},f0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;Z.released=!0,$.contexts.delete(Z.key);try{Z.context?.parallel?.disable?.()}catch(j){}return await Z.context?.release?.(),!0},q$=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return f0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return f0($,Z);return console.log(`[Context] Scheduling release in ${W}ms for context "${Z.key}"`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] Release cancelled, refCount=${Z.refCount} for context "${Z.key}"`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing context "${Z.key}" after ${W}ms delay`),await f0($,Z)},W),!0},b1=($)=>{let Z=l0($);return Z.model.repo_id||Z.model.repository||Z.model.model||null},b4=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var v1=S(()=>{B0();m0();i3=n3(),{ReadableStream:u4,WritableStream:s3}=i3,d=b.join(E0.homedir(),".buttress","models"),g4=["mxfp4","q8_0","q6_k","q6","q5_k_m","q5_k_s","q5_k","q5_1","q5_0","q4_k_m","q4_k_s","q4_k","q4_1","q4_0","q3","q2"],w0={backend:{type:"ggml-llm",variant:null,variant_preference:["cuda","vulkan","snapdragon","default"],gpu_memory_fraction:0.85,cpu_memory_fraction:d0},model:{repo_id:null,revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:"auto",allow_local_file:!1,local_path:null,api_base:f4,base_url:p4},runtime:{cache_dir:d,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10737418240,max_entries:1000},context_release_delay_ms:1e4}}});import X0 from"node:path";import p1 from"node:os";import{stat as j2,mkdir as L$,open as A$,unlink as h1,readFile as z$,writeFile as B$}from"node:fs/promises";import{createHash as w$}from"node:crypto";import{initWhisper as R$}from"@fugood/whisper.node";import{getBackendDevicesInfo as W2,isLibVariantAvailable as Y2}from"@fugood/llama.node";import*as E$ from"node:stream/web";class O2{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue($,Z=null){return new Promise((X,j)=>{this.queue.push({task:$,resolve:X,reject:j,taskId:Z}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:$,resolve:Z,reject:X,taskId:j}=this.queue.shift();this.currentTaskId=j;try{let W=await $();Z(W)}catch(W){X(W)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}}async function L2($,Z,X={}){let{globalDownloadManager:j=null}=X,W=a0(Z),H=await v$(W),Q={id:$,type:"ggml-stt",config:W,plan:H,info:H.info,contextRecord:null,downloads:new Map,globalDownloadManager:j,queue:new O2,finalized:!1},J=async()=>{if(Q.finalized)return;Q.finalized=!0;let A=Q.contextRecord;if(!A)return;if(A.released)return;if(A.releaseRequested||A.releaseTimer)return;if(A.refCount=Math.max(0,A.refCount-1),A.refCount>0)return;await s0(Q,A)},N=async(A={})=>{let{onProgress:w}=A;try{let U=await I$(Q,w);return{modelInfo:U.modelInfo&&typeof U.modelInfo==="object"?{...U.modelInfo}:null,runtime:{...Q.plan.info.runtime},download:{...Q.plan.info.download}}}catch(U){throw console.error("[Context] Error initializing context:",U),U}},G=async()=>{if(Q.finalized)return!1;let A=Q.contextRecord;if(!A)return!1;return y$(Q,A)},_=async(A={})=>{let{audioPath:w,audioData:U,options:Y={}}=A,K=Q.contextRecord;if(!K)throw Error("Context not initialized");let L={...Y};if(Q.plan.info.runtime.max_threads&&L.maxThreads==null)L.maxThreads=Q.plan.info.runtime.max_threads;let B=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,E=Date.now();return Q.queue.enqueue(async()=>{await K.ready;try{let M;if(U){let F=C$(U),{promise:x}=K.context.transcribeData(F,L);M=await x}else{if(!w)throw Error("audioPath or audioData is required for transcription");let F=X0.resolve(w),{promise:x}=K.context.transcribe(F,L);M=await x}return J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,segmentCount:M?.segments?.length||0,textLength:M?.text?.length||0,success:!0}),M}catch(M){throw J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-E,success:!1,error:M?.message||String(M)}),M}},B)},O=async(A={})=>_(A),z=async(A={})=>_(A),V=()=>{let A=Q.contextRecord;if(!A)return!1;return!A.released&&(A.releaseRequested||A.releaseTimer||A.refCount>0)},R=()=>{Q.finalized=!1},q=()=>({id:Q.id,type:Q.type,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,hasContext:Boolean(Q.contextRecord?.context),contextRefCount:Q.contextRecord?.refCount||0,queueStatus:Q.queue.getStatus()});return{id:$,type:"ggml-stt",info:H.info,queue:Q.queue,initContext:N,transcribe:O,transcribeData:z,releaseContext:G,finalize:J,getStatus:q,hasPendingReleases:V,resetFinalized:R}}async function A2($,Z,X={}){let{onProgress:j,onComplete:W,onError:H}=X;try{let Q=a0($),J=await f1(Q),N=_2(Q,J),{repoId:G}=J;if(await r0(N,J.size)){if(console.log(`[Download] STT model already exists: ${G} at ${N}`),typeof W==="function")W({localPath:N,repoId:G,alreadyExists:!0});return{started:!1,localPath:N,repoId:G,alreadyExists:!0}}let O=Z.getDownload(N);if(O)return console.log(`[Download] Already downloading STT model: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:N,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof H==="function")H(V)}),{started:!1,localPath:N,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${G}`);let z=(async()=>{try{if(await q2(J.url,J.headers,N,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}}),console.log(`[Download] Completed STT model: ${G}`),typeof W==="function")W({localPath:N,repoId:G})}catch(V){if(console.error(`[Download] Failed STT model: ${G}`,V.message),typeof H==="function")H(V);throw V}finally{Z.deleteDownload(N)}})();return Z.setDownload(N,z),{started:!0,localPath:N,repoId:G}}catch(Q){if(console.error("[Download] Failed to start STT download:",Q.message),typeof H==="function")H(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function z2($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...H}=Z,Q=null,J=null,N=null;if(W)try{let w=a0(W),U=await f1(w);Q=U.size??null,{processingBufferBytes:J}=C0({modelBytes:Q}),N=U.quantization||null}catch(w){}let G=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,_=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,O=await _0({...H,platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",includeBreakdown:j,gpuMemoryFraction:G,cpuMemoryFraction:_,dependencies:{getBackendDevicesInfo:W2,isLibVariantAvailable:Y2},modelBytes:Q,kvCacheBytes:J}),z=O.selected,V=X2(z);if(z)z.modelBytes=Q||null,z.processingBytes=J||null,z.quantization=N||null;let R=null,q=null;if($){let w=X2($);q={...$,score:w};let U="buttress",Y="buttress-higher-score";if(!O.ok)U="local",Y="buttress-unavailable";else if(!w&&w!==0)U="buttress",Y="missing-client-score";else if($.fit&&z?.fit){let K=$.fit.fitsInGpu||$.fit.fitsInCpu,L=z.fit.fitsInGpu||z.fit.fitsInCpu;if(K&&!L)U="local",Y="client-fits-in-memory";else if(L&&!K)U="buttress",Y="buttress-fits-in-memory";else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores"}else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores";R={buttressScore:V,clientScore:w,threshold:X,recommendation:U,reason:Y}}if(!O.ok&&!R)R={buttressScore:V,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let A=null;if(W)A={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,filename:W.model?.filename||null};return{type:"ggml-stt",timestamp:new Date().toISOString(),buttress:O,client:q,comparison:R,modelConfig:A}}var M$=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return E$},F$,Q2=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};Q2($[X],j)}else $[X]=j}),$},x$=".bin",J2="https://huggingface.co",N2="https://huggingface.co/api",L0,y1,u1,H2="fp16",V2=0.5,S$,U2=($)=>{if(!$)return null;let Z=$.toLowerCase();return S$.find((X)=>Z.includes(X))||null},m1,C1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},i0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","default"].includes(Z))return Z;return null},a0=($={})=>{let Z=JSON.parse(JSON.stringify(m1));if(Q2(Z,$),Z.backend.variant=i0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(C1(Z.backend.variant_preference||y1).map(i0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=[...y1];if(Z.runtime.prefer_variants=Array.from(new Set(C1(Z.runtime.prefer_variants).map(i0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(C1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.base_url=Z.model.base_url||J2,Z.model.api_base=Z.model.api_base||N2,Z.runtime.cache_dir=Z.runtime.cache_dir?X0.resolve(Z.runtime.cache_dir):L0,Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||m1.runtime.context_release_delay_ms),Z},I1=($)=>{let Z=$.toLowerCase();return u1.find((j)=>Z.includes(j))||null},P$=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(i0).filter(Boolean)))},G2=async($)=>{await L$($,{recursive:!0})},T$=($=L0)=>X0.join($,".metadata-cache"),K2=($,Z,X=L0)=>{let j=w$("sha256").update($).digest("hex");return X0.join(T$(X),Z,`${j}.json`)},k$=async($,Z,X=L0)=>{try{let j=K2($,Z,X),W=await z$(j,"utf-8");return JSON.parse(W)}catch(j){return null}},$2=async($,Z,X,j=L0)=>{try{let W=K2($,Z,j);await G2(X0.dirname(W)),await B$(W,JSON.stringify(X),"utf-8")}catch(W){}},D$=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},Z2=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},_2=($,Z)=>{if($.model.local_path)return X0.resolve($.model.local_path);let X=Z.repoId.split("/"),j=X0.join($.runtime.cache_dir,...X,Z.revision);return X0.join(j,Z.filename)},r0=async($,Z)=>{try{let X=await j2($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},q2=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await G2(X0.dirname(X));let H=await fetch($,{headers:Z});if(!H.ok||!H.body)throw Error(`Failed to download ${$}: ${H.status} ${H.statusText}`);let Q=await A$(X,"w"),J=Number(H.headers.get("content-length"))||j||0,N=0,G=0.05;try{await H.body.pipeTo(new F$({async write(_){if(await Q.write(_),N+=_.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,N/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}}))}catch(_){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),_}if(j){let _=await j2(X);if(_.size!==j)throw await h1(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${_.size}`)}},f1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),H=await k$(W,"artifact-info",j);if(H)return H;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let U=await Z2($.model.url,{headers:Q}),Y=Number(U.headers.get("content-length"))||null,K=$.model.filename||$.model.url.split("/").pop(),L={repoId:Z,revision:X,filename:K,url:$.model.url,size:Y,quantization:I1(K||""),headers:Q};return await $2(W,"artifact-info",L,j),L}let{filename:J}=$.model,N=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await D$(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((U)=>U.rfilename||U.path||U.filename).filter((U)=>typeof U==="string"&&U.endsWith(x$));if(O.length===0)throw Error(`No model artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:u1,V=()=>{for(let U of z)if(U===H2){let Y=O.find((K)=>{let L=K.toLowerCase();return!u1.some((B)=>L.includes(B))});if(Y)return{filename:Y,quantization:null}}else{let Y=O.find((K)=>K.toLowerCase().includes(U));if(Y)return{filename:Y,quantization:U}}return null};if(!J){let U=V()||{filename:O[0],quantization:null},{filename:Y,quantization:K}=U;J=Y,N=K||I1(J)}else if(!N)N=I1(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=await Z2(R,{headers:Q}),A=Number(q.headers.get("content-length"))||null,w={repoId:Z,revision:X,filename:J,url:R,size:A,quantization:N,headers:Q,isSplit:!1,splitCount:0};return await $2(W,"artifact-info",w,j),w},b$=async($,{modelBytes:Z=null,processingBytes:X=null}={})=>{let j=P$($),[W,...H]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):m1.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):V2,N=await _0({platform:process.platform,totalMemoryInBytes:p1.totalmem(),backend:"ggml-stt",variant:W||null,preferVariants:H,variantPreference:$.backend.variant_preference,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:W2,isLibVariantAvailable:Y2},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!N.ok||!N.selected){let z=(N.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let _=(N.attempts||[]).map(G);return{selected:G(N.selected),attempts:_}},v$=async($)=>{let Z=await f1($),X=C0({modelBytes:Z.size>0?Z.size:0}),j=await b$($,{modelBytes:X.modelBytes,processingBytes:X.processingBufferBytes}),W=j.selected.hasGpu&&(j.selected.fit?.fitsInGpu!==void 0?j.selected.fit.fitsInGpu:!0);if($.model.use_gpu===!1)W=!1;let H=$.model.use_flash_attn&&String($.model.use_flash_attn).toLowerCase(),Q;if(H==="on"||H==="true")Q=!0;else if(H==="off"||H==="false")Q=!1;else Q=W;let J=$.runtime.cache_dir,N=_2($,Z),G=await r0(N,Z.size),_={ok:!0,backend:"ggml-stt",model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,modelType:U2(Z.filename),url:Z.url,sizeBytes:Z.size},runtime:{variant:j.selected.variant,use_gpu:W,use_flash_attn:Q,max_threads:$.runtime.max_threads?Number($.runtime.max_threads):null},resources:{...X,gpuCapacityBytes:j.selected.gpuTotalBytes,gpuUsableBytes:j.selected.gpuUsableBytes,cpuUsableBytes:j.selected.cpuUsableBytes,fit:j.selected.fit},devices:{selected:j.selected,attempts:j.attempts},download:{cacheDir:J,localPath:N,exists:G},timestamp:new Date().toISOString()};return{config:$,info:_,artifact:Z,memory:X,devices:j,localPath:N,localExists:G}},h$=async($,Z,X,j=null)=>{let{localPath:W,artifact:H,config:Q}=$;if($.localExists){if(typeof X==="function")X(1);return W}if(j){let G=j.getDownload(W);if(G){console.log(`[ensureModelFile] Waiting for global STT download: ${H.repoId}`);try{if(await G,await r0(W,H.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(1);return W}}catch(_){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${_.message}`)}}}let J=Z.get(W);if(J){if(await J,typeof X==="function")X(1);return W}let N=(async()=>{if(Q.model.allow_local_file){if(!await r0(W,H.size))throw Error(`Local model file not found: ${W}`);return W}return await q2(H.url,H.headers,W,H.size,X),W})();Z.set(W,N);try{return await N,W}finally{Z.delete(W)}},C$=($)=>{if(!$)return null;if($ instanceof ArrayBuffer)return $;if(ArrayBuffer.isView($))return $.buffer;if(typeof $==="string"){let Z=$.startsWith("data:")?$.split(",")[1]||"":$,X=Buffer.from(Z,"base64");return X.buffer.slice(X.byteOffset,X.byteOffset+X.byteLength)}throw Error("Unsupported audioData format, expected base64 string or ArrayBuffer")},I$=async($,Z)=>{if($.contextRecord&&!$.contextRecord.released){if($.contextRecord.releaseTimer)clearTimeout($.contextRecord.releaseTimer),$.contextRecord.releaseTimer=null,console.log("[Context] Cancelled pending STT release");if($.contextRecord.releaseRequested=!1,$.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${$.contextRecord.refCount}`),typeof Z==="function")Z(0);if(!$.contextRecord.context)await $.contextRecord.ready;if(typeof Z==="function")Z(1);return $.contextRecord}if($.contextRecord)console.log(`[Context] STT record exists but released=${$.contextRecord.released}, creating new context`);else console.log("[Context] No existing STT record, creating new context");let X={refCount:1,ready:null,released:!1};$.contextRecord=X,X.ready=(async()=>{let j=Date.now();try{if(typeof Z==="function")Z(0);let W=await h$($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let H=await R$({filePath:W,useFlashAttn:$.plan.info.runtime.flash_attn_type==="on",useGpu:$.plan.info.runtime.n_gpu_layers>0,nThreads:$.plan.info.runtime.n_threads},$.plan.info.runtime.variant);if(typeof Z==="function")Z(1);X.context=H;try{X.modelInfo=H.getModelInfo()}catch(Q){X.modelInfo=null}return J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,useGpu:$.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-j,success:!0}),X}catch(W){throw J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-j,success:!1,error:W?.message||String(W)}),W}})();try{if(await X.ready,typeof Z==="function")Z(1);return X}catch(j){throw $.contextRecord=null,j}},s0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;return Z.released=!0,$.contextRecord=null,await Z.context?.release?.(),!0},y$=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return s0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return s0($,Z);return console.log(`[Context] Scheduling STT release in ${W}ms`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] STT release cancelled, refCount=${Z.refCount}`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${W}ms delay`),await s0($,Z)},W),!0},g1=($)=>{let Z=a0($),X=Z.model.repo_id||Z.model.repository||Z.model.model||null;if(!X)return null;let j=U2(Z.model.filename);if(j)return`${X}:${j}`;return X},X2=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return K0($)};var c1=S(()=>{B0();m0();({WritableStream:F$}=M$()),L0=X0.join(p1.homedir(),".buttress","models"),y1=["cuda","vulkan","default"],u1=["q8_0","q5_1","q5_0","q4_1","q4_0"],S$=["large-v3-turbo","distil-large-v3","large-v3","large-v2","large-v1","large","distil-medium","medium.en","medium","small.en-tdrz","distil-small.en","small.en","small","base.en","base","tiny.en","tiny"],m1={backend:{type:"ggml-stt",variant:null,variant_preference:y1,gpu_memory_fraction:0.85,cpu_memory_fraction:V2},model:{repo_id:"BricksDisplay/whisper-ggml",revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:["q8_0",H2,"q5_1"],allow_local_file:!1,local_path:null,api_base:N2,base_url:J2,use_gpu:!0,use_flash_attn:"auto"},runtime:{cache_dir:L0,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}}});async function r($,Z=null,X={}){if($==="ggml-llm")return e4(Z,X);if($==="ggml-stt")return z2(Z,X);throw Error(`Unknown backend type: ${$}`)}var d1=S(()=>{v1();c1()});var C;var B2=S(()=>{C={name:"@fugood/buttress-backend-core",private:!0,type:"module",version:"2.23.0-beta.38",main:"src/index.js",types:"lib/types/index.d.ts",scripts:{build:"tsc --noResolve --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js"},dependencies:{"@fugood/buttress-hardware-guardrails":"^2.23.0-beta.37","@fugood/llama.node":"^1.4.11","@fugood/whisper.node":"^1.0.11","@huggingface/gguf":"^0.3.2","@iarna/toml":"^3.0.0",bytes:"^3.1.0"}}});import y from"node:os";import w2 from"node:fs";import R2 from"node:path";import{execSync as o0}from"node:child_process";import E2 from"@iarna/toml";async function F2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating model capabilities comparison...
|
|
2
|
+
import{createRequire as T$}from"node:module";var P$=Object.defineProperty;var k$=($,Z)=>{for(var X in Z)P$($,X,{get:Z[X],enumerable:!0,configurable:!0,set:(j)=>Z[X]=()=>j})};var S=($,Z)=>()=>($&&(Z=$($=0)),Z);var J4=T$(import.meta.url);var N4=($,Z,X)=>Math.min(Math.max($,Z),X),H4=($)=>$?40:0,V4=($=0)=>{if(!$)return 0;return N4($/12884901888*20,0,20)},U4=($=0)=>{if(!$)return 0;return N4($/34359738368*10,0,10)},G4=($)=>$?10:0,_4=($="default",Z=null)=>{let X=String($).toLowerCase();if(!X)return 0;if(X.includes("cuda"))return 20;if(X.includes("vulkan"))return 10;if(X.includes("default"))return Z==="darwin"||Z==="ios"?15:5;return 0},_0=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:N=!0}={})=>{if(!N)return 0;let Q=H4(X)+_4(Z,$)+V4(j),J=U4(W),H=G4(N);return Math.min(100,Math.round(Q+J+H))},_1=({platform:$,variant:Z,hasGpu:X,gpuUsableBytes:j=0,cpuUsableBytes:W=0,ok:N=!0}={})=>({gpuPresence:H4(X),variant:_4(Z,$),gpuMemory:V4(j),cpuMemory:U4(W),availability:G4(N)});var L4,D0=0.85,b0=0.5,K4=($)=>{if(!$&&$!==0)return[];if(Array.isArray($))return $.filter((Z)=>Z!=null);return[$]},D$=($)=>{if(!$)return null;return String($).trim().toLowerCase()||null},b$=({variant:$,preferVariants:Z=[],variantPreference:X=[],defaultVariants:j=L4}={})=>{let W=[];if($)W.push($);W.push(...K4(Z)),W.push(...K4(X)),W.push(...j);let N=W.map(D$).filter(Boolean);return Array.from(new Set(N))},q4=($={})=>{let Z=String($.type||$.deviceType||$.kind||"").toLowerCase();if(Z.includes("gpu"))return!0;if(Z.includes("cuda"))return!0;if(Z.includes("metal"))return!0;if(Z.includes("vulkan"))return!0;if(Z.includes("snapdragon"))return!0;return!1},v$=($)=>{if(!Array.isArray($))return[];return $.map((Z)=>({...Z}))},h$=($,Z)=>{if($==="snapdragon")return Z.filter((X)=>X.deviceName!=="GPUOpenCL");return Z},O4=({platform:$,totalMemoryInBytes:Z,variant:X,devices:j,gpuMemoryFraction:W,cpuMemoryFraction:N,ok:Q,error:J})=>{let H=v$(h$(X,j)),G=H.some(q4),K=H.filter((w)=>q4(w)&&Number.isFinite(Number(w.maxMemorySize))).reduce((w,U)=>w+U.maxMemorySize,0),O=Z,z=G?Math.floor(K*W):0,V=O?Math.floor(O*N):0,R={platform:$,variant:X,hasGpu:G,gpuUsableBytes:z,cpuUsableBytes:V,ok:Q},q=_0(R),A=Q?_1(R):null;return{platform:$,ok:Q,variant:X,hasGpu:G,devices:H,gpuTotalBytes:K,gpuUsableBytes:z,cpuTotalBytes:O,cpuUsableBytes:V,score:q,breakdown:A,error:J,timestamp:new Date().toISOString()}},K1=({device:$,modelBytes:Z=0,kvCacheBytes:X=0}={})=>{if(!$)return{totalRequiredBytes:Z+X,fitsInGpu:!1,fitsInCpu:!1,limiting:"unknown-device"};let j=Math.max(0,Number(Z)||0)+Math.max(0,Number(X)||0),W=$.hasGpu&&j>0&&j<=$.gpuUsableBytes,N=j>0&&j<=$.cpuUsableBytes,Q="ok";if(!W&&$.hasGpu)Q="gpu-memory";if(!N)Q=W?"cpu-memory":"insufficient-memory";return{totalRequiredBytes:j,fitsInGpu:W,fitsInCpu:N,limiting:Q}},z0=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:N=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:H=null,kvCacheBytes:G=null,limitedKvCacheBytes:K=null,dependencies:O={},defaultVariants:z=L4}={})=>{let{getBackendDevicesInfo:V,isLibVariantAvailable:R}=O;if(typeof V!=="function"||typeof R!=="function")throw TypeError("GGML capability detection requires getBackendDevicesInfo and isLibVariantAvailable functions");let q=b$({variant:Z,preferVariants:X,variantPreference:j,defaultVariants:z}),A=[];for(let L of q)try{if(!await R(L))throw Error(`Variant ${L} not available on this platform`);let F=await V(L);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:F,gpuMemoryFraction:W,cpuMemoryFraction:N,ok:!0}))}catch(B){let F=B instanceof Error?B.message:String(B);A.push(O4({platform:$,totalMemoryInBytes:J,variant:L,devices:[],gpuMemoryFraction:W,cpuMemoryFraction:N,ok:!1,error:F}))}let U=A.filter((L)=>L.ok)[0]||null,Y={ok:Boolean(U),selected:U?{...U,breakdown:Q?U.breakdown:void 0}:null,attempts:A};if(!Q&&Y.selected)delete Y.selected.breakdown;if(!Y||!H&&!G)return Y;let _=(L)=>{if(!L)return L;let B=K1({device:L,modelBytes:H||0,kvCacheBytes:G||0}),F=null;if(K!=null&&K!==G)F=K1({device:L,modelBytes:H||0,kvCacheBytes:K});return{...L,fit:B,...F&&{limitedFit:F}}};return Y.selected=_(Y.selected),Y.attempts=Array.isArray(Y.attempts)?Y.attempts.map(_):Y.attempts,Y},v0="ggml-llm";var h0=S(()=>{L4=["cuda","vulkan","snapdragon","default"]});var q1="ggml-stt",A4,O1=async({platform:$,variant:Z=null,preferVariants:X=[],variantPreference:j=[],gpuMemoryFraction:W=D0,cpuMemoryFraction:N=b0,includeBreakdown:Q=!1,totalMemoryInBytes:J,modelBytes:H=null,processingBytes:G=null,kvCacheBytes:K=null,dependencies:O={}}={})=>{let z=j&&j.length>0?j:A4;return z0({platform:$,variant:Z,preferVariants:X,variantPreference:z,gpuMemoryFraction:W,cpuMemoryFraction:N,includeBreakdown:Q,totalMemoryInBytes:J,modelBytes:H,kvCacheBytes:G??K,dependencies:O,defaultVariants:A4})};var L1=S(()=>{h0();A4=["cuda","vulkan","default"]});var C$,K0=async({platform:$,totalMemoryInBytes:Z,backend:X=v0,dependencies:j,...W}={})=>{let N=C$.get(X);if(!N)throw Error(`No capability detector registered for backend "${X}"`);return await N({...W,dependencies:j,totalMemoryInBytes:Z,platform:$})};var z4=S(()=>{h0();L1();C$=new Map([[v0,z0],[q1,O1]])});var B4,A1=($)=>{let Z=$?String($).toLowerCase():"f16";return B4[Z]||B4.f16},z1=($,Z,X,j,W,N={},{totalLayers:Q=null,swaLayers:J=0,swaContext:H=null,swaContextMultiplier:G=1,swaAdditionalTokens:K=0,swaFull:O=!1}={})=>{if(!$||!Z||!X||!j||!W)return 0;let z=Q!=null&&Q!==void 0?Number(Q):Number($),V=Math.max(0,Math.floor(z));if(!V)return 0;let R=A1(N.k),q=A1(N.v),A=Number(X)*(Number(j)*R+Number(W)*q);if(!A)return 0;let w=Math.max(0,Number(Z)||0),U=Math.min(V,Math.max(0,Math.floor(Number(J)||0))),Y=Math.max(0,V-U),_=H!=null&&Number.isFinite(Number(H))?Math.max(0,Number(H)):w,L=Math.max(1,Number(G)||1),B=Math.max(0,Number(K)||0),F=_*L+B,E=O?w:Math.min(w,F),M=Y*w+U*Math.max(0,Math.floor(E));return Math.round(A*M)},C0=({modelBytes:$=0,audioLengthSeconds:Z=30,sampleRate:X=16000,bytesPerSample:j=4}={})=>{let W=Math.max(0,Number($)||0),N=Math.max(0,Math.floor(Math.max(0,Z)*X*j)),Q=1048576,J=1073741824,H;if(W<209715200)H=125829120;else if(W<524288000)H=146800640;else if(W<2147483648)H=157286400;else H=167772160;let G;if(W<209715200)G=73400320;else if(W<524288000)G=141557760;else if(W<2147483648)G=230686720;else G=230686720;let K;if(W<104857600)K=20971520;else if(W<209715200)K=31457280;else if(W<524288000)K=89128960;else if(W<2147483648)K=225443840;else K=377487360;let O=H+G+K,z=W+O+N;return{modelBytes:W,audioBufferBytes:N,processingBufferBytes:O,totalBytes:z}};var B1=S(()=>{B4={f16:2,f32:4,q8_0:1,q6_k:0.75,q5_k:0.625,q5_k_m:0.625,q5_k_s:0.625,q5_1:0.625,q5_0:0.625,q4_k:0.5,q4_k_m:0.5,q4_k_s:0.5,q4_1:0.5,q4_0:0.5,iq4_nl:0.5}});var w1=($)=>$?String($).trim().toLowerCase():null,I$=($={},Z=null)=>{if(!$)return null;let X=w1(Z),j=X?`${X}.attention.sliding_window`:null,W=(j&&$[j]!=null?$[j]:null)??$["llama.attention.sliding_window"];if(W==null)return null;let N=Number(W);return Number.isFinite(N)?N:null},R4=($=0,Z=0,X=!1)=>{let j=Math.max(0,Math.floor(Number($)||0)),W=Math.max(0,Math.floor(Number(Z)||0));if(!j||W===1)return 0;if(W<=0)return j;let N=Math.max(0,W-1),Q=Math.floor(j/W),J=j%W,H=X?Math.max(0,J-1):Math.min(J,N);return Q*N+H},w4=({arch:$,nLayer:Z=0})=>({arch:w1($),enabled:!1,window:null,pattern:null,denseFirst:!1,type:null,kvLayers:Math.max(0,Math.floor(Number(Z)||0)),swaLayers:0}),y$,I0=({arch:$,metadata:Z={},nLayer:X=0}={})=>{let j=w1($||Z["general.architecture"]),W=Math.max(0,Math.floor(Number(X)||0)),N=I$(Z,j),Q=j?y$.get(j):null;if(!Q)return w4({arch:j,nLayer:X});let J=Q({nLayer:W,nSwa:N,metadata:Z});if(!J||!J.enabled||!J.window||J.window<=0)return w4({arch:j,nLayer:X});let H=Math.max(0,Math.floor(Number(J.pattern)||0)),G=J.kvLayers!=null&&Number.isFinite(Number(J.kvLayers))?Number(J.kvLayers):W,K=Math.max(0,Math.floor(G)),O=R4(K,H,Boolean(J.denseFirst));return{arch:j,enabled:O>0,window:J.window,pattern:H,denseFirst:Boolean(J.denseFirst),type:J.type||"standard",kvLayers:K,swaLayers:O}};var F4=S(()=>{y$=new Map([["llama4",({nSwa:$})=>{if($===0)return{enabled:!1};return{enabled:!0,window:$&&$>0?$:8192,pattern:4,type:"chunked"}}],["afmoe",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["phi3",()=>({enabled:!1})],["gemma2",({nSwa:$})=>{let Z=$&&$>0?$:4096;if(!Z)return{enabled:!1};return{enabled:!0,window:Z,pattern:2,type:"standard"}}],["gemma3",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"standard"}}],["gemma3n",({nLayer:$,nSwa:Z})=>{if(!Z||Z<=0)return{enabled:!1};return{enabled:!0,window:Z,pattern:5,type:"standard",kvLayers:Math.min(20,$)}}],["gemma-embedding",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:6,type:"symmetric"}}],["cohere2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["olmo2",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:4,type:"standard"}}],["exaone4",({nLayer:$,nSwa:Z})=>{let X=$>=64,j=null;if(Z&&Z>0)j=Z;else if(X)j=4096;if(!j)return{enabled:!1};return{enabled:!0,window:j,pattern:4,type:"standard"}}],["gpt-oss",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:$,pattern:2,type:"standard"}}],["smallthinker",({nSwa:$})=>{if(!$||$<=0)return{enabled:!1};return{enabled:!0,window:4096,pattern:4,denseFirst:!0,type:"standard"}}]])});var y0=($={})=>{let Z=$["general.architecture"],X=(z,V=null)=>{let R=$[z],q=Number(R);return Number.isFinite(q)?q:V},j=Z?X(`${Z}.context_length`,X("llama.context_length")):null,W=Z?X(`${Z}.block_count`,X("llama.block_count")):null,N=Z?X(`${Z}.embedding_length`,X("llama.embedding_length")):null,Q=Z?X(`${Z}.attention.head_count`,X("llama.attention.head_count")):null,J=Z?X(`${Z}.attention.head_count_kv`,X("llama.attention.head_count_kv",Q)):null,H=Z?X(`${Z}.attention.key_length`,X("llama.attention.key_length")):null,G=Z?X(`${Z}.attention.value_length`,X("llama.attention.value_length")):null,K=$["general.quantization_version"]||null,O=$["general.file_type"]||null;return{arch:Z,nCtxTrain:j,nLayer:W,nEmbd:N,nHead:Q,nHeadKv:J,nEmbdHeadK:H,nEmbdHeadV:G,quantVersion:K,fileType:O}},N0=({layerCount:$,headKvCount:Z,embdHeadKCount:X,embdHeadVCount:j,cacheTypes:W,swaConfig:N,kvUnified:Q=!1,nParallel:J=1,swaFull:H=!1})=>{let G=N?.window&&Q?Math.max(1,Number(J)||1):1;return(K)=>z1($,K,Z,X,j,W,{totalLayers:$,swaLayers:N?.swaLayers||0,swaContext:N?.window,swaFull:H,swaContextMultiplier:G})},u0=({maxCtx:$,availableMemory:Z,modelBytes:X,kvBytesForCtx:j})=>{let W=Math.max(1,Math.floor(Number($)||0));if(!j||Z<=X)return W;let N=1,Q=W,J=W;while(N<=Q){let H=Math.floor((N+Q)/2);if(X+j(H)<=Z)J=H,N=H+1;else Q=H-1}return J};var E4=S(()=>{B1()});var B0=S(()=>{z4();B1();h0();L1();F4();E4()});import{EventEmitter as u$}from"node:events";class F1{constructor($=m$){this.maxEntries=$,this.modelLoads=[],this.completions=[],this.transcriptions=[]}addModelLoad($){R1(this.modelLoads,$,this.maxEntries),t.emit("status:modelLoad",$),t.emit("status:change",{type:"modelLoad",entry:$})}addCompletion($){R1(this.completions,$,this.maxEntries),t.emit("status:completion",$),t.emit("status:change",{type:"completion",entry:$})}addTranscription($){R1(this.transcriptions,$,this.maxEntries),t.emit("status:transcription",$),t.emit("status:change",{type:"transcription",entry:$})}getModelLoadHistory(){return[...this.modelLoads].reverse()}getCompletionHistory(){return[...this.completions].reverse()}getTranscriptionHistory(){return[...this.transcriptions].reverse()}clear(){this.modelLoads=[],this.completions=[],this.transcriptions=[]}}function E1($){let Z=(X)=>$(X);return t.on("status:change",Z),()=>t.off("status:change",Z)}function x4($){M4+=1;let Z=M4,X=E1($);return{subscriberId:Z,unsubscribe:X}}function M1($){let Z=[];return{generators:Array.from($.entries()).filter(([,j])=>j.type==="ggml-llm").map(([j,W])=>{let{instance:N}=W,Q=[];if(N.contexts)Q=Array.from(N.contexts.entries()).map(([J,H])=>{let G={key:J,refCount:H.refCount,hasModel:Boolean(H.context)},K=H.context.parallel.getStatus();return G.parallelStatus=K,Z.push({generatorId:j,contextKey:J,...K}),G});return{id:j,type:W.type,refCount:W.refCount,repoId:N.info?.model?.repoId||null,quantization:N.info?.model?.quantization||null,variant:N.info?.runtime?.variant||null,nCtx:N.info?.runtime?.n_ctx||null,nParallel:N.info?.runtime?.n_parallel||null,contexts:Q}}),parallelStatuses:Z,history:{modelLoads:e.getModelLoadHistory(),completions:e.getCompletionHistory()}}}function x1($){return{generators:Array.from($.entries()).filter(([,X])=>X.type==="ggml-stt").map(([X,j])=>{let{instance:W}=j,N=W.getStatus?.()||{},Q=N.queueStatus||{processing:!1,queuedCount:0};return{id:X,type:j.type,refCount:j.refCount,repoId:W.info?.model?.repoId||null,quantization:W.info?.model?.quantization||null,modelType:W.info?.model?.modelType||null,variant:W.info?.runtime?.variant||null,hasContext:N.hasContext||!1,contextRefCount:N.contextRefCount||0,queueStatus:Q}}),history:{modelLoads:J0.getModelLoadHistory(),transcriptions:J0.getTranscriptionHistory()}}}function S4($){return{timestamp:new Date().toISOString(),ggmlLlm:M1($),ggmlStt:x1($)}}var m$=9999,t,R1=($,Z,X)=>{if($.push({...Z,timestamp:Z.timestamp||new Date().toISOString()}),$.length>X)$.shift()},e,J0,M4=0;var m0=S(()=>{t=new u$;t.setMaxListeners(100);e=new F1,J0=new F1});import b from"node:path";import F0 from"node:os";import{stat as E0,mkdir as f$,open as p$,unlink as r,readFile as v4,writeFile as h4,rename as C4,readdir as g$}from"node:fs/promises";import{createHash as T1}from"node:crypto";import{gguf as c$}from"@huggingface/gguf";import{loadModel as d$,getBackendDevicesInfo as I4,isLibVariantAvailable as y4}from"@fugood/llama.node";import l$ from"bytes";import*as n$ from"node:stream/web";class s4{constructor($,Z){this.config=$,this.plan=Z,this.baseDir=$.runtime.cache_dir,this.enabled=$.runtime.session_cache?.enabled!==!1,this.maxSizeBytes=H6($.runtime.session_cache?.max_size_bytes,10737418240),this.maxEntries=$.runtime.session_cache?.max_entries||1000,this.metadata={variant:Z.info?.runtime?.variant||null,n_gpu_layers:Z.info?.runtime?.n_gpu_layers||0,n_ctx:Z.info?.runtime?.n_ctx||0,modelPath:Z.localPath,cacheTypeK:Z.info?.runtime?.cache_type_k||"f16",cacheTypeV:Z.info?.runtime?.cache_type_v||"f16",kvUnified:Z.info?.runtime?.kv_unified??null,swaFull:Z.info?.runtime?.swa_full??null,flashAttnType:Z.info?.runtime?.flash_attn_type||"off"},this.cacheMap=null,this.initialized=!1}async initialize(){if(!this.enabled||this.initialized)return;try{await H0(n0(this.baseDir)),await H0(g0(this.baseDir)),await H0(n4(this.baseDir)),this.cacheMap=await e$(this.baseDir),this.initialized=!0,console.log(`[SessionCache] Initialized with ${Object.keys(this.cacheMap.entries).length} entries`)}catch($){console.warn(`[SessionCache] Failed to initialize: ${$.message}`),this.enabled=!1}}async findMatchingEntry($){if(!this.enabled||!this.cacheMap)return null;let Z=W6($,this.metadata,this.cacheMap);if(Z){let{entry:X}=Z;if(!await N6(X.stateFilePath))return console.log(`[SessionCache] Removing stale entry: ${X.id}`),delete this.cacheMap.entries[X.id],this.cacheMap.totalSize-=X.stateFileSize||0,await P1(this.cacheMap,this.baseDir).catch(()=>{}),null;return X.lastAccessedAt=new Date().toISOString(),await P1(this.cacheMap,this.baseDir).catch(()=>{}),{entry:X}}return null}async prepareCompletionOptions($,Z){if(!this.enabled)return{options:$,cacheEntry:null,promptPrefix:null};let X=await this.findMatchingEntry(Z);if(X){let{entry:j}=X;return console.log(`[SessionCache] Found matching entry: ${j.id} (${j.fullText.length} chars, loadStateSize=${j.loadStateSize})`),{options:{...$,load_state_path:j.stateFilePath},cacheEntry:j,promptPrefix:j.fullText}}return{options:$,cacheEntry:null,promptPrefix:null}}async saveCompletionState($,Z,X,j=0){if(!this.enabled)return null;let W=$6($,this.metadata);if(this.cacheMap.entries[W])return console.log(`[SessionCache] Entry already exists for prompt: ${W}`),await r(X).catch(()=>{}),this.cacheMap.entries[W];let N=$+Z,Q=Z6(W,this.baseDir);try{await H0(b.dirname(Q)),await C4(X,Q);let J=await E0(Q),H={id:W,promptText:$,completionText:Z,fullText:N,promptTokenCount:j,stateFilePath:Q,stateFileSize:J.size,metadata:{...this.metadata},createdAt:new Date().toISOString(),lastAccessedAt:new Date().toISOString()};return this.cacheMap.entries[W]=H,this.cacheMap.totalSize+=J.size,await Q6(this.cacheMap,$,W,this.metadata),await Y6(this.cacheMap,this.maxSizeBytes,this.maxEntries),await P1(this.cacheMap,this.baseDir),console.log(`[SessionCache] Saved entry: ${W} (${J.size} bytes, ${N.length} chars)`),H}catch(J){return console.warn(`[SessionCache] Failed to save state: ${J.message}`),await r(X).catch(()=>{}),null}}async generateTempStatePath(){return await H0(g0(this.baseDir)),X6(this.baseDir)}async cleanup(){await J6(this.baseDir)}}async function t4($,Z,X={}){let{globalDownloadManager:j=null}=X,W=l0(Z),N=await V6(W),Q=new s4(W,N);await Q.initialize();let J={id:$,type:"ggml-llm",config:W,plan:N,info:N.info,contexts:new Map,downloads:new Map,globalDownloadManager:j,sessionCache:Q,finalized:!1},H=async()=>{if(J.finalized)return;J.finalized=!0;let Y=Array.from(J.contexts.values()),_=Y.map((B)=>{if(B.released)return Promise.resolve(!1);if(B.releaseRequested||B.releaseTimer)return Promise.resolve(!1);if(B.refCount=Math.max(0,B.refCount-1),B.refCount>0)return Promise.resolve(!1);return p0(J,B)});if(await Promise.allSettled(_),Y.length===0||Y.every((B)=>B.released))await J.sessionCache.cleanup()},G=async(Y={})=>{let{onProgress:_}=Y,L=await K6(J,_);return{modelInfo:L.modelInfo?{...L.modelInfo}:null,runtime:{...J.plan.info.runtime},download:{...J.plan.info.download}}},K=async()=>{if(J.finalized)return!1;let Y=O0(J),_=J.contexts.get(Y);if(!_)return!1;return q6(J,_,!1)},O=async(Y={})=>{let{options:_={},useCache:L=!0}=Y,B=O0(J),F=J.contexts.get(B);if(!F)throw Error(`Context "${B}" not initialized`);await F.ready;let E=_.prompt||"";if(!E&&_.messages){let x=await F.context.getFormattedChat(_.messages,_.chat_template||_.chatTemplate,{jinja:_.jinja??!0,tools:_.tools,parallel_tool_calls:_.parallel_tool_calls,tool_choice:_.tool_choice,enable_thinking:_.enable_thinking,add_generation_prompt:_.add_generation_prompt,now:_.now,chat_template_kwargs:_.chat_template_kwargs});E=x?.prompt||x||""}if(L&&J.sessionCache.enabled&&E){let{options:x}=await J.sessionCache.prepareCompletionOptions(_,E),T=await J.sessionCache.generateTempStatePath(),p=(await F.context.tokenize(E))?.tokens?.length||0,m={...x,save_state_path:T};console.log(`[SessionCache] save_state_size=${p} (prompt tokens)`);let l={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return G6(F.context,m,J.sessionCache,E,T,p,J.id,l)}let M={repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null};return U6(F.context,_,J.id,M)},z=async(Y={})=>{let{text:_="",params:L={}}=Y,B=O0(J),F=J.contexts.get(B);if(!F)throw Error(`Context "${B}" not initialized`);await F.ready;let E=await F.context.tokenize(_,L);if(!E)return{tokens:[]};let M=Array.from(E.tokens??[]).map((x)=>Number(x));return{...E,tokens:M}},V=async(Y={})=>{let{tokens:_=[]}=Y,L=O0(J),B=J.contexts.get(L);if(!B)throw Error(`Context "${L}" not initialized`);await B.ready;let F=_.map((E)=>Number(E));return B.context.detokenize(F)},R=async(Y={})=>{let{messages:_=[],template:L,params:B}=Y,F=O0(J),E=J.contexts.get(F);if(!E)throw Error(`Context "${F}" not initialized`);return await E.ready,await E.context.getFormattedChat(_,L,B)},q=()=>Array.from(J.contexts.values()).some((Y)=>!Y.released&&(Y.releaseRequested||Y.releaseTimer||Y.refCount>0)),A=()=>{J.finalized=!1},w=()=>{let Y=[],_=Array.from(J.contexts.entries()).map(([L,B])=>{let F={key:L,refCount:B.refCount,hasModel:Boolean(B.context)},E=B.context.parallel.getStatus();return F.parallelStatus=E,Y.push({contextKey:L,...E}),F});return{id:J.id,type:J.type,repoId:J.plan.info.model?.repoId||null,quantization:J.plan.info.model?.quantization||null,variant:J.plan.info.runtime?.variant||null,nCtx:J.plan.info.runtime?.n_ctx||null,nParallel:J.plan.info.runtime?.n_parallel||null,contexts:_,parallelStatuses:Y}},U=(Y)=>{let _=Array.from(J.contexts.entries()).map(([L,B])=>B.context.parallel.subscribeToStatus((F)=>{Y({contextKey:L,...F})}));return{remove:()=>{_.forEach((L)=>{if(L?.remove)L.remove()})}}};return{id:$,type:"ggml-llm",info:N.info,contexts:J.contexts,initContext:G,completion:O,tokenize:z,detokenize:V,applyChatTemplate:R,releaseContext:K,finalize:H,getStatus:w,subscribeParallelStatus:U,hasPendingReleases:q,resetFinalized:A}}async function e4($,Z,X={}){let{onProgress:j,onComplete:W,onError:N}=X;try{let Q=l0($),J=await D1(Q),H=a4(Q,J),{repoId:G}=J;if(await R0(H,J.size)){if(console.log(`[Download] Model already exists: ${G} at ${H}`),typeof W==="function")W({localPath:H,repoId:G,alreadyExists:!0});return{started:!1,localPath:H,repoId:G,alreadyExists:!0}}let O=Z.getDownload(H);if(O)return console.log(`[Download] Already downloading: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:H,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof N==="function")N(V)}),{started:!1,localPath:H,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting download: ${G}`);let z=(async()=>{try{if(J.isSplit&&J.splitCount>0){let V=/-(\d{5})-of-(\d{5})\.gguf$/,R=b.dirname(H),q=J.splitCount,A=0;for(let w=1;w<=q;w+=1){let U=String(w).padStart(5,"0"),Y=J.filename.replace(V,`-${U}-of-${String(q).padStart(5,"0")}.gguf`),_=`${Q.model.base_url.replace(/\/+$/,"")}/${J.repoId}/resolve/${J.revision}/${Y}`,L=b.join(R,Y);if(!await R0(L))await c0(_,J.headers,L,null,(F)=>{if(F>=0&&Number.isFinite(F)){let E=(A+F)/q;if(console.log(`[Download] ${G}: ${Math.round(E*100)}%`),typeof j==="function")j(E)}});A+=1}}else await c0(J.url,J.headers,H,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}});if(console.log(`[Download] Completed: ${G}`),typeof W==="function")W({localPath:H,repoId:G})}catch(V){if(console.error(`[Download] Failed: ${G}`,V.message),typeof N==="function")N(V);throw V}finally{Z.deleteDownload(H)}})();return Z.setDownload(H,z),{started:!0,localPath:H,repoId:G}}catch(Q){if(console.error("[Download] Failed to start download:",Q.message),typeof N==="function")N(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function O6($){let Z=l0($),X=await D1(Z),j=await r4(X.url,X.headers,Z.runtime.cache_dir),{arch:W,nCtxTrain:N,nLayer:Q,nEmbd:J,nHead:H,nHeadKv:G,nEmbdHeadK:K,nEmbdHeadV:O,quantVersion:z,fileType:V}=y0(j),R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(H))?Number(H):0,w=Number.isFinite(Number(G))?Number(G):A,U=A>0&&q>0?q/A:128,Y=K!=null&&Number.isFinite(Number(K))?Number(K):U,_=O!=null&&Number.isFinite(Number(O))?Number(O):U,L=I0({arch:W,metadata:j,nLayer:R}),B=L&&Number.isFinite(Number(L.kvLayers))?Number(L.kvLayers):R,F=Math.max(0,Math.floor(Number(B)||0)),M=(Z.model.n_ctx?Number(Z.model.n_ctx):null)||N||4096,x={k:Z.model.cache_type_k,v:Z.model.cache_type_v},T=X.size>0?X.size:0,D=N0({layerCount:F,headKvCount:w,embdHeadKCount:Y,embdHeadVCount:_,cacheTypes:x,swaConfig:L,kvUnified:Z.model.kv_unified,nParallel:Z.model.n_parallel,swaFull:Z.model.swa_full}),p=Z.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,m=Z.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(Z.backend.cpu_memory_fraction))):d0,l=D(M),i=await o4(Z,{modelBytes:T,kvCacheBytes:l}),c=(i.selected.totalMemory||0)*p,k=Math.max(0,F0.totalmem()*m),f=i.selected.hasGpu?c:k,h=u0({maxCtx:M,availableMemory:f,modelBytes:T,kvBytesForCtx:D}),s=D(M),I=D(h);return{kvInfo:{nCtxTrain:N,nLayer:R,nEmbd:q,nHeadKv:w,nEmbdHeadK:Y,nEmbdHeadV:_,nHeadCount:A,nHeadKvCount:w,kvLayerCount:F,swa:L?.enabled?{window:L.window,pattern:L.pattern,denseFirst:L.denseFirst,type:L.type,layers:L.swaLayers}:null},modelBytes:T,kvCacheBytes:s,limitedKvCacheBytes:I,memoryLimitedCtx:h,quantization:{name:X.quantization||null,fileType:V,version:z}}}async function $2($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...N}=Z,Q=null,J=null,H=null,G=null,K=null,O=null;if(W)try{let{modelBytes:_,kvCacheBytes:L,limitedKvCacheBytes:B,memoryLimitedCtx:F,kvInfo:E,quantization:M}=await O6(W);Q=_,J=L,H=B,G=F,K=E,O=M}catch(_){}let z=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,V=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,R=await K0({...N,platform:process.platform,totalMemoryInBytes:F0.totalmem(),backend:"ggml-llm",includeBreakdown:j,gpuMemoryFraction:z,cpuMemoryFraction:V,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Q,kvCacheBytes:J,limitedKvCacheBytes:H}),q=R.selected,A=b4(q);q.modelBytes=Q||null,q.kvCacheBytes=J||null,q.memoryLimitedCtx=G||null,q.limitedKvCacheBytes=H||null,q.kvInfo=K||null,q.quantization=O||null;let w=null,U=null;if($){let _=b4($);U={...$,score:_};let L="buttress",B="buttress-higher-score";if(!R.ok)L="local",B="buttress-unavailable";else if(!_&&_!==0)L="buttress",B="missing-client-score";else{let{fit:F,limitedFit:E}=U,M=q?.fit,x=q?.limitedFit,T=F?.fitsInGpu||F?.fitsInCpu||E?.fitsInGpu||E?.fitsInCpu,D=M?.fitsInGpu||M?.fitsInCpu||x?.fitsInGpu||x?.fitsInCpu;if(T&&!D)L="local",B="client-fits-in-memory";else if(D&&!T)L="buttress",B="buttress-fits-in-memory";else if(_>A*X)L="local",B="client-better";else if(A>_*X)L="buttress",B="buttress-better";else L="either",B="comparable-scores"}w={buttressScore:A,clientScore:_,threshold:X,recommendation:L,reason:B}}if(!R.ok&&!w)w={buttressScore:A,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let Y=null;if(W)Y={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,nCtx:W.model?.n_ctx||null,cacheKType:W.model?.cache_type_k||"f16",cacheVType:W.model?.cache_type_v||"f16"};return{type:"ggml-llm",timestamp:new Date().toISOString(),buttress:R,client:U,comparison:w,modelConfig:Y}}var i$=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return n$},s$,u4,r$,m4=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};m4($[X],j)}else $[X]=j}),$},a$=".gguf",f4="https://huggingface.co",p4="https://huggingface.co/api",d,g4,d0=0.5,w0,S1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},f0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","snapdragon","default"].includes(Z))return Z;return null},l0=($={})=>{let Z=JSON.parse(JSON.stringify(w0));if(m4(Z,$),Z.backend.variant=f0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(S1(Z.backend.variant_preference).map(f0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=["cuda","vulkan","snapdragon","default"];if(Z.runtime.prefer_variants=Array.from(new Set(S1(Z.runtime.prefer_variants).map(f0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(S1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.n_parallel=Math.max(1,Number(Z.model.n_parallel)||4),Z.model.n_batch=Math.max(1,Number(Z.model.n_batch)||512),Z.model.base_url=Z.model.base_url||f4,Z.model.api_base=Z.model.api_base||p4,Z.runtime.cache_dir=Z.runtime.cache_dir?b.resolve(Z.runtime.cache_dir):d,Z.runtime.session_cache={...w0.runtime.session_cache,...Z.runtime.session_cache||{}},Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||w0.runtime.context_release_delay_ms),Z},P4=($)=>{let Z=$.toLowerCase();return g4.find((j)=>Z.includes(j))||null},o$=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(f0).filter(Boolean)))},H0=async($)=>{await f$($,{recursive:!0})},t$=($=d)=>b.join($,".metadata-cache"),c4=($,Z,X=d)=>{let j=T1("sha256").update($).digest("hex");return b.join(t$(X),Z,`${j}.json`)},d4=async($,Z,X=d)=>{try{let j=c4($,Z,X),W=await v4(j,"utf-8");return console.log(`[Cache] Hit ${Z} cache:`,b.basename(j)),JSON.parse(W,(N,Q)=>{if(typeof Q==="string"&&Q.startsWith("__bigint__"))return BigInt(Q.slice(10));return Q})}catch(j){return null}},k1=async($,Z,X,j=d)=>{try{let W=c4($,Z,j);await H0(b.dirname(W)),await h4(W,JSON.stringify(X,(N,Q)=>{if(typeof Q==="bigint")return`__bigint__${Q.toString()}`;return Q}),"utf-8"),console.log(`[Cache] Wrote ${Z} cache:`,b.basename(W))}catch(W){console.warn(`[Cache] Failed to write ${Z} cache:`,W.message)}},n0=($=d)=>b.join($,".session-state-cache"),l4=($=d)=>b.join(n0($),"cache-map.json"),g0=($=d)=>b.join(n0($),"temp"),n4=($=d)=>b.join(n0($),"states"),k4=()=>({version:1,entries:{},totalSize:0}),e$=async($=d)=>{try{let Z=l4($),X=await v4(Z,"utf-8"),j=JSON.parse(X);if(!j.entries||typeof j.entries!=="object")return k4();return j}catch{return k4()}},P1=async($,Z=d)=>{let X=l4(Z),j=`${X}.tmp.${Date.now()}`;try{await H0(b.dirname(X)),await h4(j,JSON.stringify($,null,2),"utf-8"),await C4(j,X)}catch(W){throw await r(j).catch(()=>{}),W}},$6=($,Z)=>{let X=JSON.stringify({text:$,model:Z.modelPath,variant:Z.variant,n_gpu_layers:Z.n_gpu_layers,n_ctx:Z.n_ctx,cacheTypeK:Z.cacheTypeK,cacheTypeV:Z.cacheTypeV,kvUnified:Z.kvUnified,swaFull:Z.swaFull,flashAttnType:Z.flashAttnType});return T1("sha256").update(X).digest("hex").slice(0,24)},Z6=($,Z=d)=>b.join(n4(Z),`${$}.bin`),X6=($=d)=>{let Z=`${Date.now()}-${Math.random().toString(36).slice(2,10)}`;return b.join(g0($),`${Z}.bin`)},i4=($,Z)=>$.modelPath===Z.modelPath&&$.variant===Z.variant&&$.n_gpu_layers===Z.n_gpu_layers&&$.n_ctx>=Z.n_ctx&&$.cacheTypeK===Z.cacheTypeK&&$.cacheTypeV===Z.cacheTypeV&&$.kvUnified===Z.kvUnified&&$.swaFull===Z.swaFull&&$.flashAttnType===Z.flashAttnType,j6=($,Z)=>{let X=Math.min($.length,Z.length),j=0;while(j<X&&$[j]===Z[j])j+=1;return j},W6=($,Z,X)=>{let j=Object.values(X.entries);console.log(`[SessionCache] Finding match for promptText (${$.length} chars)`),console.log(`[SessionCache] Checking ${j.length} cache entries`);let N=j.filter((Q)=>i4(Q.metadata,Z)).reduce((Q,J)=>{let H=j6($,J.fullText);if(H>Q.prefixLen)return{entry:J,prefixLen:H};if(H===Q.prefixLen&&J.fullText.length>(Q.entry?.fullText?.length||0))return{entry:J,prefixLen:H};return Q},{entry:null,prefixLen:0});if(N.entry)return console.log(`[SessionCache] Prefix match found: ${N.entry.id} (${N.prefixLen}/${N.entry.fullText.length} chars)`),{entry:N.entry,prefixLength:N.prefixLen};return console.log("[SessionCache] No match found"),null},Y6=async($,Z,X)=>{let j=Object.values($.entries).sort((J,H)=>new Date(J.lastAccessedAt)-new Date(H.lastAccessedAt)),W=$.totalSize,N=Object.keys($.entries).length,Q=j.filter((J)=>{let H=W>Z,G=N>X;if(!H&&!G)return!1;return W-=J.stateFileSize||0,N-=1,!0});return await Promise.all(Q.map(async(J)=>{await r(J.stateFilePath).catch(()=>{}),delete $.entries[J.id],console.log(`[SessionCache] Evicted entry: ${J.id}`)})),$.totalSize=Math.max(0,W),Q.map((J)=>J.id)},Q6=async($,Z,X,j)=>{let W=Object.entries($.entries).filter(([N,Q])=>{if(N===X)return!1;if(!i4(Q.metadata,j))return!1;return Z.startsWith(Q.promptText)&&Q.promptText.length<Z.length}).map(([,N])=>N);return await Promise.all(W.map(async(N)=>{await r(N.stateFilePath).catch(()=>{}),$.totalSize-=N.stateFileSize||0,delete $.entries[N.id],console.log(`[SessionCache] Evicted superseded prefix entry: ${N.id} (${N.promptText.length} prompt chars)`)})),W.map((N)=>N.id)},J6=async($=d)=>{let Z=g0($);try{let X=await g$(Z),j=Date.now(),W=3600000;await Promise.all(X.map(async(N)=>{let Q=b.join(Z,N),J=await E0(Q).catch(()=>null);if(J&&j-J.mtimeMs>3600000)await r(Q).catch(()=>{}),console.log(`[SessionCache] Cleaned up temp file: ${N}`)}))}catch{}},N6=async($)=>{try{return await E0($),!0}catch{return!1}},H6=($,Z)=>{if($==null)return Z;if(typeof $==="number")return $;if(typeof $==="string"){let X=l$.parse($);return X!=null?X:Z}return Z},T4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},D4=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},r4=async($,Z,X=d)=>{let j=JSON.stringify({url:$,headers:Z}),W=await d4(j,"range-metadata",X);if(W)return W;let N=!/^https?:/i.test($),{metadata:Q}=await c$($,{fetch,additionalFetchHeaders:Z,allowLocalFile:N});return await k1(j,"range-metadata",Q,X),Q},a4=($,Z)=>{if($.model.local_path)return b.resolve($.model.local_path);let X=Z.repoId.split("/"),j=b.join($.runtime.cache_dir,...X,Z.revision);return b.join(j,Z.filename)},R0=async($,Z)=>{try{let X=await E0($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},c0=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await H0(b.dirname(X));let N=await fetch($,{headers:Z});if(!N.ok||!N.body)throw Error(`Failed to download ${$}: ${N.status} ${N.statusText}`);let Q=await p$(X,"w"),J=Number(N.headers.get("content-length"))||j||0,H=0,G=0.05;try{await N.body.pipeTo(new r$({async write(K){if(await Q.write(K),H+=K.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,H/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(K){throw await Q.close().catch(()=>{}),await r(X).catch(()=>{}),K}}))}catch(K){throw await Q.close().catch(()=>{}),await r(X).catch(()=>{}),K}if(j){let K=await E0(X);if(K.size!==j)throw await r(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${K.size}`)}},D1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),N=await d4(W,"artifact-info",j);if(N)return N;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let Y=await D4($.model.url,{headers:Q}),_=Number(Y.headers.get("content-length"))||null,L=$.model.filename||$.model.url.split("/").pop(),B={repoId:Z,revision:X,filename:L,url:$.model.url,size:_,headers:Q};return await k1(W,"artifact-info",B,j),B}let{filename:J}=$.model,H=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await T4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((Y)=>Y.rfilename||Y.path||Y.filename).filter((Y)=>typeof Y==="string"&&Y.endsWith(a$));if(O.length===0)throw Error(`No GGUF artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:g4,V=()=>{let Y=z.find((_)=>{return O.find((B)=>B.toLowerCase().includes(_))});if(Y)return{filename:O.find((L)=>L.toLowerCase().includes(Y)),quantization:Y};return null};if(!J){let Y=V()||{filename:O[0],quantization:null},{filename:_,quantization:L}=Y;J=_,H=L||P4(J)}else if(!H)H=P4(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=/-(\d{5})-of-(\d{5})\.gguf$/,A=J.match(q),w=null;if(A){let[,,Y]=A,_=await T4(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),L=_?.siblings||_?.files||[],B=Number(Y);w=0;for(let F=1;F<=B;F+=1){let E=String(F).padStart(5,"0"),M=J.replace(q,`-${E}-of-${Y}.gguf`),x=L.find((D)=>(D.rfilename||D.path||D.filename)===M),T=Number(x?.size);if(Number.isFinite(T)&&T>0)w+=T}}else{let Y=await D4(R,{headers:Q});w=Number(Y.headers.get("content-length"))||null}let U={repoId:Z,revision:X,filename:J,url:R,size:w,quantization:H,headers:Q,isSplit:Boolean(A),splitCount:A?Number(A[2]):0};return await k1(W,"artifact-info",U,j),U},o4=async($,{modelBytes:Z=null,kvCacheBytes:X=null}={})=>{let j=o$($),[W,...N]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):w0.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,H=await K0({platform:process.platform,totalMemoryInBytes:F0.totalmem(),backend:"ggml-llm",variant:W||null,preferVariants:N,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:I4,isLibVariantAvailable:y4},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!H.ok||!H.selected){let z=(H.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let K=(H.attempts||[]).map(G);return{selected:G(H.selected),attempts:K}},V6=async($)=>{let Z=await D1($),X=await r4(Z.url,Z.headers,$.runtime.cache_dir),{arch:j,nCtxTrain:W,nLayer:N,nEmbd:Q,nHead:J,nHeadKv:H,nEmbdHeadK:G,nEmbdHeadV:K,quantVersion:O,fileType:z}=y0(X),V=Number.isFinite(Number(N))?Number(N):0,R=Number.isFinite(Number(Q))?Number(Q):0,q=Number.isFinite(Number(J))?Number(J):0,A=Number.isFinite(Number(H))?Number(H):q,w=q>0&&R>0?R/q:128,U=G!=null&&Number.isFinite(Number(G))?Number(G):w,Y=K!=null&&Number.isFinite(Number(K))?Number(K):w,_=I0({arch:j,metadata:X,nLayer:V}),L=_&&Number.isFinite(Number(_.kvLayers))?Number(_.kvLayers):V,B=Math.max(0,Math.floor(Number(L)||0)),F={use_mmap:$.model.use_mmap??$.runtime.use_mmap,use_mlock:$.model.use_mlock??$.runtime.use_mlock,n_threads:$.model.n_threads??$.runtime.n_threads,n_ctx:$.model.n_ctx??$.runtime.n_ctx,n_batch:$.model.n_batch??$.runtime.n_batch,n_ubatch:$.model.n_ubatch??$.runtime.n_ubatch,n_cpu_moe:$.model.n_cpu_moe??$.runtime.n_cpu_moe,n_parallel:$.model.n_parallel??$.runtime.n_parallel,cpu_mask:$.model.cpu_mask??$.runtime.cpu_mask,cpu_strict:$.model.cpu_strict??$.runtime.cpu_strict,devices:$.model.devices??$.runtime.devices,n_gpu_layers:$.model.n_gpu_layers??$.runtime.n_gpu_layers,flash_attn_type:$.model.flash_attn_type??$.runtime.flash_attn_type,cache_type_k:$.model.cache_type_k??$.runtime.cache_type_k,cache_type_v:$.model.cache_type_v??$.runtime.cache_type_v,kv_unified:$.model.kv_unified??$.runtime.kv_unified,swa_full:$.model.swa_full??$.runtime.swa_full,ctx_shift:$.model.ctx_shift??$.runtime.ctx_shift},E=F.n_ctx?Number(F.n_ctx):null,M=E||W||4096,x=[],T=[],D=!0;if(E&&W&&E>W){D=!1;let Q0=`Requested context length (${E}) exceeds model training context (${W})`;x.push(Q0),T.push(Q0),M=W}if(E&&!W)x.push("Model metadata missing training context length, using requested value");let p={k:F.cache_type_k,v:F.cache_type_v},m=Z.size>0?Z.size:0,l=N0({layerCount:B,headKvCount:A,embdHeadKCount:U,embdHeadVCount:Y,cacheTypes:p,swaConfig:_,kvUnified:F.kv_unified,nParallel:F.n_parallel,swaFull:F.swa_full}),i=l(M),v=await o4($,{modelBytes:m,kvCacheBytes:i}),c=v.selected.totalMemory||0,k=c*($.backend.gpu_memory_fraction||1),f=$.backend.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):d0,h=Math.max(0,F0.totalmem()*f),s=v.selected.hasGpu?k:h,I=u0({maxCtx:M,availableMemory:s,modelBytes:m,kvBytesForCtx:l});if(!E&&I){let Q0=W?Math.min(I,W):I,U1=Math.max(32,Q0);if(U1<M)x.push(`Context length capped to ${U1} by memory limits`);M=U1}if(M>I)M=I;let o=Math.floor(I);console.log(`[buttress] Memory-limited context length: ${o}`);let q0=l(M),k0=m+q0,G0=V?m/(V+1):m,T0=0;if(v.selected.hasGpu&&G0>0)T0=Math.min(V+1,Math.max(0,Math.floor(k/G0)));console.log(`[buttress] Auto GPU layer capacity (${v.selected.variant}): ${T0}/${V+1}`);let H1;if(F.n_gpu_layers==="auto"||F.n_gpu_layers==null)H1=T0;else H1=Math.max(0,Math.min(Number(F.n_gpu_layers)||0,V+1));let M$=(()=>{let Q0=F.flash_attn_type&&String(F.flash_attn_type).toLowerCase();if(Q0==="on"||Q0==="off")return Q0;if(Q0==="auto")return v.selected.hasGpu?"auto":"off";return v.selected.hasGpu?"auto":"off"})(),x$=$.runtime.cache_dir,V1=a4($,Z),Q4=await R0(V1,Z.size),S$={ok:D,backend:"ggml-llm",warnings:x,errors:T,model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,url:Z.url,sizeBytes:Z.size,metadata:{architecture:j,n_ctx_train:W,n_layer:V,n_embd:R,quantization_version:O,file_type:z,kv_layer_count:B,swa:_?.enabled?{window:_.window,pattern:_.pattern,dense_first:_.denseFirst,type:_.type,layers:_.swaLayers}:null}},runtime:{...F,variant:v.selected.variant,n_ctx:M,requested_ctx:E,n_gpu_layers:H1,auto_gpu_layers:T0,flash_attn_type:M$,cache_type_k:p.k,cache_type_v:p.v,estimated_max_n_ctx:o},resources:{modelBytes:m,kvCacheBytes:q0,totalEstimatedBytes:k0,gpuCapacityBytes:c,gpuUsableBytes:k,cpuUsableBytes:h,fit:v.selected.fit},devices:{selected:v.selected,attempts:v.attempts},download:{cacheDir:x$,localPath:V1,exists:Q4},timestamp:new Date().toISOString()};return{config:$,info:S$,artifact:Z,metadata:{arch:j,nCtxTrain:W,nLayer:V,nEmbd:R},devices:v,cacheTypes:p,localPath:V1,localExists:Q4}},U6=($,Z,X=null,j=null)=>{let W,N=Date.now(),Q=0;return new u4({async start(J){try{let H=await $.parallel.completion(Z,(V,R)=>{if(!R)return;if(R.token)Q+=1;J.enqueue({event:"token",data:{requestId:V,...R}})}),{requestId:G}=H;W=H.stop;let K=await H.promise;console.log("[Completion] Result:",K),J.enqueue({event:"result",data:{requestId:G,...K}}),J.close();let O=Date.now()-N,z=K.timings||{};e.addCompletion({id:`completion-${G}`,generatorId:X,requestId:G,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,cacheTokens:z.cache_n??0,promptTokens:z.prompt_n??0,tokensGenerated:z.predicted_n??Q,tokensPerSecond:z.predicted_per_second??0,promptPerSecond:z.prompt_per_second??0,durationMs:O,success:!0,interrupted:K.interrupted||!1,contextFull:K.context_full||K.contextFull||!1})}catch(H){J.enqueue({event:"error",data:{message:H?.message||String(H)}}),J.error(H),e.addCompletion({id:`completion-${Date.now()}`,generatorId:X,repoId:j?.repoId||null,quantization:j?.quantization||null,variant:j?.variant||null,durationMs:Date.now()-N,tokensGenerated:Q,success:!1,error:H?.message||String(H)})}},cancel(){if(W)W()}})},G6=($,Z,X,j,W,N,Q=null,J=null)=>{let H,G="",K=!1,O=Date.now(),z=0;return new u4({async start(V){try{let R=await $.parallel.completion(Z,(Y,_)=>{if(!_)return;if(_.token)G+=_.token,z+=1;V.enqueue({event:"token",data:{requestId:Y,..._}})}),{requestId:q}=R;H=R.stop;let A=await R.promise;if(A.text)G=A.text;else if(A.content)G=A.content;K=!A.interrupted&&!A.context_full,console.log("[Completion] Result:",A),V.enqueue({event:"result",data:{requestId:q,...A}}),V.close();let w=Date.now()-O,U=A.timings||{};if(e.addCompletion({id:`completion-${q}`,generatorId:Q,requestId:q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,cacheTokens:U.cache_n??0,promptTokens:U.prompt_n??N??0,tokensGenerated:U.predicted_n??z,tokensPerSecond:U.predicted_per_second??0,promptPerSecond:U.prompt_per_second??0,durationMs:w,success:!0,interrupted:A.interrupted||!1,contextFull:A.context_full||A.contextFull||!1,usedCache:Boolean(Z.load_state_path)}),K&&X.enabled&&G)X.saveCompletionState(j,G,W,N).catch((Y)=>{console.warn("[SessionCache] Save failed:",Y.message)});else if(W)r(W).catch(()=>{})}catch(R){V.enqueue({event:"error",data:{message:R?.message||String(R)}}),V.error(R),e.addCompletion({id:`completion-${Date.now()}`,generatorId:Q,repoId:J?.repoId||null,quantization:J?.quantization||null,variant:J?.variant||null,durationMs:Date.now()-O,tokensGenerated:z,success:!1,error:R?.message||String(R)}),r(W).catch(()=>{})}},cancel(){if(H)H();r(W).catch(()=>{})}})},O0=($)=>{let Z={model:$.plan.localPath,runtime:$.plan.info.runtime};return T1("sha256").update(JSON.stringify(Z)).digest("hex").slice(0,24)},_6=async($,Z,X,j=null)=>{let{config:W,localPath:N,artifact:Q}=$;if($.localExists&&!Z.has(N)){if($.info.download.exists=!0,typeof X==="function")X(0.5);return N}if(W.model.local_path&&!W.model.allow_local_file)throw Error("Local model path provided but `model.allow_local_file` is not enabled");let J=N;if(j){let H=j.getDownload(J);if(H){console.log(`[ensureModelFile] Waiting for global download: ${Q.repoId}`);try{if(await H,await R0(N,Q.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(0.5);return N}}catch(G){console.warn(`[ensureModelFile] Global download failed, will retry: ${G.message}`)}}}if(!Z.has(J))Z.set(J,(async()=>{if(Q.isSplit&&Q.splitCount>0){let H=/-(\d{5})-of-(\d{5})\.gguf$/,G=b.dirname(N),K=Q.splitCount,O=0;for(let z=1;z<=K;z+=1){let V=String(z).padStart(5,"0"),R=Q.filename.replace(H,`-${V}-of-${String(K).padStart(5,"0")}.gguf`),q=`${W.model.base_url.replace(/\/+$/,"")}/${Q.repoId}/resolve/${Q.revision}/${R}`,A=b.join(G,R);if(!await R0(A))await c0(q,Q.headers,A,null,(U)=>{if(U>=0&&Number.isFinite(U)){let Y=(O+U)/K,_=Math.round(Y*100);if(console.log(`Downloading model splits: ${Math.min(100,_)}%`),typeof X==="function")X(Y*0.5)}});O+=1}}else console.log("Downloading model: 0%"),await c0(Q.url,Q.headers,N,Q.size,(H)=>{if(H>=0&&Number.isFinite(H)){let G=Math.round(H*100);if(console.log(`Downloading model: ${Math.min(100,G)}%`),typeof X==="function")X(H*0.5)}});$.localExists=!0,$.info.download.exists=!0})());try{await Z.get(J)}finally{Z.delete(J)}return N},K6=async($,Z)=>{let X=O0($),j=$.contexts.get(X);if(j&&!j.released){if(j.releaseTimer)clearTimeout(j.releaseTimer),j.releaseTimer=null,console.log(`[Context] Cancelled pending release for context "${X}"`);if(j.releaseRequested=!1,j.refCount+=1,console.log(`[Context] Reusing existing context "${X}", refCount=${j.refCount}`),typeof Z==="function")Z(0);if(!j.context)await j.ready;if(typeof Z==="function")Z(1);return j}if(j)console.log(`[Context] Record exists but released=${j.released}, creating new context`);else console.log(`[Context] No existing record for "${X}", creating new context`);j={key:X,refCount:1,ready:null,released:!1},$.contexts.set(X,j),j.ready=(async()=>{let W=Date.now(),N=await _6($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let Q={model:N,n_threads:$.plan.info.runtime.n_threads,use_mmap:$.plan.info.runtime.use_mmap,use_mlock:$.plan.info.runtime.use_mlock,cpu_mask:$.plan.info.runtime.cpu_mask,cpu_strict:$.plan.info.runtime.cpu_strict,devices:$.plan.info.runtime.devices,n_ctx:$.plan.info.runtime.n_ctx,n_gpu_layers:$.plan.info.runtime.n_gpu_layers,n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch,n_ubatch:$.plan.info.runtime.n_ubatch,n_cpu_moe:$.plan.info.runtime.n_cpu_moe,flash_attn_type:$.plan.info.runtime.flash_attn_type,ctx_shift:$.plan.info.runtime.ctx_shift,kv_unified:$.plan.info.runtime.kv_unified,swa_full:$.plan.info.runtime.swa_full,lib_variant:$.plan.info.runtime.variant};if($.plan.info.runtime.flash_attn_type!=="off")Q.cache_type_k=$.plan.info.runtime.cache_type_k,Q.cache_type_v=$.plan.info.runtime.cache_type_v;console.log("[Context] Load Options:",Q);let J;try{if(J=await d$(Q,(H)=>{if(typeof Z==="function"){if(Z(0.5+H*0.25),H%5===0)console.log("[Context] Load Model Progress:",H)}}),$.plan.info.runtime.n_parallel){if(!await J.parallel.enable({n_parallel:$.plan.info.runtime.n_parallel,n_batch:$.plan.info.runtime.n_batch}))throw Error("Failed to enable parallel decoding mode for context")}if(typeof Z==="function")Z(1);return j.context=J,j.modelInfo=J.getModelInfo(),e.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,nCtx:$.plan.info.runtime?.n_ctx||null,nGpuLayers:$.plan.info.runtime?.n_gpu_layers||null,durationMs:Date.now()-W,success:!0}),j}catch(H){if(e.addModelLoad({id:`${$.id}-${X}`,generatorId:$.id,contextKey:X,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-W,success:!1,error:H?.message||String(H)}),J)try{J.release()}catch(G){}throw H}})();try{return await j.ready,j}catch(W){throw $.contexts.delete(X),W}},p0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;Z.released=!0,$.contexts.delete(Z.key);try{Z.context?.parallel?.disable?.()}catch(j){}return await Z.context?.release?.(),!0},q6=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return p0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return p0($,Z);return console.log(`[Context] Scheduling release in ${W}ms for context "${Z.key}"`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] Release cancelled, refCount=${Z.refCount} for context "${Z.key}"`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing context "${Z.key}" after ${W}ms delay`),await p0($,Z)},W),!0},b1=($)=>{let Z=l0($);return Z.model.repo_id||Z.model.repository||Z.model.model||null},b4=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return _0($)};var v1=S(()=>{B0();m0();s$=i$(),{ReadableStream:u4,WritableStream:r$}=s$,d=b.join(F0.homedir(),".buttress","models"),g4=["mxfp4","q8_0","q6_k","q6","q5_k_m","q5_k_s","q5_k","q5_1","q5_0","q4_k_m","q4_k_s","q4_k","q4_1","q4_0","q3","q2"],w0={backend:{type:"ggml-llm",variant:null,variant_preference:["cuda","vulkan","snapdragon","default"],gpu_memory_fraction:0.85,cpu_memory_fraction:d0},model:{repo_id:null,revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:[],n_ctx:null,n_gpu_layers:"auto",allow_local_file:!1,local_path:null,api_base:p4,base_url:f4},runtime:{cache_dir:d,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},session_cache:{enabled:!0,max_size_bytes:10737418240,max_entries:1000},context_release_delay_ms:1e4}}});import X0 from"node:path";import f1 from"node:os";import{stat as W2,mkdir as L6,open as A6,unlink as h1,readFile as z6,writeFile as B6}from"node:fs/promises";import{createHash as w6}from"node:crypto";import{initWhisper as R6}from"@fugood/whisper.node";import{getBackendDevicesInfo as Y2,isLibVariantAvailable as Q2}from"@fugood/llama.node";import*as F6 from"node:stream/web";class L2{constructor(){this.queue=[],this.processing=!1,this.currentTaskId=null}async enqueue($,Z=null){return new Promise((X,j)=>{this.queue.push({task:$,resolve:X,reject:j,taskId:Z}),this.processNext()})}async processNext(){if(this.processing||this.queue.length===0)return;this.processing=!0;let{task:$,resolve:Z,reject:X,taskId:j}=this.queue.shift();this.currentTaskId=j;try{let W=await $();Z(W)}catch(W){X(W)}finally{this.processing=!1,this.currentTaskId=null,this.processNext()}}getStatus(){return{processing:this.processing,queuedCount:this.queue.length,currentTaskId:this.currentTaskId}}}async function A2($,Z,X={}){let{globalDownloadManager:j=null}=X,W=a0(Z),N=await v6(W),Q={id:$,type:"ggml-stt",config:W,plan:N,info:N.info,contextRecord:null,downloads:new Map,globalDownloadManager:j,queue:new L2,finalized:!1},J=async()=>{if(Q.finalized)return;Q.finalized=!0;let A=Q.contextRecord;if(!A)return;if(A.released)return;if(A.releaseRequested||A.releaseTimer)return;if(A.refCount=Math.max(0,A.refCount-1),A.refCount>0)return;await s0(Q,A)},H=async(A={})=>{let{onProgress:w}=A;try{let U=await I6(Q,w);return{modelInfo:U.modelInfo&&typeof U.modelInfo==="object"?{...U.modelInfo}:null,runtime:{...Q.plan.info.runtime},download:{...Q.plan.info.download}}}catch(U){throw console.error("[Context] Error initializing context:",U),U}},G=async()=>{if(Q.finalized)return!1;let A=Q.contextRecord;if(!A)return!1;return y6(Q,A)},K=async(A={})=>{let{audioPath:w,audioData:U,options:Y={}}=A,_=Q.contextRecord;if(!_)throw Error("Context not initialized");let L={...Y};if(Q.plan.info.runtime.max_threads&&L.maxThreads==null)L.maxThreads=Q.plan.info.runtime.max_threads;let B=`transcription-${Date.now()}-${Math.random().toString(36).slice(2,8)}`,F=Date.now();return Q.queue.enqueue(async()=>{await _.ready;try{let E;if(U){let M=C6(U),{promise:x}=_.context.transcribeData(M,L);E=await x}else{if(!w)throw Error("audioPath or audioData is required for transcription");let M=X0.resolve(w),{promise:x}=_.context.transcribe(M,L);E=await x}return J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-F,segmentCount:E?.segments?.length||0,textLength:E?.text?.length||0,success:!0}),E}catch(E){throw J0.addTranscription({id:B,generatorId:Q.id,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,durationMs:Date.now()-F,success:!1,error:E?.message||String(E)}),E}},B)},O=async(A={})=>K(A),z=async(A={})=>K(A),V=()=>{let A=Q.contextRecord;if(!A)return!1;return!A.released&&(A.releaseRequested||A.releaseTimer||A.refCount>0)},R=()=>{Q.finalized=!1},q=()=>({id:Q.id,type:Q.type,repoId:Q.plan.info.model?.repoId||null,quantization:Q.plan.info.model?.quantization||null,modelType:Q.plan.info.model?.modelType||null,variant:Q.plan.info.runtime?.variant||null,hasContext:Boolean(Q.contextRecord?.context),contextRefCount:Q.contextRecord?.refCount||0,queueStatus:Q.queue.getStatus()});return{id:$,type:"ggml-stt",info:N.info,queue:Q.queue,initContext:H,transcribe:O,transcribeData:z,releaseContext:G,finalize:J,getStatus:q,hasPendingReleases:V,resetFinalized:R}}async function z2($,Z,X={}){let{onProgress:j,onComplete:W,onError:N}=X;try{let Q=a0($),J=await p1(Q),H=q2(Q,J),{repoId:G}=J;if(await r0(H,J.size)){if(console.log(`[Download] STT model already exists: ${G} at ${H}`),typeof W==="function")W({localPath:H,repoId:G,alreadyExists:!0});return{started:!1,localPath:H,repoId:G,alreadyExists:!0}}let O=Z.getDownload(H);if(O)return console.log(`[Download] Already downloading STT model: ${G}`),O.then(()=>{if(typeof W==="function")W({localPath:H,repoId:G,joinedExisting:!0})}).catch((V)=>{if(typeof N==="function")N(V)}),{started:!1,localPath:H,repoId:G,alreadyDownloading:!0};console.log(`[Download] Starting STT model download: ${G}`);let z=(async()=>{try{if(await O2(J.url,J.headers,H,J.size,(V)=>{if(V>=0&&Number.isFinite(V)){if(console.log(`[Download] ${G}: ${Math.round(V*100)}%`),typeof j==="function")j(V)}}),console.log(`[Download] Completed STT model: ${G}`),typeof W==="function")W({localPath:H,repoId:G})}catch(V){if(console.error(`[Download] Failed STT model: ${G}`,V.message),typeof N==="function")N(V);throw V}finally{Z.deleteDownload(H)}})();return Z.setDownload(H,z),{started:!0,localPath:H,repoId:G}}catch(Q){if(console.error("[Download] Failed to start STT download:",Q.message),typeof N==="function")N(Q);return{started:!1,localPath:null,repoId:null,error:Q.message}}}async function B2($=null,Z={}){let{threshold:X=1.1,includeBreakdown:j=!1,config:W,...N}=Z,Q=null,J=null,H=null;if(W)try{let w=a0(W),U=await p1(w);Q=U.size??null,{processingBufferBytes:J}=C0({modelBytes:Q}),H=U.quantization||null}catch(w){}let G=W?.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.gpu_memory_fraction))):void 0,K=W?.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number(W.backend.cpu_memory_fraction))):void 0,O=await K0({...N,platform:process.platform,totalMemoryInBytes:f1.totalmem(),backend:"ggml-stt",includeBreakdown:j,gpuMemoryFraction:G,cpuMemoryFraction:K,dependencies:{getBackendDevicesInfo:Y2,isLibVariantAvailable:Q2},modelBytes:Q,kvCacheBytes:J}),z=O.selected,V=j2(z);if(z)z.modelBytes=Q||null,z.processingBytes=J||null,z.quantization=H||null;let R=null,q=null;if($){let w=j2($);q={...$,score:w};let U="buttress",Y="buttress-higher-score";if(!O.ok)U="local",Y="buttress-unavailable";else if(!w&&w!==0)U="buttress",Y="missing-client-score";else if($.fit&&z?.fit){let _=$.fit.fitsInGpu||$.fit.fitsInCpu,L=z.fit.fitsInGpu||z.fit.fitsInCpu;if(_&&!L)U="local",Y="client-fits-in-memory";else if(L&&!_)U="buttress",Y="buttress-fits-in-memory";else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores"}else if(w>V*X)U="local",Y="client-better";else if(V>w*X)U="buttress",Y="buttress-better";else U="either",Y="comparable-scores";R={buttressScore:V,clientScore:w,threshold:X,recommendation:U,reason:Y}}if(!O.ok&&!R)R={buttressScore:V,clientScore:$?.score??null,threshold:X,recommendation:"local",reason:"buttress-unavailable"};let A=null;if(W)A={repoId:W.model?.repo_id||null,quantization:W.model?.quantization||null,filename:W.model?.filename||null};return{type:"ggml-stt",timestamp:new Date().toISOString(),buttress:O,client:q,comparison:R,modelConfig:A}}var E6=()=>{if(typeof globalThis<"u"&&globalThis.ReadableStream&&globalThis.WritableStream)return{ReadableStream:globalThis.ReadableStream,WritableStream:globalThis.WritableStream};return F6},M6,J2=($={},Z={})=>{return Object.entries(Z||{}).forEach(([X,j])=>{if(j&&typeof j==="object"&&!Array.isArray(j)){if(!$[X]||typeof $[X]!=="object")$[X]={};J2($[X],j)}else $[X]=j}),$},x6=".bin",N2="https://huggingface.co",H2="https://huggingface.co/api",L0,y1,u1,V2="fp16",U2=0.5,S6,G2=($)=>{if(!$)return null;let Z=$.toLowerCase();return S6.find((X)=>Z.includes(X))||null},m1,C1=($,Z=[])=>{if(!$&&$!==0)return[...Z];if(Array.isArray($))return $.filter((X)=>X!=null);return[$]},i0=($)=>{if(!$)return null;let Z=String($).toLowerCase();if(["cuda","vulkan","default"].includes(Z))return Z;return null},a0=($={})=>{let Z=JSON.parse(JSON.stringify(m1));if(J2(Z,$),Z.backend.variant=i0(Z.backend.variant),Z.backend.variant_preference=Array.from(new Set(C1(Z.backend.variant_preference||y1).map(i0).filter(Boolean))),Z.backend.variant_preference.length===0)Z.backend.variant_preference=[...y1];if(Z.runtime.prefer_variants=Array.from(new Set(C1(Z.runtime.prefer_variants).map(i0).filter(Boolean))),Z.model.preferred_quantizations=Array.from(new Set(C1(Z.model.preferred_quantizations||Z.model.quantizations).map((X)=>X?String(X).toLowerCase():null).filter(Boolean))),Z.model.quantization){let X=String(Z.model.quantization).toLowerCase();if(!Z.model.preferred_quantizations.includes(X))Z.model.preferred_quantizations.unshift(X)}return Z.model.base_url=Z.model.base_url||N2,Z.model.api_base=Z.model.api_base||H2,Z.runtime.cache_dir=Z.runtime.cache_dir?X0.resolve(Z.runtime.cache_dir):L0,Z.runtime.context_release_delay_ms=Math.max(0,Number(Z.runtime.context_release_delay_ms)||m1.runtime.context_release_delay_ms),Z},I1=($)=>{let Z=$.toLowerCase();return u1.find((j)=>Z.includes(j))||null},P6=($)=>{let Z=[];if($.backend.variant)Z.push($.backend.variant);if($.runtime.prefer_variants.length>0)Z.push(...$.runtime.prefer_variants);return Z.push(...$.backend.variant_preference),Z.push("default"),Array.from(new Set(Z.map(i0).filter(Boolean)))},_2=async($)=>{await L6($,{recursive:!0})},k6=($=L0)=>X0.join($,".metadata-cache"),K2=($,Z,X=L0)=>{let j=w6("sha256").update($).digest("hex");return X0.join(k6(X),Z,`${j}.json`)},T6=async($,Z,X=L0)=>{try{let j=K2($,Z,X),W=await z6(j,"utf-8");return JSON.parse(W)}catch(j){return null}},Z2=async($,Z,X,j=L0)=>{try{let W=K2($,Z,j);await _2(X0.dirname(W)),await B6(W,JSON.stringify(X),"utf-8")}catch(W){}},D6=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,Z);if(!X.ok){let j=await X.text().catch(()=>"");throw Error(`Failed to fetch ${$}: ${X.status} ${X.statusText} ${j}`.trim())}return X.json()},X2=async($,Z={})=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");let X=await fetch($,{...Z,method:"HEAD"});if(!X.ok)throw Error(`Failed to fetch headers for ${$}: ${X.status} ${X.statusText}`);return X},q2=($,Z)=>{if($.model.local_path)return X0.resolve($.model.local_path);let X=Z.repoId.split("/"),j=X0.join($.runtime.cache_dir,...X,Z.revision);return X0.join(j,Z.filename)},r0=async($,Z)=>{try{let X=await W2($);if(!Z)return!0;return X.size===Z}catch(X){return!1}},O2=async($,Z,X,j,W)=>{if(typeof fetch!=="function")throw Error("Global fetch is not available in this runtime");await _2(X0.dirname(X));let N=await fetch($,{headers:Z});if(!N.ok||!N.body)throw Error(`Failed to download ${$}: ${N.status} ${N.statusText}`);let Q=await A6(X,"w"),J=Number(N.headers.get("content-length"))||j||0,H=0,G=0.05;try{await N.body.pipeTo(new M6({async write(K){if(await Q.write(K),H+=K.byteLength,typeof W==="function"&&J>0){let O=Math.min(1,H/J);while(O>=G)W(G),G+=0.05}},async close(){if(await Q.close(),typeof W==="function")W(1)},async abort(K){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),K}}))}catch(K){throw await Q.close().catch(()=>{}),await h1(X).catch(()=>{}),K}if(j){let K=await W2(X);if(K.size!==j)throw await h1(X).catch(()=>{}),Error(`Downloaded file size mismatch, expected ${j} got ${K.size}`)}},p1=async($)=>{let Z=$.model.repo_id||$.model.repository||$.model.model;if(!Z)throw Error("`model.repo_id` is required in Buttress backend config");let X=$.model.revision||"main",j=$.runtime.cache_dir,W=JSON.stringify({repoId:Z,revision:X,filename:$.model.filename,url:$.model.url,quantization:$.model.quantization,preferred_quantizations:$.model.preferred_quantizations}),N=await T6(W,"artifact-info",j);if(N)return N;let Q={...$.runtime.http_headers||{}};if($.runtime.huggingface_token)Q.Authorization=`Bearer ${$.runtime.huggingface_token}`;if($.model.url){let U=await X2($.model.url,{headers:Q}),Y=Number(U.headers.get("content-length"))||null,_=$.model.filename||$.model.url.split("/").pop(),L={repoId:Z,revision:X,filename:_,url:$.model.url,size:Y,quantization:I1(_||""),headers:Q};return await Z2(W,"artifact-info",L,j),L}let{filename:J}=$.model,H=$.model.quantization&&String($.model.quantization).toLowerCase(),G=await D6(`${$.model.api_base}/models/${Z}?revision=${X}&blobs=true`,{headers:Q}),O=(G?.siblings||G?.files||[]).map((U)=>U.rfilename||U.path||U.filename).filter((U)=>typeof U==="string"&&U.endsWith(x6));if(O.length===0)throw Error(`No model artifacts found in repo ${Z}`);let z=$.model.preferred_quantizations.length>0?$.model.preferred_quantizations:u1,V=()=>{for(let U of z)if(U===V2){let Y=O.find((_)=>{let L=_.toLowerCase();return!u1.some((B)=>L.includes(B))});if(Y)return{filename:Y,quantization:null}}else{let Y=O.find((_)=>_.toLowerCase().includes(U));if(Y)return{filename:Y,quantization:U}}return null};if(!J){let U=V()||{filename:O[0],quantization:null},{filename:Y,quantization:_}=U;J=Y,H=_||I1(J)}else if(!H)H=I1(J);let R=`${$.model.base_url.replace(/\/+$/,"")}/${Z}/resolve/${X}/${J}`,q=await X2(R,{headers:Q}),A=Number(q.headers.get("content-length"))||null,w={repoId:Z,revision:X,filename:J,url:R,size:A,quantization:H,headers:Q,isSplit:!1,splitCount:0};return await Z2(W,"artifact-info",w,j),w},b6=async($,{modelBytes:Z=null,processingBytes:X=null}={})=>{let j=P6($),[W,...N]=j,Q=$.backend?.gpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.gpu_memory_fraction))):m1.backend.gpu_memory_fraction||1,J=$.backend?.cpu_memory_fraction!=null?Math.min(1,Math.max(0,Number($.backend.cpu_memory_fraction))):U2,H=await K0({platform:process.platform,totalMemoryInBytes:f1.totalmem(),backend:"ggml-stt",variant:W||null,preferVariants:N,variantPreference:$.backend.variant_preference,gpuMemoryFraction:Q,cpuMemoryFraction:J,dependencies:{getBackendDevicesInfo:Y2,isLibVariantAvailable:Q2},modelBytes:Z,kvCacheBytes:X}),G=(z)=>({...z,devices:Array.isArray(z.devices)?z.devices:[],ok:z.ok,hasGpu:Boolean(z.hasGpu),totalMemory:z.gpuTotalBytes||z.totalMemory||0,error:z.ok?null:Error(z.error||`Variant ${z.variant} not available on this platform`)});if(!H.ok||!H.selected){let z=(H.attempts||[]).map((V)=>`${V.variant}: ${V.error||"unknown error"}`).join("; ");throw Error(`Unable to initialize any backend variant (${j.join(", ")}). Errors: ${z}`)}let K=(H.attempts||[]).map(G);return{selected:G(H.selected),attempts:K}},v6=async($)=>{let Z=await p1($),X=C0({modelBytes:Z.size>0?Z.size:0}),j=await b6($,{modelBytes:X.modelBytes,processingBytes:X.processingBufferBytes}),W=j.selected.hasGpu&&(j.selected.fit?.fitsInGpu!==void 0?j.selected.fit.fitsInGpu:!0);if($.model.use_gpu===!1)W=!1;let N=$.model.use_flash_attn&&String($.model.use_flash_attn).toLowerCase(),Q;if(N==="on"||N==="true")Q=!0;else if(N==="off"||N==="false")Q=!1;else Q=W;let J=$.runtime.cache_dir,H=q2($,Z),G=await r0(H,Z.size),K={ok:!0,backend:"ggml-stt",model:{repoId:Z.repoId,revision:Z.revision,filename:Z.filename,quantization:Z.quantization,modelType:G2(Z.filename),url:Z.url,sizeBytes:Z.size},runtime:{variant:j.selected.variant,use_gpu:W,use_flash_attn:Q,max_threads:$.runtime.max_threads?Number($.runtime.max_threads):null},resources:{...X,gpuCapacityBytes:j.selected.gpuTotalBytes,gpuUsableBytes:j.selected.gpuUsableBytes,cpuUsableBytes:j.selected.cpuUsableBytes,fit:j.selected.fit},devices:{selected:j.selected,attempts:j.attempts},download:{cacheDir:J,localPath:H,exists:G},timestamp:new Date().toISOString()};return{config:$,info:K,artifact:Z,memory:X,devices:j,localPath:H,localExists:G}},h6=async($,Z,X,j=null)=>{let{localPath:W,artifact:N,config:Q}=$;if($.localExists){if(typeof X==="function")X(1);return W}if(j){let G=j.getDownload(W);if(G){console.log(`[ensureModelFile] Waiting for global STT download: ${N.repoId}`);try{if(await G,await r0(W,N.size)){if($.localExists=!0,$.info.download.exists=!0,typeof X==="function")X(1);return W}}catch(K){console.warn(`[ensureModelFile] Global STT download failed, will retry: ${K.message}`)}}}let J=Z.get(W);if(J){if(await J,typeof X==="function")X(1);return W}let H=(async()=>{if(Q.model.allow_local_file){if(!await r0(W,N.size))throw Error(`Local model file not found: ${W}`);return W}return await O2(N.url,N.headers,W,N.size,X),W})();Z.set(W,H);try{return await H,W}finally{Z.delete(W)}},C6=($)=>{if(!$)return null;if($ instanceof ArrayBuffer)return $;if(ArrayBuffer.isView($))return $.buffer;if(typeof $==="string"){let Z=$.startsWith("data:")?$.split(",")[1]||"":$,X=Buffer.from(Z,"base64");return X.buffer.slice(X.byteOffset,X.byteOffset+X.byteLength)}throw Error("Unsupported audioData format, expected base64 string or ArrayBuffer")},I6=async($,Z)=>{if($.contextRecord&&!$.contextRecord.released){if($.contextRecord.releaseTimer)clearTimeout($.contextRecord.releaseTimer),$.contextRecord.releaseTimer=null,console.log("[Context] Cancelled pending STT release");if($.contextRecord.releaseRequested=!1,$.contextRecord.refCount+=1,console.log(`[Context] Reusing existing STT context, refCount=${$.contextRecord.refCount}`),typeof Z==="function")Z(0);if(!$.contextRecord.context)await $.contextRecord.ready;if(typeof Z==="function")Z(1);return $.contextRecord}if($.contextRecord)console.log(`[Context] STT record exists but released=${$.contextRecord.released}, creating new context`);else console.log("[Context] No existing STT record, creating new context");let X={refCount:1,ready:null,released:!1};$.contextRecord=X,X.ready=(async()=>{let j=Date.now();try{if(typeof Z==="function")Z(0);let W=await h6($.plan,$.downloads,Z,$.globalDownloadManager);if(typeof Z==="function")Z(0.5);let N=await R6({filePath:W,useFlashAttn:$.plan.info.runtime.flash_attn_type==="on",useGpu:$.plan.info.runtime.n_gpu_layers>0,nThreads:$.plan.info.runtime.n_threads},$.plan.info.runtime.variant);if(typeof Z==="function")Z(1);X.context=N;try{X.modelInfo=N.getModelInfo()}catch(Q){X.modelInfo=null}return J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,useGpu:$.plan.info.runtime?.use_gpu||!1,durationMs:Date.now()-j,success:!0}),X}catch(W){throw J0.addModelLoad({id:$.id,generatorId:$.id,repoId:$.plan.info.model?.repoId||null,quantization:$.plan.info.model?.quantization||null,modelType:$.plan.info.model?.modelType||null,variant:$.plan.info.runtime?.variant||null,durationMs:Date.now()-j,success:!1,error:W?.message||String(W)}),W}})();try{if(await X.ready,typeof Z==="function")Z(1);return X}catch(j){throw $.contextRecord=null,j}},s0=async($,Z,X=!1)=>{if(Z.released)return!1;if(!X&&Z.refCount>0)return!1;return Z.released=!0,$.contextRecord=null,await Z.context?.release?.(),!0},y6=async($,Z,X=!1)=>{if(Z.releaseRequested=!0,Z.releaseTimer)clearTimeout(Z.releaseTimer),Z.releaseTimer=null;if(X)Z.refCount=0;else if(Z.refCount=Math.max(0,Z.refCount-1),Z.refCount>0)return Z.releaseRequested=!1,!1;let j=$.config.runtime.context_release_delay_ms;if(typeof j!=="number"||!Number.isFinite(j))return s0($,Z);let W=Math.max(0,Math.floor(j));if(X||W<=0)return s0($,Z);return console.log(`[Context] Scheduling STT release in ${W}ms`),Z.releaseTimer=setTimeout(async()=>{if(Z.releaseTimer=null,Z.refCount>0){console.log(`[Context] STT release cancelled, refCount=${Z.refCount}`),Z.releaseRequested=!1;return}console.log(`[Context] Releasing STT context after ${W}ms delay`),await s0($,Z)},W),!0},g1=($)=>{let Z=a0($),X=Z.model.repo_id||Z.model.repository||Z.model.model||null;if(!X)return null;let j=G2(Z.model.filename);if(j)return`${X}:${j}`;return X},j2=($)=>{if(!$)return 0;if(typeof $.score==="number"&&Number.isFinite($.score))return Number($.score);return _0($)};var c1=S(()=>{B0();m0();({WritableStream:M6}=E6()),L0=X0.join(f1.homedir(),".buttress","models"),y1=["cuda","vulkan","default"],u1=["q8_0","q5_1","q5_0","q4_1","q4_0"],S6=["large-v3-turbo","distil-large-v3","large-v3","large-v2","large-v1","large","distil-medium","medium.en","medium","small.en-tdrz","distil-small.en","small.en","small","base.en","base","tiny.en","tiny"],m1={backend:{type:"ggml-stt",variant:null,variant_preference:y1,gpu_memory_fraction:0.85,cpu_memory_fraction:U2},model:{repo_id:"BricksDisplay/whisper-ggml",revision:"main",filename:null,url:null,quantization:null,preferred_quantizations:["q8_0",V2,"q5_1"],allow_local_file:!1,local_path:null,api_base:H2,base_url:N2,use_gpu:!0,use_flash_attn:"auto"},runtime:{cache_dir:L0,prefer_variants:[],huggingface_token:process.env.HUGGINGFACE_TOKEN||null,http_headers:{},max_threads:null,context_release_delay_ms:1e4}}});async function a($,Z=null,X={}){if($==="ggml-llm")return $2(Z,X);if($==="ggml-stt")return B2(Z,X);throw Error(`Unknown backend type: ${$}`)}var d1=S(()=>{v1();c1()});var C;var w2=S(()=>{C={name:"@fugood/buttress-backend-core",private:!0,type:"module",version:"2.23.0-beta.40",main:"src/index.js",types:"lib/types/index.d.ts",scripts:{build:"tsc --noResolve --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js"},dependencies:{"@fugood/buttress-hardware-guardrails":"^2.23.0-beta.40","@fugood/llama.node":"^1.4.12","@fugood/whisper.node":"^1.0.11","@huggingface/gguf":"^0.3.2","@iarna/toml":"^3.0.0",bytes:"^3.1.0"}}});import y from"node:os";import R2 from"node:fs";import F2 from"node:path";import{execSync as o0}from"node:child_process";import E2 from"@iarna/toml";async function x2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating model capabilities comparison...
|
|
3
3
|
`),X.push(`${C.name} v${C.version}`),X.push(`## Model Capabilities Comparison
|
|
4
|
-
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let
|
|
4
|
+
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let _=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))_[L]=j(_[L]||{},B);else _[L]=B}),_},W=Z||{},{server:N,generators:Q=[],...J}=W,H=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((_)=>_?.type==="ggml-llm");if(Y.length>0&&U){let _=Y.find((L)=>L.model?.repo_id===U);if(_)return H(_)}}return Object.keys(J).length>0?H({}):null},K=[];for(let U=0;U<$.length;U+=1){let Y=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let _=G(Y);_={..._||{},model:{...J.runtime,..._?.model||{},repo_id:Y}};let L=await a("ggml-llm",null,{config:_,includeBreakdown:!0});K.push({modelId:Y,capabilities:L,modelInfo:L.buttress?.selected||null,modelConfig:L.modelConfig||null})}let O=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),X.push("|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|"),K.forEach(({modelId:U,modelInfo:Y,modelConfig:_})=>{let L=Y?.quantization?.name?.toUpperCase()||"N/A",B=O(Y?.modelBytes),F=_?.nCtx||Y?.kvInfo?.nCtxTrain||"N/A",E=N0(Y),M=Number(F),x=Y?.kvCacheBytes||(E&&Number.isFinite(M)&&M>0?E(M):E&&E(Y?.kvInfo?.nCtxTrain||0))||null,T=O(x),D=O(Y?.modelBytes&&x?Y.modelBytes+x:Y?.fit?.totalRequiredBytes),p=z(Y?.fit?.fitsInGpu),m=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${L} | ${B} | ${F} | ${T} | ${D} | ${p} | ${m} |`);let l=Y?.memoryLimitedCtx!=null||Y?.limitedFit!=null,i=!Y?.fit?.fitsInGpu||!Y?.fit?.fitsInCpu;if(l&&i){let v=Y?.memoryLimitedCtx||F,c=Number(v),k=Y?.limitedKvCacheBytes||E&&Number.isFinite(c)&&c>0&&E(c)||null,f=O(k),h=O(Y?.modelBytes&&k?Y.modelBytes+k:Y?.limitedFit?.totalRequiredBytes),s=z(Y?.limitedFit?.fitsInGpu),I=z(Y?.limitedFit?.fitsInCpu);if(v!==F||f!==T||h!==D)X.push(`| ↳ Limited | - | ${B} | ${v} | ${f} | ${h} | ${s} | ${I} |`)}}),X.push(`
|
|
5
5
|
---`),X.push(`
|
|
6
|
-
### System Information`);let V=null;if(process.platform!=="win32")try{V=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${y.hostname()}`),X.push(`- **OS:** ${y.type()} ${y.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${y.cpus().length}`),X.push(`- **Total System Memory:** ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`),
|
|
6
|
+
### System Information`);let V=null;if(process.platform!=="win32")try{V=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${y.hostname()}`),X.push(`- **OS:** ${y.type()} ${y.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${y.cpus().length}`),X.push(`- **Total System Memory:** ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`),K.length>0){let Y=K[0].capabilities.buttress?.selected;if(Y){let _=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
|
|
7
7
|
### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
|
|
8
8
|
${process.argv[0]} ${process.argv[1]} ${R}
|
|
9
9
|
\`\`\``),X.push(`
|
|
10
10
|
### Package Information`),X.push(`- **Name:** ${C.name}`),X.push(`- **Version:** ${C.version}`),C.description)X.push(`- **Description:** ${C.description}`);if(Z&&Object.keys(Z).length>0){X.push(`
|
|
11
|
-
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=
|
|
11
|
+
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=F2.join(process.cwd(),A);R2.writeFileSync(w,X.join(`
|
|
12
12
|
`),"utf8"),console.log(`
|
|
13
|
-
Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function
|
|
13
|
+
Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function S2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...N}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(N)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(N).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let K=await a("ggml-llm",null,{config:G,includeBreakdown:!0}),O=K.buttress?.selected||null,z=K.modelConfig||null;if($||z?.repoId){console.log(`
|
|
14
14
|
=== Model Information ===`);let V=$||z?.repoId;if(console.log(`Repository ID: ${V}`),z?.quantization)console.log(`Quantization: ${z.quantization}`);if(z?.nCtx)console.log(`Context Length: ${z.nCtx}`);if(O?.quantization?.name)console.log(`Model Quantization: ${O.quantization.name.toUpperCase()}`);let R=z?.cache_type_k||"f16",q=z?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${R}, V=${q}`),O?.modelBytes&&O?.kvCacheBytes){if(console.log(`Model Size: ${(O.modelBytes/1024/1024/1024).toFixed(2)} GB`),O.kvInfo)console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(O.kvInfo)})`);else console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);if(console.log(`Total Required Memory: ${((O.modelBytes+O.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),O.memoryLimitedCtx!=null){let A=O.memoryLimitedCtx,w=O.kvInfo?.nCtxTrain;if(w)console.log(`
|
|
15
15
|
Memory-Limited Context: ${A} (Train: ${w})`);else console.log(`
|
|
16
|
-
Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(
|
|
16
|
+
Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(K.buttress?.selected?.fit){let{totalRequiredBytes:A}=K.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(K.buttress?.selected){let{selected:V}=K.buttress;console.log(`
|
|
17
17
|
=== Hardware Information ===`);let R=null;if(process.platform!=="win32")try{R=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(R)console.log(`System: ${R}`);else console.log(`Hostname: ${y.hostname()}`),console.log(`OS: ${y.type()} ${y.release()}`);console.log(`Platform: ${V.platform}`),console.log(`CPU Cores: ${y.cpus().length}`),console.log(`Total System Memory: ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`);let q=V.cpuTotalBytes>0?(V.cpuUsableBytes/V.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(V.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${q}% of ${(V.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),V.hasGpu)console.log(`
|
|
18
18
|
--- GPU Details ---`),V.devices.filter((w)=>w.type==="gpu").forEach((w)=>{console.log(`GPU Backend: ${w.backend}`),console.log(`GPU Name: ${w.deviceName}`),console.log(`GPU Total Memory: ${(w.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let U=V.gpuTotalBytes>0?(V.gpuUsableBytes/V.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(V.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${U}% of ${(V.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),w.metadata){if(w.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(w.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
|
|
19
19
|
Backend Variant: ${V.variant}`),console.log(`Performance Score: ${V.score}`),V.fit){if(console.log(`
|
|
20
20
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${V.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${V.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${V.fit.limiting}`),V.limitedFit)console.log(`
|
|
21
21
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(V.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${V.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${V.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${V.limitedFit.limiting}`)}}console.log(`
|
|
22
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(
|
|
22
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(K,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function P2({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${C.name} v${C.version}`),console.log(`Generating STT model capabilities comparison...
|
|
23
23
|
`),X.push(`${C.name} v${C.version}`),X.push(`## STT Model Capabilities Comparison
|
|
24
|
-
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let
|
|
24
|
+
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let _=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))_[L]=j(_[L]||{},B);else _[L]=B}),_},W=Z||{},{server:N,generators:Q=[],...J}=W,H=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((_)=>_?.type==="ggml-stt");if(Y.length>0&&U){let _=Y.find((L)=>L.model?.repo_id===U);if(_)return H(_)}}return Object.keys(J).length>0?H({}):null},K=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:_,filename:L}=M2(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(_);B={...B||{},model:{...B?.model||{},repo_id:_,...L&&{filename:L}}};let F=await a("ggml-stt",null,{config:B,includeBreakdown:!0});K.push({modelId:Y,repoId:_,filename:L,capabilities:F,modelInfo:F.buttress?.selected||null,modelConfig:F.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),K.forEach(({modelId:U,modelInfo:Y})=>{let _=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),F=z(Y?.fit?.fitsInGpu),E=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${_} | ${L} | ${B} | ${F} | ${E} |`)}),X.push(`
|
|
25
25
|
---`),X.push(`
|
|
26
|
-
### System Information`);let V=null;if(process.platform!=="win32")try{V=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${y.hostname()}`),X.push(`- **OS:** ${y.type()} ${y.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${y.cpus().length}`),X.push(`- **Total System Memory:** ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`),
|
|
26
|
+
### System Information`);let V=null;if(process.platform!=="win32")try{V=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${y.hostname()}`),X.push(`- **OS:** ${y.type()} ${y.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${y.cpus().length}`),X.push(`- **Total System Memory:** ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`),K.length>0){let Y=K[0].capabilities.buttress?.selected;if(Y){let _=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
|
|
27
27
|
### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
|
|
28
28
|
${process.argv[0]} ${process.argv[1]} ${R}
|
|
29
29
|
\`\`\``),X.push(`
|
|
30
30
|
### Package Information`),X.push(`- **Name:** ${C.name}`),X.push(`- **Version:** ${C.version}`),C.description)X.push(`- **Description:** ${C.description}`);if(Z&&Object.keys(Z).length>0){X.push(`
|
|
31
|
-
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=
|
|
31
|
+
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=E2.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=F2.join(process.cwd(),A);R2.writeFileSync(w,X.join(`
|
|
32
32
|
`),"utf8"),console.log(`
|
|
33
|
-
STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function
|
|
33
|
+
STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function k2({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${C.name} v${C.version}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=M2($),W=Z||{},{server:N,generators:Q=[],...J}=W,H=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=H(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>H(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await a("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
|
|
34
34
|
=== Model Information ===`);let q=X||R?.repoId;if(console.log(`Repository ID: ${q}`),j)console.log(`Filename: ${j}`);if(V?.modelBytes)console.log(`Model Size: ${(V.modelBytes/1024/1024).toFixed(1)} MB`);let A=V?.processingBytes||V?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(V?.modelBytes&&A)console.log(`Total Required Memory: ${((V.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(z.buttress?.selected?.fit){let{totalRequiredBytes:w}=z.buttress.selected.fit;console.log(`Total Required Memory: ${(w/1024/1024).toFixed(1)} MB`)}}if(z.buttress?.selected){let{selected:q}=z.buttress;console.log(`
|
|
35
35
|
=== Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=o0("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${y.hostname()}`),console.log(`OS: ${y.type()} ${y.release()}`);console.log(`Platform: ${q.platform}`),console.log(`CPU Cores: ${y.cpus().length}`),console.log(`Total System Memory: ${(y.totalmem()/1024/1024/1024).toFixed(2)} GB`);let w=q.cpuTotalBytes>0?(q.cpuUsableBytes/q.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${w}% of ${(q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),q.hasGpu)console.log(`
|
|
36
|
-
--- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let
|
|
36
|
+
--- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let _=q.gpuTotalBytes>0?(q.gpuUsableBytes/q.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.metadata){if(Y.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Y.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
|
|
37
37
|
Backend Variant: ${q.variant}`),console.log(`Performance Score: ${q.score}`),q.fit)console.log(`
|
|
38
38
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${q.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${q.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${q.fit.limiting}`)}console.log(`
|
|
39
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var M2=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var T2=S(()=>{B0();d1();B2()});var e0={};P3(e0,{testGgmlSttCapabilities:()=>P2,testGgmlLlmCapabilities:()=>x2,status:()=>l$,startModelDownload:()=>t0,startGenerator:()=>p$,showSttModelsTable:()=>S2,showModelsTable:()=>F2,globalDownloadManager:()=>l1,ggmlStt:()=>c$,ggmlLlm:()=>g$,getModelIdentifier:()=>d$,getCapabilities:()=>r,generatorRegistry:()=>$0,finalizeGenerator:()=>f$});async function p$($,Z){let j={"ggml-llm":{create:o4,getId:b1},"ggml-stt":{create:L2,getId:g1}}[$];if(!j)throw Error(`Unsupported backend type: ${$}`);let W=j.getId(Z);if(!W)throw Error("Buttress generator config missing repo identifier");let H=`${$}:${W}`,Q=$0.get(H);if(Q)return Q.refCount+=1,Q.instance.resetFinalized?.(),{id:Q.id,info:Q.instance.info};let J=await j.create(H,Z,{globalDownloadManager:l1}),N={id:H,type:J.type,instance:J,refCount:1};return $0.set(H,N),{id:H,info:J.info}}async function f$($){let Z=$0.get($);if(!Z)return!1;if(Z.refCount-=1,Z.refCount<=0){if(await Z.instance.finalize(),!(Z.instance.hasPendingReleases?.()??!1))$0.delete($)}return!0}function d$($,Z){if($==="ggml-llm")return b1(Z);if($==="ggml-stt")return g1(Z);return null}async function t0($,Z,X={}){let W={"ggml-llm":t4,"ggml-stt":A2}[$];if(!W)return{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${$}`};return W(Z,l1,X)}var $0,l1,m$=($)=>{let Z=$0.get($);if(!Z)throw Error(`Unknown generator id "${$}"`);return Z},V0=($,Z)=>{let X=m$($);if(X.type!==Z)throw Error(`Generator "${$}" does not support ${Z} backend`);return X.instance},g$,c$,l$;var $1=S(()=>{v1();c1();d1();m0();T2();$0=new Map,l1={downloads:new Map,getDownload($){return this.downloads.get($)||null},setDownload($,Z){this.downloads.set($,Z)},deleteDownload($){this.downloads.delete($)},isDownloading($){return this.downloads.has($)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([$,Z])=>({localPath:$,promise:Z}))}};g$={async initContext($,Z){return V0($,"ggml-llm").initContext(Z)},async completion($,Z){return V0($,"ggml-llm").completion(Z)},async tokenize($,Z){return V0($,"ggml-llm").tokenize(Z)},async detokenize($,Z){return V0($,"ggml-llm").detokenize(Z)},async applyChatTemplate($,Z){return V0($,"ggml-llm").applyChatTemplate(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-llm")throw Error(`Generator "${$}" does not support ggml-llm backend`);return X.instance.releaseContext(Z)}},c$={async initContext($,Z){return V0($,"ggml-stt").initContext(Z)},async transcribe($,Z){return V0($,"ggml-stt").transcribe(Z)},async transcribeData($,Z){return V0($,"ggml-stt").transcribeData(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-stt")throw Error(`Generator "${$}" does not support ggml-stt backend`);return X.instance.releaseContext(Z)}};l$={getFullStatus:()=>S4($0),getGgmlLlmStatus:()=>F1($0),getGgmlSttStatus:()=>x1($0),subscribeToStatus:M1,subscribeToStatusWithId:x4,llmStatusTracker:t,sttStatusTracker:J0,statusEmitter:o}});import{node as n$}from"@elysiajs/node";import{Elysia as i$}from"elysia";var s$,U0=($)=>new i$({adapter:s$?n$():void 0,...$});var F0=S(()=>{s$=typeof process<"u"&&process.versions&&process.versions.node});import{t as j0}from"elysia";var r$,a$=({store:{serverInfo:$}})=>({id:$.id,name:$.name,version:$.version,generators:$.generators,authentication:$.authentication}),n1=($)=>{let Z=U0(),X=$.autodiscover.http?.path??"/buttress/info";return Z.get(X,a$,{response:r$}),Z};var k2=S(()=>{F0();r$=j0.Object({id:j0.String(),name:j0.String(),version:j0.String(),generators:j0.Array(j0.Object({type:j0.String()})),authentication:j0.Object({required:j0.Boolean(),type:j0.Literal("device-group")})})});import{t as W0,file as o$}from"elysia";import{writeFile as D2}from"node:fs/promises";import i1 from"node:path";var t$,s1;var b2=S(()=>{F0();t$=typeof process<"u"&&process.versions!=null&&process.versions.node!=null,s1=U0().post("/buttress/upload",async({body:{file:$},store:{config:Z}})=>{let X=`${Date.now()}-${$.name.replace(/[^\dA-Za-z]/g,"_")}`,j=i1.join(Z.server.temp_file_dir,X);try{if(t$)await D2(j,await $.stream());else await D2(j,await $.arrayBuffer());return{ok:!0,filename:X}}catch(W){return{ok:!1,error:String(W)}}},{body:W0.Object({file:W0.File()}),response:W0.Object({ok:W0.Boolean(),filename:W0.Optional(W0.String()),error:W0.Optional(W0.String())})}).get("/buttress/download/:filename",async({params:{filename:$},store:{config:Z},status:X})=>{let j=i1.join(Z.server.temp_file_dir,$);if(i1.relative(Z.server.temp_file_dir,j).includes(".."))return X(400),"Invalid file path";return o$(j)},{params:W0.Object({filename:W0.String()})})});import r1 from"node:path";import C2 from"node:fs/promises";import{fileURLToPath as e$}from"node:url";var v2,$6=async()=>{let $=[r1.join(v2,"..","public","status.html"),r1.join(v2,"..","..","public","status.html")];return(await Promise.all($.map((X)=>C2.access(X).then(()=>X,()=>null)))).find((X)=>X!==null)??null},Z6=($)=>{let{status:Z}=$;if(Z?.getFullStatus)return Z.getFullStatus();return{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},h2=async()=>{let $=await $6();if(!$)return console.error("[Status] Failed to find status.html in candidate paths"),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}});try{let Z=await C2.readFile($,"utf-8");return new Response(Z,{headers:{"Content-Type":"text/html; charset=utf-8"}})}catch(Z){return console.error("[Status] Failed to serve status page:",Z),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}})}},a1;var I2=S(()=>{F0();v2=r1.dirname(e$(import.meta.url)),a1=U0().get("/status",h2).get("/status/",h2).get("/buttress/status",({store:{backend:$}})=>Z6($))});import{t as P,sse as Z1}from"elysia";import{cors as X6}from"@elysiajs/cors";async function W6($,Z,X){let W=(Z.generators||[]).filter((q)=>q.type==="ggml-llm");if(W.length===0)throw Error('No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.');let H=W[0],Q=X||H.model?.repo_id;if(X){let q=W.find((A)=>A.model?.repo_id===X);if(q)H=q}else Q=H.model?.repo_id;let J=Q,N=y2.get(J);if(N?.initialized)return N;let{generators:G,server:_,...O}=Z.global||{},z={...O,...H,model:{...H.model,repo_id:Q}};console.log(`[OpenAI] Creating generator for ${J}`);let{id:V}=await $.startGenerator("ggml-llm",z),R={id:V,config:z,repoId:Q,initialized:!1};return y2.set(J,R),await $.ggmlLlm.initContext(V,{}),R.initialized=!0,console.log(`[OpenAI] Generator ready: ${J}`),R}async function Y6($,Z,X,j){let W=$.getReader(),H="",Q=null,J=null,N="stop",G={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let O=!1;while(!O){let z=await W.read();if({done:O}=z,O)break;let{event:V,data:R}=z.value;if(V==="token"){if(R.content!=null)H+=R.content;else if(R.token!=null)H+=R.token}else if(V==="result"){if(R.text)H=R.text;else if(R.content)H=R.content;if(R.reasoning_content)Q=R.reasoning_content;if(R.tool_calls?.length>0)J=R.tool_calls.map((q,A)=>({id:q.id||`call_${Z}_${A}`,type:"function",function:{name:q.function?.name||"",arguments:q.function?.arguments||""}})),N="tool_calls";else N=R.interrupted?"length":"stop";G={prompt_tokens:R.prompt_tokens||R.promptTokens||0,completion_tokens:R.tokens_predicted||R.tokensPredicted||0,total_tokens:(R.prompt_tokens||R.promptTokens||0)+(R.tokens_predicted||R.tokensPredicted||0)}}else if(V==="error")throw Error(R.message)}}finally{W.cancel().catch(()=>{})}let _={role:"assistant",content:H||null};if(Q)_.reasoning_content=Q;if(J)_.tool_calls=J;return{id:Z,object:"chat.completion",created:X,model:j,choices:[{index:0,message:_,finish_reason:N}],usage:G}}function X1({global:$}){let Z=U0({prefix:"/oai-compat"});return Z.use(X6({origin:$?.openai_compat?.cors_allowed_origins??!1,methods:["GET","POST","OPTIONS"],allowedHeaders:["Content-Type","Authorization"],maxAge:86400,preflight:!0})),Z.get("/v1/models",({store:X})=>{let{config:j}=X,Q=(j.generators||[]).filter((J)=>J.type==="ggml-llm").map((J)=>{return{id:J.model?.repo_id||"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"}});if(Q.length===0)Q.push({id:"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"});return{object:"list",data:Q}}),Z.post("/v1/chat/completions",async function*({body:j,set:W,store:H}){let{config:Q,backend:J}=H,{messages:N=[],stream:G=!1,model:_,tools:O,temperature:z,stop:V,top_p:R,max_tokens:q,presence_penalty:A,frequency_penalty:w,tool_choice:U,stream_options:Y}=j;if(!N||N.length===0)return W.status=400,{error:{message:"messages is required and must not be empty",type:"invalid_request_error"}};try{let K=await W6(J,Q,_),L=j6(),B=Math.floor(Date.now()/1000),E=K.repoId||"ggml-llm",M={reasoning_format:"auto",messages:N,jinja:!0,add_generation_prompt:!0};if(z!=null)M.temperature=z;if(R!=null)M.top_p=R;if(q!=null)M.n_predict=q;if(V!=null)M.stop=Array.isArray(V)?V:[V];if(A!=null)M.presence_penalty=A;if(w!=null)M.frequency_penalty=w;if(O!=null)M.tools=O;if(U!=null)M.tool_choice=U;let F=await J.ggmlLlm.completion(K.id,{options:M});if(!G)return await Y6(F,L,B,E);let x=Y?.include_usage===!0,k=F.getReader(),D="",m="",p=new Map,l=new Map;try{let i=!1;while(!i){let v=await k.read();if({done:i}=v,i)break;let{event:c,data:T}=v.value;if(c==="token"){let f={};if(T.content!=null){let h=T.content;if(h.length>D.length)f.content=h.slice(D.length),D=h}if(T.reasoning_content!=null){let h=T.reasoning_content;if(h.length>m.length)f.reasoning_content=h.slice(m.length),m=h}if(T.tool_calls?.length>0){let h=[];if(T.tool_calls.forEach((s,I)=>{let a={index:I};if(!l.has(I))l.set(I,s.id||`call_${L}_${I}`),a.id=l.get(I),a.type="function";let q0=s.function?.arguments||"",T0=p.get(I)||"",G0={};if(!p.has(I)&&s.function?.name)G0.name=s.function.name;if(q0.length>T0.length)G0.arguments=q0.slice(T0.length),p.set(I,q0);if(Object.keys(G0).length>0)a.function=G0,h.push(a);else if(a.id)a.function={name:s.function?.name||"",arguments:""},h.push(a)}),h.length>0)f.tool_calls=h}if(Object.keys(f).length>0)yield Z1({data:JSON.stringify({id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:f,finish_reason:null}]})})}else if(c==="result"){let f={id:L,object:"chat.completion.chunk",created:B,model:E,choices:[{index:0,delta:{},finish_reason:T.interrupted?"length":"stop"}]};if(x)f.usage={prompt_tokens:T.prompt_tokens||T.promptTokens||0,completion_tokens:T.tokens_predicted||T.tokensPredicted||0,total_tokens:(T.prompt_tokens||T.promptTokens||0)+(T.tokens_predicted||T.tokensPredicted||0)};yield Z1({data:JSON.stringify(f)})}else if(c==="error")yield Z1({data:JSON.stringify({error:{message:T.message,type:"server_error"}})})}yield Z1({data:"[DONE]"})}finally{k.cancel().catch(()=>{})}}catch(K){return console.error("[OpenAI] Chat completion error:",K),W.status=500,{error:{message:K.message||"Internal server error",type:"server_error"}}}},{body:P.Object({model:P.Optional(P.String()),messages:P.Array(P.Any()),stream:P.Optional(P.Boolean()),temperature:P.Optional(P.Number()),top_p:P.Optional(P.Number()),max_tokens:P.Optional(P.Number()),stop:P.Optional(P.Union([P.String(),P.Array(P.String())])),presence_penalty:P.Optional(P.Number()),frequency_penalty:P.Optional(P.Number()),tools:P.Optional(P.Array(P.Any())),tool_choice:P.Optional(P.Any()),stream_options:P.Optional(P.Object({include_usage:P.Optional(P.Boolean())}))})}),Z}var j6=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,y2;var u2=S(()=>{F0();y2=new Map});var m2=S(()=>{k2();b2();I2();u2()});import J6 from"node:os";import N6 from"node:path";import H6 from"node-machine-id";import V6 from"bytes";import U6 from"ms";var x0=($={},Z={})=>{let X=Array.isArray($)?[...$]:{...$};return Object.entries(Z||{}).forEach(([j,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W))X[j]=x0(X[j]||{},W);else X[j]=W}),X},A0=($)=>{if(!$)return null;if(typeof $==="object")return JSON.parse(JSON.stringify($));return null},o1=($,Z)=>{let X=A0($)||{},j=A0(Z)||{};return x0(X,j)},p2=($,Z)=>x0(JSON.parse(JSON.stringify($.global)),Z||{}),t1=($,Z,X,j)=>{if($.generators.length>0){let W=$.generators.filter((H)=>H?.type===X);if(W.length>0){if(j){let H=W.find((Q)=>Z.getModelIdentifier(X,Q)===j);if(H)return p2($,H)}}}if(Object.keys($.global).length>0)return p2($,{});return null},f2,G6=($)=>{if(!$)return null;if($===!0)return{...f2};return x0(f2,$)},l2=($,Z)=>{if(!$.generators||$.generators.length===0)return Z.map((j)=>({type:j}));let X=new Set;if($.generators.forEach((j)=>{if(j.type)X.add(j.type)}),X.size===0)return Z.map((j)=>({type:j}));return Array.from(X).map((j)=>({type:j}))},g2=($,Z,X)=>{if($===void 0)return X;if(typeof $==="number")return $;return Z($)??X},K6=2080,c2=60000,d2=52428800,e1=($)=>{let Z=H6.machineIdSync(),X={server:{id:`buttress-${Z}`,name:`Buttress Server (${Z.slice(-8)})`,port:K6,temp_file_dir:N6.join(J6.tmpdir(),".buttress"),session_timeout:c2,max_body_size:d2},autodiscover:!1},j=x0(X,A0($)||{}),W=Array.isArray(j.generators)?j.generators:[],{server:H,generators:Q,autodiscover:J,...N}=j;return{autodiscover:G6(J),server:{id:H.id,name:H.name,port:H.port,log_level:H.log_level,temp_file_dir:H.temp_file_dir,max_body_size:g2(H.max_body_size,V6.parse,d2),session_timeout:g2(H.session_timeout,U6,c2)},global:N,generators:W}};var S0=S(()=>{f2={udp:{port:8089,announcements:{enabled:!0,interval:5000},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:"/buttress/info",cors:!0}}});import{z as Z0}from"zod";var n2,i2;var s2=S(()=>{S0();n2={getCapabilities:Z0.tuple([Z0.object({type:Z0.string().optional().default("ggml-llm"),config:Z0.any().optional(),currentClientCapabilities:Z0.any().optional(),options:Z0.any().optional()}).nullable().optional()]),startGenerator:Z0.tuple([Z0.string(),Z0.any().optional()]),finalizeGenerator:Z0.tuple([Z0.string()])},i2={async getCapabilities({backend:$,config:Z},X=null){console.log("[Server] Get Capabilities:",X);let j=X||{type:"ggml-llm"},{type:W="ggml-llm",config:H,currentClientCapabilities:Q=null,options:J={}}=j,N=A0(H),G=$.getModelIdentifier(W,N),_=t1(Z,$,W,G),O=o1(_,H);if(Object.keys(O).length===0)throw Error("Buttress server missing generator configuration");if(O.backend=O.backend||{},!O.backend.type)O.backend.type=W;return $.getCapabilities(W,Q,{...J,config:O})},async startGenerator({backend:$,config:Z,session:X},j,W){console.log("[Server] Start Generator:",j,W);let H=A0(W),Q=$.getModelIdentifier(j,H),J=t1(Z,$,j,Q),N=o1(J,W);if(Object.keys(N).length===0)throw Error("Buttress server missing generator configuration");if(N.backend=N.backend||{},!N.backend.type)N.backend.type=j;let G=await $.startGenerator(j,N);return X.generators.add(G.id),G},async finalizeGenerator({backend:$,session:Z},X){return console.log("[Server] Finalize Generator:",X),Z.generators.delete(X),$.finalizeGenerator(X)}}});import{z as g}from"zod";import{ReadableStream as _6}from"node:stream/web";var r2,a2;var o2=S(()=>{r2={initContext:g.tuple([g.string(),g.any().optional()]),completion:g.tuple([g.string(),g.any().optional()]),tokenize:g.tuple([g.string(),g.any()]),detokenize:g.tuple([g.string(),g.any()]),applyChatTemplate:g.tuple([g.string(),g.any()]),releaseContext:g.tuple([g.string()])},a2={initContext({backend:$},Z,X){return new _6({async start(j){try{let W=await $.ggmlLlm.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},completion({backend:$},Z,X){return console.log("[Server] Completion:",{id:Z,property:X}),$.ggmlLlm.completion(Z,X)},async tokenize({backend:$},Z,X){return console.log("[Server] Tokenize:",{id:Z,property:X}),$.ggmlLlm.tokenize(Z,X)},async detokenize({backend:$},Z,X){return console.log("[Server] Detokenize:",{id:Z,property:X}),$.ggmlLlm.detokenize(Z,X)},async applyChatTemplate({backend:$},Z,X){return console.log("[Server] Apply Chat Template:",{id:Z,property:X}),$.ggmlLlm.applyChatTemplate(Z,X)},async releaseContext({backend:$},Z,X){return console.log("[Server] Release Context:",{id:Z,force:X}),$.ggmlLlm.releaseContext(Z,{force:X})}}});import{z as n}from"zod";import{ReadableStream as q6}from"node:stream/web";import O6 from"node:path";var t2,e2;var $3=S(()=>{t2={initContext:n.tuple([n.string(),n.any().optional()]),transcribe:n.tuple([n.string(),n.string(),n.any().optional()]),transcribeData:n.tuple([n.string(),n.union([n.instanceof(Buffer),n.instanceof(Uint8Array)]),n.any().optional()]),releaseContext:n.tuple([n.string()])},e2={initContext({backend:$},Z,X){return new q6({async start(j){try{let W=await $.ggmlStt.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:H,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},async transcribe({backend:$,config:{server:Z}},X,j,W){return console.log("[Server] Transcribe:",{id:X,audioPath:j,options:W}),$.ggmlStt.transcribe(X,{audioPath:O6.join(Z.temp_file_dir,j),options:W})},async transcribeData({backend:$},Z,X,j){return console.log("[Server] Transcribe Data:",{id:Z,audioDataLength:X?.length||0,options:j}),$.ggmlStt.transcribeData(Z,{audioData:X,options:j})},async releaseContext({backend:$},Z,X){return console.log("[Server] Release STT Context:",{id:Z,force:X}),$.ggmlStt.releaseContext(Z,{force:X})}}});var L6,Z3,X3;var j3=S(()=>{s2();o2();$3();L6={common:i2,ggmlLlm:a2,ggmlStt:e2},Z3={common:n2,ggmlLlm:r2,ggmlStt:t2},X3=L6});import{Buffer as j1}from"node:buffer";var W3=($)=>{try{return JSON.parse($,(Z,X)=>{if(!X)return X;if(X?.type==="Buffer"&&X?.data)return j1.from(X.data,"base64");if(X?.type==="Uint8Array"&&X?.data){let j=j1.from(X.data,"base64");return j.buffer.slice(j.byteOffset,j.byteOffset+j.byteLength)}if(X?.type==="Error"&&X?.name&&X?.message)return Error(X.name,X.message);return X})}catch{return $}},W1=($)=>{try{return JSON.stringify($,(Z,X)=>{if(X instanceof Error)return{type:"Error",name:X.name,message:X.message};if(X instanceof j1)return{type:"Buffer",data:X.toString("base64")};if(X instanceof Uint8Array)return{type:"Uint8Array",data:j1.from(X).toString("base64")};return X})}catch{return $}};var Y3=()=>{};var $4="1.0",Q3=8089;import A6 from"node:dgram";class Z4{name="udp";socket=null;announcementTimer=null;config;getServerInfo;port;constructor($,Z){this.config=$,this.getServerInfo=Z,this.port=$.port??Q3}async start(){if(this.socket=A6.createSocket({type:"udp4",reuseAddr:!0}),this.socket.on("message",($,Z)=>{this.handleMessage($,Z)}),this.socket.on("error",($)=>{console.error("[Autodiscover UDP] Socket error:",$.message)}),await new Promise(($,Z)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),$()}),this.socket.once("error",Z)}),this.config.announcements.enabled){let $=this.config.announcements.interval??5000;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},$),this.sendAnnouncement()}}async stop(){if(this.announcementTimer)clearInterval(this.announcementTimer),this.announcementTimer=null;if(this.socket)await new Promise(($)=>{this.socket.close(()=>$())}),this.socket=null}handleMessage($,Z){try{let X=JSON.parse($.toString());if(X.t==="QUERY"&&this.config.requests.enabled){let j=X.d,W=this.config.requests.responseDelay??0,H=W>0?Math.random()*W:0;setTimeout(()=>{this.sendResponse(j.id,Z)},H)}}catch{}}sendAnnouncement(){if(!this.socket)return;let $=this.getServerInfo(),Z={t:"ANNOUNCE",v:$4,d:{info:$}},X=Buffer.from(JSON.stringify(Z));this.socket.send(X,0,X.length,this.port,"255.255.255.255",(j)=>{if(j)console.error("[Autodiscover UDP] Announcement error:",j.message)})}sendResponse($,Z){if(!this.socket)return;let X=this.getServerInfo(),j={t:"RESPONSE",v:$4,d:{request_id:$,info:X}},W=Buffer.from(JSON.stringify(j));this.socket.send(W,0,W.length,Z.port,Z.address,(H)=>{if(H)console.error("[Autodiscover UDP] Response error:",H.message)})}}var J3=()=>{};class X4{config;getServerInfo;transports=[];started=!1;constructor($,Z){this.config=$;this.getServerInfo=Z;if($.udp?.announcements?.enabled||$.udp?.requests?.enabled)this.transports.push(new Z4($.udp,Z))}async start(){if(this.started)return;(await Promise.allSettled(this.transports.map((Z)=>Z.start()))).forEach((Z,X)=>{if(Z.status==="rejected")console.error(`[Autodiscover] Failed to start ${this.transports[X].name}:`,Z.reason)}),this.started=!0}async stop(){if(!this.started)return;await Promise.allSettled(this.transports.map(($)=>$.stop())),this.started=!1}}var N3=S(()=>{J3()});import z6 from"node:os";var Y1=()=>{let $=z6.networkInterfaces();return Object.values($).flat().find((X)=>X?.family==="IPv4"&&!X?.internal)?.address||null};var j4=()=>{};import u from"node:os";import H3 from"node:fs";import V3 from"node:path";import{execSync as Q1}from"node:child_process";import U3 from"@iarna/toml";async function W4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.39"}`),console.log(`Generating model capabilities comparison...
|
|
40
|
-
`),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.
|
|
41
|
-
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let
|
|
39
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var M2=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var T2=S(()=>{B0();d1();w2()});var e0={};k$(e0,{testGgmlSttCapabilities:()=>k2,testGgmlLlmCapabilities:()=>S2,status:()=>l6,startModelDownload:()=>t0,startGenerator:()=>f6,showSttModelsTable:()=>P2,showModelsTable:()=>x2,globalDownloadManager:()=>l1,ggmlStt:()=>c6,ggmlLlm:()=>g6,getModelIdentifier:()=>d6,getCapabilities:()=>a,generatorRegistry:()=>$0,finalizeGenerator:()=>p6});async function f6($,Z){let j={"ggml-llm":{create:t4,getId:b1},"ggml-stt":{create:A2,getId:g1}}[$];if(!j)throw Error(`Unsupported backend type: ${$}`);let W=j.getId(Z);if(!W)throw Error("Buttress generator config missing repo identifier");let N=`${$}:${W}`,Q=$0.get(N);if(Q)return Q.refCount+=1,Q.instance.resetFinalized?.(),{id:Q.id,info:Q.instance.info};let J=await j.create(N,Z,{globalDownloadManager:l1}),H={id:N,type:J.type,instance:J,refCount:1};return $0.set(N,H),{id:N,info:J.info}}async function p6($){let Z=$0.get($);if(!Z)return!1;if(Z.refCount-=1,Z.refCount<=0){if(await Z.instance.finalize(),!(Z.instance.hasPendingReleases?.()??!1))$0.delete($)}return!0}function d6($,Z){if($==="ggml-llm")return b1(Z);if($==="ggml-stt")return g1(Z);return null}async function t0($,Z,X={}){let W={"ggml-llm":e4,"ggml-stt":z2}[$];if(!W)return{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${$}`};return W(Z,l1,X)}var $0,l1,m6=($)=>{let Z=$0.get($);if(!Z)throw Error(`Unknown generator id "${$}"`);return Z},V0=($,Z)=>{let X=m6($);if(X.type!==Z)throw Error(`Generator "${$}" does not support ${Z} backend`);return X.instance},g6,c6,l6;var $1=S(()=>{v1();c1();d1();m0();T2();$0=new Map,l1={downloads:new Map,getDownload($){return this.downloads.get($)||null},setDownload($,Z){this.downloads.set($,Z)},deleteDownload($){this.downloads.delete($)},isDownloading($){return this.downloads.has($)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([$,Z])=>({localPath:$,promise:Z}))}};g6={async initContext($,Z){return V0($,"ggml-llm").initContext(Z)},async completion($,Z){return V0($,"ggml-llm").completion(Z)},async tokenize($,Z){return V0($,"ggml-llm").tokenize(Z)},async detokenize($,Z){return V0($,"ggml-llm").detokenize(Z)},async applyChatTemplate($,Z){return V0($,"ggml-llm").applyChatTemplate(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-llm")throw Error(`Generator "${$}" does not support ggml-llm backend`);return X.instance.releaseContext(Z)}},c6={async initContext($,Z){return V0($,"ggml-stt").initContext(Z)},async transcribe($,Z){return V0($,"ggml-stt").transcribe(Z)},async transcribeData($,Z){return V0($,"ggml-stt").transcribeData(Z)},async releaseContext($,Z){let X=$0.get($);if(!X)return{released:!0,alreadyReleased:!0};if(X.type!=="ggml-stt")throw Error(`Generator "${$}" does not support ggml-stt backend`);return X.instance.releaseContext(Z)}};l6={getFullStatus:()=>S4($0),getGgmlLlmStatus:()=>M1($0),getGgmlSttStatus:()=>x1($0),subscribeToStatus:E1,subscribeToStatusWithId:x4,llmStatusTracker:e,sttStatusTracker:J0,statusEmitter:t}});import{node as n6}from"@elysiajs/node";import{Elysia as i6}from"elysia";var s6,U0=($)=>new i6({adapter:s6?n6():void 0,...$});var M0=S(()=>{s6=typeof process<"u"&&process.versions&&process.versions.node});import{t as j0}from"elysia";var r6,a6=({store:{serverInfo:$}})=>({id:$.id,name:$.name,version:$.version,generators:$.generators,authentication:$.authentication}),n1=($)=>{let Z=U0(),X=$.autodiscover.http?.path??"/buttress/info";return Z.get(X,a6,{response:r6}),Z};var D2=S(()=>{M0();r6=j0.Object({id:j0.String(),name:j0.String(),version:j0.String(),generators:j0.Array(j0.Object({type:j0.String()})),authentication:j0.Object({required:j0.Boolean(),type:j0.Literal("device-group")})})});import{t as W0,file as o6}from"elysia";import{writeFile as b2}from"node:fs/promises";import i1 from"node:path";var t6,s1;var v2=S(()=>{M0();t6=typeof process<"u"&&process.versions!=null&&process.versions.node!=null,s1=U0().post("/buttress/upload",async({body:{file:$},store:{config:Z}})=>{let X=`${Date.now()}-${$.name.replace(/[^\dA-Za-z]/g,"_")}`,j=i1.join(Z.server.temp_file_dir,X);try{if(t6)await b2(j,await $.stream());else await b2(j,await $.arrayBuffer());return{ok:!0,filename:X}}catch(W){return{ok:!1,error:String(W)}}},{body:W0.Object({file:W0.File()}),response:W0.Object({ok:W0.Boolean(),filename:W0.Optional(W0.String()),error:W0.Optional(W0.String())})}).get("/buttress/download/:filename",async({params:{filename:$},store:{config:Z},status:X})=>{let j=i1.join(Z.server.temp_file_dir,$);if(i1.relative(Z.server.temp_file_dir,j).includes(".."))return X(400),"Invalid file path";return o6(j)},{params:W0.Object({filename:W0.String()})})});import r1 from"node:path";import I2 from"node:fs/promises";import{fileURLToPath as e6}from"node:url";var h2,$3=async()=>{let $=[r1.join(h2,"..","public","status.html"),r1.join(h2,"..","..","public","status.html")];return(await Promise.all($.map((X)=>I2.access(X).then(()=>X,()=>null)))).find((X)=>X!==null)??null},Z3=($)=>{let{status:Z}=$;if(Z?.getFullStatus)return Z.getFullStatus();return{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},C2=async()=>{let $=await $3();if(!$)return console.error("[Status] Failed to find status.html in candidate paths"),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}});try{let Z=await I2.readFile($,"utf-8");return new Response(Z,{headers:{"Content-Type":"text/html; charset=utf-8"}})}catch(Z){return console.error("[Status] Failed to serve status page:",Z),new Response("Status page not found",{status:404,headers:{"Content-Type":"text/plain"}})}},a1;var y2=S(()=>{M0();h2=r1.dirname(e6(import.meta.url)),a1=U0().get("/status",C2).get("/status/",C2).get("/buttress/status",({store:{backend:$}})=>Z3($))});import{t as P,sse as Z1}from"elysia";import{cors as X3}from"@elysiajs/cors";async function W3($,Z,X){let W=(Z.generators||[]).filter((q)=>q.type==="ggml-llm");if(W.length===0)throw Error('No ggml-llm generator configured. Add a [[generators]] with type = "ggml-llm" to your config.');let N=W[0],Q=X||N.model?.repo_id;if(X){let q=W.find((A)=>A.model?.repo_id===X);if(q)N=q}else Q=N.model?.repo_id;let J=Q,H=u2.get(J);if(H?.initialized)return H;let{generators:G,server:K,...O}=Z.global||{},z={...O,...N,model:{...N.model,repo_id:Q}};console.log(`[OpenAI] Creating generator for ${J}`);let{id:V}=await $.startGenerator("ggml-llm",z),R={id:V,config:z,repoId:Q,initialized:!1};return u2.set(J,R),await $.ggmlLlm.initContext(V,{}),R.initialized=!0,console.log(`[OpenAI] Generator ready: ${J}`),R}async function Y3($,Z,X,j){let W=$.getReader(),N="",Q=null,J=null,H="stop",G={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let O=!1;while(!O){let z=await W.read();if({done:O}=z,O)break;let{event:V,data:R}=z.value;if(V==="token"){if(R.content!=null)N+=R.content;else if(R.token!=null)N+=R.token}else if(V==="result"){if(R.text)N=R.text;else if(R.content)N=R.content;if(R.reasoning_content)Q=R.reasoning_content;if(R.tool_calls?.length>0)J=R.tool_calls.map((q,A)=>({id:q.id||`call_${Z}_${A}`,type:"function",function:{name:q.function?.name||"",arguments:q.function?.arguments||""}})),H="tool_calls";else H=R.interrupted?"length":"stop";G={prompt_tokens:R.prompt_tokens||R.promptTokens||0,completion_tokens:R.tokens_predicted||R.tokensPredicted||0,total_tokens:(R.prompt_tokens||R.promptTokens||0)+(R.tokens_predicted||R.tokensPredicted||0)}}else if(V==="error")throw Error(R.message)}}finally{W.cancel().catch(()=>{})}let K={role:"assistant",content:N||null};if(Q)K.reasoning_content=Q;if(J)K.tool_calls=J;return{id:Z,object:"chat.completion",created:X,model:j,choices:[{index:0,message:K,finish_reason:H}],usage:G}}function X1({global:$}){let Z=U0({prefix:"/oai-compat"});return Z.use(X3({origin:$?.openai_compat?.cors_allowed_origins??!1,methods:["GET","POST","OPTIONS"],allowedHeaders:["Content-Type","Authorization"],maxAge:86400,preflight:!0})),Z.get("/v1/models",({store:X})=>{let{config:j}=X,Q=(j.generators||[]).filter((J)=>J.type==="ggml-llm").map((J)=>{return{id:J.model?.repo_id||"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"}});if(Q.length===0)Q.push({id:"ggml-llm",object:"model",created:Math.floor(Date.now()/1000),owned_by:"local"});return{object:"list",data:Q}}),Z.post("/v1/chat/completions",async function*({body:j,set:W,store:N}){let{config:Q,backend:J}=N,{messages:H=[],stream:G=!1,model:K,tools:O,temperature:z,stop:V,top_p:R,max_tokens:q,presence_penalty:A,frequency_penalty:w,tool_choice:U,stream_options:Y}=j;if(!H||H.length===0)return W.status=400,{error:{message:"messages is required and must not be empty",type:"invalid_request_error"}};try{let _=await W3(J,Q,K),L=j3(),B=Math.floor(Date.now()/1000),F=_.repoId||"ggml-llm",E={reasoning_format:"auto",messages:H,jinja:!0,add_generation_prompt:!0};if(z!=null)E.temperature=z;if(R!=null)E.top_p=R;if(q!=null)E.n_predict=q;if(V!=null)E.stop=Array.isArray(V)?V:[V];if(A!=null)E.presence_penalty=A;if(w!=null)E.frequency_penalty=w;if(O!=null)E.tools=O;if(U!=null)E.tool_choice=U;let M=await J.ggmlLlm.completion(_.id,{options:E});if(!G)return await Y3(M,L,B,F);let x=Y?.include_usage===!0,T=M.getReader(),D="",p="",m=new Map,l=new Map;try{let i=!1;while(!i){let v=await T.read();if({done:i}=v,i)break;let{event:c,data:k}=v.value;if(c==="token"){let f={};if(k.content!=null){let h=k.content;if(h.length>D.length)f.content=h.slice(D.length),D=h}if(k.reasoning_content!=null){let h=k.reasoning_content;if(h.length>p.length)f.reasoning_content=h.slice(p.length),p=h}if(k.tool_calls?.length>0){let h=[];if(k.tool_calls.forEach((s,I)=>{let o={index:I};if(!l.has(I))l.set(I,s.id||`call_${L}_${I}`),o.id=l.get(I),o.type="function";let q0=s.function?.arguments||"",k0=m.get(I)||"",G0={};if(!m.has(I)&&s.function?.name)G0.name=s.function.name;if(q0.length>k0.length)G0.arguments=q0.slice(k0.length),m.set(I,q0);if(Object.keys(G0).length>0)o.function=G0,h.push(o);else if(o.id)o.function={name:s.function?.name||"",arguments:""},h.push(o)}),h.length>0)f.tool_calls=h}if(Object.keys(f).length>0)yield Z1({data:JSON.stringify({id:L,object:"chat.completion.chunk",created:B,model:F,choices:[{index:0,delta:f,finish_reason:null}]})})}else if(c==="result"){let f={id:L,object:"chat.completion.chunk",created:B,model:F,choices:[{index:0,delta:{},finish_reason:k.interrupted?"length":"stop"}]};if(x)f.usage={prompt_tokens:k.prompt_tokens||k.promptTokens||0,completion_tokens:k.tokens_predicted||k.tokensPredicted||0,total_tokens:(k.prompt_tokens||k.promptTokens||0)+(k.tokens_predicted||k.tokensPredicted||0)};yield Z1({data:JSON.stringify(f)})}else if(c==="error")yield Z1({data:JSON.stringify({error:{message:k.message,type:"server_error"}})})}yield Z1({data:"[DONE]"})}finally{T.cancel().catch(()=>{})}}catch(_){return console.error("[OpenAI] Chat completion error:",_),W.status=500,{error:{message:_.message||"Internal server error",type:"server_error"}}}},{body:P.Object({model:P.Optional(P.String()),messages:P.Array(P.Any()),stream:P.Optional(P.Boolean()),temperature:P.Optional(P.Number()),top_p:P.Optional(P.Number()),max_tokens:P.Optional(P.Number()),stop:P.Optional(P.Union([P.String(),P.Array(P.String())])),presence_penalty:P.Optional(P.Number()),frequency_penalty:P.Optional(P.Number()),tools:P.Optional(P.Array(P.Any())),tool_choice:P.Optional(P.Any()),stream_options:P.Optional(P.Object({include_usage:P.Optional(P.Boolean())}))})}),Z}var j3=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,u2;var m2=S(()=>{M0();u2=new Map});var f2=S(()=>{D2();v2();y2();m2()});import J3 from"node:os";import N3 from"node:path";import H3 from"node-machine-id";import V3 from"bytes";import U3 from"ms";var x0=($={},Z={})=>{let X=Array.isArray($)?[...$]:{...$};return Object.entries(Z||{}).forEach(([j,W])=>{if(W&&typeof W==="object"&&!Array.isArray(W))X[j]=x0(X[j]||{},W);else X[j]=W}),X},A0=($)=>{if(!$)return null;if(typeof $==="object")return JSON.parse(JSON.stringify($));return null},o1=($,Z)=>{let X=A0($)||{},j=A0(Z)||{};return x0(X,j)},p2=($,Z)=>x0(JSON.parse(JSON.stringify($.global)),Z||{}),t1=($,Z,X,j)=>{if($.generators.length>0){let W=$.generators.filter((N)=>N?.type===X);if(W.length>0){if(j){let N=W.find((Q)=>Z.getModelIdentifier(X,Q)===j);if(N)return p2($,N)}}}if(Object.keys($.global).length>0)return p2($,{});return null},g2,G3=($)=>{if(!$)return null;if($===!0)return{...g2};return x0(g2,$)},n2=($,Z)=>{if(!$.generators||$.generators.length===0)return Z.map((j)=>({type:j}));let X=new Set;if($.generators.forEach((j)=>{if(j.type)X.add(j.type)}),X.size===0)return Z.map((j)=>({type:j}));return Array.from(X).map((j)=>({type:j}))},c2=($,Z,X)=>{if($===void 0)return X;if(typeof $==="number")return $;return Z($)??X},_3=2080,d2=60000,l2=52428800,e1=($)=>{let Z=H3.machineIdSync(),X={server:{id:`buttress-${Z}`,name:`Buttress Server (${Z.slice(-8)})`,port:_3,temp_file_dir:N3.join(J3.tmpdir(),".buttress"),session_timeout:d2,max_body_size:l2},autodiscover:!1},j=x0(X,A0($)||{}),W=Array.isArray(j.generators)?j.generators:[],{server:N,generators:Q,autodiscover:J,...H}=j;return{autodiscover:G3(J),server:{id:N.id,name:N.name,port:N.port,log_level:N.log_level,temp_file_dir:N.temp_file_dir,max_body_size:c2(N.max_body_size,V3.parse,l2),session_timeout:c2(N.session_timeout,U3,d2)},global:H,generators:W}};var S0=S(()=>{g2={udp:{port:8089,announcements:{enabled:!0,interval:5000},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:"/buttress/info",cors:!0}}});import{z as Z0}from"zod";var i2,s2;var r2=S(()=>{S0();i2={getCapabilities:Z0.tuple([Z0.object({type:Z0.string().optional().default("ggml-llm"),config:Z0.any().optional(),currentClientCapabilities:Z0.any().optional(),options:Z0.any().optional()}).nullable().optional()]),startGenerator:Z0.tuple([Z0.string(),Z0.any().optional()]),finalizeGenerator:Z0.tuple([Z0.string()])},s2={async getCapabilities({backend:$,config:Z},X=null){console.log("[Server] Get Capabilities:",X);let j=X||{type:"ggml-llm"},{type:W="ggml-llm",config:N,currentClientCapabilities:Q=null,options:J={}}=j,H=A0(N),G=$.getModelIdentifier(W,H),K=t1(Z,$,W,G),O=o1(K,N);if(Object.keys(O).length===0)throw Error("Buttress server missing generator configuration");if(O.backend=O.backend||{},!O.backend.type)O.backend.type=W;return $.getCapabilities(W,Q,{...J,config:O})},async startGenerator({backend:$,config:Z,session:X},j,W){console.log("[Server] Start Generator:",j,W);let N=A0(W),Q=$.getModelIdentifier(j,N),J=t1(Z,$,j,Q),H=o1(J,W);if(Object.keys(H).length===0)throw Error("Buttress server missing generator configuration");if(H.backend=H.backend||{},!H.backend.type)H.backend.type=j;let G=await $.startGenerator(j,H);return X.generators.add(G.id),G},async finalizeGenerator({backend:$,session:Z},X){return console.log("[Server] Finalize Generator:",X),Z.generators.delete(X),$.finalizeGenerator(X)}}});import{z as g}from"zod";import{ReadableStream as K3}from"node:stream/web";var a2,o2;var t2=S(()=>{a2={initContext:g.tuple([g.string(),g.any().optional()]),completion:g.tuple([g.string(),g.any().optional()]),tokenize:g.tuple([g.string(),g.any()]),detokenize:g.tuple([g.string(),g.any()]),applyChatTemplate:g.tuple([g.string(),g.any()]),releaseContext:g.tuple([g.string()])},o2={initContext({backend:$},Z,X){return new K3({async start(j){try{let W=await $.ggmlLlm.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:N,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},completion({backend:$},Z,X){return console.log("[Server] Completion:",{id:Z,property:X}),$.ggmlLlm.completion(Z,X)},async tokenize({backend:$},Z,X){return console.log("[Server] Tokenize:",{id:Z,property:X}),$.ggmlLlm.tokenize(Z,X)},async detokenize({backend:$},Z,X){return console.log("[Server] Detokenize:",{id:Z,property:X}),$.ggmlLlm.detokenize(Z,X)},async applyChatTemplate({backend:$},Z,X){return console.log("[Server] Apply Chat Template:",{id:Z,property:X}),$.ggmlLlm.applyChatTemplate(Z,X)},async releaseContext({backend:$},Z,X){return console.log("[Server] Release Context:",{id:Z,force:X}),$.ggmlLlm.releaseContext(Z,{force:X})}}});import{z as n}from"zod";import{ReadableStream as q3}from"node:stream/web";import O3 from"node:path";var e2,$$;var Z$=S(()=>{e2={initContext:n.tuple([n.string(),n.any().optional()]),transcribe:n.tuple([n.string(),n.string(),n.any().optional()]),transcribeData:n.tuple([n.string(),n.union([n.instanceof(Buffer),n.instanceof(Uint8Array)]),n.any().optional()]),releaseContext:n.tuple([n.string()])},$$={initContext({backend:$},Z,X){return new q3({async start(j){try{let W=await $.ggmlStt.initContext(Z,{...X,onProgress:(J)=>{j.enqueue({event:"progress",data:{progress:J}})}});await new Promise((J)=>setTimeout(J,1000));let{download:N,...Q}=W||{};j.enqueue({event:"result",data:{result:Q}}),j.close()}catch(W){j.error(W)}}})},async transcribe({backend:$,config:{server:Z}},X,j,W){return console.log("[Server] Transcribe:",{id:X,audioPath:j,options:W}),$.ggmlStt.transcribe(X,{audioPath:O3.join(Z.temp_file_dir,j),options:W})},async transcribeData({backend:$},Z,X,j){return console.log("[Server] Transcribe Data:",{id:Z,audioDataLength:X?.length||0,options:j}),$.ggmlStt.transcribeData(Z,{audioData:X,options:j})},async releaseContext({backend:$},Z,X){return console.log("[Server] Release STT Context:",{id:Z,force:X}),$.ggmlStt.releaseContext(Z,{force:X})}}});var L3,X$,j$;var W$=S(()=>{r2();t2();Z$();L3={common:s2,ggmlLlm:o2,ggmlStt:$$},X$={common:i2,ggmlLlm:a2,ggmlStt:e2},j$=L3});import{Buffer as j1}from"node:buffer";var Y$=($)=>{try{return JSON.parse($,(Z,X)=>{if(!X)return X;if(X?.type==="Buffer"&&X?.data)return j1.from(X.data,"base64");if(X?.type==="Uint8Array"&&X?.data){let j=j1.from(X.data,"base64");return j.buffer.slice(j.byteOffset,j.byteOffset+j.byteLength)}if(X?.type==="Error"&&X?.name&&X?.message)return Error(X.name,X.message);return X})}catch{return $}},W1=($)=>{try{return JSON.stringify($,(Z,X)=>{if(X instanceof Error)return{type:"Error",name:X.name,message:X.message};if(X instanceof j1)return{type:"Buffer",data:X.toString("base64")};if(X instanceof Uint8Array)return{type:"Uint8Array",data:j1.from(X).toString("base64")};return X})}catch{return $}};var Q$=()=>{};var $4="1.0",J$=8089;import A3 from"node:dgram";class Z4{name="udp";socket=null;announcementTimer=null;config;getServerInfo;port;constructor($,Z){this.config=$,this.getServerInfo=Z,this.port=$.port??J$}async start(){if(this.socket=A3.createSocket({type:"udp4",reuseAddr:!0}),this.socket.on("message",($,Z)=>{this.handleMessage($,Z)}),this.socket.on("error",($)=>{console.error("[Autodiscover UDP] Socket error:",$.message)}),await new Promise(($,Z)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),$()}),this.socket.once("error",Z)}),this.config.announcements.enabled){let $=this.config.announcements.interval??5000;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},$),this.sendAnnouncement()}}async stop(){if(this.announcementTimer)clearInterval(this.announcementTimer),this.announcementTimer=null;if(this.socket)await new Promise(($)=>{this.socket.close(()=>$())}),this.socket=null}handleMessage($,Z){try{let X=JSON.parse($.toString());if(X.t==="QUERY"&&this.config.requests.enabled){let j=X.d,W=this.config.requests.responseDelay??0,N=W>0?Math.random()*W:0;setTimeout(()=>{this.sendResponse(j.id,Z)},N)}}catch{}}sendAnnouncement(){if(!this.socket)return;let $=this.getServerInfo(),Z={t:"ANNOUNCE",v:$4,d:{info:$}},X=Buffer.from(JSON.stringify(Z));this.socket.send(X,0,X.length,this.port,"255.255.255.255",(j)=>{if(j)console.error("[Autodiscover UDP] Announcement error:",j.message)})}sendResponse($,Z){if(!this.socket)return;let X=this.getServerInfo(),j={t:"RESPONSE",v:$4,d:{request_id:$,info:X}},W=Buffer.from(JSON.stringify(j));this.socket.send(W,0,W.length,Z.port,Z.address,(N)=>{if(N)console.error("[Autodiscover UDP] Response error:",N.message)})}}var N$=()=>{};class X4{config;getServerInfo;transports=[];started=!1;constructor($,Z){this.config=$;this.getServerInfo=Z;if($.udp?.announcements?.enabled||$.udp?.requests?.enabled)this.transports.push(new Z4($.udp,Z))}async start(){if(this.started)return;(await Promise.allSettled(this.transports.map((Z)=>Z.start()))).forEach((Z,X)=>{if(Z.status==="rejected")console.error(`[Autodiscover] Failed to start ${this.transports[X].name}:`,Z.reason)}),this.started=!0}async stop(){if(!this.started)return;await Promise.allSettled(this.transports.map(($)=>$.stop())),this.started=!1}}var H$=S(()=>{N$()});import z3 from"node:os";var Y1=()=>{let $=z3.networkInterfaces();return Object.values($).flat().find((X)=>X?.family==="IPv4"&&!X?.internal)?.address||null};var j4=()=>{};import u from"node:os";import V$ from"node:fs";import U$ from"node:path";import{execSync as Q1}from"node:child_process";import G$ from"@iarna/toml";async function W4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),console.log(`Generating model capabilities comparison...
|
|
40
|
+
`),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),X.push(`## Model Capabilities Comparison
|
|
41
|
+
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let _=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))_[L]=j(_[L]||{},B);else _[L]=B}),_},W=Z||{},{server:N,generators:Q=[],...J}=W,H=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((_)=>_?.type==="ggml-llm");if(Y.length>0&&U){let _=Y.find((L)=>L.model?.repo_id===U);if(_)return H(_)}}return Object.keys(J).length>0?H({}):null},K=[];for(let U=0;U<$.length;U+=1){let Y=$[U];console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let _=G(Y);_={..._||{},model:{...J.runtime,..._?.model||{},repo_id:Y}};let L=await a("ggml-llm",null,{config:_,includeBreakdown:!0});K.push({modelId:Y,capabilities:L,modelInfo:L.buttress?.selected||null,modelConfig:L.modelConfig||null})}let O=(U)=>U?(U/1024/1024/1024).toFixed(2):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |"),X.push("|----------|-----------|--------------|--------------------|----------------------------|-----------------|-----------------|"),K.forEach(({modelId:U,modelInfo:Y,modelConfig:_})=>{let L=O(Y?.modelBytes),B=_?.nCtx||Y?.kvInfo?.nCtxTrain||"N/A",F=N0(Y),E=Number(B),M=Y?.kvCacheBytes||(F&&Number.isFinite(E)&&E>0?F(E):F&&F(Y?.kvInfo?.nCtxTrain||0))||null,x=O(M),T=O(Y?.modelBytes&&M?Y.modelBytes+M:Y?.fit?.totalRequiredBytes),D=z(Y?.fit?.fitsInGpu),p=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${L} | ${B} | ${x} | ${T} | ${D} | ${p} |`);let m=Y?.memoryLimitedCtx!=null||Y?.limitedFit!=null,l=!Y?.fit?.fitsInGpu||!Y?.fit?.fitsInCpu;if(m&&l){let i=Y?.memoryLimitedCtx||B,v=Number(i),c=Y?.limitedKvCacheBytes||F&&Number.isFinite(v)&&v>0&&F(v)||null,k=O(c),f=O(Y?.modelBytes&&c?Y.modelBytes+c:Y?.limitedFit?.totalRequiredBytes),h=z(Y?.limitedFit?.fitsInGpu),s=z(Y?.limitedFit?.fitsInCpu);if(i!==B||k!==x||f!==T)X.push(`| ↳ Limited | ${L} | ${i} | ${k} | ${f} | ${h} | ${s} |`)}}),X.push(`
|
|
42
42
|
---`),X.push(`
|
|
43
|
-
### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),
|
|
43
|
+
### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),K.length>0){let Y=K[0].capabilities.buttress?.selected;if(Y){let _=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
|
|
44
44
|
### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
|
|
45
45
|
${process.argv[0]} ${process.argv[1]} ${R}
|
|
46
46
|
\`\`\``),X.push(`
|
|
47
|
-
### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.
|
|
48
|
-
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=
|
|
47
|
+
### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.40"}`),Z&&Object.keys(Z).length>0){X.push(`
|
|
48
|
+
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=G$.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-llm-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=U$.join(process.cwd(),A);V$.writeFileSync(w,X.join(`
|
|
49
49
|
`),"utf8"),console.log(`
|
|
50
|
-
Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function
|
|
50
|
+
Model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate model table:",j.message),process.exit(1)}}async function K$({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),console.log("Testing capabilities for backend: ggml-llm"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let X=Z||{},{server:j,generators:W=[],...N}=X,Q=(V={},R={})=>{let q=Array.isArray(V)?[...V]:{...V};return Object.entries(R||{}).forEach(([A,w])=>{if(w&&typeof w==="object"&&!Array.isArray(w))q[A]=Q(q[A]||{},w);else q[A]=w}),q},J=(V)=>Q(JSON.parse(JSON.stringify(N)),V||{}),G=((V)=>{if(Array.isArray(W)&&W.length>0){let R=W.filter((q)=>q?.type==="ggml-llm");if(R.length>0){if(V){let q=R.find((A)=>A.model?.repo_id===V);if(q)return J(q)}}}if(Object.keys(N).length>0)return J({});return null})($);if($)G={...G||{},model:{...G?.model||{},repo_id:$}};let K=await a("ggml-llm",null,{config:G,includeBreakdown:!0}),O=K.buttress?.selected||null,z=K.modelConfig||null;if($||z?.repoId){console.log(`
|
|
51
51
|
=== Model Information ===`);let V=$||z?.repoId;if(console.log(`Repository ID: ${V}`),z?.quantization)console.log(`Quantization: ${z.quantization}`);if(z?.nCtx)console.log(`Context Length: ${z.nCtx}`);if(O?.quantization){let{fileType:A}=O.quantization;if(A!=null)console.log(`Model File Type (GGUF): ${A}`)}let R=z?.cache_type_k||"f16",q=z?.cache_type_v||"f16";if(console.log(`KV Cache Type: K=${R}, V=${q}`),O?.modelBytes&&O?.kvCacheBytes){if(console.log(`Model Size: ${(O.modelBytes/1024/1024/1024).toFixed(2)} GB`),O.kvInfo)console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB (KV info: ${JSON.stringify(O.kvInfo)})`);else console.log(`KV Cache Size: ${(O.kvCacheBytes/1024/1024/1024).toFixed(2)} GB`);if(console.log(`Total Required Memory: ${((O.modelBytes+O.kvCacheBytes)/1024/1024/1024).toFixed(2)} GB`),O.memoryLimitedCtx!=null){let A=O.memoryLimitedCtx,w=O.kvInfo?.nCtxTrain;if(w)console.log(`
|
|
52
52
|
Memory-Limited Context: ${A} (Train: ${w})`);else console.log(`
|
|
53
|
-
Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(
|
|
53
|
+
Memory-Limited Context: ${A}`);if(O.limitedKvCacheBytes!=null)console.log(`Limited KV Cache Size: ${(O.limitedKvCacheBytes/1024/1024/1024).toFixed(2)} GB`)}}else if(K.buttress?.selected?.fit){let{totalRequiredBytes:A}=K.buttress.selected.fit;console.log(`Total Required Memory: ${(A/1024/1024/1024).toFixed(2)} GB`)}}if(K.buttress?.selected){let{selected:V}=K.buttress;console.log(`
|
|
54
54
|
=== Hardware Information ===`);let R=null;if(process.platform!=="win32")try{R=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(R)console.log(`System: ${R}`);else console.log(`Hostname: ${u.hostname()}`),console.log(`OS: ${u.type()} ${u.release()}`);console.log(`Platform: ${V.platform}`),console.log(`CPU Cores: ${u.cpus().length}`),console.log(`Total System Memory: ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`);let q=V.cpuTotalBytes>0?(V.cpuUsableBytes/V.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(V.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${q}% of ${(V.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),V.hasGpu)console.log(`
|
|
55
55
|
--- GPU Details ---`),V.devices.filter((w)=>w.type==="gpu").forEach((w)=>{console.log(`GPU Backend: ${w.backend}`),console.log(`GPU Name: ${w.deviceName}`),console.log(`GPU Total Memory: ${(w.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let U=V.gpuTotalBytes>0?(V.gpuUsableBytes/V.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(V.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${U}% of ${(V.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),w.metadata){if(w.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(w.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
|
|
56
56
|
Backend Variant: ${V.variant}`),console.log(`Performance Score: ${V.score}`),V.fit){if(console.log(`
|
|
57
57
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${V.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${V.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${V.fit.limiting}`),V.limitedFit)console.log(`
|
|
58
58
|
--- Memory-Limited Fit Analysis ---`),console.log(`Limited Total Required: ${(V.limitedFit.totalRequiredBytes/1024/1024/1024).toFixed(2)} GB`),console.log(`Fits in GPU (Limited): ${V.limitedFit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU (Limited): ${V.limitedFit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor (Limited): ${V.limitedFit.limiting}`)}}console.log(`
|
|
59
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(
|
|
60
|
-
`),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.
|
|
61
|
-
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let
|
|
59
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(K,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}async function Y4({modelIds:$=[],defaultConfig:Z=null}={}){let X=[];if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),console.log(`Generating STT model capabilities comparison...
|
|
60
|
+
`),X.push(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),X.push(`## STT Model Capabilities Comparison
|
|
61
|
+
`),!$||$.length===0)console.error("Error: No model IDs provided"),process.exit(1);try{let j=(U={},Y={})=>{let _=Array.isArray(U)?[...U]:{...U};return Object.entries(Y||{}).forEach(([L,B])=>{if(B&&typeof B==="object"&&!Array.isArray(B))_[L]=j(_[L]||{},B);else _[L]=B}),_},W=Z||{},{server:N,generators:Q=[],...J}=W,H=(U)=>j(JSON.parse(JSON.stringify(J)),U||{}),G=(U)=>{if(Array.isArray(Q)&&Q.length>0){let Y=Q.filter((_)=>_?.type==="ggml-stt");if(Y.length>0&&U){let _=Y.find((L)=>L.model?.repo_id===U);if(_)return H(_)}}return Object.keys(J).length>0?H({}):null},K=[];for(let U=0;U<$.length;U+=1){let Y=$[U],{repoId:_,filename:L}=_$(Y);console.log(`[${U+1}/${$.length}] Analyzing ${Y}...`);let B=G(_);B={...B||{},model:{...B?.model||{},repo_id:_,...L&&{filename:L}}};let F=await a("ggml-stt",null,{config:B,includeBreakdown:!0});K.push({modelId:Y,repoId:_,filename:L,capabilities:F,modelInfo:F.buttress?.selected||null,modelConfig:F.modelConfig||null})}let O=(U)=>U?(U/1024/1024).toFixed(1):"N/A",z=(U)=>U?"✅":"\uD83D\uDEAB";X.push("| Model | Size (MB) | Processing Buffer (MB) | Total Required (MB) | Fits GPU | Fits CPU |"),X.push("|-------|-----------|------------------------|---------------------|----------|----------|"),K.forEach(({modelId:U,modelInfo:Y})=>{let _=O(Y?.modelBytes),L=O(Y?.processingBytes||Y?.kvCacheBytes),B=O(Y?.fit?.totalRequiredBytes),F=z(Y?.fit?.fitsInGpu),E=z(Y?.fit?.fitsInCpu);X.push(`| ${U} | ${_} | ${L} | ${B} | ${F} | ${E} |`)}),X.push(`
|
|
62
62
|
---`),X.push(`
|
|
63
|
-
### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),
|
|
63
|
+
### System Information`);let V=null;if(process.platform!=="win32")try{V=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(V)X.push(`- **System:** ${V}`);else X.push(`- **Hostname:** ${u.hostname()}`),X.push(`- **OS:** ${u.type()} ${u.release()}`);if(X.push(`- **Platform:** ${process.platform}`),X.push(`- **CPU Cores:** ${u.cpus().length}`),X.push(`- **Total System Memory:** ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`),K.length>0){let Y=K[0].capabilities.buttress?.selected;if(Y){let _=Y.cpuTotalBytes>0?(Y.cpuUsableBytes/Y.cpuTotalBytes*100).toFixed(0):0;if(X.push(`- **Usable CPU Memory:** ${(Y.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(Y.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.hasGpu){let L=Y.devices.filter((B)=>B.type==="gpu");if(L.length>0){let B=L[0];X.push(`- **GPU Backend:** ${B.backend}`),X.push(`- **GPU Name:** ${B.deviceName}`),X.push(`- **GPU Total Memory:** ${(B.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let F=Y.gpuTotalBytes>0?(Y.gpuUsableBytes/Y.gpuTotalBytes*100).toFixed(0):0;X.push(`- **GPU Usable Memory:** ${(Y.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${F}% of ${(Y.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`)}}else X.push("- **GPU:** Not available")}}X.push(`
|
|
64
64
|
### Command Used`);let R=process.argv.slice(2).join(" ");if(X.push(`\`\`\`bash
|
|
65
65
|
${process.argv[0]} ${process.argv[1]} ${R}
|
|
66
66
|
\`\`\``),X.push(`
|
|
67
|
-
### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.
|
|
68
|
-
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=
|
|
67
|
+
### Package Information`),X.push(`- **Name:** ${"@fugood/buttress-server"}`),X.push(`- **Version:** ${"2.23.0-beta.40"}`),Z&&Object.keys(Z).length>0){X.push(`
|
|
68
|
+
### Configuration`),X.push("<details>"),X.push("<summary>Click to expand TOML configuration</summary>"),X.push("\n```toml");try{let U=G$.stringify(Z);X.push(U)}catch(U){X.push("# Error serializing config"),X.push(JSON.stringify(Z,null,2))}X.push("```"),X.push("</details>")}let A=`ggml-stt-model-capabilities-${new Date().toISOString().replace(/[.:]/g,"-").split("T")[0]}.md`,w=U$.join(process.cwd(),A);V$.writeFileSync(w,X.join(`
|
|
69
69
|
`),"utf8"),console.log(`
|
|
70
|
-
STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function
|
|
70
|
+
STT model capabilities table saved to: ${w}`),process.exit(0)}catch(j){console.error("Failed to generate STT model table:",j.message),process.exit(1)}}async function q$({modelId:$=null,defaultConfig:Z=null}={}){if(console.log(`${"@fugood/buttress-server"} v${"2.23.0-beta.40"}`),console.log("Testing capabilities for backend: ggml-stt"),$)console.log(`Model: ${$}`);console.log("--------------------------------");try{let{repoId:X,filename:j}=_$($),W=Z||{},{server:N,generators:Q=[],...J}=W,H=(q={},A={})=>{let w=Array.isArray(q)?[...q]:{...q};return Object.entries(A||{}).forEach(([U,Y])=>{if(Y&&typeof Y==="object"&&!Array.isArray(Y))w[U]=H(w[U]||{},Y);else w[U]=Y}),w},G=(q)=>H(JSON.parse(JSON.stringify(J)),q||{}),O=((q)=>{if(Array.isArray(Q)&&Q.length>0){let A=Q.filter((w)=>w?.type==="ggml-stt");if(A.length>0){if(q){let w=A.find((U)=>U.model?.repo_id===q);if(w)return G(w)}}}if(Object.keys(J).length>0)return G({});return null})(X);if(X)O={...O||{},model:{...J.runtime,...O?.model||{},repo_id:X,...j&&{filename:j}}};let z=await a("ggml-stt",null,{config:O,includeBreakdown:!0}),V=z.buttress?.selected||null,R=z.modelConfig||null;if(X||R?.repoId){console.log(`
|
|
71
71
|
=== Model Information ===`);let q=X||R?.repoId;if(console.log(`Repository ID: ${q}`),j)console.log(`Filename: ${j}`);if(V?.modelBytes)console.log(`Model Size: ${(V.modelBytes/1024/1024).toFixed(1)} MB`);let A=V?.processingBytes||V?.kvCacheBytes;if(A)console.log(`Processing Buffer: ${(A/1024/1024).toFixed(1)} MB`);if(V?.modelBytes&&A)console.log(`Total Required Memory: ${((V.modelBytes+A)/1024/1024).toFixed(1)} MB`);else if(z.buttress?.selected?.fit){let{totalRequiredBytes:w}=z.buttress.selected.fit;console.log(`Total Required Memory: ${(w/1024/1024).toFixed(1)} MB`)}}if(z.buttress?.selected){let{selected:q}=z.buttress;console.log(`
|
|
72
72
|
=== Hardware Information ===`);let A=null;if(process.platform!=="win32")try{A=Q1("uname -a",{encoding:"utf8"}).trim()}catch{}if(A)console.log(`System: ${A}`);else console.log(`Hostname: ${u.hostname()}`),console.log(`OS: ${u.type()} ${u.release()}`);console.log(`Platform: ${q.platform}`),console.log(`CPU Cores: ${u.cpus().length}`),console.log(`Total System Memory: ${(u.totalmem()/1024/1024/1024).toFixed(2)} GB`);let w=q.cpuTotalBytes>0?(q.cpuUsableBytes/q.cpuTotalBytes*100).toFixed(0):0;if(console.log(`Usable CPU Memory: ${(q.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${w}% of ${(q.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),q.hasGpu)console.log(`
|
|
73
|
-
--- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let
|
|
73
|
+
--- GPU Details ---`),q.devices.filter((Y)=>Y.type==="gpu").forEach((Y)=>{console.log(`GPU Backend: ${Y.backend}`),console.log(`GPU Name: ${Y.deviceName}`),console.log(`GPU Total Memory: ${(Y.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let _=q.gpuTotalBytes>0?(q.gpuUsableBytes/q.gpuTotalBytes*100).toFixed(0):0;if(console.log(`GPU Usable Memory: ${(q.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${_}% of ${(q.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),Y.metadata){if(Y.metadata.hasBFloat16)console.log("Supports BFloat16: Yes");if(Y.metadata.hasUnifiedMemory)console.log("Unified Memory: Yes")}});else console.log("GPU: Not available");if(console.log(`
|
|
74
74
|
Backend Variant: ${q.variant}`),console.log(`Performance Score: ${q.score}`),q.fit)console.log(`
|
|
75
75
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${q.fit.fitsInGpu?"Yes":"No"}`),console.log(`Fits in CPU: ${q.fit.fitsInCpu?"Yes":"No"}`),console.log(`Limiting Factor: ${q.fit.limiting}`)}console.log(`
|
|
76
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var
|
|
77
|
-
bricks-buttress v${"2.23.0-beta.
|
|
76
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(z,null,2)),process.exit(0)}catch(X){console.error("Failed to get capabilities:",X.message),process.exit(1)}}var _$=($)=>{if(!$)return{repoId:null,filename:null};let[Z,X]=$.split(":");return{repoId:Z,filename:X||null}};var O$=S(()=>{$1();B0()});var S3={};import B3 from"node:fs";import w3 from"node:path";import R3 from"@iarna/toml";async function F3($){if(!$?.generators||!Array.isArray($.generators))return;let Z=$.generators.filter((K)=>{if(!K.model?.download)return!1;let{type:O}=K;if(!O||O!=="ggml-llm"&&O!=="ggml-stt")return console.warn(`[Download] Skipping unknown generator type: ${O}`),!1;return!0});if(Z.length===0)return;let{server:X,generators:j,...W}=$,N=Z.map((K)=>{let{type:O}=K,z=K.model?.repo_id;console.log(`[Download] Starting pre-download for ${O}: ${z}`);let V={...W,backend:K.backend||{},model:K.model||{},runtime:{...W.runtime,...K.runtime||{}}};return t0(O,V,{onProgress:()=>{},onComplete:({repoId:R,alreadyExists:q})=>{if(q)console.log(`[Download] Pre-download complete (already exists): ${R}`);else console.log(`[Download] Pre-download complete: ${R}`)},onError:(R)=>{console.error(`[Download] Pre-download failed for ${z}:`,R.message)}})}),Q=await Promise.all(N),J=Q.filter((K)=>K.started).length,H=Q.filter((K)=>K.alreadyExists).length,G=Q.filter((K)=>K.alreadyDownloading).length;console.log(`[Download] Pre-download summary: ${J} started, ${H} already exist, ${G} already downloading`)}var L$,A$,z$,J1,Y0=null,E3,M3,B$,N1,x3;var w$=S(async()=>{O$();S0();j4();await E$();if(process.argv.includes("--version")||process.argv.includes("-v"))console.log("2.23.0-beta.40"),process.exit(0);if(process.argv.includes("--help")||process.argv.includes("-h"))console.log(`
|
|
77
|
+
bricks-buttress v${"2.23.0-beta.40"}
|
|
78
78
|
|
|
79
79
|
Buttress server for remote inference with GGML backends.
|
|
80
80
|
|
|
@@ -107,5 +107,5 @@ Examples:
|
|
|
107
107
|
bricks-buttress --config ./config.toml
|
|
108
108
|
bricks-buttress --test-caps ggml-llm --test-models-default
|
|
109
109
|
bricks-buttress --test-caps ggml-stt --test-caps-model-id BricksDisplay/whisper-ggml:ggml-small.bin
|
|
110
|
-
`),process.exit(0);
|
|
111
|
-
`))$=J1;else{let Z=
|
|
110
|
+
`),process.exit(0);L$=process.argv.findIndex(($)=>$==="--port"||$==="-p"),A$=L$>=0?Number(process.argv[L$+1]):void 0,z$=process.argv.findIndex(($)=>$==="--config"||$==="-c"),J1=z$>=0?process.argv[z$+1]:null;if(J1){let $;if(J1.includes(`
|
|
111
|
+
`))$=J1;else{let Z=w3.resolve(J1);try{$=B3.readFileSync(Z,"utf8")}catch(X){console.error(`Failed to read Buttress config at ${Z}:`,X),process.exit(1)}}try{let Z=R3.parse($);if(Z.env&&typeof Z.env==="object")Object.entries(Z.env).forEach(([X,j])=>{if(process.env[X]===void 0)process.env[X]=String(j)}),delete Z.env;Y0=Z}catch(Z){console.error("Failed to parse TOML config:",Z),process.exit(1)}}E3=["ggml-org/gpt-oss-20b-GGUF","ggml-org/gpt-oss-120b-GGUF","unsloth/Nemotron-3-Nano-30B-A3B-GGUF","unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF","bartowski/Mistral-Nemo-Instruct-2407-GGUF","mistralai/Magistral-Small-2509-GGUF","mistralai/Ministral-3-14B-Reasoning-2512-GGUF","bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF","bartowski/mistralai_Devstral-2-123B-Instruct-2512-GGUF","ggml-org/gemma-3-12b-it-qat-GGUF","ggml-org/gemma-3-27b-it-qat-GGUF","unsloth/phi-4-GGUF"],M3=["BricksDisplay/whisper-ggml:ggml-small.bin","BricksDisplay/whisper-ggml:ggml-small-q8_0.bin","BricksDisplay/whisper-ggml:ggml-medium.bin","BricksDisplay/whisper-ggml:ggml-medium-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo.bin","BricksDisplay/whisper-ggml:ggml-large-v3-turbo-q8_0.bin","BricksDisplay/whisper-ggml:ggml-large-v3.bin"],B$=process.argv.findIndex(($)=>$==="--test-caps");if(B$>=0){let $=process.argv[B$+1]||"ggml-llm";if($!=="ggml-llm"&&$!=="ggml-stt")console.error("Only ggml-llm and ggml-stt backends are supported for testing capabilities"),process.exit(1);let Z=process.argv.findIndex((j)=>j==="--test-models"),X=process.argv.includes("--test-models-default");if($==="ggml-stt")if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((N)=>N.trim());await Y4({modelIds:W,defaultConfig:Y0})}else if(X)await Y4({modelIds:M3,defaultConfig:Y0});else{let j=process.argv.findIndex((N)=>N==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await q$({modelId:W,defaultConfig:Y0})}else if(Z>=0){let j=process.argv[Z+1];if(!j)console.error("Error: --test-models requires a comma-separated list of model IDs"),process.exit(1);let W=j.split(",").map((N)=>N.trim());await W4({modelIds:W,defaultConfig:Y0})}else if(X)await W4({modelIds:E3,defaultConfig:Y0});else{let j=process.argv.findIndex((N)=>N==="--test-caps-model-id"),W=j>=0?process.argv[j+1]:null;await K$({modelId:W,defaultConfig:Y0})}}N1=e1(Y0);if(A$)N1.server.port=A$;if(!N1.server.port)N1.server.port=Number(process.env.BUTTRESS_PORT)||2080;x3=process.env.ENABLE_OPENAI_COMPAT_ENDPOINT==="1";F$({config:N1,enableOpenAICompat:x3}).then(async({port:$,openaiEnabled:Z,autoDiscover:X})=>{let j=Y1();if(console.log(`Buttress server listening on port ${$}`),console.log("--------------------------------"),await R$(),console.log(),console.log("Current supported Generators:"),console.log("- LLM (GGML)"),console.log("- STT (GGML)"),console.log(),console.log("Please configure `Buttress (Remote Inference)` in the Generator to connect to this server."),console.log(),console.log(`- Use http://${j}:${$} to connect to this server via LAN.`),console.log(`- Visit http://${j}:${$}/status to see status via LAN.`),console.log(),Z)console.log("OpenAI-compatible API [EXPERIMENTAL]:"),console.log(`- Base URL: http://${j}:${$}/oai-compat/v1`),console.log(`- Chat completions: POST http://${j}:${$}/oai-compat/v1/chat/completions`),console.log(`- Models: GET http://${j}:${$}/oai-compat/v1/models`),console.log();else console.log("OpenAI-compatible API [EXPERIMENTAL]: disabled"),console.log(" Set ENABLE_OPENAI_COMPAT_ENDPOINT=1 to enable"),console.log();if(X)console.log("Auto-discover enabled"),console.log();if(Y0)await F3(Y0)}).catch(($)=>{console.error("Failed to start Buttress server:",$),process.exitCode=1})});import{node as P3}from"@elysiajs/node";import{Elysia as k3,t as P0}from"elysia";import{ReadableStream as T3}from"node:stream/web";import D3 from"node:fs/promises";import{ZodError as b3}from"zod";var v3=async()=>{let $=`https://registry.npmjs.org/${"@fugood/buttress-server"}/latest`;try{let Z=new AbortController,X=setTimeout(()=>Z.abort(),3000),j=await fetch($,{headers:{Accept:"application/json"},signal:Z.signal});if(clearTimeout(X),!j.ok)return null;return(await j.json()).version||null}catch{return null}},h3=($,Z)=>{if(!Z)return!1;let X=$.split(/[.-]/),j=Z.split(/[.-]/);for(let W=0;W<Math.max(X.length,j.length);W+=1){let N=parseInt(X[W])||0,Q=parseInt(j[W])||0;if(Q>N)return!0;if(Q<N)return!1}return!1},C3=($)=>{console.log(""),console.log("\x1B[33m╭─────────────────────────────────────────────────╮\x1B[0m"),console.log("\x1B[33m│\x1B[0m Update available! \x1B[2m%s\x1B[0m → \x1B[32m%s\x1B[0m","2.23.0-beta.40".padEnd(12),$.padEnd(12),"\x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m Run to upgrade: \x1B[33m│\x1B[0m"),console.log("\x1B[33m│\x1B[0m \x1B[36mnpm install -g %s\x1B[0m \x1B[33m│\x1B[0m","@fugood/buttress-server".padEnd(27)),console.log("\x1B[33m╰─────────────────────────────────────────────────╯\x1B[0m"),console.log("")},R$=async()=>{try{let $=await v3();if($&&h3("2.23.0-beta.40",$))C3($)}catch($){}},I3,y3=async({backend:$,router:Z,config:X,enableOpenAICompat:j})=>{try{await D3.mkdir(X.server.temp_file_dir,{recursive:!0})}catch{}let W=Y1()||"0.0.0.0",N={id:X.server.id,name:X.server.name,version:"2.23.0-beta.40",address:W,port:X.server.port,url:`http://${W}:${X.server.port}`,generators:n2(X,X.generators.map((H)=>H.type)),authentication:{required:!0,type:"device-group"}},Q=new k3({serve:{maxRequestBodySize:X.server.max_body_size},websocket:{idleTimeout:Math.ceil(X.server.session_timeout/1000)},adapter:I3?P3():void 0}).state({sessions:new Map,backend:$||e0,config:X,serverInfo:N});if(Z)Q.use(Z);if(X.autodiscover?.http?.enabled)Q.use(n1(X));if(Q.use(s1),Q.use(a1),j)Q.use(X1(X));let J={INVALID_REQUEST:-32600,INVALID_PARAMS:-32602,METHOD_NOT_FOUND:-32601,INTERNAL_ERROR:-32603};return Q.ws("/buttress/rpc",{parse:(H,G)=>{if(typeof G==="string")try{return JSON.parse(G)}catch{return H.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_REQUEST,message:"Invalid request"},id:null})),null}return G},body:P0.Object({jsonrpc:P0.String(),method:P0.String(),params:P0.String(),id:P0.String()}),open(H){let G=H.id??H.raw?.id??H.remoteAddress;if(console.log(`[Request] New connection: ${G}`),!H.data.store.sessions.has(G))H.data.store.sessions.set(G,{streams:new Map,generators:new Set,timeout:null});else{let K=H.data.store.sessions.get(G);clearTimeout(K.timeout),K.timeout=null}},async message(H,{id:G,method:K,params:O}){let z=H.id??H.raw?.id??H.remoteAddress;console.log(`[Request] Received request from ${z}: ${K}`);let V=H.data.store.sessions.get(z),[R,q]=K.split("."),A=j$[R]?.[q];if(!A){H.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.METHOD_NOT_FOUND,message:"Method not found"},id:G}));return}try{if(K==="cancel"){if(V.streams.has(G))V.streams.get(G)?.cancel(),V.streams.delete(G);return}if(K==="ping"){H.send(JSON.stringify({jsonrpc:"2.0",result:"pong",id:G}));return}let w=Y$(O),U=X$[R]?.[q],Y=U?U.parse(w):w,_={...H.data.store,peerId:z,session:V},L=await A(_,...Y);if(L instanceof T3){V.streams.set(G,L),H.send(JSON.stringify({jsonrpc:"2.0",result:{type:"stream"},id:G}));try{let B=L.getReader();while(!0){let{value:F,done:E}=await B.read();if(E)break;let{event:M,data:x}=F;H.send(JSON.stringify({jsonrpc:"2.0",method:`notification/${M}`,params:W1(x),id:G}))}H.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_end",id:G}))}catch(B){console.error(B),H.send(JSON.stringify({jsonrpc:"2.0",method:"notification/_error",params:W1(B),id:G}))}V.streams.delete(G)}else H.send(JSON.stringify({jsonrpc:"2.0",result:W1(L),id:G}))}catch(w){if(w instanceof b3){H.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INVALID_PARAMS,message:"Invalid params",data:w.issues},id:G}));return}console.error(w),H.send(JSON.stringify({jsonrpc:"2.0",error:{code:J.INTERNAL_ERROR,message:String(w)},id:G}))}},async close(H){let G=H.id??H.raw?.id??H.remoteAddress;console.log(`[Request] Connection closed: ${G}`);let{backend:K,sessions:O}=H.data.store,z=O.get(G);if(!z)return;z.streams.forEach((V)=>V.cancel()),z.streams.clear(),z.timeout=setTimeout(()=>{O.delete(G),console.log(`[Request] Session timed out: ${G}`);let{generators:V}=z;V.forEach((R)=>{K.finalizeGenerator(R)})},X.server.session_timeout)}}),{app:Q,config:X}},F$=async({backend:$,router:Z,config:X,enableOpenAICompat:j=!1})=>{let{app:W,config:N}=await y3({backend:$,router:Z,config:X,enableOpenAICompat:j}),{server:{port:Q}}=N,J=[new Promise((G)=>W.listen(Q,G))],H=null;if(N.autodiscover)H=new X4(N.autodiscover,()=>W.store.serverInfo),J.push(H.start());return await Promise.all(J),{app:W,port:Q,openaiEnabled:j,autoDiscover:H}};var E$=S(async()=>{$1();f2();W$();Q$();S0();H$();j4();$1();S0();I3=typeof process<"u"&&process.versions&&process.versions.node;if(J4.main==J4.module)await w$().then(() => S3)});await E$();export{F$ as startServer,t0 as startModelDownload,e1 as processConfig,C3 as logUpdateMessage,y3 as createServer,h3 as compareVersions,v3 as checkForUpdates,R$ as checkAndNotifyUpdates};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/buttress-server",
|
|
3
|
-
"version": "2.23.0-beta.
|
|
3
|
+
"version": "2.23.0-beta.40",
|
|
4
4
|
"main": "dist/index.mjs",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"dependencies": {
|
|
29
29
|
"@elysiajs/cors": "^1.1.1",
|
|
30
30
|
"@elysiajs/node": "^1.4.2",
|
|
31
|
-
"@fugood/llama.node": "^1.4.
|
|
31
|
+
"@fugood/llama.node": "^1.4.12",
|
|
32
32
|
"@fugood/whisper.node": "^1.0.11",
|
|
33
33
|
"@huggingface/gguf": "^0.3.2",
|
|
34
34
|
"@iarna/toml": "^3.0.0",
|
|
@@ -43,5 +43,5 @@
|
|
|
43
43
|
"oxc-transform": "^0.105.0",
|
|
44
44
|
"typescript": "^5.9.3"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "5b18821cb5d33f7ae4f3461e8ee21ca42892e2c4"
|
|
47
47
|
}
|