Package not found. Please check the package name and try again.
@fugood/buttress-server 2.24.0-beta.39 → 2.24.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.mjs +2 -2
- package/package.json +3 -3
package/lib/index.mjs
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import snapshot_download
|
|
|
6
6
|
path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
|
|
7
7
|
print(path)
|
|
8
8
|
`.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let l=c.stdout.trim().split(`
|
|
9
|
-
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return rn(t,n);if(e===`ggml-stt`)return Gn(t,n);if(e===`mlx-llm`)return Tr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0-beta.
|
|
9
|
+
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return rn(t,n);if(e===`ggml-stt`)return Gn(t,n);if(e===`mlx-llm`)return Tr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0-beta.40`,Dr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0-beta.40`,"@fugood/llama.node":`^1.7.0-rc.5`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Or=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function kr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
10
10
|
`),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
|
|
11
11
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=He(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
|
|
12
12
|
---`),n.push(`
|
|
@@ -33,7 +33,7 @@ print(path)
|
|
|
33
33
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
34
34
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
35
35
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
36
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Nr=e({finalizeGenerator:()=>Lr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Vr,ggmlLlm:()=>Rr,ggmlStt:()=>Br,globalDownloadManager:()=>Pr,mlxLlm:()=>zr,showModelsTable:()=>kr,showSttModelsTable:()=>jr,startGenerator:()=>Ir,startModelDownload:()=>Ur,status:()=>Hr,testGgmlLlmCapabilities:()=>Ar,testGgmlSttCapabilities:()=>Mr});const Z=new Map,Pr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Fr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Fr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Ir(e,t){let n={"ggml-llm":{create:Qt,getId:$t},"ggml-stt":{create:Vn,getId:Hn},"mlx-llm":{create:Cr,getId:wr}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Pr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Lr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Rr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},zr={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Br={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Vr(e,t){return e===`ggml-llm`?$t(t):e===`ggml-stt`?Hn(t):e===`mlx-llm`?wr(t):null}const Hr={getFullStatus:()=>$e(Z),getGgmlLlmStatus:()=>Xe(Z),getGgmlSttStatus:()=>Ze(Z),getMlxLlmStatus:()=>Qe(Z),subscribeToStatus:Je,subscribeToStatusWithId:Ye,llmStatusTracker:U,sttStatusTracker:W,statusEmitter:H};async function Ur(e,t,n={}){let r={"ggml-llm":en,"ggml-stt":Un,"mlx-llm":Er}[e];return r?r(t,Pr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Wr=`@fugood/buttress-server`,Gr=`2.24.0-beta.39`,Kr={name:Wr,version:Gr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.4`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`f74322101339ff5efb796a89fcd8f0a52c2465ce`};const qr=()=>({version:Gr,name:Wr,description:Kr.description}),Jr=typeof process<`u`&&process.versions&&process.versions.node,Yr=e=>new n({adapter:Jr?t():void 0,...e}),Xr=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),Zr=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var Qr=e=>{let t=Yr(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,Zr,{response:Xr}),t};const $r=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ei=Yr().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return $r?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ti=_.dirname(D(import.meta.url)),ni=async()=>{let e=[_.join(ti,`..`,`public`,`status.html`),_.join(ti,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},ri=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ii=async()=>{let e=await ni();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var ai=Yr().get(`/status`,ii).get(`/status/`,ii).get(`/buttress/status`,({store:{backend:e}})=>ri(e));const oi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,si=[`ggml-llm`,`mlx-llm`],ci=new Map;function li(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function ui(e,t,n){let r=(t.generators||[]).filter(e=>si.includes(e.type));if(r.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let i=r[0],a=n||i.model?.repo_id;if(n){let e=r.find(e=>e.model?.repo_id===n);e&&(i=e)}else a=i.model?.repo_id;let o=i.type||`ggml-llm`,s=a,c=ci.get(s);if(c?.initialized)return c;let{generators:l,server:u,...d}=t.global||{},f={...d,...i,model:{...i.model,repo_id:a}};console.log(`[OpenAI] Creating ${o} generator for ${s}`);let{id:p}=await e.startGenerator(o,f),m={id:p,type:o,config:f,repoId:a,initialized:!1};return ci.set(s,m),await li(e,o).initContext(p,{}),m.initialized=!0,console.log(`[OpenAI] Generator ready: ${s}`),m}function di(e){let t=e.prompt_tokens??e.promptTokens??0,n=e.tokens_predicted??e.tokensPredicted??0;return{prompt_tokens:t,completion_tokens:n,total_tokens:t+n}}async function fi(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=di(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function pi({global:e}){let t=Yr({prefix:`/oai-compat`});return t.use(P({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>si.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await ui(a,r,c),t=oi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await li(a,e.type).completion(e.id,{options:b});if(!s)return await fi(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=di(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const mi=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=mi(n[e]||{},t):n[e]=t}),n},hi=e=>e&&typeof e==`object`?structuredClone(e):null,gi=(e,t)=>mi(hi(e)||{},hi(t)||{}),_i=(e,t)=>mi(structuredClone(e.global),t||{}),vi=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return _i(e,a)}}return Object.keys(e.global).length>0?_i(e,{}):null},yi={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},bi=e=>e?e===!0?{...yi}:mi(yi,e):null,xi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Si=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Ci=6e4,wi=1024*1024*50,Ti=e=>{let t=F.machineIdSync(),n=mi({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Ci,max_body_size:wi},autodiscover:!1},hi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:bi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Si(i.max_body_size,w.parse,wi),session_timeout:Si(i.session_timeout,te,Ci)},global:s,generators:r}},Ei={getCapabilities:N.tuple([N.object({type:N.string().optional().default(`ggml-llm`),config:N.any().optional(),currentClientCapabilities:N.any().optional(),options:N.any().optional()}).nullable().optional()]),startGenerator:N.tuple([N.string(),N.any().optional()]),finalizeGenerator:N.tuple([N.string()])};var Di={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=hi(i),c=gi(vi(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=hi(i),o=gi(vi(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const Oi={initContext:N.tuple([N.string(),N.any().optional()]),completion:N.tuple([N.string(),N.any().optional()]),tokenize:N.tuple([N.string(),N.any()]),detokenize:N.tuple([N.string(),N.any()]),applyChatTemplate:N.tuple([N.string(),N.any()]),releaseContext:N.tuple([N.string()])};function ki(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Ai(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function ji(e,t){return{initContext:ki(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Ai(e,`${t}Release Context`)}}var Mi=ji(e=>e.ggmlLlm,``);const Ni={initContext:N.tuple([N.string(),N.any().optional()]),transcribe:N.tuple([N.string(),N.string(),N.any().optional()]),transcribeData:N.tuple([N.string(),N.union([N.instanceof(Buffer),N.instanceof(Uint8Array)]),N.any().optional()]),releaseContext:N.tuple([N.string()])},Pi=e=>e.ggmlStt,Fi={common:Di,ggmlLlm:Mi,ggmlStt:{initContext:ki(Pi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Ai(Pi,`Release STT Context`)},mlxLlm:ji(e=>e.mlxLlm,`MLX `)},Ii={common:Ei,ggmlLlm:Oi,ggmlStt:Ni,mlxLlm:Oi};var Li=Fi;const Ri=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return I.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=I.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},zi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof I?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:I.from(t).toString(`base64`)}:t)}catch{return e}};var Bi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=ne.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Vi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Bi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const Hi=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=qr(),Ui=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Wi({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
36
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Nr=e({finalizeGenerator:()=>Lr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Vr,ggmlLlm:()=>Rr,ggmlStt:()=>Br,globalDownloadManager:()=>Pr,mlxLlm:()=>zr,showModelsTable:()=>kr,showSttModelsTable:()=>jr,startGenerator:()=>Ir,startModelDownload:()=>Ur,status:()=>Hr,testGgmlLlmCapabilities:()=>Ar,testGgmlSttCapabilities:()=>Mr});const Z=new Map,Pr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},Fr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=Fr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Ir(e,t){let n={"ggml-llm":{create:Qt,getId:$t},"ggml-stt":{create:Vn,getId:Hn},"mlx-llm":{create:Cr,getId:wr}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Pr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Lr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Rr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},zr={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Br={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Vr(e,t){return e===`ggml-llm`?$t(t):e===`ggml-stt`?Hn(t):e===`mlx-llm`?wr(t):null}const Hr={getFullStatus:()=>$e(Z),getGgmlLlmStatus:()=>Xe(Z),getGgmlSttStatus:()=>Ze(Z),getMlxLlmStatus:()=>Qe(Z),subscribeToStatus:Je,subscribeToStatusWithId:Ye,llmStatusTracker:U,sttStatusTracker:W,statusEmitter:H};async function Ur(e,t,n={}){let r={"ggml-llm":en,"ggml-stt":Un,"mlx-llm":Er}[e];return r?r(t,Pr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Wr=`@fugood/buttress-server`,Gr=`2.24.0-beta.40`,Kr={name:Wr,version:Gr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.5`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`c07ad6f9f378a3040385f99bdf0033c834c7d96c`};const qr=()=>({version:Gr,name:Wr,description:Kr.description}),Jr=typeof process<`u`&&process.versions&&process.versions.node,Yr=e=>new n({adapter:Jr?t():void 0,...e}),Xr=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),Zr=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var Qr=e=>{let t=Yr(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,Zr,{response:Xr}),t};const $r=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ei=Yr().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return $r?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ti=_.dirname(D(import.meta.url)),ni=async()=>{let e=[_.join(ti,`..`,`public`,`status.html`),_.join(ti,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},ri=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ii=async()=>{let e=await ni();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var ai=Yr().get(`/status`,ii).get(`/status/`,ii).get(`/buttress/status`,({store:{backend:e}})=>ri(e));const oi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`,si=[`ggml-llm`,`mlx-llm`],ci=new Map;function li(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function ui(e,t,n){let r=(t.generators||[]).filter(e=>si.includes(e.type));if(r.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let i=r[0],a=n||i.model?.repo_id;if(n){let e=r.find(e=>e.model?.repo_id===n);e&&(i=e)}else a=i.model?.repo_id;let o=i.type||`ggml-llm`,s=a,c=ci.get(s);if(c?.initialized)return c;let{generators:l,server:u,...d}=t.global||{},f={...d,...i,model:{...i.model,repo_id:a}};console.log(`[OpenAI] Creating ${o} generator for ${s}`);let{id:p}=await e.startGenerator(o,f),m={id:p,type:o,config:f,repoId:a,initialized:!1};return ci.set(s,m),await li(e,o).initContext(p,{}),m.initialized=!0,console.log(`[OpenAI] Generator ready: ${s}`),m}function di(e){let t=e.prompt_tokens??e.promptTokens??0,n=e.tokens_predicted??e.tokensPredicted??0;return{prompt_tokens:t,completion_tokens:n,total_tokens:t+n}}async function fi(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=di(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function pi({global:e}){let t=Yr({prefix:`/oai-compat`});return t.use(P({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>si.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await ui(a,r,c),t=oi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await li(a,e.type).completion(e.id,{options:b});if(!s)return await fi(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=di(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const mi=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=mi(n[e]||{},t):n[e]=t}),n},hi=e=>e&&typeof e==`object`?structuredClone(e):null,gi=(e,t)=>mi(hi(e)||{},hi(t)||{}),_i=(e,t)=>mi(structuredClone(e.global),t||{}),vi=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return _i(e,a)}}return Object.keys(e.global).length>0?_i(e,{}):null},yi={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},bi=e=>e?e===!0?{...yi}:mi(yi,e):null,xi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Si=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Ci=6e4,wi=1024*1024*50,Ti=e=>{let t=F.machineIdSync(),n=mi({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Ci,max_body_size:wi},autodiscover:!1},hi(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:bi(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Si(i.max_body_size,w.parse,wi),session_timeout:Si(i.session_timeout,te,Ci)},global:s,generators:r}},Ei={getCapabilities:N.tuple([N.object({type:N.string().optional().default(`ggml-llm`),config:N.any().optional(),currentClientCapabilities:N.any().optional(),options:N.any().optional()}).nullable().optional()]),startGenerator:N.tuple([N.string(),N.any().optional()]),finalizeGenerator:N.tuple([N.string()])};var Di={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=hi(i),c=gi(vi(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=hi(i),o=gi(vi(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const Oi={initContext:N.tuple([N.string(),N.any().optional()]),completion:N.tuple([N.string(),N.any().optional()]),tokenize:N.tuple([N.string(),N.any()]),detokenize:N.tuple([N.string(),N.any()]),applyChatTemplate:N.tuple([N.string(),N.any()]),releaseContext:N.tuple([N.string()])};function ki(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Ai(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function ji(e,t){return{initContext:ki(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Ai(e,`${t}Release Context`)}}var Mi=ji(e=>e.ggmlLlm,``);const Ni={initContext:N.tuple([N.string(),N.any().optional()]),transcribe:N.tuple([N.string(),N.string(),N.any().optional()]),transcribeData:N.tuple([N.string(),N.union([N.instanceof(Buffer),N.instanceof(Uint8Array)]),N.any().optional()]),releaseContext:N.tuple([N.string()])},Pi=e=>e.ggmlStt,Fi={common:Di,ggmlLlm:Mi,ggmlStt:{initContext:ki(Pi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Ai(Pi,`Release STT Context`)},mlxLlm:ji(e=>e.mlxLlm,`MLX `)},Ii={common:Ei,ggmlLlm:Oi,ggmlStt:Ni,mlxLlm:Oi};var Li=Fi;const Ri=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return I.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=I.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},zi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof I?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:I.from(t).toString(`base64`)}:t)}catch{return e}};var Bi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=ne.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Vi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Bi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const Hi=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=qr(),Ui=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Wi({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
37
37
|
`),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
|
|
38
38
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=He(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
|
|
39
39
|
---`),n.push(`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/buttress-server",
|
|
3
|
-
"version": "2.24.0-beta.
|
|
3
|
+
"version": "2.24.0-beta.40",
|
|
4
4
|
"main": "lib/index.mjs",
|
|
5
5
|
"types": "lib/index.d.mts",
|
|
6
6
|
"type": "module",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@elysiajs/cors": "^1.1.1",
|
|
31
31
|
"@elysiajs/node": "^1.4.2",
|
|
32
|
-
"@fugood/llama.node": "^1.7.0-rc.
|
|
32
|
+
"@fugood/llama.node": "^1.7.0-rc.5",
|
|
33
33
|
"@fugood/whisper.node": "^1.0.18",
|
|
34
34
|
"@huggingface/gguf": "^0.3.2",
|
|
35
35
|
"@iarna/toml": "^3.0.0",
|
|
@@ -43,5 +43,5 @@
|
|
|
43
43
|
"tsdown": "^0.20.1",
|
|
44
44
|
"typescript": "^5.9.3"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "c07ad6f9f378a3040385f99bdf0033c834c7d96c"
|
|
47
47
|
}
|