@fugood/buttress-server 2.24.0-beta.50 → 2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.mjs +2 -2
- package/package.json +2 -2
package/lib/index.mjs
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import snapshot_download
|
|
|
6
6
|
path = snapshot_download("${s}", revision="${o.model.revision||`main`}")
|
|
7
7
|
print(path)
|
|
8
8
|
`.trim(),a={...process.env};o.runtime.huggingface_token&&(a.HF_TOKEN=o.runtime.huggingface_token);let c=await q(t,[`-c`,n],{timeout:6e5,env:a});r?.(1);let l=c.stdout.trim().split(`
|
|
9
|
-
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return cn(t,n);if(e===`ggml-stt`)return Yn(t,n);if(e===`mlx-llm`)return kr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0
|
|
9
|
+
`).pop();i?.({localPath:l,repoId:s,alreadyExists:!1})}catch(e){throw a?.(e),e}finally{t?.deleteDownload(l)}})();return t?.setDownload(l,u),{started:!0,localPath:null,repoId:s}}async function J(e,t=null,n={}){if(e===`ggml-llm`)return cn(t,n);if(e===`ggml-stt`)return Yn(t,n);if(e===`mlx-llm`)return kr(t,n);throw Error(`Unknown backend type: ${e}`)}var Y=`@fugood/buttress-backend-core`,X=`2.24.0`,jr={name:Y,private:!0,type:`module`,version:X,main:`src/index.js`,types:`lib/types/index.d.ts`,scripts:{build:`tsc --noCheck --declaration --emitDeclarationOnly --allowJs --outDir lib/types src/index.js`},dependencies:{"@fugood/buttress-hardware-guardrails":`^2.24.0`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`}};const Mr=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function Nr({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${Y} v${X}`),console.log(`Generating model capabilities comparison...
|
|
10
10
|
`),n.push(`${Y} v${X}`),n.push(`## Model Capabilities Comparison
|
|
11
11
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Quantization | Size (GB) | Context Size | KV Cache Size (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|--------------|-----------|--------------|--------------------|-----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=t?.quantization?.name?.toUpperCase()||`N/A`,a=u(t?.modelBytes),o=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,s=Ue(t),c=Number(o),l=t?.kvCacheBytes||(s&&Number.isFinite(c)&&c>0?s(c):s&&s(t?.kvInfo?.nCtxTrain||0))||null,f=u(l),p=u(t?.modelBytes&&l?t.modelBytes+l:t?.fit?.totalRequiredBytes),m=d(t?.fit?.fitsInGpu),h=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${o} | ${f} | ${p} | ${m} | ${h} |`);let g=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,_=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(g&&_){let e=t?.memoryLimitedCtx||o,r=Number(e),i=t?.limitedKvCacheBytes||s&&Number.isFinite(r)&&r>0&&s(r)||null,c=u(i),l=u(t?.modelBytes&&i?t.modelBytes+i:t?.limitedFit?.totalRequiredBytes),m=d(t?.limitedFit?.fitsInGpu),h=d(t?.limitedFit?.fitsInCpu);(e!==o||c!==f||l!==p)&&n.push(`| ↳ Limited | - | ${a} | ${e} | ${c} | ${l} | ${m} | ${h} |`)}}),n.push(`
|
|
12
12
|
---`),n.push(`
|
|
@@ -33,7 +33,7 @@ print(path)
|
|
|
33
33
|
=== Hardware Information ===`);let t=null;if(process.platform!==`win32`)try{t=O(`uname -a`,{encoding:`utf8`}).trim()}catch{}t?console.log(`System: ${t}`):(console.log(`Hostname: ${v.hostname()}`),console.log(`OS: ${v.type()} ${v.release()}`)),console.log(`Platform: ${e.platform}`),console.log(`CPU Cores: ${v.cpus().length}`),console.log(`Total System Memory: ${(v.totalmem()/1024/1024/1024).toFixed(2)} GB`);let n=e.cpuTotalBytes>0?(e.cpuUsableBytes/e.cpuTotalBytes*100).toFixed(0):0;console.log(`Usable CPU Memory: ${(e.cpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.cpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),e.hasGpu?(console.log(`
|
|
34
34
|
--- GPU Details ---`),e.devices.filter(e=>e.type===`gpu`).forEach(t=>{console.log(`GPU Backend: ${t.backend}`),console.log(`GPU Name: ${t.deviceName}`),console.log(`GPU Total Memory: ${(t.maxMemorySize/1024/1024/1024).toFixed(2)} GB`);let n=e.gpuTotalBytes>0?(e.gpuUsableBytes/e.gpuTotalBytes*100).toFixed(0):0;console.log(`GPU Usable Memory: ${(e.gpuUsableBytes/1024/1024/1024).toFixed(2)} GB (${n}% of ${(e.gpuTotalBytes/1024/1024/1024).toFixed(2)} GB)`),t.metadata&&(t.metadata.hasBFloat16&&console.log(`Supports BFloat16: Yes`),t.metadata.hasUnifiedMemory&&console.log(`Unified Memory: Yes`))})):console.log(`GPU: Not available`),console.log(`\nBackend Variant: ${e.variant}`),console.log(`Performance Score: ${e.score}`),e.fit&&(console.log(`
|
|
35
35
|
--- Model Fit Analysis ---`),console.log(`Fits in GPU: ${e.fit.fitsInGpu?`Yes`:`No`}`),console.log(`Fits in CPU: ${e.fit.fitsInCpu?`Yes`:`No`}`),console.log(`Limiting Factor: ${e.fit.limiting}`))}console.log(`
|
|
36
|
-
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Lr=e({finalizeGenerator:()=>Vr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Gr,ggmlLlm:()=>Hr,ggmlStt:()=>Wr,globalDownloadManager:()=>Rr,mlxLlm:()=>Ur,showModelsTable:()=>Nr,showSttModelsTable:()=>Fr,startGenerator:()=>Br,startModelDownload:()=>qr,status:()=>Kr,testGgmlLlmCapabilities:()=>Pr,testGgmlSttCapabilities:()=>Ir});const Z=new Map,Rr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},zr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=zr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Br(e,t){let n={"ggml-llm":{create:nn,getId:rn},"ggml-stt":{create:Gn,getId:Kn},"mlx-llm":{create:Dr,getId:Or}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Rr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Vr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Hr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Ur={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Wr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Gr(e,t){return e===`ggml-llm`?rn(t):e===`ggml-stt`?Kn(t):e===`mlx-llm`?Or(t):null}const Kr={getFullStatus:()=>et(Z),getGgmlLlmStatus:()=>Ze(Z),getGgmlSttStatus:()=>Qe(Z),getMlxLlmStatus:()=>$e(Z),subscribeToStatus:Ye,subscribeToStatusWithId:Xe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function qr(e,t,n={}){let r={"ggml-llm":an,"ggml-stt":qn,"mlx-llm":Ar}[e];return r?r(t,Rr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Jr=`@fugood/buttress-server`,Yr=`2.24.0-beta.50`,Xr={name:Jr,version:Yr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`8361b40d24b4714d744e1a10873057facd48c7f7`};const Zr=()=>({version:Yr,name:Jr,description:Xr.description}),Qr=typeof process<`u`&&process.versions&&process.versions.node,$r=e=>new n({adapter:Qr?t():void 0,...e}),ei=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),ti=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ni=e=>{let t=$r(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,ti,{response:ei}),t};const ri=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ii=$r().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return ri?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ai=_.dirname(D(import.meta.url)),oi=async()=>{let e=[_.join(ai,`..`,`public`,`status.html`),_.join(ai,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},si=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ci=async()=>{let e=await oi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var li=$r().get(`/status`,ci).get(`/status/`,ci).get(`/buttress/status`,({store:{backend:e}})=>si(e));const ui=[`ggml-llm`,`mlx-llm`],di=new Map;function fi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function pi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ui.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=di.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return di.set(c,h),await fi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function mi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function hi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const gi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function _i(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=hi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function vi({global:e}){let t=$r({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await pi(a,r,c,`[OpenAI]`),t=gi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await fi(a,e.type).completion(e.id,{options:b});if(!s)return await _i(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=hi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const yi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function bi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function xi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Si(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ci(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Si(u),l=u.stopping_word||u.stoppingWord||null,c=xi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function wi({global:e}){let t=$r({prefix:`/anthropic-messages`});return t.use(M({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=bi(o),n=await pi(a,r,o.model,`[Anthropic]`),s=yi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await fi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ci(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Si(r),w=r.stopping_word||r.stoppingWord||null,C=xi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=bi(a),t=await pi(i,r,a.model,`[Anthropic]`),n=fi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ti=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ti(n[e]||{},t):n[e]=t}),n},Ei=e=>e&&typeof e==`object`?structuredClone(e):null,Di=(e,t)=>Ti(Ei(e)||{},Ei(t)||{}),Oi=(e,t)=>Ti(structuredClone(e.global),t||{}),ki=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Oi(e,a)}}return Object.keys(e.global).length>0?Oi(e,{}):null},Ai={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},ji=e=>e?e===!0?{...Ai}:Ti(Ai,e):null,Mi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ni=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Pi=6e4,Fi=1024*1024*50,Ii=e=>{let t=N.machineIdSync(),n=Ti({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Pi,max_body_size:Fi},autodiscover:!1},Ei(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:ji(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ni(i.max_body_size,w.parse,Fi),session_timeout:Ni(i.session_timeout,P,Pi)},global:s,generators:r}},Li={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ri={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Ei(i),c=Di(ki(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Ei(i),o=Di(ki(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const zi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Bi(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Vi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Hi(e,t){return{initContext:Bi(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Vi(e,`${t}Release Context`)}}var Ui=Hi(e=>e.ggmlLlm,``);const Wi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Gi=e=>e.ggmlStt,Ki={common:Ri,ggmlLlm:Ui,ggmlStt:{initContext:Bi(Gi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Vi(Gi,`Release STT Context`)},mlxLlm:Hi(e=>e.mlxLlm,`MLX `)},qi={common:Li,ggmlLlm:zi,ggmlStt:Wi,mlxLlm:zi};var Ji=Ki;const Yi=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Xi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Zi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=re.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const $i=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=Zr(),ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ta({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
36
|
+
=== Full Capabilities JSON ===`),console.log(JSON.stringify(u,null,2)),process.exit(0)}catch(e){console.error(`Failed to get capabilities:`,e.message),process.exit(1)}}var Lr=e({finalizeGenerator:()=>Vr,generatorRegistry:()=>Z,getCapabilities:()=>J,getModelIdentifier:()=>Gr,ggmlLlm:()=>Hr,ggmlStt:()=>Wr,globalDownloadManager:()=>Rr,mlxLlm:()=>Ur,showModelsTable:()=>Nr,showSttModelsTable:()=>Fr,startGenerator:()=>Br,startModelDownload:()=>qr,status:()=>Kr,testGgmlLlmCapabilities:()=>Pr,testGgmlSttCapabilities:()=>Ir});const Z=new Map,Rr={downloads:new Map,getDownload(e){return this.downloads.get(e)||null},setDownload(e,t){this.downloads.set(e,t)},deleteDownload(e){this.downloads.delete(e)},isDownloading(e){return this.downloads.has(e)},getActiveDownloads(){return Array.from(this.downloads.entries()).map(([e,t])=>({localPath:e,promise:t}))}},zr=e=>{let t=Z.get(e);if(!t)throw Error(`Unknown generator id "${e}"`);return t},Q=(e,t)=>{let n=zr(e);if(n.type!==t)throw Error(`Generator "${e}" does not support ${t} backend`);return n.instance};async function Br(e,t){let n={"ggml-llm":{create:nn,getId:rn},"ggml-stt":{create:Gn,getId:Kn},"mlx-llm":{create:Dr,getId:Or}}[e];if(!n)throw Error(`Unsupported backend type: ${e}`);let r=n.getId(t);if(!r)throw Error(`Buttress generator config missing repo identifier`);let i=`${e}:${r}`,a=Z.get(i);if(a)return a.refCount+=1,a.instance.resetFinalized?.(),{id:a.id,info:a.instance.info};let o=await n.create(i,t,{globalDownloadManager:Rr}),s={id:i,type:o.type,instance:o,refCount:1};return Z.set(i,s),{id:i,info:o.info}}async function Vr(e){let t=Z.get(e);return t?(--t.refCount,t.refCount<=0&&(await t.instance.finalize(),(t.instance.hasPendingReleases?.()??!1)||Z.delete(e)),!0):!1}const Hr={async initContext(e,t){return Q(e,`ggml-llm`).initContext(t)},async completion(e,t){return Q(e,`ggml-llm`).completion(t)},async tokenize(e,t){return Q(e,`ggml-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`ggml-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`ggml-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-llm`)throw Error(`Generator "${e}" does not support ggml-llm backend`);return n.instance.releaseContext(t)}},Ur={async initContext(e,t){return Q(e,`mlx-llm`).initContext(t)},async completion(e,t){return Q(e,`mlx-llm`).completion(t)},async tokenize(e,t){return Q(e,`mlx-llm`).tokenize(t)},async detokenize(e,t){return Q(e,`mlx-llm`).detokenize(t)},async applyChatTemplate(e,t){return Q(e,`mlx-llm`).applyChatTemplate(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`mlx-llm`)throw Error(`Generator "${e}" does not support mlx-llm backend`);return n.instance.releaseContext(t)}},Wr={async initContext(e,t){return Q(e,`ggml-stt`).initContext(t)},async transcribe(e,t){return Q(e,`ggml-stt`).transcribe(t)},async transcribeData(e,t){return Q(e,`ggml-stt`).transcribeData(t)},async releaseContext(e,t){let n=Z.get(e);if(!n)return{released:!0,alreadyReleased:!0};if(n.type!==`ggml-stt`)throw Error(`Generator "${e}" does not support ggml-stt backend`);return n.instance.releaseContext(t)}};function Gr(e,t){return e===`ggml-llm`?rn(t):e===`ggml-stt`?Kn(t):e===`mlx-llm`?Or(t):null}const Kr={getFullStatus:()=>et(Z),getGgmlLlmStatus:()=>Ze(Z),getGgmlSttStatus:()=>Qe(Z),getMlxLlmStatus:()=>$e(Z),subscribeToStatus:Ye,subscribeToStatusWithId:Xe,llmStatusTracker:H,sttStatusTracker:U,statusEmitter:V};async function qr(e,t,n={}){let r={"ggml-llm":an,"ggml-stt":qn,"mlx-llm":Ar}[e];return r?r(t,Rr,n):{started:!1,localPath:null,repoId:null,error:`Unknown backend type: ${e}`}}var Jr=`@fugood/buttress-server`,Yr=`2.24.0`,Xr={name:Jr,version:Yr,main:`lib/index.mjs`,types:`lib/index.d.mts`,type:`module`,bin:{"bricks-buttress":`lib/index.mjs`},files:[`lib`,`config`,`public`],scripts:{typecheck:`tsc --noEmit`,build:`tsdown -c rolldown.config.js`,prepublish:`bun run build`,dev:`bun src/index.ts`,start:`bun lib/index.mjs`,"start-with-node":`node lib/index.mjs`},keywords:[`BRICKS`,`buttress`,`server`],license:`MIT`,dependencies:{"@elysiajs/cors":`^1.1.1`,"@elysiajs/node":`^1.4.2`,"@fugood/llama.node":`^1.7.0-rc.11`,"@fugood/whisper.node":`^1.0.18`,"@huggingface/gguf":`^0.3.2`,"@iarna/toml":`^3.0.0`,bytes:`^3.1.0`,elysia:`^1.4.19`,ms:`^2.1.1`,"node-machine-id":`^1.1.12`,zod:`^3.25.76`},devDependencies:{tsdown:`^0.20.1`,typescript:`^5.9.3`},gitHead:`e1d49be3eff48a93731e8bf76dfc180bce57d3e4`};const Zr=()=>({version:Yr,name:Jr,description:Xr.description}),Qr=typeof process<`u`&&process.versions&&process.versions.node,$r=e=>new n({adapter:Qr?t():void 0,...e}),ei=a.Object({id:a.String(),name:a.String(),version:a.String(),generators:a.Array(a.Object({type:a.String()})),authentication:a.Object({required:a.Boolean(),type:a.Literal(`device-group`)})}),ti=({store:{serverInfo:e}})=>({id:e.id,name:e.name,version:e.version,generators:e.generators,authentication:e.authentication});var ni=e=>{let t=$r(),n=e.autodiscover.http?.path??`/buttress/info`;return t.get(n,ti,{response:ei}),t};const ri=typeof process<`u`&&process.versions!=null&&process.versions.node!=null;var ii=$r().post(`/buttress/upload`,async({body:{file:e},store:{config:t}})=>{let n=`${Date.now()}-${e.name.replace(/[^\dA-Za-z]/g,`_`)}`,r=_.join(t.server.temp_file_dir,n);try{return ri?await g(r,await e.stream()):await g(r,await e.arrayBuffer()),{ok:!0,filename:n}}catch(e){return{ok:!1,error:String(e)}}},{body:a.Object({file:a.File()}),response:a.Object({ok:a.Boolean(),filename:a.Optional(a.String()),error:a.Optional(a.String())})}).get(`/buttress/download/:filename`,async({params:{filename:e},store:{config:t},status:n})=>{let i=_.join(t.server.temp_file_dir,e);return _.relative(t.server.temp_file_dir,i).includes(`..`)?(n(400),`Invalid file path`):r(i)},{params:a.Object({filename:a.String()})});const ai=_.dirname(D(import.meta.url)),oi=async()=>{let e=[_.join(ai,`..`,`public`,`status.html`),_.join(ai,`..`,`..`,`public`,`status.html`)];return(await Promise.all(e.map(e=>c.access(e).then(()=>e,()=>null)))).find(e=>e!==null)??null},si=e=>{let{status:t}=e;return t?.getFullStatus?t.getFullStatus():{timestamp:new Date().toISOString(),ggmlLlm:{generators:[],history:{}},ggmlStt:{generators:[],history:{}}}},ci=async()=>{let e=await oi();if(!e)return console.error(`[Status] Failed to find status.html in candidate paths`),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}});try{let t=await c.readFile(e,`utf-8`);return new Response(t,{headers:{"Content-Type":`text/html; charset=utf-8`}})}catch(e){return console.error(`[Status] Failed to serve status page:`,e),new Response(`Status page not found`,{status:404,headers:{"Content-Type":`text/plain`}})}};var li=$r().get(`/status`,ci).get(`/status/`,ci).get(`/buttress/status`,({store:{backend:e}})=>si(e));const ui=[`ggml-llm`,`mlx-llm`],di=new Map;function fi(e,t){return t===`mlx-llm`?e.mlxLlm:e.ggmlLlm}async function pi(e,t,n,r=`[LLM]`){let i=(t.generators||[]).filter(e=>ui.includes(e.type));if(i.length===0)throw Error(`No LLM generator configured. Add a [[generators]] with type = "ggml-llm" or "mlx-llm" to your config.`);let a=i[0],o=n||a.model?.repo_id;if(n){let e=i.find(e=>e.model?.repo_id===n);e&&(a=e)}else o=a.model?.repo_id;let s=a.type||`ggml-llm`,c=o,l=di.get(c);if(l?.initialized)return l;let{generators:u,server:d,...f}=t.global||{},p={...f,...a,model:{...a.model,repo_id:o}};console.log(`${r} Creating ${s} generator for ${c}`);let{id:m}=await e.startGenerator(s,p),h={id:m,type:s,config:p,repoId:o,initialized:!1};return di.set(c,h),await fi(e,s).initContext(m,{}),h.initialized=!0,console.log(`${r} Generator ready: ${c}`),h}function mi(e){let t=e.timings||{},n=t.prompt_n??t.promptN??0,r=t.cache_n??t.cacheN??0,i=t.predicted_n??t.predictedN??0;return{promptTokens:n||e.prompt_tokens||e.promptTokens||0,cachedTokens:r,completionTokens:i||e.tokens_evaluated||e.tokensEvaluated||e.tokens_predicted||e.tokensPredicted||0}}function hi(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e),i=t+n;return{prompt_tokens:i,completion_tokens:r,total_tokens:i+r}}const gi=()=>`chatcmpl-${Date.now()}-${Math.random().toString(36).slice(2,9)}`;async function _i(e,t,n,r){let i=e.getReader(),a=``,o=null,s=null,c=`stop`,l={prompt_tokens:0,completion_tokens:0,total_tokens:0};try{let e=!1;for(;!e;){let n=await i.read();if({done:e}=n,e)break;let{event:r,data:u}=n.value;if(r===`token`)u.content!=null&&(a=u.content),u.reasoning_content!=null&&(o=u.reasoning_content);else if(r===`result`)u.content==null?u.text&&(a=u.text):a=u.content,u.reasoning_content!=null&&(o=u.reasoning_content),u.tool_calls?.length>0?(s=u.tool_calls.map((e,n)=>({id:e.id||`call_${t}_${n}`,type:`function`,function:{name:e.function?.name||``,arguments:e.function?.arguments||``}})),c=`tool_calls`):c=u.interrupted?`length`:`stop`,l=hi(u);else if(r===`error`)throw Error(u.message)}}finally{i.cancel().catch(()=>{})}let u={role:`assistant`,content:a||null};return o&&(u.reasoning_content=o),s&&(u.tool_calls=s),{id:t,object:`chat.completion`,created:n,model:r,choices:[{index:0,message:u,finish_reason:c}],usage:l}}function vi({global:e}){let t=$r({prefix:`/oai-compat`});return t.use(M({origin:e?.openai_compat?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`],maxAge:86400,preflight:!0})),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>({id:e.model?.repo_id||e.type,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}));return n.length===0&&n.push({id:`ggml-llm`,object:`model`,created:Math.floor(Date.now()/1e3),owned_by:`local`}),{object:`list`,data:n}}),t.post(`/v1/chat/completions`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,{messages:o=[],stream:s=!1,model:c,tools:l,temperature:u,stop:d,top_p:f,max_tokens:p,presence_penalty:m,frequency_penalty:h,tool_choice:g,stream_options:_,enable_thinking:v}=e;if(!o||o.length===0)return t.status=400,{error:{message:`messages is required and must not be empty`,type:`invalid_request_error`}};try{let e=await pi(a,r,c,`[OpenAI]`),t=gi(),n=Math.floor(Date.now()/1e3),y=e.repoId||`ggml-llm`,b={reasoning_format:`auto`,messages:o,jinja:!0,add_generation_prompt:!0};u!=null&&(b.temperature=u),f!=null&&(b.top_p=f),p!=null&&(b.n_predict=p),d!=null&&(b.stop=Array.isArray(d)?d:[d]),m!=null&&(b.presence_penalty=m),h!=null&&(b.frequency_penalty=h),l!=null&&(b.tools=l),g!=null&&(b.tool_choice=g),b.enable_thinking=v??!1;let x=await fi(a,e.type).completion(e.id,{options:b});if(!s)return await _i(x,t,n,y);let S=_?.include_usage===!0,C=x.getReader(),w=``,T=``,E=new Map,D=new Map;try{let e=!1;for(;!e;){let r=await C.read();if({done:e}=r,e)break;let{event:a,data:o}=r.value;if(a===`token`){let e={};if(o.content!=null){let t=o.content;t.length>w.length&&(e.content=t.slice(w.length),w=t)}if(o.reasoning_content!=null){let t=o.reasoning_content;t.length>T.length&&(e.reasoning_content=t.slice(T.length),T=t)}if(o.tool_calls?.length>0){let n=[];o.tool_calls.forEach((e,r)=>{let i={index:r};D.has(r)||(D.set(r,e.id||`call_${t}_${r}`),i.id=D.get(r),i.type=`function`);let a=e.function?.arguments||``,o=E.get(r)||``,s={};!E.has(r)&&e.function?.name&&(s.name=e.function.name),a.length>o.length&&(s.arguments=a.slice(o.length),E.set(r,a)),Object.keys(s).length>0?(i.function=s,n.push(i)):i.id&&(i.function={name:e.function?.name||``,arguments:``},n.push(i))}),n.length>0&&(e.tool_calls=n)}Object.keys(e).length>0&&(yield i({data:JSON.stringify({id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:e,finish_reason:null}]})}))}else if(a===`result`){let e=`stop`;o.tool_calls?.length>0||D.size>0?e=`tool_calls`:o.interrupted&&(e=`length`);let r={id:t,object:`chat.completion.chunk`,created:n,model:y,choices:[{index:0,delta:{},finish_reason:e}]};S&&(r.usage=hi(o)),yield i({data:JSON.stringify(r)})}else a===`error`&&(yield i({data:JSON.stringify({error:{message:o.message,type:`server_error`}})}))}yield i({data:`[DONE]`})}finally{C.cancel().catch(()=>{})}}catch(e){return console.error(`[OpenAI] Chat completion error:`,e),t.status=500,{error:{message:e.message||`Internal server error`,type:`server_error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),max_tokens:a.Optional(a.Number()),stop:a.Optional(a.Union([a.String(),a.Array(a.String())])),presence_penalty:a.Optional(a.Number()),frequency_penalty:a.Optional(a.Number()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),stream_options:a.Optional(a.Object({include_usage:a.Optional(a.Boolean())})),enable_thinking:a.Optional(a.Boolean())})}),t}const yi=()=>`msg_${Date.now()}${Math.random().toString(36).slice(2,11)}`;function bi(e){let t={},n=[];if(e.system!=null){let t=``;if(typeof e.system==`string`)t=e.system;else if(Array.isArray(e.system))for(let n of e.system)n?.type===`text`&&typeof n.text==`string`&&(t+=n.text);t&&n.push({role:`system`,content:t})}if(!Array.isArray(e.messages))throw Error(`'messages' is required and must be an array`);for(let t of e.messages){let e=t?.role||`user`;if(t?.content==null){if(e===`assistant`)continue;n.push(t);continue}if(typeof t.content==`string`){n.push({role:e,content:t.content});continue}if(!Array.isArray(t.content)){n.push(t);continue}let r=[],i=[],a=[],o=``,s=!1;for(let e of t.content){let t=e?.type||``;if(t===`text`)i.push({type:`text`,text:e.text||``});else if(t===`thinking`)o+=e.thinking||``;else if(t===`image`){let t=e.source||{};if(t.type===`base64`){let e=t.media_type||`image/jpeg`,n=t.data||``;i.push({type:`image_url`,image_url:{url:`data:${e};base64,${n}`}})}else t.type===`url`&&i.push({type:`image_url`,image_url:{url:t.url||``}})}else if(t===`tool_use`)r.push({id:e.id||``,type:`function`,function:{name:e.name||``,arguments:JSON.stringify(e.input??{})}}),s=!0;else if(t===`tool_result`){let t=e.tool_use_id||``,n=``,r=e.content;if(typeof r==`string`)n=r;else if(Array.isArray(r))for(let e of r)e?.type===`text`&&(n+=e.text||``);a.push({role:`tool`,tool_call_id:t,content:n})}}if(i.length>0||s||o){let t={role:e};i.length>0?t.content=i:(s||o)&&(t.content=``),r.length>0&&(t.tool_calls=r),o&&(t.reasoning_content=o),n.push(t)}for(let e of a)n.push(e)}if(t.messages=n,Array.isArray(e.tools)&&(t.tools=e.tools.map(e=>({type:`function`,function:{name:e.name||``,description:e.description||``,parameters:e.input_schema||{}}}))),e.tool_choice&&typeof e.tool_choice==`object`){let n=e.tool_choice.type;n===`auto`?t.tool_choice=`auto`:n===`any`||n===`tool`?t.tool_choice=`required`:n===`none`&&(t.tool_choice=`none`)}else Array.isArray(t.tools)&&t.tools.length>0&&(t.tool_choice=`auto`);e.stop_sequences!=null&&(t.stop=Array.isArray(e.stop_sequences)?e.stop_sequences:[e.stop_sequences]),t.max_tokens=e.max_tokens??4096;for(let n of[`temperature`,`top_p`,`top_k`,`stream`])e[n]!=null&&(t[n]=e[n]);return e.thinking&&typeof e.thinking==`object`&&e.thinking.type===`enabled`&&(t.enable_thinking=!0,e.thinking.budget_tokens!=null&&(t.thinking_budget_tokens=e.thinking.budget_tokens)),t}function xi(e,t){return t?`tool_use`:e.stopping_word||e.stoppingWord?`stop_sequence`:e.interrupted||e.truncated?`max_tokens`:`end_turn`}function Si(e){let{promptTokens:t,cachedTokens:n,completionTokens:r}=mi(e);return{cache_read_input_tokens:n,input_tokens:t,output_tokens:r}}async function Ci(e,t,n){let r=e.getReader(),i=``,a=``,o=[],s={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},c=`end_turn`,l=null;try{let e=!1;for(;!e;){let t=await r.read();if({done:e}=t,e)break;let{event:n,data:u}=t.value;if(n===`token`)u.content!=null&&(i=u.content),u.reasoning_content!=null&&(a=u.reasoning_content);else if(n===`result`)u.content==null?u.text&&u.reasoning_content==null&&(i=u.text):i=u.content,u.reasoning_content!=null&&(a=u.reasoning_content),Array.isArray(u.tool_calls)&&(o=u.tool_calls),s=Si(u),l=u.stopping_word||u.stoppingWord||null,c=xi(u,o.length>0);else if(n===`error`)throw Error(u.message||`completion error`)}}finally{r.cancel().catch(()=>{})}let u=[];a&&u.push({type:`thinking`,thinking:a,signature:``}),i&&u.push({type:`text`,text:i});for(let e of o){let t={};try{t=JSON.parse(e.function?.arguments||`{}`)}catch{t={}}u.push({type:`tool_use`,id:e.id||`toolu_${Math.random().toString(36).slice(2,11)}`,name:e.function?.name||``,input:t})}return{id:t,type:`message`,role:`assistant`,content:u,model:n,stop_reason:c,stop_sequence:l,usage:s}}function wi({global:e}){let t=$r({prefix:`/anthropic-messages`});return t.use(M({origin:e?.anthropic_messages?.cors_allowed_origins??!1,methods:[`GET`,`POST`,`OPTIONS`],allowedHeaders:[`Content-Type`,`Authorization`,`x-api-key`,`anthropic-version`],maxAge:86400,preflight:!0})),t.post(`/v1/messages`,async function*({body:e,set:t,store:n}){let{config:r,backend:a}=n,o=e;try{if(!Array.isArray(o.messages)||o.messages.length===0)return t.status=400,{type:`error`,error:{type:`invalid_request_error`,message:`messages is required and must not be empty`}};let e=bi(o),n=await pi(a,r,o.model,`[Anthropic]`),s=yi(),c=n.repoId||`ggml-llm`,l={reasoning_format:`auto`,messages:e.messages,jinja:!0,add_generation_prompt:!0};e.temperature!=null&&(l.temperature=e.temperature),e.top_p!=null&&(l.top_p=e.top_p),e.top_k!=null&&(l.top_k=e.top_k),e.max_tokens!=null&&(l.n_predict=e.max_tokens),e.stop!=null&&(l.stop=e.stop),e.tools!=null&&(l.tools=e.tools),e.tool_choice!=null&&(l.tool_choice=e.tool_choice),l.enable_thinking=e.enable_thinking??!1,e.thinking_budget_tokens!=null&&(l.thinking_budget_tokens=e.thinking_budget_tokens);let u=await fi(a,n.type).completion(n.id,{options:l});if(!o.stream)return await Ci(u,s,c);let d=u.getReader(),f=``,p=``,m=new Map,h=new Map,g=new Map,_=new Set,v=!1,y=!1,b=0,x=0,S={cache_read_input_tokens:0,input_tokens:0,output_tokens:0},C=`end_turn`,w=null,T=!1,E=e=>(v?1:0)+(y?1:0)+e;try{let e=!1;for(;!e;){let t=await d.read();if({done:e}=t,e)break;let{event:n,data:r}=t.value;if(n===`token`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}if(r.reasoning_content!=null){let e=r.reasoning_content;e.length>p.length&&(v||(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:0,content_block:{type:`thinking`,thinking:``}})}),v=!0,b=1),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`thinking_delta`,thinking:e.slice(p.length)}})}),p=e)}if(r.content!=null){let e=r.content;e.length>f.length&&(y||=(yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:b,content_block:{type:`text`,text:``}})}),!0),yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:b,delta:{type:`text_delta`,text:e.slice(f.length)}})}),f=e)}if(Array.isArray(r.tool_calls)&&r.tool_calls.length>0){for(let e=0;e<r.tool_calls.length;e+=1){let t=r.tool_calls[e],n=E(e),a=t?.function?.arguments||``,o=m.get(e)||``;if(!_.has(e)){let r=t?.id||`toolu_${s}_${e}`,a=t?.function?.name||g.get(e)||``;h.set(e,r),g.set(e,a),_.add(e),yield i({event:`content_block_start`,data:JSON.stringify({type:`content_block_start`,index:n,content_block:{type:`tool_use`,id:r,name:a,input:{}}})})}a.length>o.length&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:n,delta:{type:`input_json_delta`,partial_json:a.slice(o.length)}})}),m.set(e,a))}x=r.tool_calls.length}}else if(n===`result`){if(!T){let e=Si(r);yield i({event:`message_start`,data:JSON.stringify({type:`message_start`,message:{id:s,type:`message`,role:`assistant`,content:[],model:c,stop_reason:null,stop_sequence:null,usage:e}})}),T=!0}Array.isArray(r.tool_calls)&&(x=Math.max(x,r.tool_calls.length)),S=Si(r),w=r.stopping_word||r.stoppingWord||null,C=xi(r,_.size>0)}else if(n===`error`){yield i({event:`error`,data:JSON.stringify({type:`error`,error:{type:`api_error`,message:r.message||`completion error`}})});return}}v&&(yield i({event:`content_block_delta`,data:JSON.stringify({type:`content_block_delta`,index:0,delta:{type:`signature_delta`,signature:``}})}),yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:0})})),y&&(yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:b})}));for(let e of[..._].sort((e,t)=>e-t))yield i({event:`content_block_stop`,data:JSON.stringify({type:`content_block_stop`,index:E(e)})});yield i({event:`message_delta`,data:JSON.stringify({type:`message_delta`,delta:{stop_reason:C,stop_sequence:w},usage:{output_tokens:S.output_tokens}})}),yield i({event:`message_stop`,data:JSON.stringify({type:`message_stop`})})}finally{d.cancel().catch(()=>{})}}catch(e){return console.error(`[Anthropic] Messages error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),max_tokens:a.Optional(a.Number()),stream:a.Optional(a.Boolean()),temperature:a.Optional(a.Number()),top_p:a.Optional(a.Number()),top_k:a.Optional(a.Number()),stop_sequences:a.Optional(a.Union([a.String(),a.Array(a.String())])),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any()),thinking:a.Optional(a.Any()),metadata:a.Optional(a.Any())})}),t.post(`/v1/messages/count_tokens`,async({body:e,set:t,store:n})=>{let{config:r,backend:i}=n,a=e;try{let e=bi(a),t=await pi(i,r,a.model,`[Anthropic]`),n=fi(i,t.type),o={messages:e.messages,add_generation_prompt:!0,jinja:!0};e.tools!=null&&(o.tools=e.tools);let s=await n.applyChatTemplate(t.id,o),c=typeof s==`string`?s:s?.prompt||``,l=await n.tokenize(t.id,{text:c,add_special:!0,parse_special:!0});return{input_tokens:(Array.isArray(l)?l:l?.tokens||[]).length}}catch(e){return console.error(`[Anthropic] count_tokens error:`,e),t.status=500,{type:`error`,error:{type:`api_error`,message:e?.message||`Internal server error`}}}},{body:a.Object({model:a.Optional(a.String()),messages:a.Array(a.Any()),system:a.Optional(a.Any()),tools:a.Optional(a.Array(a.Any())),tool_choice:a.Optional(a.Any())})}),t.get(`/v1/models`,({store:e})=>{let{config:t}=e,n=(t.generators||[]).filter(e=>ui.includes(e.type)).map(e=>{let t=e.model?.repo_id||e.type;return{id:t,type:`model`,display_name:t,created_at:new Date().toISOString()}});return n.length===0&&n.push({id:`ggml-llm`,type:`model`,display_name:`ggml-llm`,created_at:new Date().toISOString()}),{data:n,has_more:!1,first_id:n[0]?.id,last_id:n.at(-1)?.id}}),t}const Ti=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=Ti(n[e]||{},t):n[e]=t}),n},Ei=e=>e&&typeof e==`object`?structuredClone(e):null,Di=(e,t)=>Ti(Ei(e)||{},Ei(t)||{}),Oi=(e,t)=>Ti(structuredClone(e.global),t||{}),ki=(e,t,n,r)=>{if(e.generators.length>0){let i=e.generators.filter(e=>e?.type===n);if(i.length>0&&r){let a=i.find(e=>t.getModelIdentifier(n,e)===r);if(a)return Oi(e,a)}}return Object.keys(e.global).length>0?Oi(e,{}):null},Ai={udp:{port:8089,announcements:{enabled:!0,interval:5e3},requests:{enabled:!0,responseDelay:100}},http:{enabled:!0,path:`/buttress/info`,cors:!0}},ji=e=>e?e===!0?{...Ai}:Ti(Ai,e):null,Mi=(e,t)=>{if(!e.generators||e.generators.length===0)return t.map(e=>({type:e}));let n=new Set;return e.generators.forEach(e=>{e.type&&n.add(e.type)}),n.size===0?t.map(e=>({type:e})):Array.from(n).map(e=>({type:e}))},Ni=(e,t,n)=>e===void 0?n:typeof e==`number`?e:t(e)??n,Pi=6e4,Fi=1024*1024*50,Ii=e=>{let t=N.machineIdSync(),n=Ti({server:{id:`buttress-${t}`,name:`Buttress Server (${t.slice(-8)})`,port:2080,temp_file_dir:_.join(v.tmpdir(),`.buttress`),session_timeout:Pi,max_body_size:Fi},autodiscover:!1},Ei(e)||{}),r=Array.isArray(n.generators)?n.generators:[],{server:i,generators:a,autodiscover:o,...s}=n;return{autodiscover:ji(o),server:{id:i.id,name:i.name,port:i.port,log_level:i.log_level,temp_file_dir:i.temp_file_dir,max_body_size:Ni(i.max_body_size,w.parse,Fi),session_timeout:Ni(i.session_timeout,P,Pi)},global:s,generators:r}},Li={getCapabilities:j.tuple([j.object({type:j.string().optional().default(`ggml-llm`),config:j.any().optional(),currentClientCapabilities:j.any().optional(),options:j.any().optional()}).nullable().optional()]),startGenerator:j.tuple([j.string(),j.any().optional()]),finalizeGenerator:j.tuple([j.string()])};var Ri={async getCapabilities({backend:e,config:t},n=null){console.log(`[Server] Get Capabilities:`,n);let{type:r=`ggml-llm`,config:i,currentClientCapabilities:a=null,options:o={}}=n||{type:`ggml-llm`},s=Ei(i),c=Di(ki(t,e,r,e.getModelIdentifier(r,s)),i);if(Object.keys(c).length===0)throw Error(`Buttress server missing generator configuration`);return c.backend=c.backend||{},c.backend.type||(c.backend.type=r),e.getCapabilities(r,a,{...o,config:c})},async startGenerator({backend:e,config:t,session:n},r,i){console.log(`[Server] Start Generator:`,r,i);let a=Ei(i),o=Di(ki(t,e,r,e.getModelIdentifier(r,a)),i);if(Object.keys(o).length===0)throw Error(`Buttress server missing generator configuration`);o.backend=o.backend||{},o.backend.type||(o.backend.type=r);let s=await e.startGenerator(r,o);return n.generators.add(s.id),s},async finalizeGenerator({backend:e,session:t},n){return console.log(`[Server] Finalize Generator:`,n),t.generators.delete(n),e.finalizeGenerator(n)}};const zi={initContext:j.tuple([j.string(),j.any().optional()]),completion:j.tuple([j.string(),j.any().optional()]),tokenize:j.tuple([j.string(),j.any()]),detokenize:j.tuple([j.string(),j.any()]),applyChatTemplate:j.tuple([j.string(),j.any()]),releaseContext:j.tuple([j.string()])};function Bi(e){return function({backend:t,session:n},r,i){return new s({async start(a){try{let o=await e(t).initContext(r,{...i,onProgress:e=>{a.enqueue({event:`progress`,data:{progress:e}})}});n.initializedContexts.add(r),await new Promise(e=>setTimeout(e,1e3));let{download:s,...c}=o||{};a.enqueue({event:`result`,data:{result:c}}),a.close()}catch(e){a.error(e)}}})}}function Vi(e,t){return async function({backend:n,session:r},i,a){return console.log(`[Server] ${t}:`,{id:i,force:a}),r.initializedContexts.has(i)?(r.initializedContexts.delete(i),e(n).releaseContext(i,{force:a})):(console.log(`[Server] ${t} skipped - not initialized by this session:`,{id:i}),{released:!1,skipped:!0})}}function Hi(e,t){return{initContext:Bi(e),completion({backend:n},r,i){return console.log(`[Server] ${t}Completion:`,{id:r,property:i}),e(n).completion(r,i)},async tokenize({backend:n},r,i){return console.log(`[Server] ${t}Tokenize:`,{id:r,property:i}),e(n).tokenize(r,i)},async detokenize({backend:n},r,i){return console.log(`[Server] ${t}Detokenize:`,{id:r,property:i}),e(n).detokenize(r,i)},async applyChatTemplate({backend:n},r,i){return console.log(`[Server] ${t}Apply Chat Template:`,{id:r,property:i}),e(n).applyChatTemplate(r,i)},releaseContext:Vi(e,`${t}Release Context`)}}var Ui=Hi(e=>e.ggmlLlm,``);const Wi={initContext:j.tuple([j.string(),j.any().optional()]),transcribe:j.tuple([j.string(),j.string(),j.any().optional()]),transcribeData:j.tuple([j.string(),j.union([j.instanceof(Buffer),j.instanceof(Uint8Array)]),j.any().optional()]),releaseContext:j.tuple([j.string()])},Gi=e=>e.ggmlStt,Ki={common:Ri,ggmlLlm:Ui,ggmlStt:{initContext:Bi(Gi),async transcribe({backend:e,config:{server:t}},n,r,i){return console.log(`[Server] Transcribe:`,{id:n,audioPath:r,options:i}),e.ggmlStt.transcribe(n,{audioPath:_.join(t.temp_file_dir,r),options:i})},async transcribeData({backend:e},t,n,r){return console.log(`[Server] Transcribe Data:`,{id:t,audioDataLength:n?.length||0,options:r}),e.ggmlStt.transcribeData(t,{audioData:n,options:r})},releaseContext:Vi(Gi,`Release STT Context`)},mlxLlm:Hi(e=>e.mlxLlm,`MLX `)},qi={common:Li,ggmlLlm:zi,ggmlStt:Wi,mlxLlm:zi};var Ji=Ki;const Yi=e=>{try{return JSON.parse(e,(e,t)=>{if(!t)return t;if(t?.type===`Buffer`&&t?.data)return F.from(t.data,`base64`);if(t?.type===`Uint8Array`&&t?.data){let e=F.from(t.data,`base64`);return e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)}return t?.type===`Error`&&t?.name&&t?.message?Error(t.name,t.message):t})}catch{return e}},Xi=e=>{try{return JSON.stringify(e,(e,t)=>t instanceof Error?{type:`Error`,name:t.name,message:t.message}:t instanceof F?{type:`Buffer`,data:t.toString(`base64`)}:t instanceof Uint8Array?{type:`Uint8Array`,data:F.from(t).toString(`base64`)}:t)}catch{return e}};var Zi=class{name=`udp`;socket=null;announcementTimer=null;config;getServerInfo;port;constructor(e,t){this.config=e,this.getServerInfo=t,this.port=e.port??8089}async start(){if(this.socket=re.createSocket({type:`udp4`,reuseAddr:!0}),this.socket.on(`message`,(e,t)=>{this.handleMessage(e,t)}),this.socket.on(`error`,e=>{console.error(`[Autodiscover UDP] Socket error:`,e.message)}),await new Promise((e,t)=>{this.socket.bind(this.port,()=>{this.socket.setBroadcast(!0),console.log(`[Autodiscover UDP] Listening on port ${this.port}`),e()}),this.socket.once(`error`,t)}),this.config.announcements.enabled){let e=this.config.announcements.interval??5e3;this.announcementTimer=setInterval(()=>{this.sendAnnouncement()},e),this.sendAnnouncement()}}async stop(){this.announcementTimer&&=(clearInterval(this.announcementTimer),null),this.socket&&=(await new Promise(e=>{this.socket.close(()=>e())}),null)}handleMessage(e,t){try{let n=JSON.parse(e.toString());if(n.t===`QUERY`&&this.config.requests.enabled){let e=n.d,r=this.config.requests.responseDelay??0,i=r>0?Math.random()*r:0;setTimeout(()=>{this.sendResponse(e.id,t)},i)}}catch{}}sendAnnouncement(){if(!this.socket)return;let e={t:`ANNOUNCE`,v:`1.0`,d:{info:this.getServerInfo()}},t=Buffer.from(JSON.stringify(e));this.socket.send(t,0,t.length,this.port,`255.255.255.255`,e=>{e&&console.error(`[Autodiscover UDP] Announcement error:`,e.message)})}sendResponse(e,t){if(!this.socket)return;let n={t:`RESPONSE`,v:`1.0`,d:{request_id:e,info:this.getServerInfo()}},r=Buffer.from(JSON.stringify(n));this.socket.send(r,0,r.length,t.port,t.address,e=>{e&&console.error(`[Autodiscover UDP] Response error:`,e.message)})}},Qi=class{transports=[];started=!1;constructor(e,t){this.config=e,this.getServerInfo=t,(e.udp?.announcements?.enabled||e.udp?.requests?.enabled)&&this.transports.push(new Zi(e.udp,t))}async start(){this.started||=((await Promise.allSettled(this.transports.map(e=>e.start()))).forEach((e,t)=>{e.status===`rejected`&&console.error(`[Autodiscover] Failed to start ${this.transports[t].name}:`,e.reason)}),!0)}async stop(){this.started&&=(await Promise.allSettled(this.transports.map(e=>e.stop())),!1)}};const $i=()=>{let e=v.networkInterfaces();return Object.values(e).flat().find(e=>e?.family===`IPv4`&&!e?.internal)?.address||null},$=Zr(),ea=e=>{if(!e)return{repoId:null,filename:null};let[t,n]=e.split(`:`);return{repoId:t,filename:n||null}};async function ta({modelIds:e=[],defaultConfig:t=null}={}){let n=[];console.log(`${$.name} v${$.version}`),console.log(`Generating model capabilities comparison...
|
|
37
37
|
`),n.push(`${$.name} v${$.version}`),n.push(`## Model Capabilities Comparison
|
|
38
38
|
`),(!e||e.length===0)&&(console.error(`Error: No model IDs provided`),process.exit(1));try{let r=(e={},t={})=>{let n=Array.isArray(e)?[...e]:{...e};return Object.entries(t||{}).forEach(([e,t])=>{t&&typeof t==`object`&&!Array.isArray(t)?n[e]=r(n[e]||{},t):n[e]=t}),n},{server:i,generators:a=[],...o}=t||{},s=e=>r(structuredClone(o),e||{}),c=e=>{if(Array.isArray(a)&&a.length>0){let t=a.filter(e=>e?.type===`ggml-llm`);if(t.length>0&&e){let n=t.find(t=>t.model?.repo_id===e);if(n)return s(n)}}return Object.keys(o).length>0?s({}):null},l=[];for(let t=0;t<e.length;t+=1){let n=e[t];console.log(`[${t+1}/${e.length}] Analyzing ${n}...`);let r=c(n);r={...r||{},model:{...o.runtime,...r?.model||{},repo_id:n}};let i=await J(`ggml-llm`,null,{config:r,includeBreakdown:!0});l.push({modelId:n,capabilities:i,modelInfo:i.buttress?.selected||null,modelConfig:i.modelConfig||null})}let u=e=>e?(e/1024/1024/1024).toFixed(2):`N/A`,d=e=>e?`✅`:`🚫`;n.push(`| Model ID | Size (GB) | Context Size | KV Cache Size (GB) | Recurrent Mem (GB) | Total Required Memory (GB) | Fits GPU (Full) | Fits CPU (Full) |`),n.push(`|----------|-----------|--------------|--------------------|--------------------|----------------------------|-----------------|-----------------|`),l.forEach(({modelId:e,modelInfo:t,modelConfig:r})=>{let i=u(t?.modelBytes),a=r?.nCtx||t?.kvInfo?.nCtxTrain||`N/A`,o=Ue(t),s=Number(a),c=t?.kvCacheBytes||(o&&Number.isFinite(s)&&s>0?o(s):o&&o(t?.kvInfo?.nCtxTrain||0))||null,l=u(c),f=t?.recurrentMemoryBytes||0,p=f>0?u(f):`-`,m=u(t?.modelBytes&&(c!=null||f>0)?t.modelBytes+(c||0)+f:t?.fit?.totalRequiredBytes),h=d(t?.fit?.fitsInGpu),g=d(t?.fit?.fitsInCpu);n.push(`| ${e} | ${i} | ${a} | ${l} | ${p} | ${m} | ${h} | ${g} |`);let _=t?.memoryLimitedCtx!=null||t?.limitedFit!=null,v=!t?.fit?.fitsInGpu||!t?.fit?.fitsInCpu;if(_&&v){let e=t?.memoryLimitedCtx||a,r=Number(e),s=t?.limitedKvCacheBytes||o&&Number.isFinite(r)&&r>0&&o(r)||null,c=u(s),h=u(t?.modelBytes&&(s!=null||f>0)?t.modelBytes+(s||0)+f:t?.limitedFit?.totalRequiredBytes),g=d(t?.limitedFit?.fitsInGpu),_=d(t?.limitedFit?.fitsInCpu);(e!==a||c!==l||h!==m)&&n.push(`| ↳ Limited | ${i} | ${e} | ${c} | ${p} | ${h} | ${g} | ${_} |`)}}),n.push(`
|
|
39
39
|
---`),n.push(`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/buttress-server",
|
|
3
|
-
"version": "2.24.0
|
|
3
|
+
"version": "2.24.0",
|
|
4
4
|
"main": "lib/index.mjs",
|
|
5
5
|
"types": "lib/index.d.mts",
|
|
6
6
|
"type": "module",
|
|
@@ -43,5 +43,5 @@
|
|
|
43
43
|
"tsdown": "^0.20.1",
|
|
44
44
|
"typescript": "^5.9.3"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "e1d49be3eff48a93731e8bf76dfc180bce57d3e4"
|
|
47
47
|
}
|