@symerian/symi 3.5.4 → 3.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{agent-Bl0Wfrd4.js → agent-8EuVoCbs.js} +11 -11
- package/dist/{agent-BsRlH9Ez.js → agent-B87OYNFx.js} +9 -9
- package/dist/{agents-Ch38fapn.js → agents-CNbjbSD0.js} +11 -11
- package/dist/{audit-CcxTZUi-.js → audit-Bk6kay2Z.js} +9 -9
- package/dist/{audit-CojL8qpM.js → audit-CtrXFZb5.js} +9 -9
- package/dist/{auth-choice-CQnJ5pRD.js → auth-choice-BL4lqaJL.js} +8 -8
- package/dist/{auth-choice-O55PGflm.js → auth-choice-CB0rQByJ.js} +8 -8
- package/dist/{auth-choice-options-DfT-D0Cb.js → auth-choice-options-DmXqtnP9.js} +1 -1
- package/dist/{auth-choice-options-osb2GMd4.js → auth-choice-options-od_KpHe8.js} +1 -1
- package/dist/{auth-choice-prompt-6_u7zS1M.js → auth-choice-prompt-BnvC700A.js} +1 -1
- package/dist/{auth-choice-prompt-SCWccuw0.js → auth-choice-prompt-kt2FcieM.js} +1 -1
- package/dist/{auth-token-Dvw-1mFR.js → auth-token-DyhvQgv4.js} +1 -1
- package/dist/{auth-token-t_CY5BPB.js → auth-token-lPWiklDD.js} +1 -1
- package/dist/{banner-TDzek44y.js → banner-D-ssxnQN.js} +1 -1
- package/dist/{bonjour-discovery-BjSUj2V6.js → bonjour-discovery-CyLfNUkJ.js} +1 -1
- package/dist/{bonjour-discovery-FJyieAXp.js → bonjour-discovery-DuObXMh2.js} +1 -1
- package/dist/breakdown-B_Dhhm3K.js +753 -0
- package/dist/breakdown-Cszrv-Lf.js +753 -0
- package/dist/{browser-cli-BXH0KbhG.js → browser-cli-DGjYBquX.js} +9 -9
- package/dist/{browser-cli-UZqxfaV_.js → browser-cli-rZb2WjFF.js} +9 -9
- package/dist/build-info.json +3 -3
- package/dist/bundled/boot-md/handler.js +10 -10
- package/dist/bundled/session-memory/handler.js +9 -9
- package/dist/{call-CKm3T_ar.js → call-DKQC0JT1.js} +1 -1
- package/dist/{call-DrkOiHjS.js → call-ogggp9QZ.js} +1 -1
- package/dist/canvas-host/a2ui/.bundle.hash +1 -1
- package/dist/{channel-options-BEM8ruys.js → channel-options-DLV6zIpW.js} +2 -2
- package/dist/{channel-options-zAbgRuGY.js → channel-options-_DJXj921.js} +2 -2
- package/dist/{channels-cli-cRWPvOQD.js → channels-cli-2OTAQPvZ.js} +54 -53
- package/dist/{channels-cli-DdKnAzW9.js → channels-cli-CUsX2v-1.js} +54 -53
- package/dist/{chrome-C_I81hbq.js → chrome-3jl2ulOE.js} +4 -4
- package/dist/{chrome-zElD4rpN.js → chrome-D1eO2jfe.js} +1 -1
- package/dist/{chrome-rzK8edU7.js → chrome-DJChpTwP.js} +1 -1
- package/dist/{chrome-BKUACyeO.js → chrome-OTJg3QKn.js} +4 -4
- package/dist/{clack-prompter-CuIw5vQW.js → clack-prompter-_kEnSVz4.js} +4 -4
- package/dist/{clack-prompter-Bc38EIYW.js → clack-prompter-v9MYo-5u.js} +4 -4
- package/dist/{cli--UhNWFtQ.js → cli-D4cMWhob.js} +36 -35
- package/dist/{cli-BJDlZfYi.js → cli-Qq7pr2SX.js} +36 -35
- package/dist/{command-registry-COpZWfkh.js → command-registry-Br9vlIL_.js} +11 -11
- package/dist/{commands-registry-DPZevjMh.js → commands-registry-BuOUfpmg.js} +31 -0
- package/dist/{commands-registry-VfAQOVZO.js → commands-registry-C2Hvwjym.js} +31 -0
- package/dist/{commands-registry-BisT2Fcd.js → commands-registry-CJPBJPlh.js} +31 -0
- package/dist/{commands-registry-DFYDtKr_.js → commands-registry-DG2kGyg5.js} +31 -0
- package/dist/{completion-cli-EltMSZer.js → completion-cli-CKLoZFMQ.js} +3 -3
- package/dist/{completion-cli-DVF5x7Kp.js → completion-cli-P5zBWNnc.js} +1 -1
- package/dist/{config-D7SS4vdk.js → config-B4jkreCN.js} +13 -2
- package/dist/{config-_DaupjQd.js → config-D7TcU4qN.js} +13 -2
- package/dist/{config-cli-kN07V9j6.js → config-cli-CcJOubCt.js} +3 -3
- package/dist/{config-cli-DaPePEIB.js → config-cli-NvqRZAqr.js} +3 -3
- package/dist/{config-guard-SNMCV6sc.js → config-guard-CLfXzB_z.js} +2 -2
- package/dist/{config-guard-CN1qhDyB.js → config-guard-D58THqAB.js} +8 -8
- package/dist/{config-validation-DS5Jr_iO.js → config-validation-DQ-lm2Qx.js} +1 -1
- package/dist/{config-validation-DGWvkKov.js → config-validation-DhsYEtLL.js} +1 -1
- package/dist/{configure-id9imsuH.js → configure-B1RJ6bi3.js} +22 -22
- package/dist/{configure-C9Z4clKW.js → configure-C7BfUqKR.js} +22 -22
- package/dist/{control-service-CJWzOFkK.js → control-service-DOGU9T2P.js} +4 -4
- package/dist/{control-service-DvK2HPC2.js → control-service-DPzhv357.js} +4 -4
- package/dist/cost-cli-C9LvOtGz.js +138 -0
- package/dist/cost-cli-CmfkZsmZ.js +133 -0
- package/dist/{cron-cli-BIIzbF-W.js → cron-cli-DtDqHIZn.js} +7 -7
- package/dist/{cron-cli-BNx0V18_.js → cron-cli-L4iwVTNV.js} +7 -7
- package/dist/{daemon-cli-CGfWQN7c.js → daemon-cli-39fFIU2G.js} +15 -15
- package/dist/{daemon-cli-DCbqVfyu.js → daemon-cli-B5TdtWCQ.js} +15 -15
- package/dist/daemon-cli.js +13 -2
- package/dist/{daemon-runtime-BD_EkuZP.js → daemon-runtime-7YBz5otm.js} +3 -3
- package/dist/{daemon-runtime-CAdn50ea.js → daemon-runtime-DaJ4Tf2a.js} +3 -3
- package/dist/{deliver-6fYMGY7T.js → deliver-0nDhDdKd.js} +2 -2
- package/dist/{deliver-DyO3QD8O.js → deliver-BiWlR84Y.js} +5 -5
- package/dist/{deliver-DB4v0Tyl.js → deliver-C81eqdrP.js} +2 -2
- package/dist/{deliver-Cjyb6h4g.js → deliver-f3cIWxXT.js} +5 -5
- package/dist/{deps-Bt6gnwqB.js → deps-D5me2CAW.js} +1 -1
- package/dist/{devices-cli-CK5iNr60.js → devices-cli-DuVhACtW.js} +4 -4
- package/dist/{devices-cli-Dg4sVTTI.js → devices-cli-Ec6d58OB.js} +4 -4
- package/dist/{diagnostics-Ee2qfR9V.js → diagnostics-BZH08r5U.js} +1 -1
- package/dist/{diagnostics-4PsqURzT.js → diagnostics-CdawaB0U.js} +1 -1
- package/dist/{directory-cli-CIX9qXWA.js → directory-cli-BQrGwwjf.js} +6 -6
- package/dist/{directory-cli-CR-4DW4i.js → directory-cli-FR_KoYon.js} +6 -6
- package/dist/{dns-cli-Cd3EpJKL.js → dns-cli-CH2QMf70.js} +4 -4
- package/dist/{dns-cli-DzE58v7c.js → dns-cli-Cua_RGWw.js} +4 -4
- package/dist/{docs-cli-DD59E5fQ.js → docs-cli-9yu6OeWj.js} +2 -2
- package/dist/{docs-cli-BcqjkjOA.js → docs-cli-DwrSCuWF.js} +2 -2
- package/dist/{doctor-completion-Dw70YhMd.js → doctor-completion-Be3Of_A2.js} +2 -2
- package/dist/{doctor-completion-Dv6aw9I2.js → doctor-completion-Dco2Lmvy.js} +2 -2
- package/dist/{doctor-config-flow-BOfsrB_o.js → doctor-config-flow-CKJlpwtv.js} +5 -5
- package/dist/{doctor-config-flow-dUAjyD2W.js → doctor-config-flow-CcwEh3WQ.js} +5 -5
- package/dist/{enable-avpl_Lwo.js → enable-BI6rQ1Fb.js} +1 -1
- package/dist/{enable-B5wvwTYB.js → enable-xfczksnk.js} +1 -1
- package/dist/entry.js +1 -1
- package/dist/{exec-approvals-cli-Cv9lE0Ov.js → exec-approvals-cli-1K1PMf0f.js} +10 -10
- package/dist/{exec-approvals-cli-CLD1HXqE.js → exec-approvals-cli-N45JsIvc.js} +10 -10
- package/dist/extensionAPI.js +8 -8
- package/dist/{gateway-cli-DF-S-bKo.js → gateway-cli-BQAh5DqM.js} +86 -85
- package/dist/{gateway-cli-C1yf0Kx4.js → gateway-cli-XV8T-ix8.js} +84 -83
- package/dist/{gateway-rpc-CBb3_pT9.js → gateway-rpc-c2_A2rwu.js} +2 -2
- package/dist/{gateway-rpc-DrcgCoEA.js → gateway-rpc-gDrdC_J6.js} +2 -2
- package/dist/{glass-ui-ws-CgTry9OG.js → glass-ui-ws-1FOCi0nn.js} +62 -61
- package/dist/{glass-ui-ws-BZJKBK5D.js → glass-ui-ws-CqGzDNV7.js} +63 -62
- package/dist/{health-B__mwl7J.js → health-B5BrlZ8G.js} +6 -6
- package/dist/{health-BJl_ZtRW.js → health-B7yvi1O9.js} +6 -6
- package/dist/{hooks-cli-B_d8Cv16.js → hooks-cli-BPa-ssdB.js} +42 -41
- package/dist/{hooks-cli-CpDJa8fo.js → hooks-cli-D7iTDry_.js} +42 -41
- package/dist/{hooks-status-CRdYwf7p.js → hooks-status-B5vDwm19.js} +1 -1
- package/dist/{hooks-status-i4MgV8tU.js → hooks-status-CQ33gaVT.js} +2 -2
- package/dist/index.js +47 -46
- package/dist/{inspect-BZEGJ1Wu.js → inspect-C3_zr-N9.js} +2 -2
- package/dist/{inspect-ChCXJY8c.js → inspect-D_lN8pwg.js} +2 -2
- package/dist/{install-safe-path-PVqsVjI9.js → install-safe-path-79R0iPyC.js} +2 -2
- package/dist/{install-safe-path-INk2Z_kc.js → install-safe-path-BBe6qFNr.js} +2 -2
- package/dist/{installs-lGgvyXAJ.js → installs-BVJQl3Tj.js} +3 -3
- package/dist/{installs-b6XWplET.js → installs-Bb1phV9x.js} +3 -3
- package/dist/{lifecycle-core-iiq9e4fE.js → lifecycle-core-DdbL6ELJ.js} +7 -7
- package/dist/{lifecycle-core-SWNKAc8k.js → lifecycle-core-O50xtu5c.js} +7 -7
- package/dist/llm-slug-generator.js +9 -9
- package/dist/{logs-cli-BGjeoamB.js → logs-cli-DvbMaSKU.js} +6 -6
- package/dist/{logs-cli-BEWpB_ac.js → logs-cli-T5v8efuM.js} +6 -6
- package/dist/{manager-rvtFoeFT.js → manager-Bt-1HGg9.js} +1 -1
- package/dist/{manager-DcZUW1bz.js → manager-CC13EPO9.js} +1 -1
- package/dist/{manager-PTSjHNVq.js → manager-Ck1YSfr6.js} +1 -1
- package/dist/{manager-xeIkDkmx.js → manager-DghM7T-b.js} +1 -1
- package/dist/{memory-D8JRYEYq.js → memory-CgWOtF4K.js} +4 -4
- package/dist/{memory-cli-DZtv3G9o.js → memory-cli-DGheAg5i.js} +7 -7
- package/dist/{memory-cli-rORHUdUV.js → memory-cli-vN3fGs79.js} +7 -7
- package/dist/{memory-CeB8eMPH.js → memory-d0YI5Lr0.js} +4 -4
- package/dist/{model-catalog-MHTLXFwi.js → model-catalog-CzKsiNZA.js} +2 -2
- package/dist/{model-catalog-DQTmHZK7.js → model-catalog-IWi6-nY9.js} +2 -2
- package/dist/{model-picker-BVNI6Imm.js → model-picker-CS_h9RHv.js} +2 -2
- package/dist/{model-picker-DHLA0BEe.js → model-picker-DN-co6Oy.js} +2 -2
- package/dist/{models-D3haEpaG.js → models-CBw1pGIq.js} +17 -17
- package/dist/{models-cli-DidUOdbx.js → models-cli-BBReZHNq.js} +46 -45
- package/dist/{models-cli-DiyNuQ5w.js → models-cli-Bmh1S3iG.js} +49 -48
- package/dist/{models-config-CRnTzdFs.js → models-config-B5Xxy-c-.js} +1 -1
- package/dist/{models-config-Br7EjqgG.js → models-config-CiR_RUxw.js} +1 -1
- package/dist/{node-cli-Yqf38Nio.js → node-cli-2HDdeLgi.js} +20 -20
- package/dist/{node-cli-DnwByBU2.js → node-cli-CH1yLZuO.js} +20 -20
- package/dist/{node-service-CRSkbk2b.js → node-service-CfHbECCi.js} +2 -2
- package/dist/{node-service-E8k7BOQV.js → node-service-D44noKnR.js} +2 -2
- package/dist/{nodes-cli-BY6HrZlh.js → nodes-cli-ARs9ZPma.js} +9 -9
- package/dist/{nodes-cli-OTk4gEj-.js → nodes-cli-DIm6cMS_.js} +9 -9
- package/dist/{note-Bn5K9itM.js → note-Cltpxj6i.js} +1 -1
- package/dist/{note-DtkOgLxt.js → note-ytwb4wwn.js} +1 -1
- package/dist/{npm-registry-spec-CZCkONwR.js → npm-registry-spec-CONWlhd6.js} +1 -1
- package/dist/{npm-registry-spec-L8BUQ0nu.js → npm-registry-spec-DPqOMFd9.js} +1 -1
- package/dist/{onboard-oIJ6OEGu.js → onboard-Csj4sOF5.js} +18 -18
- package/dist/{onboard-C1PSYsJY.js → onboard-LVUmCZhX.js} +18 -18
- package/dist/{onboard-channels-DvNWHvx3.js → onboard-channels-mrSIlZDB.js} +7 -7
- package/dist/{onboard-channels-BSt89ffO.js → onboard-channels-vDtX0e0w.js} +7 -7
- package/dist/{onboard-custom-y7PsUrmE.js → onboard-custom-BCcuL4Uq.js} +2 -2
- package/dist/{onboard-custom-TvRg0dYq.js → onboard-custom-C-wfNtOM.js} +2 -2
- package/dist/{onboard-helpers-CtpOchu0.js → onboard-helpers-BKfeJ1xj.js} +4 -4
- package/dist/{onboard-helpers-HjgAKoSl.js → onboard-helpers-CCI7SimM.js} +4 -4
- package/dist/{onboard-hooks-BU0EsBMH.js → onboard-hooks-CXRw9BJY.js} +2 -2
- package/dist/{onboard-hooks-Cmy0Qz0F.js → onboard-hooks-DfDF9AU4.js} +2 -2
- package/dist/{onboard-remote-B7bM-1-l.js → onboard-remote-D4R-Yq2Q.js} +3 -3
- package/dist/{onboard-remote-B5kHPh_e.js → onboard-remote-DKXBH_lQ.js} +3 -3
- package/dist/{onboard-skills-BsPcuuer.js → onboard-skills-BPmu8XLA.js} +2 -2
- package/dist/{onboard-skills-V7pX2pkl.js → onboard-skills-jVyIFojF.js} +3 -3
- package/dist/{onboarding-Ce0PyZFq.js → onboarding-D2VmoKU1.js} +16 -16
- package/dist/{onboarding-D4SGY_zG.js → onboarding-DojADc8N.js} +16 -16
- package/dist/{onboarding.finalize-TkbmJzg5.js → onboarding.finalize-Bi1wSEtw.js} +28 -28
- package/dist/{onboarding.finalize-BDPrr9CT.js → onboarding.finalize-C4BwVw6u.js} +26 -26
- package/dist/{onboarding.gateway-config-9dgVRLeb.js → onboarding.gateway-config-B301LxTx.js} +6 -6
- package/dist/{onboarding.gateway-config-DXQhxsQS.js → onboarding.gateway-config-ChCxv8dy.js} +6 -6
- package/dist/{openai-model-default-D_rz8Pew.js → openai-model-default-CKfNKTZD.js} +1 -1
- package/dist/{openai-model-default-BkGyzajy.js → openai-model-default-ChdgeMqX.js} +1 -1
- package/dist/{outbound-send-deps-PNzbuoSX.js → outbound-send-deps-JQjmNLPx.js} +1 -1
- package/dist/{pairing-cli-WQ1MKO2B.js → pairing-cli-D7_UlTsI.js} +3 -3
- package/dist/{pairing-cli-D2zvrKTM.js → pairing-cli-UgiAsygW.js} +3 -3
- package/dist/{path-env-BHRjFlqO.js → path-env-BAyW1s3c.js} +1 -1
- package/dist/{path-env-i-ZjtVbs.js → path-env-C1hgM8gP.js} +1 -1
- package/dist/{pi-embedded-helpers-nFK_hP2q.js → pi-embedded-helpers-BBiyNXkS.js} +1 -1
- package/dist/{pi-embedded-helpers-DVolpQ34.js → pi-embedded-helpers-CDzBxa-P.js} +1 -1
- package/dist/{pi-embedded-BPuUM-gD.js → pi-embedded-zSv7VKNQ.js} +1265 -428
- package/dist/{pi-tools.policy-uHgu_nx0.js → pi-tools.policy-CU8U7--z.js} +2 -2
- package/dist/{pi-tools.policy-CTzYYKRt.js → pi-tools.policy-WaLKhqJQ.js} +2 -2
- package/dist/{plugin-auto-enable-C4b23B35.js → plugin-auto-enable-Bt2sVz8w.js} +2 -2
- package/dist/{plugin-auto-enable-BkhVwtbW.js → plugin-auto-enable-PBDcVF_-.js} +2 -2
- package/dist/{plugin-registry-CxUYRmLh.js → plugin-registry-Dq6OJ45H.js} +2 -2
- package/dist/{plugin-registry-V5jShvbP.js → plugin-registry-L1h7mxjX.js} +2 -2
- package/dist/plugin-sdk/auto-reply/reply/commands-info.d.ts +1 -0
- package/dist/plugin-sdk/auto-reply/status.d.ts +13 -0
- package/dist/plugin-sdk/config/types.models.d.ts +5 -0
- package/dist/plugin-sdk/config/zod-schema.core.d.ts +11 -0
- package/dist/plugin-sdk/config/zod-schema.d.ts +5 -0
- package/dist/plugin-sdk/cost/benchmark.d.ts +36 -0
- package/dist/plugin-sdk/cost/breakdown.d.ts +63 -0
- package/dist/plugin-sdk/cost/gpu-inference.d.ts +52 -0
- package/dist/plugin-sdk/cost/rates.d.ts +32 -0
- package/dist/plugin-sdk/cost/serving-discovery.d.ts +54 -0
- package/dist/plugin-sdk/cost/summary.d.ts +40 -0
- package/dist/plugin-sdk/index.js +19 -8
- package/dist/{plugins-cli-BthQwo7n.js → plugins-cli-BEIKMoVH.js} +43 -42
- package/dist/{plugins-cli-CMAwepRk.js → plugins-cli-C9ehLXaT.js} +43 -42
- package/dist/{program-context-BVEz8AgF.js → program-context-Bh5wzWdC.js} +48 -40
- package/dist/{program-iw_XyVhs.js → program-iTd2Dei7.js} +46 -45
- package/dist/{prompt-select-styled-CF4qV0_M.js → prompt-select-styled-CNFytCD3.js} +32 -32
- package/dist/{prompt-select-styled-D3RAY7pg.js → prompt-select-styled-fXotfbCt.js} +31 -31
- package/dist/{provider-auth-helpers-BTBjOyhO.js → provider-auth-helpers-DBrH-cce.js} +6 -6
- package/dist/{provider-auth-helpers-Dxo0v0UO.js → provider-auth-helpers-Dho1n21x.js} +6 -6
- package/dist/{push-apns-DIIbGT1M.js → push-apns-DYYY9NOE.js} +1 -1
- package/dist/{push-apns-B8Wg3Hqw.js → push-apns-DpyCFRTu.js} +1 -1
- package/dist/{pw-ai-kkF0QaDF.js → pw-ai-DBAtSFTB.js} +2 -2
- package/dist/{pw-ai-BFS9ezWe.js → pw-ai-DOAsQ5NX.js} +2 -2
- package/dist/{pw-ai-_prsAw5O.js → pw-ai-DQZa9DUQ.js} +2 -2
- package/dist/{pw-ai-Cx-Ko_FL.js → pw-ai-DY_6l11g.js} +2 -2
- package/dist/{qr-cli-CcAzgz_N.js → qr-cli-D2A-IU0Y.js} +1 -1
- package/dist/{qr-cli-KmGkQDBF.js → qr-cli-DHpiebkG.js} +1 -1
- package/dist/{redact-identifier-CXvHJXk9.js → redact-identifier-BOxAUdff.js} +1 -1
- package/dist/{register.agent-8XsVRuWC.js → register.agent-BYpBNbHe.js} +61 -60
- package/dist/{register.agent-D-OIszeY.js → register.agent-DceNHGi_.js} +63 -62
- package/dist/register.configure-09lIIUvq.js +131 -0
- package/dist/register.configure-UUZqL0Ck.js +135 -0
- package/dist/{register.maintenance-CBYqsXkf.js → register.maintenance-CsoEyVyl.js} +63 -62
- package/dist/{register.maintenance-CjM8sJqH.js → register.maintenance-DdX8GZpv.js} +64 -63
- package/dist/{register.message-kSIASKVK.js → register.message-CAM62MaW.js} +41 -40
- package/dist/{register.message-DnljWvZP.js → register.message-RYn7LD6-.js} +40 -39
- package/dist/{register.onboard-Vj1-Ike1.js → register.onboard-CoRtNlNa.js} +28 -28
- package/dist/{register.onboard-eDNn1pdd.js → register.onboard-DhR5o8KO.js} +28 -28
- package/dist/{register.setup-CmhspsS8.js → register.setup-CEjhFwYz.js} +28 -28
- package/dist/{register.setup-DMaoMWRD.js → register.setup-CI_RT1Wa.js} +28 -28
- package/dist/{register.status-health-sessions-CJqCoqAi.js → register.status-health-sessions-8N1xEKl7.js} +35 -35
- package/dist/{register.status-health-sessions-r3F9H8b0.js → register.status-health-sessions-DNlsQ-xt.js} +36 -36
- package/dist/{register.subclis-RqX1a2aL.js → register.subclis-D1qkm3UK.js} +37 -29
- package/dist/{replies-BYF1gbJf.js → replies-0nzkXt6o.js} +1 -1
- package/dist/{replies-B91-OngF.js → replies-C5CBlnFS.js} +1 -1
- package/dist/{replies-DAYTg6Mb.js → replies-D0FYSIJg.js} +1 -1
- package/dist/{replies-UP2sglaR.js → replies-_DV8VSSj.js} +1 -1
- package/dist/{resolve-route-Zww0Y-HJ.js → resolve-route-CnTdHVgw.js} +1 -1
- package/dist/{resolve-route-CmZ7XjmB.js → resolve-route-D0NrCHnr.js} +1 -1
- package/dist/{routes-BfeVDhod.js → routes-CPMVuvoz.js} +3 -3
- package/dist/{routes-y2Ww0pum.js → routes-DIyAmLYR.js} +3 -3
- package/dist/{rpc-DZCrGGH5.js → rpc-B2BLRS45.js} +2 -2
- package/dist/{rpc-RIkh5F9v.js → rpc-gUsL9Mqf.js} +2 -2
- package/dist/{run-main-6pE2fYXT.js → run-main-hKg6FDt6.js} +61 -60
- package/dist/{sandbox-DQdjlC-w.js → sandbox-61-kshSF.js} +5 -5
- package/dist/{sandbox-BjQ6n3zl.js → sandbox-J92UBQK8.js} +5 -5
- package/dist/{sandbox-cli-CTYAHFyM.js → sandbox-cli-D9v0jhBb.js} +8 -8
- package/dist/{sandbox-cli-C0IVf8AS.js → sandbox-cli-DJgF-r8G.js} +8 -8
- package/dist/{security-cli-B8VyQJcl.js → security-cli-CZix5cXq.js} +13 -13
- package/dist/{security-cli-D0oErb9f.js → security-cli-J9J0qvDy.js} +13 -13
- package/dist/{send-Cxu1tl56.js → send-B0jHSNMF.js} +1 -1
- package/dist/{send-Cuk_Rjss.js → send-C75uSv6p.js} +1 -1
- package/dist/{send-BBfSp-8Q.js → send-Dq252-bi.js} +1 -1
- package/dist/{server-context-BFCh7pUb.js → server-context-D-82OTsT.js} +5 -5
- package/dist/{server-context-YlgRz1wC.js → server-context-SbbjNkPL.js} +5 -5
- package/dist/{server-methods-BWCAXrQJ.js → server-methods-CXoipzKe.js} +31 -31
- package/dist/{server-methods-BxsThjMp.js → server-methods-bIWvtXlT.js} +31 -31
- package/dist/{server-node-events-AZFI44cm.js → server-node-events-BD8wnswX.js} +40 -39
- package/dist/{server-node-events-C10Bhdag.js → server-node-events-nQkNuQOx.js} +39 -38
- package/dist/{service-vsb7_8MA.js → service-BF50XyKr.js} +2 -2
- package/dist/{service-BBw78ATM.js → service-Ccv3Zi5_.js} +2 -2
- package/dist/{session-cost-usage-CqLUlh7S.js → session-cost-usage-CJWHHY-C.js} +1 -1
- package/dist/{session-cost-usage-D6Ocwmq1.js → session-cost-usage-D3mgssM_.js} +1 -1
- package/dist/{session-utils-BZnPW904.js → session-utils-Cs1jlD-q.js} +3 -3
- package/dist/{session-utils-BGw_xwXK.js → session-utils-Zpe3t68b.js} +3 -3
- package/dist/{sessions-uS7rV-lI.js → sessions-BSmT7vGX.js} +3 -3
- package/dist/{sessions-DHPf5TzB.js → sessions-Dn6VXn4p.js} +1 -1
- package/dist/{sessions-optZRB57.js → sessions-Dxf5Kjig.js} +1 -1
- package/dist/{shared-CfBMF1VB.js → shared-BJRsBGGr.js} +2 -2
- package/dist/{shared-CVXZtpIq.js → shared-BmjlTHYU.js} +2 -2
- package/dist/{shared-BucaCyaw.js → shared-Ck6cf10x.js} +1 -1
- package/dist/{shared-k1QjV3ys.js → shared-Crfo8y70.js} +1 -1
- package/dist/{skill-commands-CKm7eCw6.js → skill-commands-D0xAWG0l.js} +1 -1
- package/dist/{skill-commands-DAtpPeXi.js → skill-commands-DKkiQJxU.js} +1 -1
- package/dist/{skill-commands-BoMrW3WV.js → skill-commands-JIccKWkf.js} +1 -1
- package/dist/{skill-commands-yPirQFOb.js → skill-commands-KnANH5Qm.js} +1 -1
- package/dist/{skills-cli-CcWFIcOR.js → skills-cli-BZF5mcG5.js} +7 -7
- package/dist/{skills-cli-DcJBgk5R.js → skills-cli-C9F-zLWe.js} +3 -3
- package/dist/{skills-install-CcW780Kq.js → skills-install-QOHQrX6P.js} +4 -4
- package/dist/{skills-install-CbRsCzAG.js → skills-install-X12kifP1.js} +4 -4
- package/dist/{skills-status-CLzaY4r2.js → skills-status-6LVKpabC.js} +1 -1
- package/dist/{ssrf-q6hBiAOi.js → ssrf-BCSnhba8.js} +13 -2
- package/dist/{ssrf-oI58TMOb.js → ssrf-DNhyFMRW.js} +13 -2
- package/dist/{status-BxD8CjLr.js → status-CIZltjd7.js} +2 -2
- package/dist/{status-Cv8N2kIe.js → status-CTDVhXdb.js} +20 -20
- package/dist/{status-CZc22Lqb.js → status-CWzbgPAZ.js} +2 -2
- package/dist/{status-B5sjPQS6.js → status-Dgg3SlRY.js} +20 -20
- package/dist/{status.update-B52bM2Tg.js → status.update-CdTtmVpp.js} +2 -2
- package/dist/{status.update-Bl_ewksT.js → status.update-_3qHPt0O.js} +2 -2
- package/dist/{subagent-registry-BnHgIGX-.js → subagent-registry-DQ_BPb-T.js} +130 -40
- package/dist/{synthesis-DvfrgkQU.js → synthesis-CDd1xqI3.js} +36 -35
- package/dist/{synthesis-fD8J2vag.js → synthesis-CSsmBmJc.js} +9 -9
- package/dist/{synthesis-o7Zdrwxz.js → synthesis-DIqhHKjV.js} +36 -35
- package/dist/{synthesis-7UL3pCpj.js → synthesis-XbEFEFK1.js} +8 -8
- package/dist/{system-cli-BAn07VkE.js → system-cli-DB9etQxq.js} +5 -5
- package/dist/{system-cli-BTZ8T0-Z.js → system-cli-szRqg1_v.js} +5 -5
- package/dist/{systemd-DCzA-V_E.js → systemd-Dic80Qni.js} +1 -1
- package/dist/{systemd-Cmn7zMqD.js → systemd-Dtydk5-z.js} +1 -1
- package/dist/{systemd-hints-CvIqgg9d.js → systemd-hints-DmDUq9JT.js} +4 -4
- package/dist/{systemd-hints-BlGehfA3.js → systemd-hints-bP8dBJ78.js} +4 -4
- package/dist/{systemd-linger-BUWYteOW.js → systemd-linger-BBxRNFXz.js} +2 -2
- package/dist/{systemd-linger-Bj4nK9ZB.js → systemd-linger-DjoQVcOq.js} +2 -2
- package/dist/{tui-XLWiN8Hm.js → tui-CEhOf9fk.js} +6 -6
- package/dist/{tui-B6mIUNPO.js → tui-Dp58oYvX.js} +6 -6
- package/dist/{tui-cli-byFYgg3c.js → tui-cli-DFaOwBwg.js} +12 -12
- package/dist/{tui-cli-CTK5_3rU.js → tui-cli-wxh3YpaV.js} +12 -12
- package/dist/{unified-runner-DnikxMFg.js → unified-runner-Bm3r8UOg.js} +130 -40
- package/dist/{unified-runner-BIiKFnNF.js → unified-runner-DN0asRIR.js} +1266 -429
- package/dist/{update-DxY1UB0k.js → update-BDo7wxMf.js} +1 -1
- package/dist/{update-EhDcBqN1.js → update-CcacbR32.js} +1 -1
- package/dist/{update-check-pFwV6W-z.js → update-check-A86nJKtZ.js} +1 -1
- package/dist/{update-check-H0DwvuqN.js → update-check-oEbS0voJ.js} +1 -1
- package/dist/{update-cli-C8IVRiDZ.js → update-cli-CK7nw0t0.js} +74 -73
- package/dist/{update-cli-BAdPKO1o.js → update-cli-kysdsvn3.js} +73 -72
- package/dist/{update-runner-CERd2XdT.js → update-runner-CQMv-ibM.js} +3 -3
- package/dist/{update-runner-7oHPAIND.js → update-runner-DHi359Tb.js} +3 -3
- package/dist/{usage-format-_37Bn-Jb.js → usage-format-DQJz-0qd.js} +0 -1
- package/dist/{usage-format-rxT5cEAf.js → usage-format-DeczTGI-.js} +0 -1
- package/dist/{webhooks-cli-3UJupWMP.js → webhooks-cli-C3g4MxCE.js} +3 -3
- package/dist/{webhooks-cli-BFpGMNrf.js → webhooks-cli-C3uyehJS.js} +3 -3
- package/dist/{with-timeout-Bn-ltLg1.js → with-timeout-B0ZJa7f2.js} +1 -1
- package/dist/{with-timeout-EXM-FP4_.js → with-timeout-GbJ1Yzsh.js} +1 -1
- package/package.json +1 -1
- package/dist/register.configure-Bcdw9qlF.js +0 -130
- package/dist/register.configure-KE6Kvt4O.js +0 -134
- /package/dist/{agents.config-BG9fpad0.js → agents.config-Cyc_RegL.js} +0 -0
- /package/dist/{agents.config-CxrFhtC1.js → agents.config-DK6nN-Qh.js} +0 -0
- /package/dist/{argv-oa_KBmv1.js → argv-Bl0th4dq.js} +0 -0
- /package/dist/{auth-choice-legacy-DwMt7oDR.js → auth-choice-legacy-BZWIpO92.js} +0 -0
- /package/dist/{auth-choice-legacy-ZsU_kwPI.js → auth-choice-legacy-iEyARXI2.js} +0 -0
- /package/dist/{bindings-CLS_owm_.js → bindings-BAOPW5eK.js} +0 -0
- /package/dist/{bindings-BpMZcmYL.js → bindings-c7qLJdIA.js} +0 -0
- /package/dist/{brew-B3VMQVQk.js → brew-B4FH-mEH.js} +0 -0
- /package/dist/{brew-BsE_w2tW.js → brew-CI00FKf8.js} +0 -0
- /package/dist/{catalog-CriSxQMH.js → catalog-BaM8d1-T.js} +0 -0
- /package/dist/{catalog-BXxi5-YC.js → catalog-BnLcGVie.js} +0 -0
- /package/dist/{channel-selection-BiYW5XI-.js → channel-selection-BrFl0vVK.js} +0 -0
- /package/dist/{channel-selection-DF4bza2Q.js → channel-selection-DjrS6oiz.js} +0 -0
- /package/dist/{channels-status-issues-DuRF3XF3.js → channels-status-issues-BwbA2OBt.js} +0 -0
- /package/dist/{channels-status-issues-C2AVfv0-.js → channels-status-issues-iWQDGUQe.js} +0 -0
- /package/dist/{cli-utils-tNdOjVQI.js → cli-utils-CbnnSB38.js} +0 -0
- /package/dist/{clipboard-B1jgJ11s.js → clipboard-BSTxQ2tO.js} +0 -0
- /package/dist/{clipboard-Sks8Qw-6.js → clipboard-CHn5r7Kj.js} +0 -0
- /package/dist/{command-options-C2yg82U0.js → command-options-DgcCGXMQ.js} +0 -0
- /package/dist/{command-options-D_KEIae-.js → command-options-Vxb4M7Aj.js} +0 -0
- /package/dist/{consolidate-DPAffHCg.js → consolidate-BSUbtSDt.js} +0 -0
- /package/dist/{consolidate-CF6vEjPe.js → consolidate-DhoiF4n7.js} +0 -0
- /package/dist/{constants-B4wXIPMk.js → constants-BLIhifCJ.js} +0 -0
- /package/dist/{constants-ChnKXZKi.js → constants-BP_JoSwO.js} +0 -0
- /package/dist/{control-ui-assets-CkPR0Fz2.js → control-ui-assets-CnX0W3vy.js} +0 -0
- /package/dist/{control-ui-assets-BVCcPRi3.js → control-ui-assets-DfBRXG5y.js} +0 -0
- /package/dist/{delivery-queue-DqDe6994.js → delivery-queue-CIwn40ej.js} +0 -0
- /package/dist/{delivery-queue-RUd4NhRw.js → delivery-queue-duM0RT18.js} +0 -0
- /package/dist/{dm-policy-shared-DmpXsl2K.js → dm-policy-shared-BUwZ6Ajx.js} +0 -0
- /package/dist/{dm-policy-shared-ZfPtrTeh.js → dm-policy-shared-EuuPwRNE.js} +0 -0
- /package/dist/{entry-status-BqhxPFzK.js → entry-status-D0BSpWt_.js} +0 -0
- /package/dist/{format-BskCnX0I.js → format-BcXs5iuf.js} +0 -0
- /package/dist/{gmail-setup-utils-BlGvgPlF.js → gmail-setup-utils-DJgxyDTX.js} +0 -0
- /package/dist/{gmail-setup-utils-DsRU2ywQ.js → gmail-setup-utils-_TEJA4pL.js} +0 -0
- /package/dist/{health-format-CzrMGubG.js → health-format-Dp4xmTKn.js} +0 -0
- /package/dist/{health-format-C0DWlpEn.js → health-format-Li703vy6.js} +0 -0
- /package/dist/{help-format-DhRyo3Pj.js → help-format-8IYCRvx1.js} +0 -0
- /package/dist/{help-format-BjEVi4c_.js → help-format-BTNd5kFC.js} +0 -0
- /package/dist/{helpers-CcArejFh.js → helpers-BnXDQL2q.js} +0 -0
- /package/dist/{helpers-Dm_utoC3.js → helpers-CoScWBmD.js} +0 -0
- /package/dist/{helpers-DJRLZbrj.js → helpers-DGBa5iKd.js} +0 -0
- /package/dist/{helpers-DUclfUl-.js → helpers-lLtg0Hgi.js} +0 -0
- /package/dist/{ipv4--8f4a_51.js → ipv4-CCdAVmwu.js} +0 -0
- /package/dist/{ipv4-Cy7l-phR.js → ipv4-DFUXU3PT.js} +0 -0
- /package/dist/{links-D6ASL_as.js → links-B9CbwY46.js} +0 -0
- /package/dist/{links-BkFDs4sT.js → links-CQZxjjCO.js} +0 -0
- /package/dist/{logging-4WqMHwAZ.js → logging-BRejMykf.js} +0 -0
- /package/dist/{logging-iIX8duxv.js → logging-BmJ5ky59.js} +0 -0
- /package/dist/{logging-Czh4x_NQ.js → logging-CLWZ3KQI.js} +0 -0
- /package/dist/{model-D50Lt_sm.js → model-BMFj7NXy.js} +0 -0
- /package/dist/{model-param-b-CfiuLqZG.js → model-param-b-C6mMtCJr.js} +0 -0
- /package/dist/{onboard-config-BKb2QQqE.js → onboard-config-B9Iy9-4f.js} +0 -0
- /package/dist/{onboard-config-DZIbmz6s.js → onboard-config-DwsP5yz1.js} +0 -0
- /package/dist/{outbound-send-deps-BR6jeE9h.js → outbound-send-deps-DpyxnI3f.js} +0 -0
- /package/dist/{parse-log-line-CeJ1RDHv.js → parse-log-line-BQPvB6cZ.js} +0 -0
- /package/dist/{parse-log-line-DxFrnzNo.js → parse-log-line-Cg7e_EO6.js} +0 -0
- /package/dist/{parse-port-DdAxyRwV.js → parse-port-CLBKavYE.js} +0 -0
- /package/dist/{parse-port-W56FnCN_.js → parse-port-dzRz6QnZ.js} +0 -0
- /package/dist/{parse-timeout-DKY8YAUj.js → parse-timeout-CuYdP9TL.js} +0 -0
- /package/dist/{path-safety-LPRW6mO-.js → path-safety-ZDtSDPro.js} +0 -0
- /package/dist/{path-safety-grmMl9nR.js → path-safety-cWA8NpOb.js} +0 -0
- /package/dist/{plugins-allowlist-BuJjTWyS.js → plugins-allowlist-C5r7biDp.js} +0 -0
- /package/dist/{plugins-allowlist-BvQZxkGy.js → plugins-allowlist-CD4oIJNk.js} +0 -0
- /package/dist/{ports-CqKpyZSl.js → ports-BE4iY9ho.js} +0 -0
- /package/dist/{ports-BDzMH5Eu.js → ports-C2swmreL.js} +0 -0
- /package/dist/{program-context-C5Y6Svzj.js → program-context-CGKRxOBU.js} +0 -0
- /package/dist/{progress-deSwGz9x.js → progress-BOQ0hkeM.js} +0 -0
- /package/dist/{progress-B3K1UFtC.js → progress-DQTvTiEg.js} +0 -0
- /package/dist/{prompt-style-DeaIm7_1.js → prompt-style-CzRuIYtp.js} +0 -0
- /package/dist/{prompt-style-DS51QPPi.js → prompt-style-pphr4yLK.js} +0 -0
- /package/dist/{prompts-LvxbwPs6.js → prompts-B98rp78m.js} +0 -0
- /package/dist/{prompts-l_pUOv42.js → prompts-CqJkfsHC.js} +0 -0
- /package/dist/{qmd-manager-BbWeOOTD.js → qmd-manager-C6wRtPm_.js} +0 -0
- /package/dist/{qmd-manager-CYKlEJ2U.js → qmd-manager-CYEAMLE7.js} +0 -0
- /package/dist/{runtime-guard-GWXNB3ke.js → runtime-guard-Bgv3O9s4.js} +0 -0
- /package/dist/{runtime-guard-DWXoHW1Q.js → runtime-guard-ofQGcCUc.js} +0 -0
- /package/dist/{runtime-status-BqrK6ivx.js → runtime-status-B-PXkhrX.js} +0 -0
- /package/dist/{runtime-status-D4pjzLyv.js → runtime-status-Zb9OQ0VK.js} +0 -0
- /package/dist/{session-dirs-CXErIzZq.js → session-dirs-BsQ-g02q.js} +0 -0
- /package/dist/{session-dirs-DJ4qiP3r.js → session-dirs-DpiMgw4q.js} +0 -0
- /package/dist/{skill-scanner-zh2dJM3x.js → skill-scanner-D5p8L-xO.js} +0 -0
- /package/dist/{skill-scanner-Ur9nMoo4.js → skill-scanner-EhsZUzzk.js} +0 -0
- /package/dist/{stagger-DkaCfsKB.js → stagger-DQs772BN.js} +0 -0
- /package/dist/{status-Bz_-dDs6.js → status-DtyqId3v.js} +0 -0
- /package/dist/{status-CExNNC5N.js → status-dOhj12u0.js} +0 -0
- /package/dist/{system-run-command-CeR1OLFk.js → system-run-command-2oJPKfVr.js} +0 -0
- /package/dist/{system-run-command-Craglj8T.js → system-run-command-D_OmCQOL.js} +0 -0
- /package/dist/{table-xKEHbVsK.js → table-Dm8jYqmW.js} +0 -0
- /package/dist/{table-jBELNhV1.js → table-Ds5CZCyv.js} +0 -0
- /package/dist/{tool-loop-detection-D3y39HsT.js → tool-loop-detection-2Mxxm8YE.js} +0 -0
- /package/dist/{tool-loop-detection-DljhV89j.js → tool-loop-detection-Bk07vuCX.js} +0 -0
- /package/dist/{widearea-dns-C8Rsixsi.js → widearea-dns-BwIBBqZo.js} +0 -0
- /package/dist/{widearea-dns-BSrnDwJD.js → widearea-dns-DA_m7oeT.js} +0 -0
- /package/dist/{workspace-DKEYzTNc.js → workspace-DD9pYbiv.js} +0 -0
- /package/dist/{workspace-BkWAKK8O.js → workspace-Wkr6wXcS.js} +0 -0
- /package/dist/{wsl-cy5jpbkx.js → wsl-Ci3VOaSK.js} +0 -0
- /package/dist/{wsl-hSA0zCrh.js → wsl-S50RbgL8.js} +0 -0
|
@@ -0,0 +1,753 @@
|
|
|
1
|
+
import { n as loadCostUsageSummary } from "./session-cost-usage-CJWHHY-C.js";
|
|
2
|
+
import { t as estimateUsageCost } from "./usage-format-DQJz-0qd.js";
|
|
3
|
+
|
|
4
|
+
//#region src/cost/benchmark.ts
|
|
5
|
+
/**
|
|
6
|
+
* Claude reference rates and benchmark comparison.
|
|
7
|
+
*
|
|
8
|
+
* Hardcoded per-token USD rates for Claude Opus 4.7, Sonnet 4.6, and
|
|
9
|
+
* Haiku 4.5 (per million tokens, Anthropic public list as of 2026-Q1).
|
|
10
|
+
* Override via `models.gpuRates` is for GPU rates only — Claude rates can
|
|
11
|
+
* be overridden via the per-model `cost: { input, output }` configuration
|
|
12
|
+
* on the user's `anthropic` provider entry, which already exists.
|
|
13
|
+
*
|
|
14
|
+
* Display: pick a target tier via `agents.defaults.cost.benchmark`, default
|
|
15
|
+
* `sonnet`. Setting `"all"` shows comparisons against all three tiers.
|
|
16
|
+
*/
|
|
17
|
+
/** Per-million-token USD rates, applied via `estimateUsageCost`. */
|
|
18
|
+
const CLAUDE_RATES = {
|
|
19
|
+
opus: {
|
|
20
|
+
input: 15,
|
|
21
|
+
output: 75,
|
|
22
|
+
cacheRead: 1.5,
|
|
23
|
+
cacheWrite: 18.75
|
|
24
|
+
},
|
|
25
|
+
sonnet: {
|
|
26
|
+
input: 3,
|
|
27
|
+
output: 15,
|
|
28
|
+
cacheRead: .3,
|
|
29
|
+
cacheWrite: 3.75
|
|
30
|
+
},
|
|
31
|
+
haiku: {
|
|
32
|
+
input: .8,
|
|
33
|
+
output: 4,
|
|
34
|
+
cacheRead: .08,
|
|
35
|
+
cacheWrite: 1
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
const CLAUDE_LABEL = {
|
|
39
|
+
opus: "Claude Opus 4.7",
|
|
40
|
+
sonnet: "Claude Sonnet 4.6",
|
|
41
|
+
haiku: "Claude Haiku 4.5"
|
|
42
|
+
};
|
|
43
|
+
function benchmarkUsage(args) {
|
|
44
|
+
if (!args.usage) return [];
|
|
45
|
+
const tiers = args.target === "all" ? [
|
|
46
|
+
"opus",
|
|
47
|
+
"sonnet",
|
|
48
|
+
"haiku"
|
|
49
|
+
] : [args.target];
|
|
50
|
+
const results = [];
|
|
51
|
+
for (const tier of tiers) {
|
|
52
|
+
const cost = estimateUsageCost({
|
|
53
|
+
usage: args.usage,
|
|
54
|
+
cost: CLAUDE_RATES[tier]
|
|
55
|
+
});
|
|
56
|
+
if (cost === void 0) continue;
|
|
57
|
+
results.push({
|
|
58
|
+
tier,
|
|
59
|
+
costUsd: cost
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
return results;
|
|
63
|
+
}
|
|
64
|
+
function formatBenchmarkSuffix(args) {
|
|
65
|
+
if (args.benchmarks.length === 0) return;
|
|
66
|
+
if (args.benchmarks.length === 1) {
|
|
67
|
+
const b = args.benchmarks[0];
|
|
68
|
+
return formatPair(args.ourCostUsd, b);
|
|
69
|
+
}
|
|
70
|
+
const parts = [];
|
|
71
|
+
for (const b of args.benchmarks) parts.push(`${shortLabel(b.tier)} $${formatCostShort(b.costUsd)}`);
|
|
72
|
+
const cheapest = args.benchmarks.reduce((min, b) => b.costUsd < min.costUsd ? b : min);
|
|
73
|
+
if (cheapest.costUsd > args.ourCostUsd && cheapest.costUsd > 0) {
|
|
74
|
+
const savedPct = (cheapest.costUsd - args.ourCostUsd) / cheapest.costUsd * 100;
|
|
75
|
+
if (savedPct >= 1) parts.push(`save ${savedPct.toFixed(0)}% vs ${shortLabel(cheapest.tier)}`);
|
|
76
|
+
}
|
|
77
|
+
return `vs ${parts.join(" / ")}`;
|
|
78
|
+
}
|
|
79
|
+
function formatPair(ourCost, b) {
|
|
80
|
+
const benchPart = `${CLAUDE_LABEL[b.tier]} would be $${formatCostShort(b.costUsd)}`;
|
|
81
|
+
if (b.costUsd > 0 && b.costUsd > ourCost) {
|
|
82
|
+
const saved = (b.costUsd - ourCost) / b.costUsd * 100;
|
|
83
|
+
if (saved >= 1) return `${benchPart} (save ${saved.toFixed(0)}%)`;
|
|
84
|
+
} else if (b.costUsd > 0 && ourCost > b.costUsd) {
|
|
85
|
+
const overspend = (ourCost - b.costUsd) / b.costUsd * 100;
|
|
86
|
+
if (overspend >= 1) return `${benchPart} (+${overspend.toFixed(0)}%)`;
|
|
87
|
+
}
|
|
88
|
+
return benchPart;
|
|
89
|
+
}
|
|
90
|
+
function shortLabel(tier) {
|
|
91
|
+
if (tier === "opus") return "Opus";
|
|
92
|
+
if (tier === "sonnet") return "Sonnet";
|
|
93
|
+
return "Haiku";
|
|
94
|
+
}
|
|
95
|
+
function formatCostShort(value) {
|
|
96
|
+
if (value >= 1) return value.toFixed(2);
|
|
97
|
+
return value.toFixed(4);
|
|
98
|
+
}
|
|
99
|
+
function isClaudeTier(value) {
|
|
100
|
+
return value === "opus" || value === "sonnet" || value === "haiku";
|
|
101
|
+
}
|
|
102
|
+
function isBenchmarkTarget(value) {
|
|
103
|
+
return isClaudeTier(value) || value === "all";
|
|
104
|
+
}
|
|
105
|
+
function resolveBenchmarkTarget(value) {
|
|
106
|
+
if (typeof value === "string" && isBenchmarkTarget(value)) return value;
|
|
107
|
+
return "sonnet";
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
//#endregion
|
|
111
|
+
//#region src/cost/summary.ts
|
|
112
|
+
async function loadCostBenchmarkSummary(params) {
|
|
113
|
+
const summary = await loadCostUsageSummary(params);
|
|
114
|
+
const daily = summary.daily.map((day) => ({
|
|
115
|
+
...day,
|
|
116
|
+
benchmarkUsd: computeBenchmarkPerTier(day)
|
|
117
|
+
}));
|
|
118
|
+
const benchmarkTotals = computeBenchmarkPerTier(summary.totals);
|
|
119
|
+
const savingsTotals = {
|
|
120
|
+
opus: benchmarkTotals.opus - summary.totals.totalCost,
|
|
121
|
+
sonnet: benchmarkTotals.sonnet - summary.totals.totalCost,
|
|
122
|
+
haiku: benchmarkTotals.haiku - summary.totals.totalCost
|
|
123
|
+
};
|
|
124
|
+
return {
|
|
125
|
+
...summary,
|
|
126
|
+
daily,
|
|
127
|
+
benchmarkTotals,
|
|
128
|
+
savingsTotals
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
function computeBenchmarkPerTier(totals) {
|
|
132
|
+
const all = benchmarkUsage({
|
|
133
|
+
usage: {
|
|
134
|
+
input: totals.input,
|
|
135
|
+
output: totals.output,
|
|
136
|
+
cacheRead: totals.cacheRead,
|
|
137
|
+
cacheWrite: totals.cacheWrite
|
|
138
|
+
},
|
|
139
|
+
target: "all"
|
|
140
|
+
});
|
|
141
|
+
const map = new Map(all.map((b) => [b.tier, b.costUsd]));
|
|
142
|
+
return {
|
|
143
|
+
opus: map.get("opus") ?? 0,
|
|
144
|
+
sonnet: map.get("sonnet") ?? 0,
|
|
145
|
+
haiku: map.get("haiku") ?? 0
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
function resolveWindow(window, now = Date.now()) {
|
|
149
|
+
const end = now;
|
|
150
|
+
switch (window) {
|
|
151
|
+
case "today": {
|
|
152
|
+
const d = new Date(end);
|
|
153
|
+
d.setHours(0, 0, 0, 0);
|
|
154
|
+
return {
|
|
155
|
+
startMs: d.getTime(),
|
|
156
|
+
endMs: end
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
case "week": return {
|
|
160
|
+
startMs: end - 10080 * 6e4,
|
|
161
|
+
endMs: end
|
|
162
|
+
};
|
|
163
|
+
case "month": return {
|
|
164
|
+
startMs: end - 720 * 60 * 6e4,
|
|
165
|
+
endMs: end
|
|
166
|
+
};
|
|
167
|
+
case "all": return {
|
|
168
|
+
startMs: 0,
|
|
169
|
+
endMs: end
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
function formatSummaryReport(summary, label) {
|
|
174
|
+
const t = summary.totals;
|
|
175
|
+
const b = summary.benchmarkTotals;
|
|
176
|
+
const lines = [];
|
|
177
|
+
if (label) lines.push(`# ${label}`);
|
|
178
|
+
lines.push(`Tokens: ${formatTokens(t.totalTokens)} (${formatTokens(t.input)} in / ${formatTokens(t.output)} out)`);
|
|
179
|
+
lines.push(`Spend: $${t.totalCost.toFixed(4)}`);
|
|
180
|
+
lines.push(`Claude equivalents:`);
|
|
181
|
+
lines.push(` Opus: $${b.opus.toFixed(4)} (saved $${(b.opus - t.totalCost).toFixed(4)})`);
|
|
182
|
+
lines.push(` Sonnet: $${b.sonnet.toFixed(4)} (saved $${(b.sonnet - t.totalCost).toFixed(4)})`);
|
|
183
|
+
lines.push(` Haiku: $${b.haiku.toFixed(4)} (saved $${(b.haiku - t.totalCost).toFixed(4)})`);
|
|
184
|
+
if (summary.daily.length > 0) {
|
|
185
|
+
lines.push("");
|
|
186
|
+
lines.push("Daily:");
|
|
187
|
+
for (const d of summary.daily.slice(-7)) lines.push(` ${d.date}: ${formatTokens(d.totalTokens).padStart(8)} tokens · $${d.totalCost.toFixed(4).padStart(8)} · vs Sonnet $${d.benchmarkUsd.sonnet.toFixed(4)}`);
|
|
188
|
+
}
|
|
189
|
+
return lines.join("\n");
|
|
190
|
+
}
|
|
191
|
+
function formatTokens(n) {
|
|
192
|
+
if (!Number.isFinite(n) || n <= 0) return "0";
|
|
193
|
+
if (n >= 1e6) return `${(n / 1e6).toFixed(1)}m`;
|
|
194
|
+
if (n >= 1e3) return `${(n / 1e3).toFixed(n >= 1e4 ? 0 : 1)}k`;
|
|
195
|
+
return String(Math.round(n));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
//#endregion
|
|
199
|
+
//#region src/cost/rates.ts
|
|
200
|
+
const GPU_VRAM_GB = {
|
|
201
|
+
"a100-40gb": 40,
|
|
202
|
+
"a100-80gb": 80,
|
|
203
|
+
"h100-pcie": 80,
|
|
204
|
+
"h100-sxm": 80,
|
|
205
|
+
h200: 141,
|
|
206
|
+
l40: 48,
|
|
207
|
+
l40s: 48,
|
|
208
|
+
a40: 48,
|
|
209
|
+
a6000: 48,
|
|
210
|
+
a5000: 24,
|
|
211
|
+
v100: 32,
|
|
212
|
+
mi300x: 192,
|
|
213
|
+
"rtx-4090": 24
|
|
214
|
+
};
|
|
215
|
+
const GPU_LABEL = {
|
|
216
|
+
"a100-40gb": "A100-40GB",
|
|
217
|
+
"a100-80gb": "A100-80GB",
|
|
218
|
+
"h100-pcie": "H100-PCIe",
|
|
219
|
+
"h100-sxm": "H100-SXM",
|
|
220
|
+
h200: "H200",
|
|
221
|
+
l40: "L40",
|
|
222
|
+
l40s: "L40S",
|
|
223
|
+
a40: "A40",
|
|
224
|
+
a6000: "RTX-A6000",
|
|
225
|
+
a5000: "RTX-A5000",
|
|
226
|
+
v100: "V100",
|
|
227
|
+
mi300x: "MI300X",
|
|
228
|
+
"rtx-4090": "RTX-4090"
|
|
229
|
+
};
|
|
230
|
+
const DEFAULT_HOURLY_RATES_USD = {
|
|
231
|
+
"a100-40gb": 2.06,
|
|
232
|
+
"a100-80gb": 2.21,
|
|
233
|
+
"h100-pcie": 4.25,
|
|
234
|
+
"h100-sxm": 4.76,
|
|
235
|
+
h200: 6.5,
|
|
236
|
+
l40: 1.27,
|
|
237
|
+
l40s: 1.35,
|
|
238
|
+
a40: .95,
|
|
239
|
+
a6000: .96,
|
|
240
|
+
a5000: .61,
|
|
241
|
+
v100: .8,
|
|
242
|
+
mi300x: 4.89,
|
|
243
|
+
"rtx-4090": .74
|
|
244
|
+
};
|
|
245
|
+
/**
|
|
246
|
+
* Rough decode (output) throughput defaults in tokens/sec for a 27-30B FP8
|
|
247
|
+
* model on a single GPU of each type. Scales roughly inversely with model
|
|
248
|
+
* parameter count and bytes-per-param. Used as a fallback when no live
|
|
249
|
+
* `vllm:generation_tokens_total` delta has been observed yet.
|
|
250
|
+
*
|
|
251
|
+
* These are conservative published-benchmark medians; treat as ballpark.
|
|
252
|
+
* Override via `models.gpuRates` is rate-only; throughput overrides are
|
|
253
|
+
* Phase 4+ (not yet wired to config).
|
|
254
|
+
*/
|
|
255
|
+
const DEFAULT_DECODE_TPS = {
|
|
256
|
+
"a100-40gb": 60,
|
|
257
|
+
"a100-80gb": 80,
|
|
258
|
+
"h100-pcie": 120,
|
|
259
|
+
"h100-sxm": 130,
|
|
260
|
+
h200: 160,
|
|
261
|
+
l40: 45,
|
|
262
|
+
l40s: 50,
|
|
263
|
+
a40: 40,
|
|
264
|
+
a6000: 35,
|
|
265
|
+
a5000: 25,
|
|
266
|
+
v100: 25,
|
|
267
|
+
mi300x: 110,
|
|
268
|
+
"rtx-4090": 55
|
|
269
|
+
};
|
|
270
|
+
/**
|
|
271
|
+
* Prefill throughput is typically 30-60× higher than decode for a given
|
|
272
|
+
* setup. Use 40× as a balanced midpoint.
|
|
273
|
+
*/
|
|
274
|
+
const PREFILL_DECODE_RATIO = 40;
|
|
275
|
+
function resolveDefaultDecodeTps(gpuType, paramsB, bytesPerParam) {
|
|
276
|
+
const baseline = DEFAULT_DECODE_TPS[gpuType];
|
|
277
|
+
if (!paramsB) return baseline;
|
|
278
|
+
const refBytes = 27;
|
|
279
|
+
const modelBytes = paramsB * (bytesPerParam ?? 2);
|
|
280
|
+
if (modelBytes <= 0) return baseline;
|
|
281
|
+
const scaled = baseline * (refBytes / modelBytes);
|
|
282
|
+
return Math.max(5, scaled);
|
|
283
|
+
}
|
|
284
|
+
function resolveHourlyRate(gpuType, overrides) {
|
|
285
|
+
if (overrides && typeof overrides[gpuType] === "number" && overrides[gpuType] >= 0) return overrides[gpuType];
|
|
286
|
+
return DEFAULT_HOURLY_RATES_USD[gpuType];
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
//#endregion
|
|
290
|
+
//#region src/cost/gpu-inference.ts
|
|
291
|
+
/**
|
|
292
|
+
* Heuristic GPU type + count inference for self-hosted serving endpoints.
|
|
293
|
+
*
|
|
294
|
+
* Two signals: the baseUrl hostname (often encodes "a100", "h100", etc.) and
|
|
295
|
+
* the model id (encodes parameter count + quantization, e.g. "qwen36-27b-fp8").
|
|
296
|
+
* Output is a confidence-tagged guess so downstream code (and the user) can
|
|
297
|
+
* decide whether to trust the inference or override it.
|
|
298
|
+
*
|
|
299
|
+
* Bytes-per-parameter table:
|
|
300
|
+
* fp32 → 4
|
|
301
|
+
* fp16/bf16 → 2 (default when unspecified)
|
|
302
|
+
* fp8/int8/q8 → 1
|
|
303
|
+
* int4/q4/fp4 → 0.5
|
|
304
|
+
*
|
|
305
|
+
* GPU count formula:
|
|
306
|
+
* model_bytes = params × bytes_per_param
|
|
307
|
+
* target_util = 0.6 (40% headroom for KV cache, activations, scheduler)
|
|
308
|
+
* count = max(1, ceil(model_bytes / (vram × target_util)))
|
|
309
|
+
*/
|
|
310
|
+
const URL_GPU_PATTERNS = [
|
|
311
|
+
{
|
|
312
|
+
re: /h200/i,
|
|
313
|
+
type: "h200",
|
|
314
|
+
confidence: "high"
|
|
315
|
+
},
|
|
316
|
+
{
|
|
317
|
+
re: /h100[-_]?pcie/i,
|
|
318
|
+
type: "h100-pcie",
|
|
319
|
+
confidence: "high"
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
re: /h100[-_]?sxm/i,
|
|
323
|
+
type: "h100-sxm",
|
|
324
|
+
confidence: "high"
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
re: /h100/i,
|
|
328
|
+
type: "h100-sxm",
|
|
329
|
+
confidence: "medium"
|
|
330
|
+
},
|
|
331
|
+
{
|
|
332
|
+
re: /a100[-_]?80/i,
|
|
333
|
+
type: "a100-80gb",
|
|
334
|
+
confidence: "high"
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
re: /a100[-_]?40/i,
|
|
338
|
+
type: "a100-40gb",
|
|
339
|
+
confidence: "high"
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
re: /a100/i,
|
|
343
|
+
type: "a100-80gb",
|
|
344
|
+
confidence: "medium"
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
re: /l40s/i,
|
|
348
|
+
type: "l40s",
|
|
349
|
+
confidence: "high"
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
re: /l40/i,
|
|
353
|
+
type: "l40",
|
|
354
|
+
confidence: "high"
|
|
355
|
+
},
|
|
356
|
+
{
|
|
357
|
+
re: /mi300x/i,
|
|
358
|
+
type: "mi300x",
|
|
359
|
+
confidence: "high"
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
re: /a6000/i,
|
|
363
|
+
type: "a6000",
|
|
364
|
+
confidence: "high"
|
|
365
|
+
},
|
|
366
|
+
{
|
|
367
|
+
re: /a5000/i,
|
|
368
|
+
type: "a5000",
|
|
369
|
+
confidence: "high"
|
|
370
|
+
},
|
|
371
|
+
{
|
|
372
|
+
re: /a40/i,
|
|
373
|
+
type: "a40",
|
|
374
|
+
confidence: "high"
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
re: /v100/i,
|
|
378
|
+
type: "v100",
|
|
379
|
+
confidence: "high"
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
re: /rtx[-_]?4090|gpu[-_]?4090/i,
|
|
383
|
+
type: "rtx-4090",
|
|
384
|
+
confidence: "high"
|
|
385
|
+
}
|
|
386
|
+
];
|
|
387
|
+
function inferGpuFromUrl(baseUrl) {
|
|
388
|
+
const url = baseUrl.toLowerCase();
|
|
389
|
+
for (const pattern of URL_GPU_PATTERNS) {
|
|
390
|
+
const match = url.match(pattern.re);
|
|
391
|
+
if (match) return {
|
|
392
|
+
gpuType: pattern.type,
|
|
393
|
+
confidence: pattern.confidence,
|
|
394
|
+
match: match[0]
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
return { confidence: "low" };
|
|
398
|
+
}
|
|
399
|
+
function parseModelSize(modelId) {
|
|
400
|
+
const id = modelId.toLowerCase();
|
|
401
|
+
const sizeMatch = id.match(/(\d+(?:\.\d+)?)b(?:[-_./]|$)/);
|
|
402
|
+
const paramsB = sizeMatch ? Number.parseFloat(sizeMatch[1]) : void 0;
|
|
403
|
+
let bytesPerParam;
|
|
404
|
+
if (/\b(fp4|q4|int4)\b/.test(id) || /[-_]q4[-_]/.test(id)) bytesPerParam = .5;
|
|
405
|
+
else if (/\b(fp8|int8|q8|w8a8|w8a16)\b/.test(id) || /[-_]fp8[-_]?/.test(id)) bytesPerParam = 1;
|
|
406
|
+
else if (/\b(fp16|bf16|float16|half)\b/.test(id)) bytesPerParam = 2;
|
|
407
|
+
else if (/\b(fp32|float32)\b/.test(id)) bytesPerParam = 4;
|
|
408
|
+
return {
|
|
409
|
+
paramsB,
|
|
410
|
+
bytesPerParam
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
function inferGpuCount(args) {
|
|
414
|
+
const util = args.targetUtilization ?? .6;
|
|
415
|
+
if (!args.paramsB || !args.bytesPerParam) return 1;
|
|
416
|
+
const modelBytes = args.paramsB * 1e9 * args.bytesPerParam;
|
|
417
|
+
const usableVram = args.vramGb * 1e9 * util;
|
|
418
|
+
return Math.max(1, Math.ceil(modelBytes / usableVram));
|
|
419
|
+
}
|
|
420
|
+
function inferGpu(args) {
|
|
421
|
+
const reasoning = [];
|
|
422
|
+
const overrideType = args.override?.gpuType;
|
|
423
|
+
const overrideCount = args.override?.gpuCount;
|
|
424
|
+
let gpuType;
|
|
425
|
+
let confidence = "low";
|
|
426
|
+
if (overrideType && overrideType in GPU_VRAM_GB) {
|
|
427
|
+
gpuType = overrideType;
|
|
428
|
+
confidence = "high";
|
|
429
|
+
reasoning.push(`gpuType=${gpuType} from config override`);
|
|
430
|
+
} else if (args.baseUrl) {
|
|
431
|
+
const fromUrl = inferGpuFromUrl(args.baseUrl);
|
|
432
|
+
if (fromUrl.gpuType) {
|
|
433
|
+
gpuType = fromUrl.gpuType;
|
|
434
|
+
confidence = fromUrl.confidence;
|
|
435
|
+
reasoning.push(`gpuType=${gpuType} from URL pattern '${fromUrl.match}'`);
|
|
436
|
+
} else reasoning.push("no GPU type pattern matched in URL");
|
|
437
|
+
}
|
|
438
|
+
const sized = args.modelId ? parseModelSize(args.modelId) : {};
|
|
439
|
+
if (sized.paramsB) reasoning.push(`paramsB=${sized.paramsB} from model id '${args.modelId}'`);
|
|
440
|
+
if (sized.bytesPerParam) reasoning.push(`bytesPerParam=${sized.bytesPerParam} from quantization marker`);
|
|
441
|
+
else if (args.modelId) reasoning.push("no quantization marker — defaulting to bf16 (2 bytes)");
|
|
442
|
+
let gpuCount = 1;
|
|
443
|
+
if (typeof overrideCount === "number" && overrideCount > 0) {
|
|
444
|
+
gpuCount = Math.floor(overrideCount);
|
|
445
|
+
reasoning.push(`gpuCount=${gpuCount} from config override`);
|
|
446
|
+
} else if (gpuType) {
|
|
447
|
+
const bpp = sized.bytesPerParam ?? 2;
|
|
448
|
+
gpuCount = inferGpuCount({
|
|
449
|
+
paramsB: sized.paramsB,
|
|
450
|
+
bytesPerParam: bpp,
|
|
451
|
+
vramGb: GPU_VRAM_GB[gpuType]
|
|
452
|
+
});
|
|
453
|
+
if (sized.paramsB) reasoning.push(`gpuCount=${gpuCount} (model ${(sized.paramsB * bpp).toFixed(0)}GB / ${GPU_VRAM_GB[gpuType]}GB × 0.6 util)`);
|
|
454
|
+
}
|
|
455
|
+
return {
|
|
456
|
+
gpuType,
|
|
457
|
+
gpuCount,
|
|
458
|
+
confidence,
|
|
459
|
+
paramsB: sized.paramsB,
|
|
460
|
+
bytesPerParam: sized.bytesPerParam,
|
|
461
|
+
reasoning
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
//#endregion
|
|
466
|
+
//#region src/cost/serving-discovery.ts
|
|
467
|
+
const SUCCESS_TTL_MS = 6e4;
|
|
468
|
+
const NULL_TTL_MS = 5 * 6e4;
|
|
469
|
+
const DEFAULT_TIMEOUT_MS = 4e3;
|
|
470
|
+
const cache = /* @__PURE__ */ new Map();
|
|
471
|
+
const previousSnapshots = /* @__PURE__ */ new Map();
|
|
472
|
+
async function probeServingEndpoint(baseUrl, opts = {}) {
|
|
473
|
+
const fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
474
|
+
const nowFn = opts.now ?? (() => Date.now());
|
|
475
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
476
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
477
|
+
if (!trimmed) return null;
|
|
478
|
+
const cached = cache.get(trimmed);
|
|
479
|
+
if (cached && !opts.forceRefresh && cached.expiresAt > nowFn()) return cached.info;
|
|
480
|
+
const rootUrl = trimmed.replace(/\/v\d+$/, "");
|
|
481
|
+
const probeAt = nowFn();
|
|
482
|
+
const fetchOpts = () => ({
|
|
483
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
484
|
+
headers: { Accept: "application/json, text/plain, */*" }
|
|
485
|
+
});
|
|
486
|
+
const [versionRes, modelsRes, metricsRes] = await Promise.allSettled([
|
|
487
|
+
fetchImpl(`${rootUrl}/version`, fetchOpts()),
|
|
488
|
+
fetchImpl(`${rootUrl}/v1/models`, fetchOpts()),
|
|
489
|
+
fetchImpl(`${rootUrl}/metrics`, fetchOpts())
|
|
490
|
+
]);
|
|
491
|
+
const info = {
|
|
492
|
+
engine: "unknown",
|
|
493
|
+
probedAt: probeAt,
|
|
494
|
+
baseUrl: trimmed,
|
|
495
|
+
rootUrl
|
|
496
|
+
};
|
|
497
|
+
if (versionRes.status === "fulfilled" && versionRes.value.ok) try {
|
|
498
|
+
const txt = await versionRes.value.text();
|
|
499
|
+
try {
|
|
500
|
+
const json = JSON.parse(txt);
|
|
501
|
+
if (typeof json.version === "string") info.version = json.version;
|
|
502
|
+
} catch {
|
|
503
|
+
const stripped = txt.trim();
|
|
504
|
+
if (stripped && stripped.length < 64 && /^[\w.\-+]+$/.test(stripped)) info.version = stripped;
|
|
505
|
+
}
|
|
506
|
+
} catch {}
|
|
507
|
+
if (modelsRes.status === "fulfilled" && modelsRes.value.ok) try {
|
|
508
|
+
const first = (await modelsRes.value.json()).data?.[0];
|
|
509
|
+
if (first) {
|
|
510
|
+
if (typeof first.id === "string") info.modelId = first.id;
|
|
511
|
+
if (typeof first.max_model_len === "number") info.contextWindow = first.max_model_len;
|
|
512
|
+
if (typeof first.owned_by === "string") {
|
|
513
|
+
const owner = first.owned_by.toLowerCase();
|
|
514
|
+
if (owner.includes("vllm")) info.engine = "vllm";
|
|
515
|
+
else if (owner.includes("sglang")) info.engine = "sglang";
|
|
516
|
+
else if (info.engine === "unknown") info.engine = "openai-compat";
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
} catch {}
|
|
520
|
+
let snapshot = { probedAt: probeAt };
|
|
521
|
+
if (metricsRes.status === "fulfilled" && metricsRes.value.ok) try {
|
|
522
|
+
const parsed = parsePrometheusMetrics(await metricsRes.value.text());
|
|
523
|
+
info.kvCacheUsagePerc = parsed.kvCacheUsagePerc;
|
|
524
|
+
info.queueDepth = parsed.queueDepth;
|
|
525
|
+
info.numRequestsRunning = parsed.numRequestsRunning;
|
|
526
|
+
info.cacheConfig = parsed.cacheConfig;
|
|
527
|
+
snapshot = {
|
|
528
|
+
probedAt: probeAt,
|
|
529
|
+
promptTokensTotal: parsed.promptTokensTotal,
|
|
530
|
+
generationTokensTotal: parsed.generationTokensTotal
|
|
531
|
+
};
|
|
532
|
+
if (info.engine === "unknown" && parsed.foundVllmPrefix) info.engine = "vllm";
|
|
533
|
+
if (info.engine === "unknown" && parsed.foundSglangPrefix) info.engine = "sglang";
|
|
534
|
+
} catch {}
|
|
535
|
+
const prev = previousSnapshots.get(trimmed);
|
|
536
|
+
if (prev && typeof prev.generationTokensTotal === "number" && typeof snapshot.generationTokensTotal === "number") {
|
|
537
|
+
const dt = (snapshot.probedAt - prev.probedAt) / 1e3;
|
|
538
|
+
const dToks = snapshot.generationTokensTotal - prev.generationTokensTotal;
|
|
539
|
+
if (dt > 0 && dToks >= 0) info.observedTps = dToks / dt;
|
|
540
|
+
}
|
|
541
|
+
if (snapshot.generationTokensTotal !== void 0 || snapshot.promptTokensTotal !== void 0) previousSnapshots.set(trimmed, snapshot);
|
|
542
|
+
if (!(info.version !== void 0 || info.modelId !== void 0 || info.cacheConfig !== void 0 || info.kvCacheUsagePerc !== void 0 || info.queueDepth !== void 0 || info.numRequestsRunning !== void 0)) {
|
|
543
|
+
cache.set(trimmed, {
|
|
544
|
+
info: null,
|
|
545
|
+
expiresAt: nowFn() + NULL_TTL_MS
|
|
546
|
+
});
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
cache.set(trimmed, {
|
|
550
|
+
info,
|
|
551
|
+
expiresAt: nowFn() + SUCCESS_TTL_MS
|
|
552
|
+
});
|
|
553
|
+
return info;
|
|
554
|
+
}
|
|
555
|
+
function parsePrometheusMetrics(text) {
|
|
556
|
+
const result = {
|
|
557
|
+
foundVllmPrefix: false,
|
|
558
|
+
foundSglangPrefix: false
|
|
559
|
+
};
|
|
560
|
+
const lines = text.split("\n");
|
|
561
|
+
for (const raw of lines) {
|
|
562
|
+
const line = raw.trim();
|
|
563
|
+
if (!line || line.startsWith("#")) continue;
|
|
564
|
+
if (line.startsWith("vllm:")) result.foundVllmPrefix = true;
|
|
565
|
+
if (line.startsWith("sglang:")) result.foundSglangPrefix = true;
|
|
566
|
+
const match = line.match(/^([^\s{]+)(\{[^}]*\})?\s+([0-9eE+\-.]+)$/);
|
|
567
|
+
if (!match) continue;
|
|
568
|
+
const metric = match[1];
|
|
569
|
+
const labels = match[2];
|
|
570
|
+
const value = Number(match[3]);
|
|
571
|
+
if (!Number.isFinite(value)) continue;
|
|
572
|
+
switch (metric) {
|
|
573
|
+
case "vllm:prompt_tokens_total":
|
|
574
|
+
result.promptTokensTotal = (result.promptTokensTotal ?? 0) + value;
|
|
575
|
+
break;
|
|
576
|
+
case "vllm:generation_tokens_total":
|
|
577
|
+
result.generationTokensTotal = (result.generationTokensTotal ?? 0) + value;
|
|
578
|
+
break;
|
|
579
|
+
case "vllm:kv_cache_usage_perc":
|
|
580
|
+
result.kvCacheUsagePerc = value;
|
|
581
|
+
break;
|
|
582
|
+
case "vllm:num_requests_waiting":
|
|
583
|
+
if (!labels || !labels.includes("reason=")) result.queueDepth = value;
|
|
584
|
+
break;
|
|
585
|
+
case "vllm:num_requests_running":
|
|
586
|
+
result.numRequestsRunning = value;
|
|
587
|
+
break;
|
|
588
|
+
case "vllm:cache_config_info": {
|
|
589
|
+
if (!labels) break;
|
|
590
|
+
const parsed = parseLabels(labels);
|
|
591
|
+
const cfg = {};
|
|
592
|
+
if (parsed.block_size !== void 0) cfg.blockSize = Number(parsed.block_size);
|
|
593
|
+
if (parsed.num_gpu_blocks !== void 0) cfg.numGpuBlocks = Number(parsed.num_gpu_blocks);
|
|
594
|
+
if (parsed.cache_dtype !== void 0) cfg.cacheDtype = parsed.cache_dtype;
|
|
595
|
+
if (parsed.gpu_memory_utilization !== void 0) {
|
|
596
|
+
const n = Number(parsed.gpu_memory_utilization);
|
|
597
|
+
if (Number.isFinite(n)) cfg.gpuMemoryUtilization = n;
|
|
598
|
+
}
|
|
599
|
+
if (parsed.enable_prefix_caching !== void 0) cfg.enablePrefixCaching = parsed.enable_prefix_caching === "True";
|
|
600
|
+
result.cacheConfig = cfg;
|
|
601
|
+
break;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
return result;
|
|
606
|
+
}
|
|
607
|
+
function parseLabels(rawLabels) {
|
|
608
|
+
const inner = rawLabels.replace(/^\{/, "").replace(/\}$/, "");
|
|
609
|
+
const out = {};
|
|
610
|
+
const re = /(\w+)="([^"]*)"/g;
|
|
611
|
+
let m;
|
|
612
|
+
while ((m = re.exec(inner)) !== null) out[m[1]] = m[2];
|
|
613
|
+
return out;
|
|
614
|
+
}
|
|
615
|
+
function formatEngineLine(info) {
|
|
616
|
+
if (!info) return null;
|
|
617
|
+
const parts = [];
|
|
618
|
+
const engineLabel = (() => {
|
|
619
|
+
if (info.engine === "vllm") return info.version ? `vLLM ${info.version}` : "vLLM";
|
|
620
|
+
if (info.engine === "sglang") return info.version ? `SGLang ${info.version}` : "SGLang";
|
|
621
|
+
if (info.engine === "openai-compat") return "OpenAI-compat";
|
|
622
|
+
return info.version ? `engine ${info.version}` : null;
|
|
623
|
+
})();
|
|
624
|
+
if (engineLabel) parts.push(engineLabel);
|
|
625
|
+
if (typeof info.observedTps === "number" && info.observedTps > .5) parts.push(`${Math.round(info.observedTps)} tps`);
|
|
626
|
+
if (typeof info.kvCacheUsagePerc === "number") {
|
|
627
|
+
const pct = Math.round(info.kvCacheUsagePerc * 100);
|
|
628
|
+
if (pct > 0) parts.push(`KV ${pct}%`);
|
|
629
|
+
}
|
|
630
|
+
if (typeof info.queueDepth === "number" && info.queueDepth > 0) parts.push(`queue ${info.queueDepth}`);
|
|
631
|
+
if (typeof info.numRequestsRunning === "number" && info.numRequestsRunning > 0) parts.push(`running ${info.numRequestsRunning}`);
|
|
632
|
+
if (parts.length === 0) return null;
|
|
633
|
+
return `🖥 ${parts.join(" · ")}`;
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
//#endregion
|
|
637
|
+
//#region src/cost/breakdown.ts
|
|
638
|
+
async function buildCostBreakdown(args) {
|
|
639
|
+
const benchmarkTarget = args.config?.models?.benchmark === "none" ? "none" : resolveBenchmarkTarget(args.config?.models?.benchmark);
|
|
640
|
+
const breakdown = {
|
|
641
|
+
serving: null,
|
|
642
|
+
benchmarks: [],
|
|
643
|
+
benchmarkTarget
|
|
644
|
+
};
|
|
645
|
+
if (args.baseUrl && args.baseUrl.trim()) try {
|
|
646
|
+
breakdown.serving = await probeServingEndpoint(args.baseUrl, {
|
|
647
|
+
timeoutMs: args.probeTimeoutMs ?? 3e3,
|
|
648
|
+
forceRefresh: Boolean(args.forceProbeRefresh)
|
|
649
|
+
});
|
|
650
|
+
} catch {
|
|
651
|
+
breakdown.serving = null;
|
|
652
|
+
}
|
|
653
|
+
breakdown.engineLine = formatEngineLine(breakdown.serving) ?? void 0;
|
|
654
|
+
const modelCost = resolveModelCost(args.config, args.provider, args.modelId);
|
|
655
|
+
const detectedModelId = breakdown.serving?.modelId ?? args.modelId;
|
|
656
|
+
breakdown.gpu = inferGpu({
|
|
657
|
+
baseUrl: args.baseUrl,
|
|
658
|
+
modelId: detectedModelId,
|
|
659
|
+
override: {
|
|
660
|
+
gpuType: modelCost?.gpuType,
|
|
661
|
+
gpuCount: modelCost?.gpuCount
|
|
662
|
+
}
|
|
663
|
+
});
|
|
664
|
+
const overrides = args.config?.models?.gpuRates;
|
|
665
|
+
let rate;
|
|
666
|
+
let source;
|
|
667
|
+
if (typeof modelCost?.hourlyRate === "number" && modelCost.hourlyRate >= 0) {
|
|
668
|
+
rate = modelCost.hourlyRate;
|
|
669
|
+
source = "model-override";
|
|
670
|
+
} else if (breakdown.gpu.gpuType) {
|
|
671
|
+
rate = resolveHourlyRate(breakdown.gpu.gpuType, overrides);
|
|
672
|
+
if (rate !== void 0) source = overrides && breakdown.gpu.gpuType in overrides ? "config-override" : "default-table";
|
|
673
|
+
}
|
|
674
|
+
breakdown.hourlyRate = rate;
|
|
675
|
+
breakdown.hourlyRateSource = source;
|
|
676
|
+
if (breakdown.gpu?.gpuType) {
|
|
677
|
+
const observed = breakdown.serving?.observedTps;
|
|
678
|
+
if (typeof observed === "number" && observed > .5) {
|
|
679
|
+
breakdown.decodeTps = observed;
|
|
680
|
+
breakdown.prefillTps = observed * PREFILL_DECODE_RATIO;
|
|
681
|
+
breakdown.throughputSource = "observed";
|
|
682
|
+
} else {
|
|
683
|
+
const decode = resolveDefaultDecodeTps(breakdown.gpu.gpuType, breakdown.gpu.paramsB, breakdown.gpu.bytesPerParam);
|
|
684
|
+
breakdown.decodeTps = decode;
|
|
685
|
+
breakdown.prefillTps = decode * PREFILL_DECODE_RATIO;
|
|
686
|
+
breakdown.throughputSource = "default-table";
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
if (args.usage && breakdown.hourlyRate !== void 0 && breakdown.gpu?.gpuType && breakdown.decodeTps !== void 0 && breakdown.prefillTps !== void 0) {
|
|
690
|
+
const perSecond = breakdown.hourlyRate * breakdown.gpu.gpuCount / 3600;
|
|
691
|
+
const inputTokens = args.usage.input ?? 0;
|
|
692
|
+
const outputTokens = args.usage.output ?? 0;
|
|
693
|
+
const totalCost = ((breakdown.prefillTps > 0 ? inputTokens / breakdown.prefillTps : 0) + (breakdown.decodeTps > 0 ? outputTokens / breakdown.decodeTps : 0)) * perSecond;
|
|
694
|
+
breakdown.instanceCostUsd = totalCost;
|
|
695
|
+
const totalTokens = inputTokens + outputTokens;
|
|
696
|
+
if (totalTokens > 0) breakdown.perTokenAvgUsd = totalCost / totalTokens;
|
|
697
|
+
}
|
|
698
|
+
if (args.usage) {
|
|
699
|
+
const costForEstimate = modelCost ? {
|
|
700
|
+
input: modelCost.input ?? 0,
|
|
701
|
+
output: modelCost.output ?? 0,
|
|
702
|
+
cacheRead: modelCost.cacheRead ?? 0,
|
|
703
|
+
cacheWrite: modelCost.cacheWrite ?? 0
|
|
704
|
+
} : void 0;
|
|
705
|
+
const rawCost = estimateUsageCost({
|
|
706
|
+
usage: args.usage,
|
|
707
|
+
cost: costForEstimate
|
|
708
|
+
});
|
|
709
|
+
if (typeof rawCost === "number" && rawCost > 0) breakdown.effectiveCostUsd = rawCost;
|
|
710
|
+
else if (breakdown.instanceCostUsd !== void 0) breakdown.effectiveCostUsd = breakdown.instanceCostUsd;
|
|
711
|
+
if (benchmarkTarget !== "none" && breakdown.effectiveCostUsd !== void 0) {
|
|
712
|
+
breakdown.benchmarks = benchmarkUsage({
|
|
713
|
+
usage: args.usage,
|
|
714
|
+
target: benchmarkTarget
|
|
715
|
+
});
|
|
716
|
+
breakdown.costLineSuffix = formatBenchmarkSuffix({
|
|
717
|
+
ourCostUsd: breakdown.effectiveCostUsd,
|
|
718
|
+
benchmarks: breakdown.benchmarks
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
breakdown.hardwareLine = formatHardwareLine(breakdown);
|
|
723
|
+
return breakdown;
|
|
724
|
+
}
|
|
725
|
+
function resolveModelCost(config, provider, modelId) {
|
|
726
|
+
if (!config || !provider || !modelId) return;
|
|
727
|
+
const providers = config.models?.providers;
|
|
728
|
+
if (!providers) return;
|
|
729
|
+
const providerCfg = providers[provider];
|
|
730
|
+
if (!providerCfg?.models) return;
|
|
731
|
+
return providerCfg.models.find((m) => m.id === modelId)?.cost;
|
|
732
|
+
}
|
|
733
|
+
function formatHardwareLine(breakdown) {
|
|
734
|
+
const gpu = breakdown.gpu;
|
|
735
|
+
if (!gpu?.gpuType) return;
|
|
736
|
+
const label = GPU_LABEL[gpu.gpuType];
|
|
737
|
+
const count = gpu.gpuCount;
|
|
738
|
+
const parts = [];
|
|
739
|
+
parts.push(`${count}× ${label}`);
|
|
740
|
+
if (typeof breakdown.hourlyRate === "number") parts.push(`≈ $${(breakdown.hourlyRate * count).toFixed(2)}/hr`);
|
|
741
|
+
const tag = (() => {
|
|
742
|
+
if (breakdown.hourlyRateSource === "model-override") return "model override";
|
|
743
|
+
if (breakdown.hourlyRateSource === "config-override") return "config override";
|
|
744
|
+
if (gpu.confidence === "high") return "inferred";
|
|
745
|
+
if (gpu.confidence === "medium") return "inferred · medium-conf";
|
|
746
|
+
return "inferred · low-conf";
|
|
747
|
+
})();
|
|
748
|
+
parts.push(`(${tag})`);
|
|
749
|
+
return `🧰 ${parts.join(" · ")}`;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
//#endregion
|
|
753
|
+
export { resolveWindow as i, formatSummaryReport as n, loadCostBenchmarkSummary as r, buildCostBreakdown as t };
|