metrillm-mcp 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +773 -191
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -28,10 +28,22 @@ import { Ollama } from "ollama";
|
|
|
28
28
|
|
|
29
29
|
// ../src/utils.ts
|
|
30
30
|
import vm from "vm";
|
|
31
|
-
import { execFile } from "child_process";
|
|
31
|
+
import { execFile, spawn } from "child_process";
|
|
32
32
|
function openUrl(url) {
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
if (process.platform === "win32") {
|
|
34
|
+
const child2 = spawn("cmd", ["/c", "start", "", url], {
|
|
35
|
+
windowsHide: true,
|
|
36
|
+
stdio: "ignore"
|
|
37
|
+
});
|
|
38
|
+
child2.on("error", () => {
|
|
39
|
+
});
|
|
40
|
+
child2.unref();
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
const cmd = process.platform === "darwin" ? "open" : "xdg-open";
|
|
44
|
+
const child = execFile(cmd, [url]);
|
|
45
|
+
child.on("error", () => {
|
|
46
|
+
});
|
|
35
47
|
}
|
|
36
48
|
function avg(nums) {
|
|
37
49
|
if (nums.length === 0) return 0;
|
|
@@ -110,7 +122,8 @@ function stripThinkTags(text) {
|
|
|
110
122
|
}
|
|
111
123
|
function hasThinkingContent(response, thinkingField) {
|
|
112
124
|
if (thinkingField && thinkingField.trim().length > 0) return true;
|
|
113
|
-
|
|
125
|
+
if (/<think(?:ing)?[\s>]/i.test(response)) return true;
|
|
126
|
+
return /^\s*(?:thinking|thought)\s+process\s*:/i.test(response);
|
|
114
127
|
}
|
|
115
128
|
function estimateTokenCount(text) {
|
|
116
129
|
if (!text) return 0;
|
|
@@ -514,7 +527,8 @@ function extractCodeBlock(text, preferredFunctionName) {
|
|
|
514
527
|
var client = new Ollama();
|
|
515
528
|
var DEFAULT_OLLAMA_HOST = "http://127.0.0.1:11434";
|
|
516
529
|
var OLLAMA_INIT_TIMEOUT_MS = 12e4;
|
|
517
|
-
var
|
|
530
|
+
var DEFAULT_STREAM_STALL_TIMEOUT_MS = 3e4;
|
|
531
|
+
var SHARED_STREAM_STALL_TIMEOUT_ENV = "METRILLM_STREAM_STALL_TIMEOUT_MS";
|
|
518
532
|
function getOllamaBaseUrl() {
|
|
519
533
|
const configured = process.env.OLLAMA_HOST?.trim();
|
|
520
534
|
if (!configured) return DEFAULT_OLLAMA_HOST;
|
|
@@ -564,35 +578,81 @@ var defaultKeepAlive;
|
|
|
564
578
|
function setDefaultKeepAlive(keepAlive) {
|
|
565
579
|
defaultKeepAlive = keepAlive;
|
|
566
580
|
}
|
|
581
|
+
function hasSamplingOverrides(options) {
|
|
582
|
+
return options?.top_p !== void 0 || options?.seed !== void 0;
|
|
583
|
+
}
|
|
584
|
+
function isUnsupportedSamplingOptionError(err) {
|
|
585
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
586
|
+
const lower = message.toLowerCase();
|
|
587
|
+
const mentionsSampling = /\b(seed|top_p|topp)\b/.test(lower);
|
|
588
|
+
if (!mentionsSampling) return false;
|
|
589
|
+
return /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/.test(lower);
|
|
590
|
+
}
|
|
591
|
+
function parseNonNegativeInt(value) {
|
|
592
|
+
if (!/^\d+$/.test(value)) return null;
|
|
593
|
+
const parsed = Number.parseInt(value, 10);
|
|
594
|
+
if (!Number.isSafeInteger(parsed) || parsed < 0) return null;
|
|
595
|
+
return parsed;
|
|
596
|
+
}
|
|
597
|
+
function resolveStreamStallTimeoutMs(override) {
|
|
598
|
+
if (override !== void 0) {
|
|
599
|
+
if (!Number.isFinite(override) || override < 0) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
600
|
+
return override === 0 ? void 0 : Math.trunc(override);
|
|
601
|
+
}
|
|
602
|
+
const configured = process.env[SHARED_STREAM_STALL_TIMEOUT_ENV]?.trim();
|
|
603
|
+
if (!configured) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
604
|
+
const parsed = parseNonNegativeInt(configured);
|
|
605
|
+
if (parsed === null) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
606
|
+
return parsed === 0 ? void 0 : parsed;
|
|
607
|
+
}
|
|
608
|
+
function buildGenerateRequest(model, prompt, options, includeSampling) {
|
|
609
|
+
return {
|
|
610
|
+
model,
|
|
611
|
+
prompt,
|
|
612
|
+
stream: true,
|
|
613
|
+
keep_alive: options?.keep_alive ?? defaultKeepAlive,
|
|
614
|
+
...options?.think !== void 0 ? { think: options.think } : {},
|
|
615
|
+
options: {
|
|
616
|
+
temperature: options?.temperature ?? 0,
|
|
617
|
+
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
618
|
+
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
619
|
+
num_predict: options?.num_predict ?? 512
|
|
620
|
+
}
|
|
621
|
+
};
|
|
622
|
+
}
|
|
567
623
|
async function generate(model, prompt, options) {
|
|
568
624
|
return generateStream(model, prompt, void 0, options);
|
|
569
625
|
}
|
|
570
626
|
async function generateStream(model, prompt, callbacks, options) {
|
|
571
|
-
const
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
stream: true,
|
|
576
|
-
keep_alive: options?.keep_alive ?? defaultKeepAlive,
|
|
577
|
-
...options?.think !== void 0 ? { think: options.think } : {},
|
|
578
|
-
options: {
|
|
579
|
-
temperature: options?.temperature ?? 0,
|
|
580
|
-
num_predict: options?.num_predict ?? 512
|
|
581
|
-
}
|
|
582
|
-
}),
|
|
627
|
+
const stallTimeoutMs = resolveStreamStallTimeoutMs(options?.stall_timeout_ms);
|
|
628
|
+
let abortedByStallTimeout = false;
|
|
629
|
+
const initializeStream = (includeSampling) => withTimeout(
|
|
630
|
+
client.generate(buildGenerateRequest(model, prompt, options, includeSampling)),
|
|
583
631
|
OLLAMA_INIT_TIMEOUT_MS,
|
|
584
632
|
"Ollama generate initialization"
|
|
585
633
|
);
|
|
634
|
+
let stream;
|
|
635
|
+
try {
|
|
636
|
+
stream = await initializeStream(true);
|
|
637
|
+
} catch (err) {
|
|
638
|
+
if (hasSamplingOverrides(options) && isUnsupportedSamplingOptionError(err)) {
|
|
639
|
+
stream = await initializeStream(false);
|
|
640
|
+
} else {
|
|
641
|
+
throw err;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
586
644
|
let fullResponse = "";
|
|
587
645
|
let fullThinking = "";
|
|
588
646
|
let result = null;
|
|
589
647
|
let firstChunkSeen = false;
|
|
590
648
|
let stallTimer = null;
|
|
591
649
|
const resetStallTimer = () => {
|
|
650
|
+
if (stallTimeoutMs === void 0) return;
|
|
592
651
|
if (stallTimer) clearTimeout(stallTimer);
|
|
593
652
|
stallTimer = setTimeout(() => {
|
|
653
|
+
abortedByStallTimeout = true;
|
|
594
654
|
client.abort();
|
|
595
|
-
},
|
|
655
|
+
}, stallTimeoutMs);
|
|
596
656
|
};
|
|
597
657
|
try {
|
|
598
658
|
resetStallTimer();
|
|
@@ -627,6 +687,9 @@ async function generateStream(model, prompt, callbacks, options) {
|
|
|
627
687
|
if (stallTimer) clearTimeout(stallTimer);
|
|
628
688
|
}
|
|
629
689
|
if (!result) {
|
|
690
|
+
if (abortedByStallTimeout && stallTimeoutMs !== void 0) {
|
|
691
|
+
throw new Error(`Ollama stream timed out after ${stallTimeoutMs}ms`);
|
|
692
|
+
}
|
|
630
693
|
throw new Error("Stream ended without done signal");
|
|
631
694
|
}
|
|
632
695
|
callbacks?.onDone?.(result);
|
|
@@ -656,42 +719,191 @@ function abortOngoingRequests() {
|
|
|
656
719
|
import os from "os";
|
|
657
720
|
import path from "path";
|
|
658
721
|
import { promises as fs } from "fs";
|
|
722
|
+
import { execFile as execFile2 } from "child_process";
|
|
659
723
|
var DEFAULT_LM_STUDIO_BASE_URL = "http://127.0.0.1:1234";
|
|
660
724
|
var LM_STUDIO_INIT_TIMEOUT_MS = 15e3;
|
|
661
725
|
var LM_STUDIO_METADATA_TIMEOUT_MS = 2e3;
|
|
662
|
-
var
|
|
726
|
+
var DEFAULT_STREAM_STALL_TIMEOUT_MS2 = 3e4;
|
|
727
|
+
var LM_STUDIO_CLI_TIMEOUT_MS = 8e3;
|
|
728
|
+
var SHARED_STREAM_STALL_TIMEOUT_ENV2 = "METRILLM_STREAM_STALL_TIMEOUT_MS";
|
|
663
729
|
var DEFAULT_LM_STUDIO_HOME_DIR = path.join(os.homedir(), ".lmstudio");
|
|
664
730
|
var DEFAULT_LM_STUDIO_MODELS_DIR = path.join(DEFAULT_LM_STUDIO_HOME_DIR, "models");
|
|
665
731
|
var LM_STUDIO_HOME_DIR_ENV = "LM_STUDIO_HOME_DIR";
|
|
666
732
|
var LM_STUDIO_MODELS_DIR_ENV = "LM_STUDIO_MODELS_DIR";
|
|
733
|
+
var LM_STUDIO_CLI_PATH_ENV = "LM_STUDIO_CLI_PATH";
|
|
667
734
|
var defaultKeepAlive2;
|
|
668
735
|
var activeAbortControllers = /* @__PURE__ */ new Set();
|
|
669
736
|
var directorySizeCache = /* @__PURE__ */ new Map();
|
|
670
737
|
var modelDefinitionCache = /* @__PURE__ */ new Map();
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
738
|
+
var NON_THINKING_SYSTEM_PROMPT = [
|
|
739
|
+
"You are in non-thinking mode for benchmark reproducibility.",
|
|
740
|
+
"Return only the final answer.",
|
|
741
|
+
"Do not output internal reasoning, chain-of-thought, or scratchpad.",
|
|
742
|
+
"Never output tags or sections like <think>, </think>, [THINK], [/THINK], or Thinking Process."
|
|
743
|
+
].join(" ");
|
|
744
|
+
function hasThinkingLeakText(response) {
|
|
745
|
+
return /^\s*(?:thinking|thought)\s+process\s*:/i.test(response) || /\[(?:\/)?THINK(?:ING)?\]/i.test(response);
|
|
746
|
+
}
|
|
747
|
+
function assertThinkingModeRespected(model, think, response, reasoning) {
|
|
748
|
+
if (think !== false) return;
|
|
749
|
+
if (reasoning.trim().length > 0 || /<think(?:ing)?[\s>]/i.test(response) || hasThinkingLeakText(response)) {
|
|
750
|
+
throw new Error(
|
|
751
|
+
[
|
|
752
|
+
`LM Studio model "${model}" still emitted thinking content while non-thinking mode is requested.`,
|
|
753
|
+
"In LM Studio, add this at the top of the model chat template: {%- set enable_thinking = false %}.",
|
|
754
|
+
"If this model does not expose a Prompt/Chat Template editor in LM Studio (e.g. some GPT-OSS builds), non-thinking mode cannot be enforced from the API.",
|
|
755
|
+
"Use --thinking for this model, or benchmark a model/runtime that supports explicit non-thinking control.",
|
|
756
|
+
"Then eject/reload the model and run the benchmark again."
|
|
757
|
+
].join(" ")
|
|
758
|
+
);
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
function buildNativeThinkingOption(think) {
|
|
762
|
+
if (think !== true) return void 0;
|
|
763
|
+
return "high";
|
|
764
|
+
}
|
|
765
|
+
function hasSamplingOverrides2(options) {
|
|
766
|
+
return options?.top_p !== void 0 || options?.seed !== void 0;
|
|
767
|
+
}
|
|
768
|
+
function isUnsupportedSamplingMessage(status, text) {
|
|
769
|
+
if (status !== 400 && status !== 422) return false;
|
|
770
|
+
const lower = text.toLowerCase();
|
|
771
|
+
const mentionsSampling = /\b(seed|top_p|topp)\b/.test(lower);
|
|
772
|
+
if (!mentionsSampling) return false;
|
|
773
|
+
return /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/.test(lower);
|
|
774
|
+
}
|
|
775
|
+
function extractLMStudioErrorMessage(body) {
|
|
776
|
+
const trimmed = body.trim();
|
|
777
|
+
if (!trimmed) return "";
|
|
778
|
+
try {
|
|
779
|
+
const parsed = JSON.parse(trimmed);
|
|
780
|
+
const message = parsed.error?.message;
|
|
781
|
+
if (typeof message === "string" && message.trim().length > 0) {
|
|
782
|
+
return message.trim();
|
|
783
|
+
}
|
|
784
|
+
} catch {
|
|
785
|
+
}
|
|
786
|
+
return trimmed;
|
|
787
|
+
}
|
|
788
|
+
function isModelLoadGuardrailError(message) {
|
|
789
|
+
const lower = message.toLowerCase();
|
|
790
|
+
if (!lower.includes("failed to load model")) return false;
|
|
791
|
+
return lower.includes("insufficient system resources") || lower.includes("overload your system") || lower.includes("loading guardrails");
|
|
792
|
+
}
|
|
793
|
+
function buildLMStudioRequestError(kind, model, status, statusText, body) {
|
|
794
|
+
const backendMessage = extractLMStudioErrorMessage(body);
|
|
795
|
+
if (isModelLoadGuardrailError(backendMessage)) {
|
|
796
|
+
return new Error(
|
|
797
|
+
[
|
|
798
|
+
`LM Studio could not load model "${model}" due to insufficient system resources (model loading guardrails).`,
|
|
799
|
+
"In LM Studio: unload other models, reduce loaded context length, or relax model loading guardrails in Settings.",
|
|
800
|
+
`Backend error: ${backendMessage}`
|
|
801
|
+
].join(" ")
|
|
802
|
+
);
|
|
803
|
+
}
|
|
804
|
+
const suffix = backendMessage ? ` ${backendMessage}` : "";
|
|
805
|
+
return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
|
|
806
|
+
}
|
|
807
|
+
function buildNativeChatBody(model, prompt, options, stream, includeSampling) {
|
|
808
|
+
const reasoning = buildNativeThinkingOption(options?.think);
|
|
674
809
|
return {
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
810
|
+
model,
|
|
811
|
+
input: prompt,
|
|
812
|
+
temperature: options?.temperature ?? 0,
|
|
813
|
+
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
814
|
+
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
815
|
+
max_tokens: options?.num_predict ?? 512,
|
|
816
|
+
stream,
|
|
817
|
+
...reasoning !== void 0 ? { reasoning } : {},
|
|
818
|
+
...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
|
|
678
819
|
};
|
|
679
820
|
}
|
|
680
|
-
function
|
|
821
|
+
function getNativeStatNumber(value) {
|
|
822
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
|
|
823
|
+
return value;
|
|
824
|
+
}
|
|
825
|
+
function flattenNativeText(value, depth = 0) {
|
|
826
|
+
if (depth > 3 || value == null) return "";
|
|
827
|
+
if (typeof value === "string") return value;
|
|
828
|
+
if (Array.isArray(value)) {
|
|
829
|
+
return value.map((item) => flattenNativeText(item, depth + 1)).join("");
|
|
830
|
+
}
|
|
831
|
+
if (typeof value === "object") {
|
|
832
|
+
const candidate = value;
|
|
833
|
+
return flattenNativeText(candidate.text, depth + 1) || flattenNativeText(candidate.content, depth + 1) || flattenNativeText(candidate.delta, depth + 1) || flattenNativeText(candidate.value, depth + 1);
|
|
834
|
+
}
|
|
835
|
+
return "";
|
|
836
|
+
}
|
|
837
|
+
function collectNativeOutput(output4) {
|
|
838
|
+
if (!Array.isArray(output4)) {
|
|
839
|
+
return { response: "", reasoning: "" };
|
|
840
|
+
}
|
|
841
|
+
let response = "";
|
|
842
|
+
let reasoning = "";
|
|
843
|
+
for (const item of output4) {
|
|
844
|
+
if (typeof item !== "object" || item === null) continue;
|
|
845
|
+
const nativeItem = item;
|
|
846
|
+
const text = flattenNativeText(nativeItem.text ?? nativeItem.content).trim();
|
|
847
|
+
if (!text) continue;
|
|
848
|
+
const type = asNonEmptyString(nativeItem.type)?.toLowerCase() ?? "";
|
|
849
|
+
if (type.includes("reason")) {
|
|
850
|
+
reasoning += text;
|
|
851
|
+
} else {
|
|
852
|
+
response += text;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
return { response, reasoning };
|
|
856
|
+
}
|
|
857
|
+
function extractNativeStats(payload) {
|
|
858
|
+
if (typeof payload !== "object" || payload === null) return void 0;
|
|
859
|
+
const direct = payload.stats;
|
|
860
|
+
if (direct) return direct;
|
|
861
|
+
const result = payload.result?.stats;
|
|
862
|
+
return result;
|
|
863
|
+
}
|
|
864
|
+
function extractNativeResponse(payload) {
|
|
865
|
+
if (typeof payload !== "object" || payload === null) {
|
|
866
|
+
return { response: "", reasoning: "" };
|
|
867
|
+
}
|
|
868
|
+
const resultOutput = payload.result?.output;
|
|
869
|
+
const directOutput = payload.output;
|
|
870
|
+
const fromResult = collectNativeOutput(resultOutput);
|
|
871
|
+
if (fromResult.response || fromResult.reasoning) return fromResult;
|
|
872
|
+
return collectNativeOutput(directOutput);
|
|
873
|
+
}
|
|
874
|
+
function extractNativeDelta(payload) {
|
|
875
|
+
if (typeof payload !== "object" || payload === null) {
|
|
876
|
+
return { response: "", reasoning: "" };
|
|
877
|
+
}
|
|
878
|
+
const type = asNonEmptyString(payload.type)?.toLowerCase() ?? "";
|
|
879
|
+
const directText = flattenNativeText(payload.delta);
|
|
880
|
+
const fallbackText = directText || flattenNativeText(payload.content) || flattenNativeText(payload.text);
|
|
881
|
+
if (!fallbackText) {
|
|
882
|
+
return { response: "", reasoning: "" };
|
|
883
|
+
}
|
|
884
|
+
if (type.includes("reason")) {
|
|
885
|
+
return { response: "", reasoning: fallbackText };
|
|
886
|
+
}
|
|
887
|
+
if (type.includes("message") || type.includes("text") || type.includes("content")) {
|
|
888
|
+
return { response: fallbackText, reasoning: "" };
|
|
889
|
+
}
|
|
890
|
+
return { response: fallbackText, reasoning: "" };
|
|
891
|
+
}
|
|
892
|
+
function parseNonNegativeInt2(value) {
|
|
681
893
|
if (!/^\d+$/.test(value)) return null;
|
|
682
894
|
const parsed = Number.parseInt(value, 10);
|
|
683
895
|
if (!Number.isSafeInteger(parsed) || parsed < 0) return null;
|
|
684
896
|
return parsed;
|
|
685
897
|
}
|
|
686
|
-
function
|
|
898
|
+
function resolveStreamStallTimeoutMs2(override) {
|
|
687
899
|
if (override !== void 0) {
|
|
688
|
-
if (!Number.isFinite(override) || override < 0) return
|
|
900
|
+
if (!Number.isFinite(override) || override < 0) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
689
901
|
return override === 0 ? void 0 : Math.trunc(override);
|
|
690
902
|
}
|
|
691
|
-
const configured = process.env
|
|
692
|
-
if (!configured) return
|
|
693
|
-
const parsed =
|
|
694
|
-
if (parsed === null) return
|
|
903
|
+
const configured = process.env[SHARED_STREAM_STALL_TIMEOUT_ENV2]?.trim();
|
|
904
|
+
if (!configured) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
905
|
+
const parsed = parseNonNegativeInt2(configured);
|
|
906
|
+
if (parsed === null) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
695
907
|
return parsed === 0 ? void 0 : parsed;
|
|
696
908
|
}
|
|
697
909
|
function getLMStudioBaseUrl() {
|
|
@@ -714,25 +926,29 @@ function getLMStudioHeaders() {
|
|
|
714
926
|
}
|
|
715
927
|
return headers;
|
|
716
928
|
}
|
|
717
|
-
function
|
|
718
|
-
if (typeof
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
if (
|
|
725
|
-
const
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
929
|
+
function getUsageTokenCount(value) {
|
|
930
|
+
if (typeof value !== "number" || !Number.isFinite(value)) return 0;
|
|
931
|
+
if (value <= 0) return 0;
|
|
932
|
+
return Math.trunc(value);
|
|
933
|
+
}
|
|
934
|
+
function estimateCompletionTokensFallback(text) {
|
|
935
|
+
const normalized = text.trim();
|
|
936
|
+
if (!normalized) return 0;
|
|
937
|
+
const cjkMatches = normalized.match(/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/gu);
|
|
938
|
+
const cjkCount = cjkMatches?.length ?? 0;
|
|
939
|
+
const withoutCjk = normalized.replace(
|
|
940
|
+
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/gu,
|
|
941
|
+
""
|
|
942
|
+
);
|
|
943
|
+
const nonCjkChars = withoutCjk.replace(/\s+/g, "").length;
|
|
944
|
+
const nonCjkHeuristic = Math.ceil(nonCjkChars / 4);
|
|
945
|
+
const whitespaceEstimate = estimateTokenCount(normalized);
|
|
946
|
+
return Math.max(1, Math.max(whitespaceEstimate, cjkCount + nonCjkHeuristic));
|
|
732
947
|
}
|
|
733
|
-
function
|
|
734
|
-
const
|
|
735
|
-
|
|
948
|
+
function resolveCompletionTokenCount(reportedTokenCount, response, reasoning) {
|
|
949
|
+
const reported = getUsageTokenCount(reportedTokenCount);
|
|
950
|
+
if (reported > 0) return reported;
|
|
951
|
+
return estimateCompletionTokensFallback(`${reasoning} ${response}`);
|
|
736
952
|
}
|
|
737
953
|
function asNonEmptyString(value) {
|
|
738
954
|
if (typeof value !== "string") return void 0;
|
|
@@ -761,7 +977,7 @@ async function pathIsDirectory(targetPath) {
|
|
|
761
977
|
try {
|
|
762
978
|
const stat = await fs.stat(targetPath);
|
|
763
979
|
return stat.isDirectory();
|
|
764
|
-
} catch {
|
|
980
|
+
} catch (_err) {
|
|
765
981
|
return false;
|
|
766
982
|
}
|
|
767
983
|
}
|
|
@@ -982,11 +1198,17 @@ async function resolveLocalModelMetadata(modelId, apiModel, modelsRootDir) {
|
|
|
982
1198
|
const size = await readDirectorySizeBytes(source.fullPath);
|
|
983
1199
|
if (size > bestSize) bestSize = size;
|
|
984
1200
|
if (size > 0) {
|
|
985
|
-
return {
|
|
1201
|
+
return {
|
|
1202
|
+
size,
|
|
1203
|
+
parameterSize: definition.parameterSize
|
|
1204
|
+
};
|
|
986
1205
|
}
|
|
987
1206
|
}
|
|
988
1207
|
if (bestSize > 0) {
|
|
989
|
-
return {
|
|
1208
|
+
return {
|
|
1209
|
+
size: bestSize,
|
|
1210
|
+
parameterSize: definition.parameterSize
|
|
1211
|
+
};
|
|
990
1212
|
}
|
|
991
1213
|
const fallback = await resolvePublisherModelMetadata(modelId, apiModel, modelsRootDir);
|
|
992
1214
|
if (fallback.size > 0) {
|
|
@@ -995,7 +1217,10 @@ async function resolveLocalModelMetadata(modelId, apiModel, modelsRootDir) {
|
|
|
995
1217
|
parameterSize: definition.parameterSize ?? fallback.parameterSize
|
|
996
1218
|
};
|
|
997
1219
|
}
|
|
998
|
-
return {
|
|
1220
|
+
return {
|
|
1221
|
+
size: 0,
|
|
1222
|
+
parameterSize: definition.parameterSize ?? fallback.parameterSize
|
|
1223
|
+
};
|
|
999
1224
|
}
|
|
1000
1225
|
function parseSizeBytes(model) {
|
|
1001
1226
|
if (!model) return 0;
|
|
@@ -1033,6 +1258,21 @@ function inferParameterSizeFromModelId(modelId) {
|
|
|
1033
1258
|
}
|
|
1034
1259
|
return void 0;
|
|
1035
1260
|
}
|
|
1261
|
+
function resolveModelFormat(apiModel, _localMetadata, _modelId) {
|
|
1262
|
+
return asNonEmptyString(apiModel?.compatibility_type);
|
|
1263
|
+
}
|
|
1264
|
+
function buildModelEntry(id, apiModel, localMetadata) {
|
|
1265
|
+
const apiSize = parseSizeBytes(apiModel);
|
|
1266
|
+
return {
|
|
1267
|
+
name: id,
|
|
1268
|
+
size: apiSize > 0 ? apiSize : localMetadata?.size ?? 0,
|
|
1269
|
+
parameterSize: localMetadata?.parameterSize ?? inferParameterSizeFromModelId(id),
|
|
1270
|
+
quantization: asNonEmptyString(apiModel?.quantization),
|
|
1271
|
+
runtimeStatus: asNonEmptyString(apiModel?.state),
|
|
1272
|
+
modelFormat: resolveModelFormat(apiModel, localMetadata, id),
|
|
1273
|
+
family: asNonEmptyString(apiModel?.arch) ?? asNonEmptyString(apiModel?.type) ?? asNonEmptyString(apiModel?.publisher)
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1036
1276
|
function isLoadedState(state) {
|
|
1037
1277
|
if (!state) return false;
|
|
1038
1278
|
const normalized = state.trim().toLowerCase();
|
|
@@ -1040,6 +1280,128 @@ function isLoadedState(state) {
|
|
|
1040
1280
|
if (normalized === "loaded" || normalized === "ready") return true;
|
|
1041
1281
|
return normalized.includes("loaded");
|
|
1042
1282
|
}
|
|
1283
|
+
function execFileText(cmd, args, timeoutMs) {
|
|
1284
|
+
return new Promise((resolve, reject) => {
|
|
1285
|
+
execFile2(
|
|
1286
|
+
cmd,
|
|
1287
|
+
args,
|
|
1288
|
+
{
|
|
1289
|
+
timeout: timeoutMs,
|
|
1290
|
+
maxBuffer: 1024 * 1024,
|
|
1291
|
+
env: process.env
|
|
1292
|
+
},
|
|
1293
|
+
(err, stdout, stderr) => {
|
|
1294
|
+
if (err) {
|
|
1295
|
+
const error = err;
|
|
1296
|
+
error.stdout = stdout;
|
|
1297
|
+
error.stderr = stderr;
|
|
1298
|
+
reject(error);
|
|
1299
|
+
return;
|
|
1300
|
+
}
|
|
1301
|
+
resolve({ stdout, stderr });
|
|
1302
|
+
}
|
|
1303
|
+
);
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
function isCommandMissingError(err) {
|
|
1307
|
+
return err instanceof Error && "code" in err && err.code === "ENOENT";
|
|
1308
|
+
}
|
|
1309
|
+
async function runLmsCli(args) {
|
|
1310
|
+
const configuredPath = asNonEmptyString(process.env[LM_STUDIO_CLI_PATH_ENV]);
|
|
1311
|
+
const fallbackPath = path.join(getLMStudioHomeDir(), "bin", "lms");
|
|
1312
|
+
const candidates = [
|
|
1313
|
+
configuredPath,
|
|
1314
|
+
"lms",
|
|
1315
|
+
fallbackPath
|
|
1316
|
+
].filter(
|
|
1317
|
+
(candidate, index, list) => Boolean(candidate) && list.indexOf(candidate) === index
|
|
1318
|
+
);
|
|
1319
|
+
let lastError;
|
|
1320
|
+
for (const candidate of candidates) {
|
|
1321
|
+
try {
|
|
1322
|
+
return await execFileText(candidate, args, LM_STUDIO_CLI_TIMEOUT_MS);
|
|
1323
|
+
} catch (err) {
|
|
1324
|
+
lastError = err;
|
|
1325
|
+
if (isCommandMissingError(err)) continue;
|
|
1326
|
+
throw err;
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
throw lastError ?? new Error("LM Studio CLI is not available.");
|
|
1330
|
+
}
|
|
1331
|
+
function normalizeCliToken(value) {
|
|
1332
|
+
return (value ?? "").trim().toLowerCase();
|
|
1333
|
+
}
|
|
1334
|
+
function matchesLoadedModelCliEntry(entry, model) {
|
|
1335
|
+
const target = normalizeCliToken(model);
|
|
1336
|
+
if (!target) return false;
|
|
1337
|
+
return [
|
|
1338
|
+
entry.identifier,
|
|
1339
|
+
entry.indexedModelIdentifier,
|
|
1340
|
+
entry.path,
|
|
1341
|
+
entry.modelKey
|
|
1342
|
+
].some((candidate) => normalizeCliToken(candidate) === target);
|
|
1343
|
+
}
|
|
1344
|
+
async function listLoadedModelsFromCli() {
|
|
1345
|
+
const { stdout } = await runLmsCli(["ps", "--json"]);
|
|
1346
|
+
const parsed = JSON.parse(stdout);
|
|
1347
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1348
|
+
}
|
|
1349
|
+
function parseEstimatedBytes(output4) {
|
|
1350
|
+
const match = output4.match(/Estimated Total Memory:\s*([0-9]+(?:\.[0-9]+)?)\s*(KiB|MiB|GiB|TiB|KB|MB|GB|TB)/i);
|
|
1351
|
+
if (!match) return null;
|
|
1352
|
+
const value = Number.parseFloat(match[1] ?? "");
|
|
1353
|
+
const unit = (match[2] ?? "").toUpperCase();
|
|
1354
|
+
if (!Number.isFinite(value) || value <= 0) return null;
|
|
1355
|
+
const multipliers = {
|
|
1356
|
+
KIB: 1024,
|
|
1357
|
+
MIB: 1024 ** 2,
|
|
1358
|
+
GIB: 1024 ** 3,
|
|
1359
|
+
TIB: 1024 ** 4,
|
|
1360
|
+
KB: 1e3,
|
|
1361
|
+
MB: 1e3 ** 2,
|
|
1362
|
+
GB: 1e3 ** 3,
|
|
1363
|
+
TB: 1e3 ** 4
|
|
1364
|
+
};
|
|
1365
|
+
const multiplier = multipliers[unit];
|
|
1366
|
+
if (!multiplier) return null;
|
|
1367
|
+
return Math.round(value * multiplier);
|
|
1368
|
+
}
|
|
1369
|
+
async function estimateLoadedModelMemoryBytes(model) {
|
|
1370
|
+
let loadedEntry;
|
|
1371
|
+
try {
|
|
1372
|
+
const loadedModels = await listLoadedModelsFromCli();
|
|
1373
|
+
loadedEntry = loadedModels.find((entry) => matchesLoadedModelCliEntry(entry, model));
|
|
1374
|
+
} catch {
|
|
1375
|
+
loadedEntry = void 0;
|
|
1376
|
+
}
|
|
1377
|
+
if (!loadedEntry) return null;
|
|
1378
|
+
const candidateModelKeys = [
|
|
1379
|
+
loadedEntry?.path,
|
|
1380
|
+
loadedEntry?.indexedModelIdentifier,
|
|
1381
|
+
loadedEntry?.modelKey
|
|
1382
|
+
].filter(
|
|
1383
|
+
(candidate, index, list) => Boolean(candidate?.trim()) && list.findIndex((item) => item === candidate) === index
|
|
1384
|
+
);
|
|
1385
|
+
for (const candidate of candidateModelKeys) {
|
|
1386
|
+
const args = ["load", "--estimate-only", "-y"];
|
|
1387
|
+
if (typeof loadedEntry?.contextLength === "number" && Number.isFinite(loadedEntry.contextLength) && loadedEntry.contextLength > 0) {
|
|
1388
|
+
args.push("--context-length", String(Math.trunc(loadedEntry.contextLength)));
|
|
1389
|
+
}
|
|
1390
|
+
args.push(candidate);
|
|
1391
|
+
try {
|
|
1392
|
+
const { stdout, stderr } = await runLmsCli(args);
|
|
1393
|
+
const estimated = parseEstimatedBytes(`${stdout}
|
|
1394
|
+
${stderr}`);
|
|
1395
|
+
if (estimated !== null) return estimated;
|
|
1396
|
+
} catch (err) {
|
|
1397
|
+
const output4 = err instanceof Error ? `${String(err.stdout ?? "")}
|
|
1398
|
+
${String(err.stderr ?? "")}` : "";
|
|
1399
|
+
const estimated = parseEstimatedBytes(output4);
|
|
1400
|
+
if (estimated !== null) return estimated;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
return null;
|
|
1404
|
+
}
|
|
1043
1405
|
async function fetchApiModels() {
|
|
1044
1406
|
try {
|
|
1045
1407
|
const resp = await fetchWithTimeout(
|
|
@@ -1092,7 +1454,7 @@ async function getLMStudioVersion() {
|
|
|
1092
1454
|
const localVersion = await resolveLocalLMStudioVersion();
|
|
1093
1455
|
try {
|
|
1094
1456
|
const resp = await fetchWithTimeout(
|
|
1095
|
-
"/v1/models",
|
|
1457
|
+
"/api/v1/models",
|
|
1096
1458
|
{ method: "GET", headers: getLMStudioHeaders() },
|
|
1097
1459
|
5e3,
|
|
1098
1460
|
"LM Studio version check"
|
|
@@ -1107,7 +1469,7 @@ async function getLMStudioVersion() {
|
|
|
1107
1469
|
}
|
|
1108
1470
|
async function listModels2() {
|
|
1109
1471
|
const resp = await fetchWithTimeout(
|
|
1110
|
-
"/v1/models",
|
|
1472
|
+
"/api/v1/models",
|
|
1111
1473
|
{ method: "GET", headers: getLMStudioHeaders() },
|
|
1112
1474
|
LM_STUDIO_INIT_TIMEOUT_MS,
|
|
1113
1475
|
"LM Studio list models"
|
|
@@ -1125,25 +1487,25 @@ async function listModels2() {
|
|
|
1125
1487
|
apiById.set(id, model);
|
|
1126
1488
|
}
|
|
1127
1489
|
const modelsRootDir = await resolveModelsRootDir();
|
|
1128
|
-
const
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1490
|
+
const localMetadataEntries = await Promise.all(
|
|
1491
|
+
ids.map(async (id) => {
|
|
1492
|
+
const localMetadata = await resolveLocalModelMetadata(id, apiById.get(id), modelsRootDir);
|
|
1493
|
+
return [id, localMetadata];
|
|
1494
|
+
})
|
|
1495
|
+
);
|
|
1496
|
+
const localMetadataById = new Map(
|
|
1497
|
+
localMetadataEntries
|
|
1498
|
+
);
|
|
1499
|
+
return ids.map((id) => buildModelEntry(id, apiById.get(id), localMetadataById.get(id)));
|
|
1500
|
+
}
|
|
1501
|
+
async function resolveModel(modelId) {
|
|
1502
|
+
const id = modelId.trim();
|
|
1503
|
+
if (!id) return null;
|
|
1504
|
+
const apiModels = await fetchApiModels();
|
|
1505
|
+
const apiModel = apiModels?.find((candidate) => asNonEmptyString(candidate.id) === id);
|
|
1506
|
+
const modelsRootDir = await resolveModelsRootDir();
|
|
1507
|
+
const localMetadata = await resolveLocalModelMetadata(id, apiModel, modelsRootDir);
|
|
1508
|
+
return buildModelEntry(id, apiModel, localMetadata);
|
|
1147
1509
|
}
|
|
1148
1510
|
async function listRunningModels2() {
|
|
1149
1511
|
const apiModels = await fetchApiModels();
|
|
@@ -1164,39 +1526,54 @@ async function generate2(model, prompt, options) {
|
|
|
1164
1526
|
activeAbortControllers.add(controller);
|
|
1165
1527
|
try {
|
|
1166
1528
|
const baseUrl = getLMStudioBaseUrl();
|
|
1167
|
-
const url = new URL("/v1/chat
|
|
1168
|
-
const
|
|
1529
|
+
const url = new URL("/api/v1/chat", baseUrl);
|
|
1530
|
+
const doRequest = (includeSampling) => fetch(url, {
|
|
1169
1531
|
method: "POST",
|
|
1170
1532
|
headers: getLMStudioHeaders(),
|
|
1171
|
-
body: JSON.stringify(
|
|
1172
|
-
model,
|
|
1173
|
-
messages: [{ role: "user", content: prompt }],
|
|
1174
|
-
temperature: options?.temperature ?? 0,
|
|
1175
|
-
max_tokens: options?.num_predict ?? 512,
|
|
1176
|
-
stream: false,
|
|
1177
|
-
...buildThinkingConfig(options?.think)
|
|
1178
|
-
}),
|
|
1533
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, includeSampling)),
|
|
1179
1534
|
signal: controller.signal
|
|
1180
1535
|
});
|
|
1536
|
+
let resp = await doRequest(true);
|
|
1181
1537
|
if (!resp.ok) {
|
|
1182
1538
|
const body = await resp.text().catch(() => "");
|
|
1183
|
-
|
|
1539
|
+
if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
|
|
1540
|
+
resp = await doRequest(false);
|
|
1541
|
+
} else {
|
|
1542
|
+
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
if (!resp.ok) {
|
|
1546
|
+
const body = await resp.text().catch(() => "");
|
|
1547
|
+
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
1184
1548
|
}
|
|
1185
1549
|
const payload = await resp.json();
|
|
1186
|
-
const
|
|
1187
|
-
const response =
|
|
1188
|
-
const reasoning =
|
|
1189
|
-
|
|
1550
|
+
const nativeResponse = extractNativeResponse(payload);
|
|
1551
|
+
const response = nativeResponse.response;
|
|
1552
|
+
const reasoning = nativeResponse.reasoning;
|
|
1553
|
+
assertThinkingModeRespected(model, options?.think, response, reasoning);
|
|
1554
|
+
const stats = extractNativeStats(payload);
|
|
1190
1555
|
const totalDuration = Math.max(0, Date.now() - start) * 1e6;
|
|
1556
|
+
const outputTokens = getUsageTokenCount(stats?.total_output_tokens) || resolveCompletionTokenCount(void 0, response, reasoning);
|
|
1557
|
+
const throughput = getNativeStatNumber(stats?.tokens_per_second);
|
|
1558
|
+
const timeToFirstTokenSeconds = getNativeStatNumber(stats?.time_to_first_token_seconds);
|
|
1559
|
+
const modelLoadTimeSeconds = getNativeStatNumber(stats?.model_load_time_seconds);
|
|
1560
|
+
const evalCountEstimated = getUsageTokenCount(stats?.total_output_tokens) <= 0;
|
|
1561
|
+
const evalDuration = throughput !== void 0 && throughput > 0 && outputTokens > 0 ? Math.max(1, Math.round(outputTokens / throughput * 1e9)) : totalDuration;
|
|
1562
|
+
const promptEvalDuration = timeToFirstTokenSeconds !== void 0 ? Math.max(0, Math.round(timeToFirstTokenSeconds * 1e9)) : 0;
|
|
1563
|
+
const loadDuration = Math.max(
|
|
1564
|
+
0,
|
|
1565
|
+
Math.round((modelLoadTimeSeconds ?? 0) * 1e9)
|
|
1566
|
+
);
|
|
1191
1567
|
return {
|
|
1192
1568
|
response,
|
|
1193
1569
|
...reasoning ? { thinking: reasoning } : {},
|
|
1194
1570
|
totalDuration,
|
|
1195
|
-
loadDuration
|
|
1196
|
-
promptEvalCount:
|
|
1197
|
-
promptEvalDuration
|
|
1198
|
-
evalCount:
|
|
1199
|
-
evalDuration
|
|
1571
|
+
loadDuration,
|
|
1572
|
+
promptEvalCount: getUsageTokenCount(stats?.input_tokens),
|
|
1573
|
+
promptEvalDuration,
|
|
1574
|
+
evalCount: outputTokens,
|
|
1575
|
+
evalDuration,
|
|
1576
|
+
...evalCountEstimated ? { evalCountEstimated: true } : {}
|
|
1200
1577
|
};
|
|
1201
1578
|
} catch (err) {
|
|
1202
1579
|
if (err instanceof Error && err.name === "AbortError") {
|
|
@@ -1211,10 +1588,10 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1211
1588
|
const start = Date.now();
|
|
1212
1589
|
const controller = new AbortController();
|
|
1213
1590
|
activeAbortControllers.add(controller);
|
|
1214
|
-
const stallTimeoutMs =
|
|
1591
|
+
const stallTimeoutMs = resolveStreamStallTimeoutMs2(options?.stall_timeout_ms);
|
|
1215
1592
|
let abortedByStallTimeout = false;
|
|
1216
1593
|
const baseUrl = getLMStudioBaseUrl();
|
|
1217
|
-
const url = new URL("/v1/chat
|
|
1594
|
+
const url = new URL("/api/v1/chat", baseUrl);
|
|
1218
1595
|
let stallTimer = null;
|
|
1219
1596
|
const resetStallTimer = () => {
|
|
1220
1597
|
if (stallTimeoutMs === void 0) return;
|
|
@@ -1226,23 +1603,24 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1226
1603
|
};
|
|
1227
1604
|
try {
|
|
1228
1605
|
resetStallTimer();
|
|
1229
|
-
const
|
|
1606
|
+
const doRequest = (includeSampling) => fetch(url, {
|
|
1230
1607
|
method: "POST",
|
|
1231
1608
|
headers: getLMStudioHeaders(),
|
|
1232
|
-
body: JSON.stringify(
|
|
1233
|
-
model,
|
|
1234
|
-
messages: [{ role: "user", content: prompt }],
|
|
1235
|
-
temperature: options?.temperature ?? 0,
|
|
1236
|
-
max_tokens: options?.num_predict ?? 512,
|
|
1237
|
-
stream: true,
|
|
1238
|
-
stream_options: { include_usage: true },
|
|
1239
|
-
...buildThinkingConfig(options?.think)
|
|
1240
|
-
}),
|
|
1609
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, includeSampling)),
|
|
1241
1610
|
signal: controller.signal
|
|
1242
1611
|
});
|
|
1612
|
+
let resp = await doRequest(true);
|
|
1613
|
+
if (!resp.ok) {
|
|
1614
|
+
const body = await resp.text().catch(() => "");
|
|
1615
|
+
if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
|
|
1616
|
+
resp = await doRequest(false);
|
|
1617
|
+
} else {
|
|
1618
|
+
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1243
1621
|
if (!resp.ok) {
|
|
1244
1622
|
const body = await resp.text().catch(() => "");
|
|
1245
|
-
throw
|
|
1623
|
+
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
1246
1624
|
}
|
|
1247
1625
|
if (!resp.body) {
|
|
1248
1626
|
throw new Error("LM Studio stream response body is empty");
|
|
@@ -1253,10 +1631,10 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1253
1631
|
let doneReceived = false;
|
|
1254
1632
|
let fullResponse = "";
|
|
1255
1633
|
let fullThinking = "";
|
|
1256
|
-
let
|
|
1634
|
+
let stats;
|
|
1257
1635
|
let firstChunkSeen = false;
|
|
1258
|
-
let
|
|
1259
|
-
let
|
|
1636
|
+
let firstGeneratedTokenTime = null;
|
|
1637
|
+
let lastGeneratedTokenTime = null;
|
|
1260
1638
|
const processDataLine = (rawLine) => {
|
|
1261
1639
|
const line = rawLine.trim();
|
|
1262
1640
|
if (!line.startsWith("data:")) return;
|
|
@@ -1272,18 +1650,27 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1272
1650
|
} catch {
|
|
1273
1651
|
return;
|
|
1274
1652
|
}
|
|
1275
|
-
const
|
|
1276
|
-
const content =
|
|
1277
|
-
const reasoning =
|
|
1278
|
-
const
|
|
1279
|
-
if (
|
|
1653
|
+
const delta = extractNativeDelta(payload);
|
|
1654
|
+
const content = delta.response;
|
|
1655
|
+
const reasoning = delta.reasoning;
|
|
1656
|
+
const chunkStats = extractNativeStats(payload);
|
|
1657
|
+
if (chunkStats) stats = chunkStats;
|
|
1658
|
+
const aggregate = extractNativeResponse(payload);
|
|
1659
|
+
if (aggregate.response) {
|
|
1660
|
+
fullResponse = aggregate.response;
|
|
1661
|
+
}
|
|
1662
|
+
if (aggregate.reasoning) {
|
|
1663
|
+
fullThinking = aggregate.reasoning;
|
|
1664
|
+
}
|
|
1665
|
+
if (reasoning || content) {
|
|
1666
|
+
const now = Date.now();
|
|
1667
|
+
if (firstGeneratedTokenTime === null) firstGeneratedTokenTime = now;
|
|
1668
|
+
lastGeneratedTokenTime = now;
|
|
1669
|
+
}
|
|
1280
1670
|
if (reasoning) {
|
|
1281
1671
|
fullThinking += reasoning;
|
|
1282
1672
|
}
|
|
1283
1673
|
if (content) {
|
|
1284
|
-
const now = Date.now();
|
|
1285
|
-
if (firstTokenTime === null) firstTokenTime = now;
|
|
1286
|
-
lastTokenTime = now;
|
|
1287
1674
|
fullResponse += content;
|
|
1288
1675
|
callbacks?.onToken?.(content);
|
|
1289
1676
|
}
|
|
@@ -1303,6 +1690,14 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1303
1690
|
processDataLine(rawLine);
|
|
1304
1691
|
}
|
|
1305
1692
|
}
|
|
1693
|
+
buffered += decoder.decode();
|
|
1694
|
+
if (buffered.length > 0) {
|
|
1695
|
+
const lines = buffered.split("\n");
|
|
1696
|
+
buffered = lines.pop() ?? "";
|
|
1697
|
+
for (const rawLine of lines) {
|
|
1698
|
+
processDataLine(rawLine);
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1306
1701
|
if (buffered.trim().length > 0) {
|
|
1307
1702
|
processDataLine(buffered);
|
|
1308
1703
|
}
|
|
@@ -1311,17 +1706,27 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1311
1706
|
throw new Error("LM Studio stream ended without content");
|
|
1312
1707
|
}
|
|
1313
1708
|
const totalDuration = Math.max(0, Date.now() - start) * 1e6;
|
|
1314
|
-
const
|
|
1709
|
+
const outputTokens = getUsageTokenCount(stats?.total_output_tokens) || resolveCompletionTokenCount(void 0, fullResponse, fullThinking);
|
|
1710
|
+
const throughput = getNativeStatNumber(stats?.tokens_per_second);
|
|
1711
|
+
const timeToFirstTokenSeconds = getNativeStatNumber(stats?.time_to_first_token_seconds);
|
|
1712
|
+
const modelLoadTimeSeconds = getNativeStatNumber(stats?.model_load_time_seconds);
|
|
1713
|
+
const evalCountEstimated = getUsageTokenCount(stats?.total_output_tokens) <= 0;
|
|
1714
|
+
const evalDurationMs = throughput !== void 0 && throughput > 0 && outputTokens > 0 ? outputTokens / throughput * 1e3 : firstGeneratedTokenTime !== null && lastGeneratedTokenTime !== null && lastGeneratedTokenTime > firstGeneratedTokenTime ? lastGeneratedTokenTime - firstGeneratedTokenTime : Date.now() - start;
|
|
1315
1715
|
const result = {
|
|
1316
1716
|
response: fullResponse,
|
|
1317
1717
|
...fullThinking ? { thinking: fullThinking } : {},
|
|
1318
1718
|
totalDuration,
|
|
1319
|
-
loadDuration:
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1719
|
+
loadDuration: Math.max(
|
|
1720
|
+
0,
|
|
1721
|
+
Math.round((modelLoadTimeSeconds ?? 0) * 1e9)
|
|
1722
|
+
),
|
|
1723
|
+
promptEvalCount: getUsageTokenCount(stats?.input_tokens),
|
|
1724
|
+
promptEvalDuration: timeToFirstTokenSeconds !== void 0 ? Math.max(0, Math.round(timeToFirstTokenSeconds * 1e9)) : firstGeneratedTokenTime !== null ? (firstGeneratedTokenTime - start) * 1e6 : 0,
|
|
1725
|
+
evalCount: outputTokens,
|
|
1726
|
+
evalDuration: Math.max(1, Math.round(evalDurationMs * 1e6)),
|
|
1727
|
+
...evalCountEstimated ? { evalCountEstimated: true } : {}
|
|
1324
1728
|
};
|
|
1729
|
+
assertThinkingModeRespected(model, options?.think, fullResponse, fullThinking);
|
|
1325
1730
|
callbacks?.onDone?.(result);
|
|
1326
1731
|
return result;
|
|
1327
1732
|
} catch (err) {
|
|
@@ -1487,6 +1892,19 @@ function getRuntimeName() {
|
|
|
1487
1892
|
function getRuntimeModelFormat() {
|
|
1488
1893
|
return activeRuntime.modelFormat ?? "gguf";
|
|
1489
1894
|
}
|
|
1895
|
+
async function resolveRuntimeModel(model) {
|
|
1896
|
+
if (activeRuntime.name === "lm-studio") {
|
|
1897
|
+
return resolveModel(model);
|
|
1898
|
+
}
|
|
1899
|
+
const knownModels = await activeRuntime.listModels();
|
|
1900
|
+
const matchedModel = knownModels.find((candidate) => candidate.name === model);
|
|
1901
|
+
if (matchedModel) return matchedModel;
|
|
1902
|
+
return {
|
|
1903
|
+
name: model,
|
|
1904
|
+
size: 0,
|
|
1905
|
+
modelFormat: activeRuntime.modelFormat ?? "gguf"
|
|
1906
|
+
};
|
|
1907
|
+
}
|
|
1490
1908
|
|
|
1491
1909
|
// ../src/commands/bench.ts
|
|
1492
1910
|
import { createHash as createHash3 } from "crypto";
|
|
@@ -1495,11 +1913,33 @@ import chalk8 from "chalk";
|
|
|
1495
1913
|
// ../src/core/hardware.ts
|
|
1496
1914
|
import si from "systeminformation";
|
|
1497
1915
|
import os2 from "os";
|
|
1498
|
-
import { execFile as
|
|
1916
|
+
import { execFile as execFile3 } from "child_process";
|
|
1499
1917
|
import { readFile } from "fs/promises";
|
|
1918
|
+
function normalizeWhitespace(value) {
|
|
1919
|
+
return value.replace(/\s+/g, " ").trim();
|
|
1920
|
+
}
|
|
1921
|
+
function looksLikeGpuDescriptor(value) {
|
|
1922
|
+
return /\b(radeon|graphics|geforce|rtx|gtx|arc|iris|uhd|quadro|tesla|adreno|mali|powervr)\b/i.test(value);
|
|
1923
|
+
}
|
|
1924
|
+
function splitCpuAndInferredGpu(cpuLabel) {
|
|
1925
|
+
const normalized = normalizeWhitespace(cpuLabel);
|
|
1926
|
+
const withGpuMatch = normalized.match(/\s+(?:w\/\s*|with\s+)(.+)$/i);
|
|
1927
|
+
if (!withGpuMatch?.index) {
|
|
1928
|
+
return { cpu: normalized, inferredGpu: null };
|
|
1929
|
+
}
|
|
1930
|
+
const inferredGpu = normalizeWhitespace(withGpuMatch[1] ?? "");
|
|
1931
|
+
if (!looksLikeGpuDescriptor(inferredGpu)) {
|
|
1932
|
+
return { cpu: normalized, inferredGpu: null };
|
|
1933
|
+
}
|
|
1934
|
+
const cpu = normalizeWhitespace(normalized.slice(0, withGpuMatch.index));
|
|
1935
|
+
return {
|
|
1936
|
+
cpu: cpu || normalized,
|
|
1937
|
+
inferredGpu: inferredGpu || null
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1500
1940
|
function execCommand(cmd, args, timeoutMs = 3e3) {
|
|
1501
1941
|
return new Promise((resolve) => {
|
|
1502
|
-
const child =
|
|
1942
|
+
const child = execFile3(cmd, args, { timeout: timeoutMs }, (err, stdout) => {
|
|
1503
1943
|
if (err) return resolve("");
|
|
1504
1944
|
resolve(stdout.trim());
|
|
1505
1945
|
});
|
|
@@ -1633,11 +2073,14 @@ async function getHardwareInfo() {
|
|
|
1633
2073
|
]);
|
|
1634
2074
|
const gpuController = graphics.controllers[0];
|
|
1635
2075
|
const gpuNames = graphics.controllers.map((g) => g.model).filter(Boolean).join(", ");
|
|
2076
|
+
const cpuLabelRaw = normalizeWhitespace(`${cpu.manufacturer} ${cpu.brand}`);
|
|
2077
|
+
const { cpu: cpuLabel, inferredGpu } = splitCpuAndInferredGpu(cpuLabelRaw);
|
|
2078
|
+
const defaultIntegratedGpu = process.platform === "darwin" ? "Integrated / Apple Silicon" : "Integrated / Unknown";
|
|
1636
2079
|
const gpuCoresRaw = gpuController?.cores;
|
|
1637
2080
|
const gpuCores = gpuCoresRaw ? parseInt(String(gpuCoresRaw), 10) : null;
|
|
1638
2081
|
const memType = memLayout.length > 0 ? memLayout[0].type : null;
|
|
1639
2082
|
return {
|
|
1640
|
-
cpu:
|
|
2083
|
+
cpu: cpuLabel,
|
|
1641
2084
|
cpuCores: cpu.cores,
|
|
1642
2085
|
cpuPCores: cpu.performanceCores || null,
|
|
1643
2086
|
cpuECores: cpu.efficiencyCores || null,
|
|
@@ -1647,7 +2090,7 @@ async function getHardwareInfo() {
|
|
|
1647
2090
|
memoryType: memType || null,
|
|
1648
2091
|
swapTotalGB: +(mem.swaptotal / 1024 / 1024 / 1024).toFixed(1),
|
|
1649
2092
|
swapUsedGB: +(mem.swapused / 1024 / 1024 / 1024).toFixed(1),
|
|
1650
|
-
gpu: gpuNames ||
|
|
2093
|
+
gpu: normalizeWhitespace(gpuNames) || inferredGpu || defaultIntegratedGpu,
|
|
1651
2094
|
gpuCores: gpuCores && !isNaN(gpuCores) ? gpuCores : null,
|
|
1652
2095
|
gpuVramMB: gpuController?.vram ?? null,
|
|
1653
2096
|
os: `${osInfo.distro} ${osInfo.release}`,
|
|
@@ -1682,6 +2125,10 @@ import chalk from "chalk";
|
|
|
1682
2125
|
|
|
1683
2126
|
// ../src/ui/terminal.ts
|
|
1684
2127
|
var supportsUnicode = process.platform !== "win32" || Boolean(process.env.WT_SESSION) || Boolean(process.env.TERM_PROGRAM);
|
|
2128
|
+
var ANSI_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
|
|
2129
|
+
function stripAnsi(value) {
|
|
2130
|
+
return value.replace(ANSI_RE, "");
|
|
2131
|
+
}
|
|
1685
2132
|
|
|
1686
2133
|
// ../src/ui/progress.ts
|
|
1687
2134
|
var FUN_PHRASES = [
|
|
@@ -1816,6 +2263,33 @@ function errorMsg(text) {
|
|
|
1816
2263
|
console.log(chalk.red(` ${CROSS_MARK} ${text}`));
|
|
1817
2264
|
}
|
|
1818
2265
|
|
|
2266
|
+
// ../src/benchmarks/profile.ts
|
|
2267
|
+
var BENCHMARK_PROFILE_VERSION = "v1";
|
|
2268
|
+
var BENCHMARK_PROFILE_SEED = 42;
|
|
2269
|
+
var BENCHMARK_PROFILE_TOP_P = 1;
|
|
2270
|
+
var BENCHMARK_PROFILE_TEMPERATURE = 0;
|
|
2271
|
+
function withBenchmarkProfile(opts = {}) {
|
|
2272
|
+
return {
|
|
2273
|
+
temperature: BENCHMARK_PROFILE_TEMPERATURE,
|
|
2274
|
+
top_p: BENCHMARK_PROFILE_TOP_P,
|
|
2275
|
+
seed: BENCHMARK_PROFILE_SEED,
|
|
2276
|
+
...opts
|
|
2277
|
+
};
|
|
2278
|
+
}
|
|
2279
|
+
function buildBenchmarkProfileMetadata(thinkEnabled) {
|
|
2280
|
+
return {
|
|
2281
|
+
version: BENCHMARK_PROFILE_VERSION,
|
|
2282
|
+
sampling: {
|
|
2283
|
+
temperature: BENCHMARK_PROFILE_TEMPERATURE,
|
|
2284
|
+
topP: BENCHMARK_PROFILE_TOP_P,
|
|
2285
|
+
seed: BENCHMARK_PROFILE_SEED
|
|
2286
|
+
},
|
|
2287
|
+
thinkingMode: thinkEnabled ? "enabled" : "disabled",
|
|
2288
|
+
contextWindowTokens: null,
|
|
2289
|
+
contextPolicy: "runtime-default"
|
|
2290
|
+
};
|
|
2291
|
+
}
|
|
2292
|
+
|
|
1819
2293
|
// ../src/benchmarks/performance.ts
|
|
1820
2294
|
var WARMUP_PROMPT = "Say hello in one word.";
|
|
1821
2295
|
var BENCH_PROMPTS = [
|
|
@@ -1855,11 +2329,15 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1855
2329
|
optionalProbeWithAvailability(() => getSwapUsedGB(), 0),
|
|
1856
2330
|
optionalProbe(() => detectBatteryPowered(), void 0)
|
|
1857
2331
|
]);
|
|
2332
|
+
const runningModelsBeforeWarmup = await optionalProbe(() => listRunningModels3(), []);
|
|
2333
|
+
const modelWasAlreadyLoaded = runningModelsBeforeWarmup.some((m) => m.name === model);
|
|
1858
2334
|
const warmup = await withTimeout(
|
|
1859
2335
|
generateStream3(model, WARMUP_PROMPT, void 0, {
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
2336
|
+
...withBenchmarkProfile({
|
|
2337
|
+
num_predict: 32,
|
|
2338
|
+
think: options.think,
|
|
2339
|
+
stall_timeout_ms: options.streamStallTimeoutMs
|
|
2340
|
+
})
|
|
1863
2341
|
}),
|
|
1864
2342
|
warmupTimeoutMs,
|
|
1865
2343
|
"Model warmup",
|
|
@@ -1870,15 +2348,6 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1870
2348
|
const loadTime = warmup.loadDuration / 1e6;
|
|
1871
2349
|
const runningModels = await listRunningModels3();
|
|
1872
2350
|
const thisModel = runningModels.find((m) => m.name === model);
|
|
1873
|
-
let installedModelSizeBytes = 0;
|
|
1874
|
-
try {
|
|
1875
|
-
const availableModels = await listModels3();
|
|
1876
|
-
const listedModel = availableModels.find((m) => m.name === model);
|
|
1877
|
-
if (listedModel && Number.isFinite(listedModel.size) && listedModel.size > 0) {
|
|
1878
|
-
installedModelSizeBytes = listedModel.size;
|
|
1879
|
-
}
|
|
1880
|
-
} catch {
|
|
1881
|
-
}
|
|
1882
2351
|
spinner.succeed("Model loaded");
|
|
1883
2352
|
const tpsValues = [];
|
|
1884
2353
|
const firstChunkValues = [];
|
|
@@ -1892,6 +2361,7 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1892
2361
|
let thinkingDetected = false;
|
|
1893
2362
|
let totalThinkingTokens = 0;
|
|
1894
2363
|
const cpuLoadSamples = [];
|
|
2364
|
+
let tokensPerSecondEstimated = false;
|
|
1895
2365
|
for (let i = 0; i < BENCH_PROMPTS.length; i++) {
|
|
1896
2366
|
spinner.start(`Running performance test ${i + 1}/${BENCH_PROMPTS.length}...`);
|
|
1897
2367
|
let firstChunkTime = null;
|
|
@@ -1914,11 +2384,11 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1914
2384
|
}
|
|
1915
2385
|
}
|
|
1916
2386
|
},
|
|
1917
|
-
{
|
|
2387
|
+
withBenchmarkProfile({
|
|
1918
2388
|
num_predict: 256,
|
|
1919
2389
|
think: options.think,
|
|
1920
2390
|
stall_timeout_ms: options.streamStallTimeoutMs
|
|
1921
|
-
}
|
|
2391
|
+
})
|
|
1922
2392
|
),
|
|
1923
2393
|
promptTimeoutMs,
|
|
1924
2394
|
"Performance benchmark",
|
|
@@ -1929,6 +2399,9 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1929
2399
|
tpsValues.push(tps);
|
|
1930
2400
|
totalEvalCount += result.evalCount;
|
|
1931
2401
|
totalEvalDurationNs += result.evalDuration;
|
|
2402
|
+
if (result.evalCountEstimated) {
|
|
2403
|
+
tokensPerSecondEstimated = true;
|
|
2404
|
+
}
|
|
1932
2405
|
if (firstChunkTime !== null) {
|
|
1933
2406
|
firstChunkValues.push(firstChunkTime);
|
|
1934
2407
|
}
|
|
@@ -1974,10 +2447,18 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1974
2447
|
]);
|
|
1975
2448
|
let memoryUsedGB;
|
|
1976
2449
|
let memoryPercent;
|
|
1977
|
-
|
|
2450
|
+
let memoryFootprintEstimated = false;
|
|
2451
|
+
const runtimeReportsComparableLoadedSize = runtimeName !== "lm-studio";
|
|
2452
|
+
const estimatedLoadedModelSizeBytes = runtimeName === "lm-studio" && modelWasAlreadyLoaded ? await optionalProbe(() => estimateLoadedModelMemoryBytes(model), null) : null;
|
|
2453
|
+
const loadedModelSizeBytes = runtimeReportsComparableLoadedSize && thisModel && thisModel.size > 0 ? thisModel.size : 0;
|
|
2454
|
+
const memoryFootprintAvailable = runtimeReportsComparableLoadedSize ? loadedModelSizeBytes > 0 || !modelWasAlreadyLoaded : (estimatedLoadedModelSizeBytes ?? 0) > 0 || !modelWasAlreadyLoaded;
|
|
1978
2455
|
if (loadedModelSizeBytes > 0) {
|
|
1979
2456
|
memoryUsedGB = loadedModelSizeBytes / 1024 ** 3;
|
|
1980
2457
|
memoryPercent = memoryUsedGB / memAfter.totalGB * 100;
|
|
2458
|
+
} else if ((estimatedLoadedModelSizeBytes ?? 0) > 0) {
|
|
2459
|
+
memoryUsedGB = (estimatedLoadedModelSizeBytes ?? 0) / 1024 ** 3;
|
|
2460
|
+
memoryPercent = memoryUsedGB / memAfter.totalGB * 100;
|
|
2461
|
+
memoryFootprintEstimated = true;
|
|
1981
2462
|
} else {
|
|
1982
2463
|
memoryUsedGB = Math.max(0, memAfter.usedGB - memBefore.usedGB);
|
|
1983
2464
|
memoryPercent = Math.max(0, memAfter.percent - memBefore.percent);
|
|
@@ -1999,6 +2480,7 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1999
2480
|
return {
|
|
2000
2481
|
metrics: {
|
|
2001
2482
|
tokensPerSecond: totalEvalDurationNs > 0 ? totalEvalCount / (totalEvalDurationNs / 1e9) : avg(tpsValues),
|
|
2483
|
+
...tokensPerSecondEstimated ? { tokensPerSecondEstimated: true } : {},
|
|
2002
2484
|
...firstChunkMs !== void 0 ? { firstChunkMs } : {},
|
|
2003
2485
|
ttft: ttft >= 0 ? ttft : 3e4,
|
|
2004
2486
|
// Fallback: 30s if no TTFT measured
|
|
@@ -2009,6 +2491,8 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2009
2491
|
completionTokens: totalCompletionTokens,
|
|
2010
2492
|
memoryUsedGB: +memoryUsedGB.toFixed(1),
|
|
2011
2493
|
memoryPercent: +memoryPercent.toFixed(1),
|
|
2494
|
+
memoryFootprintAvailable,
|
|
2495
|
+
...memoryFootprintEstimated ? { memoryFootprintEstimated: true } : {},
|
|
2012
2496
|
memoryHostUsedGB: memAfter.usedGB,
|
|
2013
2497
|
memoryHostPercent: memAfter.percent,
|
|
2014
2498
|
tpsStdDev: tpsValues.length >= 2 ? stddev(tpsValues) : void 0,
|
|
@@ -2401,7 +2885,7 @@ Answer:`;
|
|
|
2401
2885
|
const startTime = Date.now();
|
|
2402
2886
|
try {
|
|
2403
2887
|
const result = await withTimeout(
|
|
2404
|
-
generate3(model, prompt, {
|
|
2888
|
+
generate3(model, prompt, withBenchmarkProfile({ num_predict: 1024, think: opts?.think })),
|
|
2405
2889
|
timeoutMs,
|
|
2406
2890
|
"Reasoning question",
|
|
2407
2891
|
abortOngoingRequests3
|
|
@@ -2730,7 +3214,7 @@ Answer:`;
|
|
|
2730
3214
|
const startTime = Date.now();
|
|
2731
3215
|
try {
|
|
2732
3216
|
const result = await withTimeout(
|
|
2733
|
-
generate3(model, prompt, {
|
|
3217
|
+
generate3(model, prompt, withBenchmarkProfile({ num_predict: 1024, think: opts?.think })),
|
|
2734
3218
|
timeoutMs,
|
|
2735
3219
|
"Math problem",
|
|
2736
3220
|
abortOngoingRequests3
|
|
@@ -2773,7 +3257,7 @@ Answer:`;
|
|
|
2773
3257
|
|
|
2774
3258
|
// ../src/benchmarks/coding.ts
|
|
2775
3259
|
import vm2 from "vm";
|
|
2776
|
-
import { spawn } from "child_process";
|
|
3260
|
+
import { spawn as spawn2 } from "child_process";
|
|
2777
3261
|
import { Worker } from "worker_threads";
|
|
2778
3262
|
|
|
2779
3263
|
// ../src/datasets/coding.json
|
|
@@ -6446,7 +6930,7 @@ async function runTestsInSubprocess(code, task) {
|
|
|
6446
6930
|
const total = task.tests.length;
|
|
6447
6931
|
return new Promise((resolve) => {
|
|
6448
6932
|
const wallTimeoutMs = computeIsolatedWallTimeoutMs(task);
|
|
6449
|
-
const child =
|
|
6933
|
+
const child = spawn2(
|
|
6450
6934
|
process.execPath,
|
|
6451
6935
|
[
|
|
6452
6936
|
"--max-old-space-size=96",
|
|
@@ -6613,7 +7097,7 @@ Reply with ONLY the function code, no explanation.`;
|
|
|
6613
7097
|
const startTime = Date.now();
|
|
6614
7098
|
try {
|
|
6615
7099
|
const result = await withTimeout(
|
|
6616
|
-
generate3(model, prompt, {
|
|
7100
|
+
generate3(model, prompt, withBenchmarkProfile({ num_predict: 2048, think: opts?.think })),
|
|
6617
7101
|
timeoutMs,
|
|
6618
7102
|
"Coding task",
|
|
6619
7103
|
abortOngoingRequests3
|
|
@@ -6968,7 +7452,7 @@ async function runInstructionFollowingBench(model, opts) {
|
|
|
6968
7452
|
const startTime = Date.now();
|
|
6969
7453
|
try {
|
|
6970
7454
|
const result = await withTimeout(
|
|
6971
|
-
generate3(model, prompt, {
|
|
7455
|
+
generate3(model, prompt, withBenchmarkProfile({ num_predict: 1024, think: opts?.think })),
|
|
6972
7456
|
timeoutMs,
|
|
6973
7457
|
"Instruction following task",
|
|
6974
7458
|
abortOngoingRequests3
|
|
@@ -7354,7 +7838,7 @@ async function runStructuredOutputBench(model, opts) {
|
|
|
7354
7838
|
const startTime = Date.now();
|
|
7355
7839
|
try {
|
|
7356
7840
|
const result = await withTimeout(
|
|
7357
|
-
generate3(model, q.prompt, {
|
|
7841
|
+
generate3(model, q.prompt, withBenchmarkProfile({ num_predict: 1024, think: opts?.think })),
|
|
7358
7842
|
timeoutMs,
|
|
7359
7843
|
"Structured output task",
|
|
7360
7844
|
abortOngoingRequests3
|
|
@@ -7613,7 +8097,7 @@ async function runMultilingualBench(model, opts) {
|
|
|
7613
8097
|
const startTime = Date.now();
|
|
7614
8098
|
try {
|
|
7615
8099
|
const result = await withTimeout(
|
|
7616
|
-
generate3(model, q.prompt, {
|
|
8100
|
+
generate3(model, q.prompt, withBenchmarkProfile({ num_predict: 1024, think: opts?.think })),
|
|
7617
8101
|
timeoutMs,
|
|
7618
8102
|
"Multilingual task",
|
|
7619
8103
|
abortOngoingRequests3
|
|
@@ -7749,13 +8233,15 @@ function computePerformanceScore(perf, hardware) {
|
|
|
7749
8233
|
const tuning = deriveHardwareFitTuning(hardware);
|
|
7750
8234
|
const safeTokensPerSecond = sanitizeNonNegative(perf.tokensPerSecond, 0);
|
|
7751
8235
|
const safeTtft = sanitizeNonNegative(perf.ttft, tuning.ttft.hardMaxMs * 2);
|
|
7752
|
-
const effectiveMemPercent = sanitizeNonNegative(
|
|
7753
|
-
perf.memoryHostPercent ?? perf.memoryPercent,
|
|
7754
|
-
100
|
|
7755
|
-
);
|
|
7756
8236
|
const speed = Math.round(scoreSpeed(safeTokensPerSecond, tuning));
|
|
7757
8237
|
const ttft = Math.round(scoreTTFT(safeTtft, tuning));
|
|
7758
|
-
const memory = Math.round(
|
|
8238
|
+
const memory = perf.memoryFootprintAvailable === false ? Math.round((speed + ttft) / (50 + 20) * 30) : Math.round(
|
|
8239
|
+
scoreMemory(
|
|
8240
|
+
// Score memory from the model's measured footprint/delta rather than
|
|
8241
|
+
// unrelated host RAM usage from other running workloads.
|
|
8242
|
+
sanitizeNonNegative(perf.memoryPercent, 100)
|
|
8243
|
+
)
|
|
8244
|
+
);
|
|
7759
8245
|
return {
|
|
7760
8246
|
total: clamp(speed + ttft + memory, 0, 100),
|
|
7761
8247
|
speed,
|
|
@@ -7864,11 +8350,9 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
7864
8350
|
const safeTokensPerSecond = sanitizeNonNegative(perf.tokensPerSecond, 0);
|
|
7865
8351
|
const safeTtft = sanitizeNonNegative(perf.ttft, tuning.ttft.hardMaxMs * 10);
|
|
7866
8352
|
const safeLoadTime = sanitizeNonNegative(perf.loadTime, tuning.loadTimeHardMaxMs * 10);
|
|
7867
|
-
const
|
|
7868
|
-
|
|
7869
|
-
|
|
7870
|
-
);
|
|
7871
|
-
const modelMemoryDeltaPercent = sanitizeNonNegative(perf.memoryPercent, 100);
|
|
8353
|
+
const modelMemoryFootprintAvailable = perf.memoryFootprintAvailable !== false;
|
|
8354
|
+
const modelMemoryDeltaPercent = modelMemoryFootprintAvailable ? sanitizeNonNegative(perf.memoryPercent, 100) : void 0;
|
|
8355
|
+
const hostMemoryPercent = perf.memoryHostPercent !== void 0 && Number.isFinite(perf.memoryHostPercent) && perf.memoryHostPercent >= 0 ? perf.memoryHostPercent : void 0;
|
|
7872
8356
|
const disqualifiers = [];
|
|
7873
8357
|
if (safeTokensPerSecond < tuning.speed.hardMin) {
|
|
7874
8358
|
disqualifiers.push(
|
|
@@ -7885,12 +8369,12 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
7885
8369
|
`Model load time too high: ${Math.round(safeLoadTime)}ms (maximum: ${tuning.loadTimeHardMaxMs}ms for ${tuning.profile} profile)`
|
|
7886
8370
|
);
|
|
7887
8371
|
}
|
|
7888
|
-
const hostCritical = hostMemoryPercent > 95;
|
|
7889
|
-
const modelDeltaCritical = modelMemoryDeltaPercent > 90;
|
|
7890
|
-
const modelDeltaSignificant = modelMemoryDeltaPercent >= 10;
|
|
7891
|
-
if (modelDeltaCritical
|
|
8372
|
+
const hostCritical = hostMemoryPercent !== void 0 && hostMemoryPercent > 95;
|
|
8373
|
+
const modelDeltaCritical = modelMemoryDeltaPercent !== void 0 && modelMemoryDeltaPercent > 90;
|
|
8374
|
+
const modelDeltaSignificant = modelMemoryDeltaPercent !== void 0 && modelMemoryDeltaPercent >= 10;
|
|
8375
|
+
if (modelDeltaCritical) {
|
|
7892
8376
|
disqualifiers.push(
|
|
7893
|
-
`Memory usage critical:
|
|
8377
|
+
`Memory usage critical: model delta +${modelMemoryDeltaPercent.toFixed(0)}%`
|
|
7894
8378
|
);
|
|
7895
8379
|
}
|
|
7896
8380
|
const verdictScore = globalScore ?? hardwareFitScore;
|
|
@@ -7925,9 +8409,28 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
7925
8409
|
`Token speed is unstable (stddev ${perf.tpsStdDev.toFixed(1)} tok/s, mean ${safeTokensPerSecond.toFixed(1)} tok/s) \u2014 may indicate thermal throttling or memory pressure.`
|
|
7926
8410
|
);
|
|
7927
8411
|
}
|
|
7928
|
-
if (
|
|
8412
|
+
if (perf.tokensPerSecondEstimated) {
|
|
8413
|
+
warnings.push(
|
|
8414
|
+
"Token throughput is estimated from LM Studio output because native token stats were unavailable. Compare tok/s across backends cautiously."
|
|
8415
|
+
);
|
|
8416
|
+
}
|
|
8417
|
+
if (perf.memoryFootprintEstimated) {
|
|
8418
|
+
warnings.push(
|
|
8419
|
+
"Model memory footprint is estimated via LM Studio CLI rather than measured from a fresh load."
|
|
8420
|
+
);
|
|
8421
|
+
}
|
|
8422
|
+
if (!modelMemoryFootprintAvailable) {
|
|
7929
8423
|
warnings.push(
|
|
7930
|
-
|
|
8424
|
+
"Model memory footprint was unavailable for this run, so RAM fit scoring was normalized from speed and TTFT only."
|
|
8425
|
+
);
|
|
8426
|
+
}
|
|
8427
|
+
if (hostCritical && !modelMemoryFootprintAvailable) {
|
|
8428
|
+
warnings.push(
|
|
8429
|
+
`Host memory is already high (${hostMemoryPercent.toFixed(0)}%) and model footprint was unavailable. Results may be influenced by other running workloads.`
|
|
8430
|
+
);
|
|
8431
|
+
} else if (hostCritical && modelMemoryDeltaPercent !== void 0 && !modelDeltaSignificant) {
|
|
8432
|
+
warnings.push(
|
|
8433
|
+
`Host memory is already high (${hostMemoryPercent.toFixed(0)}%) but model delta is limited (+${modelMemoryDeltaPercent.toFixed(0)}%). Results may be influenced by other running workloads.`
|
|
7931
8434
|
);
|
|
7932
8435
|
}
|
|
7933
8436
|
if (hardware?.powerMode === "low-power") {
|
|
@@ -8015,12 +8518,46 @@ function getLevel(score) {
|
|
|
8015
8518
|
if (score >= 25) return "Weak";
|
|
8016
8519
|
return "Poor";
|
|
8017
8520
|
}
|
|
8521
|
+
function formatCpuCoresLabel(hw) {
|
|
8522
|
+
if (hw.cpuPCores !== null && hw.cpuECores !== null) {
|
|
8523
|
+
return `${hw.cpuCores} total (${hw.cpuPCores} performance + ${hw.cpuECores} efficiency)`;
|
|
8524
|
+
}
|
|
8525
|
+
if (hw.cpuPCores !== null && hw.cpuCores > hw.cpuPCores) {
|
|
8526
|
+
return `${hw.cpuCores} threads (${hw.cpuPCores} cores)`;
|
|
8527
|
+
}
|
|
8528
|
+
if (hw.cpuPCores !== null) {
|
|
8529
|
+
return `${hw.cpuCores} total (${hw.cpuPCores} performance)`;
|
|
8530
|
+
}
|
|
8531
|
+
if (hw.cpuECores !== null) {
|
|
8532
|
+
return `${hw.cpuCores} total (${hw.cpuECores} efficiency)`;
|
|
8533
|
+
}
|
|
8534
|
+
return String(hw.cpuCores);
|
|
8535
|
+
}
|
|
8536
|
+
function summarizeCategoryIssues(name, details) {
|
|
8537
|
+
let crashes = 0;
|
|
8538
|
+
let timeouts = 0;
|
|
8539
|
+
let errors = 0;
|
|
8540
|
+
for (const detail of details) {
|
|
8541
|
+
const actual = detail.actual ?? "";
|
|
8542
|
+
if (/^TIMEOUT\b/i.test(actual)) {
|
|
8543
|
+
timeouts++;
|
|
8544
|
+
continue;
|
|
8545
|
+
}
|
|
8546
|
+
if (/^ERROR:/i.test(actual)) {
|
|
8547
|
+
errors++;
|
|
8548
|
+
if (/model has crashed|has crashed without additional information|model crashed/i.test(actual)) {
|
|
8549
|
+
crashes++;
|
|
8550
|
+
}
|
|
8551
|
+
}
|
|
8552
|
+
}
|
|
8553
|
+
return { name, crashes, timeouts, errors };
|
|
8554
|
+
}
|
|
8018
8555
|
function printHardwareTable(hw) {
|
|
8019
8556
|
const table = new Table({
|
|
8020
8557
|
head: [chalk3.bold("Hardware"), chalk3.bold("Value")],
|
|
8021
8558
|
style: { head: [], border: [] }
|
|
8022
8559
|
});
|
|
8023
|
-
const coresDetail =
|
|
8560
|
+
const coresDetail = formatCpuCoresLabel(hw);
|
|
8024
8561
|
const cpuLine = hw.cpuFreqGHz ? `${hw.cpu} @ ${hw.cpuFreqGHz} GHz` : hw.cpu;
|
|
8025
8562
|
const ramLine = hw.memoryType ? `${hw.totalMemoryGB} GB ${hw.memoryType} (${hw.freeMemoryGB} GB free)` : `${hw.totalMemoryGB} GB (${hw.freeMemoryGB} GB free)`;
|
|
8026
8563
|
const swapColor = hw.swapUsedGB > hw.swapTotalGB * 0.5 ? chalk3.yellow : chalk3.green;
|
|
@@ -8056,7 +8593,10 @@ function printPerformanceTable(perf, benchEnvironment) {
|
|
|
8056
8593
|
const ttftColor = perf.ttft < 1e3 ? chalk3.green : perf.ttft < 3e3 ? chalk3.yellow : chalk3.red;
|
|
8057
8594
|
const memColor = perf.memoryPercent < 50 ? chalk3.green : perf.memoryPercent < 80 ? chalk3.yellow : chalk3.red;
|
|
8058
8595
|
table.push(
|
|
8059
|
-
[
|
|
8596
|
+
[
|
|
8597
|
+
"Tokens/sec",
|
|
8598
|
+
perf.tokensPerSecondEstimated ? chalk3.yellow(`${perf.tokensPerSecond.toFixed(1)} tok/s (estimated)`) : tpsColor(`${perf.tokensPerSecond.toFixed(1)} tok/s`)
|
|
8599
|
+
],
|
|
8060
8600
|
[
|
|
8061
8601
|
"First Chunk Latency",
|
|
8062
8602
|
perf.firstChunkMs !== void 0 ? formatDuration(perf.firstChunkMs) : chalk3.dim("N/A (stream metric unavailable)")
|
|
@@ -8071,8 +8611,8 @@ function printPerformanceTable(perf, benchEnvironment) {
|
|
|
8071
8611
|
["Completion Tokens", String(perf.completionTokens)],
|
|
8072
8612
|
[
|
|
8073
8613
|
"Model Memory Footprint",
|
|
8074
|
-
memColor(
|
|
8075
|
-
`${perf.memoryUsedGB.toFixed(1)} GB (+${perf.memoryPercent.toFixed(0)}%)`
|
|
8614
|
+
perf.memoryFootprintAvailable === false ? chalk3.dim("N/A (model already loaded; runtime metric unavailable)") : memColor(
|
|
8615
|
+
`${perf.memoryUsedGB.toFixed(1)} GB (+${perf.memoryPercent.toFixed(0)}%)${perf.memoryFootprintEstimated ? " (estimated)" : ""}`
|
|
8076
8616
|
)
|
|
8077
8617
|
],
|
|
8078
8618
|
[
|
|
@@ -8133,6 +8673,18 @@ function printQualityTable(quality, timePenalties) {
|
|
|
8133
8673
|
]);
|
|
8134
8674
|
}
|
|
8135
8675
|
console.log(table.toString());
|
|
8676
|
+
const issueSummaries = categories.map((cat) => summarizeCategoryIssues(cat.name, cat.result.details)).filter((summary) => summary.errors > 0 || summary.timeouts > 0);
|
|
8677
|
+
if (issueSummaries.length > 0) {
|
|
8678
|
+
console.log(chalk3.yellow("Execution issues detected during quality benchmark:"));
|
|
8679
|
+
for (const summary of issueSummaries) {
|
|
8680
|
+
const parts = [];
|
|
8681
|
+
if (summary.crashes > 0) parts.push(`${summary.crashes} crash${summary.crashes > 1 ? "es" : ""}`);
|
|
8682
|
+
const nonCrashErrors = summary.errors - summary.crashes;
|
|
8683
|
+
if (nonCrashErrors > 0) parts.push(`${nonCrashErrors} error${nonCrashErrors > 1 ? "s" : ""}`);
|
|
8684
|
+
if (summary.timeouts > 0) parts.push(`${summary.timeouts} timeout${summary.timeouts > 1 ? "s" : ""}`);
|
|
8685
|
+
console.log(chalk3.yellow(` \u2022 ${summary.name}: ${parts.join(", ")} (scored as incorrect)`));
|
|
8686
|
+
}
|
|
8687
|
+
}
|
|
8136
8688
|
}
|
|
8137
8689
|
function printSummaryTable(results) {
|
|
8138
8690
|
const termWidth = process.stdout.columns || 80;
|
|
@@ -8146,7 +8698,7 @@ function printSummaryTable(results) {
|
|
|
8146
8698
|
chalk3.bold("Model"),
|
|
8147
8699
|
chalk3.bold("tok/s"),
|
|
8148
8700
|
chalk3.bold("TTFT"),
|
|
8149
|
-
chalk3.bold("
|
|
8701
|
+
chalk3.bold("Model RAM%"),
|
|
8150
8702
|
chalk3.bold("Profile"),
|
|
8151
8703
|
chalk3.bold("HW Fit"),
|
|
8152
8704
|
chalk3.bold("Quality"),
|
|
@@ -8159,17 +8711,23 @@ function printSummaryTable(results) {
|
|
|
8159
8711
|
style: { head: [], border: [] },
|
|
8160
8712
|
wordWrap: true
|
|
8161
8713
|
});
|
|
8714
|
+
const formatSummaryModelMemory = (result) => {
|
|
8715
|
+
if (result.performance.memoryFootprintAvailable === false) return "N/A";
|
|
8716
|
+
const value = `${result.performance.memoryPercent.toFixed(0)}%`;
|
|
8717
|
+
return result.performance.memoryFootprintEstimated ? `${value}~` : value;
|
|
8718
|
+
};
|
|
8162
8719
|
for (const r of results) {
|
|
8163
8720
|
const vColor = r.fitness.verdict === "EXCELLENT" ? chalk3.green.bold : r.fitness.verdict === "GOOD" ? chalk3.blue.bold : r.fitness.verdict === "MARGINAL" ? chalk3.yellow.bold : chalk3.red.bold;
|
|
8164
8721
|
const flags = [];
|
|
8165
8722
|
if (r.hardware.powerMode === "low-power") flags.push(chalk3.red("ECO"));
|
|
8166
8723
|
if (r.modelInfo?.thinkingDetected) flags.push(chalk3.magenta("THINK"));
|
|
8167
8724
|
const modelName = compact && r.model.length > 20 ? r.model.slice(0, 18) + ".." : r.model;
|
|
8725
|
+
const throughputLabel = r.performance.tokensPerSecondEstimated ? `~${r.performance.tokensPerSecond.toFixed(1)}` : `${r.performance.tokensPerSecond.toFixed(1)}`;
|
|
8168
8726
|
const row = [
|
|
8169
8727
|
modelName,
|
|
8170
|
-
|
|
8728
|
+
throughputLabel,
|
|
8171
8729
|
formatDuration(r.performance.ttft),
|
|
8172
|
-
r
|
|
8730
|
+
formatSummaryModelMemory(r),
|
|
8173
8731
|
r.fitness.tuning.profile,
|
|
8174
8732
|
scoreColor(r.fitness.hardwareFitScore)(
|
|
8175
8733
|
`${compactBar(r.fitness.hardwareFitScore)} ${r.fitness.hardwareFitScore}%`
|
|
@@ -8194,9 +8752,8 @@ function printSummaryTable(results) {
|
|
|
8194
8752
|
// ../src/ui/verdict.ts
|
|
8195
8753
|
import chalk4 from "chalk";
|
|
8196
8754
|
var BOX_INNER = 60;
|
|
8197
|
-
var ANSI_RE = /\x1b\[[0-9;]*m/g;
|
|
8198
8755
|
function visibleLength(str) {
|
|
8199
|
-
return str
|
|
8756
|
+
return stripAnsi(str).length;
|
|
8200
8757
|
}
|
|
8201
8758
|
function wrapText(text, maxWidth) {
|
|
8202
8759
|
if (visibleLength(text) <= maxWidth) return [text];
|
|
@@ -8566,6 +9123,15 @@ function assertUploaderConfig(config) {
|
|
|
8566
9123
|
);
|
|
8567
9124
|
}
|
|
8568
9125
|
}
|
|
9126
|
+
function resolveUploadedMemoryPercent(result) {
|
|
9127
|
+
return result.performance.memoryFootprintAvailable === false ? null : result.performance.memoryPercent;
|
|
9128
|
+
}
|
|
9129
|
+
function resolveUploadedModelFormat(result) {
|
|
9130
|
+
if (result.metadata.modelFormat?.trim()) return result.metadata.modelFormat;
|
|
9131
|
+
const runtimeBackend = result.metadata.runtimeBackend ?? "ollama";
|
|
9132
|
+
if (runtimeBackend === "ollama") return "gguf";
|
|
9133
|
+
return "unknown";
|
|
9134
|
+
}
|
|
8569
9135
|
async function uploadBenchResult(result, options = {}) {
|
|
8570
9136
|
const config = resolveUploaderConfig();
|
|
8571
9137
|
assertUploaderConfig(config);
|
|
@@ -8578,7 +9144,7 @@ async function uploadBenchResult(result, options = {}) {
|
|
|
8578
9144
|
thinking_detected: result.modelInfo?.thinkingDetected ?? null,
|
|
8579
9145
|
tokens_per_second: result.performance.tokensPerSecond,
|
|
8580
9146
|
ttft_ms: result.performance.ttft,
|
|
8581
|
-
memory_percent: result
|
|
9147
|
+
memory_percent: resolveUploadedMemoryPercent(result),
|
|
8582
9148
|
thinking_tokens_estimate: result.performance.thinkingTokensEstimate ?? null,
|
|
8583
9149
|
verdict: result.fitness.verdict,
|
|
8584
9150
|
global_score: result.fitness.globalScore,
|
|
@@ -8595,7 +9161,7 @@ async function uploadBenchResult(result, options = {}) {
|
|
|
8595
9161
|
benchmark_spec_version: result.metadata.benchmarkSpecVersion,
|
|
8596
9162
|
runtime_version: result.metadata.runtimeVersion,
|
|
8597
9163
|
runtime_backend: result.metadata.runtimeBackend ?? "ollama",
|
|
8598
|
-
model_format: result
|
|
9164
|
+
model_format: resolveUploadedModelFormat(result),
|
|
8599
9165
|
raw_log_hash: result.metadata.rawLogHash,
|
|
8600
9166
|
result
|
|
8601
9167
|
};
|
|
@@ -8848,6 +9414,7 @@ async function promptSubmitterProfile(deps, defaults = {}) {
|
|
|
8848
9414
|
}
|
|
8849
9415
|
console.log(chalk6.yellow("Nickname must be between 2 and 40 characters."));
|
|
8850
9416
|
}
|
|
9417
|
+
console.log(chalk6.dim("Your email is never stored \u2014 only a SHA-256 hash is saved to match your leaderboard entries."));
|
|
8851
9418
|
while (true) {
|
|
8852
9419
|
const emailHint = defaults.email ? ` [${defaults.email}]` : "";
|
|
8853
9420
|
const emailAnswer = await ask(`Email${emailHint} > `);
|
|
@@ -9008,7 +9575,7 @@ async function promptThinkingMode() {
|
|
|
9008
9575
|
}
|
|
9009
9576
|
|
|
9010
9577
|
// ../src/commands/bench.ts
|
|
9011
|
-
var BENCHMARK_SPEC_VERSION = "0.2.
|
|
9578
|
+
var BENCHMARK_SPEC_VERSION = "0.2.1";
|
|
9012
9579
|
var PROMPT_PACK_VERSION = "0.1.0";
|
|
9013
9580
|
async function benchCommand(options) {
|
|
9014
9581
|
if (options.backend !== void 0) {
|
|
@@ -9101,6 +9668,11 @@ async function benchCommand(options) {
|
|
|
9101
9668
|
if (!silent && thinkEnabled) {
|
|
9102
9669
|
infoMsg("Thinking mode enabled \u2014 models that support it will use extended reasoning.");
|
|
9103
9670
|
}
|
|
9671
|
+
if (!silent) {
|
|
9672
|
+
infoMsg(
|
|
9673
|
+
`Benchmark profile ${BENCHMARK_PROFILE_VERSION}: temperature=0, top_p=1, seed=42, context=runtime default.`
|
|
9674
|
+
);
|
|
9675
|
+
}
|
|
9104
9676
|
try {
|
|
9105
9677
|
const results = [];
|
|
9106
9678
|
const failedModels = [];
|
|
@@ -9130,7 +9702,7 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9130
9702
|
minSuccessfulPrompts: options.perfMinSuccessfulPrompts,
|
|
9131
9703
|
failOnPromptError: options.perfStrict,
|
|
9132
9704
|
think: thinkEnabled,
|
|
9133
|
-
streamStallTimeoutMs: options.
|
|
9705
|
+
streamStallTimeoutMs: options.streamStallTimeoutMs
|
|
9134
9706
|
});
|
|
9135
9707
|
const perf = perfResult.metrics;
|
|
9136
9708
|
const benchEnvironment = perfResult.benchEnvironment;
|
|
@@ -9168,13 +9740,22 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9168
9740
|
printVerdict(modelName, fitness);
|
|
9169
9741
|
}
|
|
9170
9742
|
const matchedModel = allModels.find((m) => m.name === modelName);
|
|
9171
|
-
|
|
9172
|
-
|
|
9173
|
-
|
|
9174
|
-
|
|
9175
|
-
|
|
9176
|
-
|
|
9177
|
-
|
|
9743
|
+
let resolvedModel = matchedModel;
|
|
9744
|
+
if (matchedModel?.modelFormat === void 0) {
|
|
9745
|
+
try {
|
|
9746
|
+
resolvedModel = await resolveRuntimeModel(modelName) ?? matchedModel;
|
|
9747
|
+
} catch {
|
|
9748
|
+
resolvedModel = matchedModel;
|
|
9749
|
+
}
|
|
9750
|
+
}
|
|
9751
|
+
const modelMetadataSource = resolvedModel ?? matchedModel;
|
|
9752
|
+
const modelInfo = modelMetadataSource ? {
|
|
9753
|
+
parameterSize: modelMetadataSource.parameterSize,
|
|
9754
|
+
quantization: modelMetadataSource.quantization,
|
|
9755
|
+
family: modelMetadataSource.family,
|
|
9756
|
+
// Persist actual observed thinking behavior from the benchmark run.
|
|
9757
|
+
thinkingDetected: perfResult.thinkingDetected
|
|
9758
|
+
} : { thinkingDetected: perfResult.thinkingDetected };
|
|
9178
9759
|
const partialResult = {
|
|
9179
9760
|
model: modelName,
|
|
9180
9761
|
modelInfo,
|
|
@@ -9189,7 +9770,8 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9189
9770
|
promptPackVersion: PROMPT_PACK_VERSION,
|
|
9190
9771
|
runtimeVersion,
|
|
9191
9772
|
runtimeBackend: getRuntimeName(),
|
|
9192
|
-
modelFormat:
|
|
9773
|
+
modelFormat: resolvedModel?.modelFormat ?? (getRuntimeName() === "ollama" ? getRuntimeModelFormat() : "unknown"),
|
|
9774
|
+
benchmarkProfile: buildBenchmarkProfileMetadata(thinkEnabled)
|
|
9193
9775
|
}
|
|
9194
9776
|
};
|
|
9195
9777
|
const rawLogHash = createHash3("sha256").update(JSON.stringify(partialResult)).digest("hex");
|
|
@@ -9550,7 +10132,7 @@ async function handleShareResult(args) {
|
|
|
9550
10132
|
// src/index.ts
|
|
9551
10133
|
var server = new McpServer({
|
|
9552
10134
|
name: "metrillm",
|
|
9553
|
-
version: "0.1
|
|
10135
|
+
version: "0.2.1"
|
|
9554
10136
|
});
|
|
9555
10137
|
for (const def of toolDefinitions) {
|
|
9556
10138
|
switch (def.name) {
|