metrillm-mcp 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +558 -155
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -28,10 +28,22 @@ import { Ollama } from "ollama";
|
|
|
28
28
|
|
|
29
29
|
// ../src/utils.ts
|
|
30
30
|
import vm from "vm";
|
|
31
|
-
import { execFile } from "child_process";
|
|
31
|
+
import { execFile, spawn } from "child_process";
|
|
32
32
|
function openUrl(url) {
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
if (process.platform === "win32") {
|
|
34
|
+
const child2 = spawn("cmd", ["/c", "start", "", url], {
|
|
35
|
+
windowsHide: true,
|
|
36
|
+
stdio: "ignore"
|
|
37
|
+
});
|
|
38
|
+
child2.on("error", () => {
|
|
39
|
+
});
|
|
40
|
+
child2.unref();
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
const cmd = process.platform === "darwin" ? "open" : "xdg-open";
|
|
44
|
+
const child = execFile(cmd, [url]);
|
|
45
|
+
child.on("error", () => {
|
|
46
|
+
});
|
|
35
47
|
}
|
|
36
48
|
function avg(nums) {
|
|
37
49
|
if (nums.length === 0) return 0;
|
|
@@ -515,7 +527,8 @@ function extractCodeBlock(text, preferredFunctionName) {
|
|
|
515
527
|
var client = new Ollama();
|
|
516
528
|
var DEFAULT_OLLAMA_HOST = "http://127.0.0.1:11434";
|
|
517
529
|
var OLLAMA_INIT_TIMEOUT_MS = 12e4;
|
|
518
|
-
var
|
|
530
|
+
var DEFAULT_STREAM_STALL_TIMEOUT_MS = 3e4;
|
|
531
|
+
var SHARED_STREAM_STALL_TIMEOUT_ENV = "METRILLM_STREAM_STALL_TIMEOUT_MS";
|
|
519
532
|
function getOllamaBaseUrl() {
|
|
520
533
|
const configured = process.env.OLLAMA_HOST?.trim();
|
|
521
534
|
if (!configured) return DEFAULT_OLLAMA_HOST;
|
|
@@ -575,6 +588,23 @@ function isUnsupportedSamplingOptionError(err) {
|
|
|
575
588
|
if (!mentionsSampling) return false;
|
|
576
589
|
return /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/.test(lower);
|
|
577
590
|
}
|
|
591
|
+
function parseNonNegativeInt(value) {
|
|
592
|
+
if (!/^\d+$/.test(value)) return null;
|
|
593
|
+
const parsed = Number.parseInt(value, 10);
|
|
594
|
+
if (!Number.isSafeInteger(parsed) || parsed < 0) return null;
|
|
595
|
+
return parsed;
|
|
596
|
+
}
|
|
597
|
+
function resolveStreamStallTimeoutMs(override) {
|
|
598
|
+
if (override !== void 0) {
|
|
599
|
+
if (!Number.isFinite(override) || override < 0) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
600
|
+
return override === 0 ? void 0 : Math.trunc(override);
|
|
601
|
+
}
|
|
602
|
+
const configured = process.env[SHARED_STREAM_STALL_TIMEOUT_ENV]?.trim();
|
|
603
|
+
if (!configured) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
604
|
+
const parsed = parseNonNegativeInt(configured);
|
|
605
|
+
if (parsed === null) return DEFAULT_STREAM_STALL_TIMEOUT_MS;
|
|
606
|
+
return parsed === 0 ? void 0 : parsed;
|
|
607
|
+
}
|
|
578
608
|
function buildGenerateRequest(model, prompt, options, includeSampling) {
|
|
579
609
|
return {
|
|
580
610
|
model,
|
|
@@ -594,6 +624,8 @@ async function generate(model, prompt, options) {
|
|
|
594
624
|
return generateStream(model, prompt, void 0, options);
|
|
595
625
|
}
|
|
596
626
|
async function generateStream(model, prompt, callbacks, options) {
|
|
627
|
+
const stallTimeoutMs = resolveStreamStallTimeoutMs(options?.stall_timeout_ms);
|
|
628
|
+
let abortedByStallTimeout = false;
|
|
597
629
|
const initializeStream = (includeSampling) => withTimeout(
|
|
598
630
|
client.generate(buildGenerateRequest(model, prompt, options, includeSampling)),
|
|
599
631
|
OLLAMA_INIT_TIMEOUT_MS,
|
|
@@ -615,10 +647,12 @@ async function generateStream(model, prompt, callbacks, options) {
|
|
|
615
647
|
let firstChunkSeen = false;
|
|
616
648
|
let stallTimer = null;
|
|
617
649
|
const resetStallTimer = () => {
|
|
650
|
+
if (stallTimeoutMs === void 0) return;
|
|
618
651
|
if (stallTimer) clearTimeout(stallTimer);
|
|
619
652
|
stallTimer = setTimeout(() => {
|
|
653
|
+
abortedByStallTimeout = true;
|
|
620
654
|
client.abort();
|
|
621
|
-
},
|
|
655
|
+
}, stallTimeoutMs);
|
|
622
656
|
};
|
|
623
657
|
try {
|
|
624
658
|
resetStallTimer();
|
|
@@ -653,6 +687,9 @@ async function generateStream(model, prompt, callbacks, options) {
|
|
|
653
687
|
if (stallTimer) clearTimeout(stallTimer);
|
|
654
688
|
}
|
|
655
689
|
if (!result) {
|
|
690
|
+
if (abortedByStallTimeout && stallTimeoutMs !== void 0) {
|
|
691
|
+
throw new Error(`Ollama stream timed out after ${stallTimeoutMs}ms`);
|
|
692
|
+
}
|
|
656
693
|
throw new Error("Stream ended without done signal");
|
|
657
694
|
}
|
|
658
695
|
callbacks?.onDone?.(result);
|
|
@@ -682,14 +719,18 @@ function abortOngoingRequests() {
|
|
|
682
719
|
import os from "os";
|
|
683
720
|
import path from "path";
|
|
684
721
|
import { promises as fs } from "fs";
|
|
722
|
+
import { execFile as execFile2 } from "child_process";
|
|
685
723
|
var DEFAULT_LM_STUDIO_BASE_URL = "http://127.0.0.1:1234";
|
|
686
724
|
var LM_STUDIO_INIT_TIMEOUT_MS = 15e3;
|
|
687
725
|
var LM_STUDIO_METADATA_TIMEOUT_MS = 2e3;
|
|
688
|
-
var
|
|
726
|
+
var DEFAULT_STREAM_STALL_TIMEOUT_MS2 = 3e4;
|
|
727
|
+
var LM_STUDIO_CLI_TIMEOUT_MS = 8e3;
|
|
728
|
+
var SHARED_STREAM_STALL_TIMEOUT_ENV2 = "METRILLM_STREAM_STALL_TIMEOUT_MS";
|
|
689
729
|
var DEFAULT_LM_STUDIO_HOME_DIR = path.join(os.homedir(), ".lmstudio");
|
|
690
730
|
var DEFAULT_LM_STUDIO_MODELS_DIR = path.join(DEFAULT_LM_STUDIO_HOME_DIR, "models");
|
|
691
731
|
var LM_STUDIO_HOME_DIR_ENV = "LM_STUDIO_HOME_DIR";
|
|
692
732
|
var LM_STUDIO_MODELS_DIR_ENV = "LM_STUDIO_MODELS_DIR";
|
|
733
|
+
var LM_STUDIO_CLI_PATH_ENV = "LM_STUDIO_CLI_PATH";
|
|
693
734
|
var defaultKeepAlive2;
|
|
694
735
|
var activeAbortControllers = /* @__PURE__ */ new Set();
|
|
695
736
|
var directorySizeCache = /* @__PURE__ */ new Map();
|
|
@@ -717,14 +758,9 @@ function assertThinkingModeRespected(model, think, response, reasoning) {
|
|
|
717
758
|
);
|
|
718
759
|
}
|
|
719
760
|
}
|
|
720
|
-
function
|
|
721
|
-
if (think
|
|
722
|
-
|
|
723
|
-
return {
|
|
724
|
-
include_reasoning: think,
|
|
725
|
-
reasoning_effort: effort,
|
|
726
|
-
reasoning: { effort }
|
|
727
|
-
};
|
|
761
|
+
function buildNativeThinkingOption(think) {
|
|
762
|
+
if (think !== true) return void 0;
|
|
763
|
+
return "high";
|
|
728
764
|
}
|
|
729
765
|
function hasSamplingOverrides2(options) {
|
|
730
766
|
return options?.top_p !== void 0 || options?.seed !== void 0;
|
|
@@ -768,38 +804,106 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
|
|
|
768
804
|
const suffix = backendMessage ? ` ${backendMessage}` : "";
|
|
769
805
|
return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
|
|
770
806
|
}
|
|
771
|
-
function
|
|
772
|
-
const
|
|
773
|
-
{ role: "system", content: NON_THINKING_SYSTEM_PROMPT },
|
|
774
|
-
{ role: "user", content: prompt }
|
|
775
|
-
] : [{ role: "user", content: prompt }];
|
|
807
|
+
function buildNativeChatBody(model, prompt, options, stream, includeSampling) {
|
|
808
|
+
const reasoning = buildNativeThinkingOption(options?.think);
|
|
776
809
|
return {
|
|
777
810
|
model,
|
|
778
|
-
|
|
811
|
+
input: prompt,
|
|
779
812
|
temperature: options?.temperature ?? 0,
|
|
780
813
|
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
781
814
|
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
782
815
|
max_tokens: options?.num_predict ?? 512,
|
|
783
816
|
stream,
|
|
784
|
-
...
|
|
785
|
-
...
|
|
817
|
+
...reasoning !== void 0 ? { reasoning } : {},
|
|
818
|
+
...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
|
|
786
819
|
};
|
|
787
820
|
}
|
|
788
|
-
function
|
|
821
|
+
function getNativeStatNumber(value) {
|
|
822
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
|
|
823
|
+
return value;
|
|
824
|
+
}
|
|
825
|
+
function flattenNativeText(value, depth = 0) {
|
|
826
|
+
if (depth > 3 || value == null) return "";
|
|
827
|
+
if (typeof value === "string") return value;
|
|
828
|
+
if (Array.isArray(value)) {
|
|
829
|
+
return value.map((item) => flattenNativeText(item, depth + 1)).join("");
|
|
830
|
+
}
|
|
831
|
+
if (typeof value === "object") {
|
|
832
|
+
const candidate = value;
|
|
833
|
+
return flattenNativeText(candidate.text, depth + 1) || flattenNativeText(candidate.content, depth + 1) || flattenNativeText(candidate.delta, depth + 1) || flattenNativeText(candidate.value, depth + 1);
|
|
834
|
+
}
|
|
835
|
+
return "";
|
|
836
|
+
}
|
|
837
|
+
function collectNativeOutput(output4) {
|
|
838
|
+
if (!Array.isArray(output4)) {
|
|
839
|
+
return { response: "", reasoning: "" };
|
|
840
|
+
}
|
|
841
|
+
let response = "";
|
|
842
|
+
let reasoning = "";
|
|
843
|
+
for (const item of output4) {
|
|
844
|
+
if (typeof item !== "object" || item === null) continue;
|
|
845
|
+
const nativeItem = item;
|
|
846
|
+
const text = flattenNativeText(nativeItem.text ?? nativeItem.content).trim();
|
|
847
|
+
if (!text) continue;
|
|
848
|
+
const type = asNonEmptyString(nativeItem.type)?.toLowerCase() ?? "";
|
|
849
|
+
if (type.includes("reason")) {
|
|
850
|
+
reasoning += text;
|
|
851
|
+
} else {
|
|
852
|
+
response += text;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
return { response, reasoning };
|
|
856
|
+
}
|
|
857
|
+
function extractNativeStats(payload) {
|
|
858
|
+
if (typeof payload !== "object" || payload === null) return void 0;
|
|
859
|
+
const direct = payload.stats;
|
|
860
|
+
if (direct) return direct;
|
|
861
|
+
const result = payload.result?.stats;
|
|
862
|
+
return result;
|
|
863
|
+
}
|
|
864
|
+
function extractNativeResponse(payload) {
|
|
865
|
+
if (typeof payload !== "object" || payload === null) {
|
|
866
|
+
return { response: "", reasoning: "" };
|
|
867
|
+
}
|
|
868
|
+
const resultOutput = payload.result?.output;
|
|
869
|
+
const directOutput = payload.output;
|
|
870
|
+
const fromResult = collectNativeOutput(resultOutput);
|
|
871
|
+
if (fromResult.response || fromResult.reasoning) return fromResult;
|
|
872
|
+
return collectNativeOutput(directOutput);
|
|
873
|
+
}
|
|
874
|
+
function extractNativeDelta(payload) {
|
|
875
|
+
if (typeof payload !== "object" || payload === null) {
|
|
876
|
+
return { response: "", reasoning: "" };
|
|
877
|
+
}
|
|
878
|
+
const type = asNonEmptyString(payload.type)?.toLowerCase() ?? "";
|
|
879
|
+
const directText = flattenNativeText(payload.delta);
|
|
880
|
+
const fallbackText = directText || flattenNativeText(payload.content) || flattenNativeText(payload.text);
|
|
881
|
+
if (!fallbackText) {
|
|
882
|
+
return { response: "", reasoning: "" };
|
|
883
|
+
}
|
|
884
|
+
if (type.includes("reason")) {
|
|
885
|
+
return { response: "", reasoning: fallbackText };
|
|
886
|
+
}
|
|
887
|
+
if (type.includes("message") || type.includes("text") || type.includes("content")) {
|
|
888
|
+
return { response: fallbackText, reasoning: "" };
|
|
889
|
+
}
|
|
890
|
+
return { response: fallbackText, reasoning: "" };
|
|
891
|
+
}
|
|
892
|
+
function parseNonNegativeInt2(value) {
|
|
789
893
|
if (!/^\d+$/.test(value)) return null;
|
|
790
894
|
const parsed = Number.parseInt(value, 10);
|
|
791
895
|
if (!Number.isSafeInteger(parsed) || parsed < 0) return null;
|
|
792
896
|
return parsed;
|
|
793
897
|
}
|
|
794
|
-
function
|
|
898
|
+
function resolveStreamStallTimeoutMs2(override) {
|
|
795
899
|
if (override !== void 0) {
|
|
796
|
-
if (!Number.isFinite(override) || override < 0) return
|
|
900
|
+
if (!Number.isFinite(override) || override < 0) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
797
901
|
return override === 0 ? void 0 : Math.trunc(override);
|
|
798
902
|
}
|
|
799
|
-
const configured = process.env
|
|
800
|
-
if (!configured) return
|
|
801
|
-
const parsed =
|
|
802
|
-
if (parsed === null) return
|
|
903
|
+
const configured = process.env[SHARED_STREAM_STALL_TIMEOUT_ENV2]?.trim();
|
|
904
|
+
if (!configured) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
905
|
+
const parsed = parseNonNegativeInt2(configured);
|
|
906
|
+
if (parsed === null) return DEFAULT_STREAM_STALL_TIMEOUT_MS2;
|
|
803
907
|
return parsed === 0 ? void 0 : parsed;
|
|
804
908
|
}
|
|
805
909
|
function getLMStudioBaseUrl() {
|
|
@@ -822,25 +926,29 @@ function getLMStudioHeaders() {
|
|
|
822
926
|
}
|
|
823
927
|
return headers;
|
|
824
928
|
}
|
|
825
|
-
function
|
|
826
|
-
if (typeof
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
if (
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
929
|
+
function getUsageTokenCount(value) {
|
|
930
|
+
if (typeof value !== "number" || !Number.isFinite(value)) return 0;
|
|
931
|
+
if (value <= 0) return 0;
|
|
932
|
+
return Math.trunc(value);
|
|
933
|
+
}
|
|
934
|
+
function estimateCompletionTokensFallback(text) {
|
|
935
|
+
const normalized = text.trim();
|
|
936
|
+
if (!normalized) return 0;
|
|
937
|
+
const cjkMatches = normalized.match(/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/gu);
|
|
938
|
+
const cjkCount = cjkMatches?.length ?? 0;
|
|
939
|
+
const withoutCjk = normalized.replace(
|
|
940
|
+
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/gu,
|
|
941
|
+
""
|
|
942
|
+
);
|
|
943
|
+
const nonCjkChars = withoutCjk.replace(/\s+/g, "").length;
|
|
944
|
+
const nonCjkHeuristic = Math.ceil(nonCjkChars / 4);
|
|
945
|
+
const whitespaceEstimate = estimateTokenCount(normalized);
|
|
946
|
+
return Math.max(1, Math.max(whitespaceEstimate, cjkCount + nonCjkHeuristic));
|
|
840
947
|
}
|
|
841
|
-
function
|
|
842
|
-
const
|
|
843
|
-
|
|
948
|
+
function resolveCompletionTokenCount(reportedTokenCount, response, reasoning) {
|
|
949
|
+
const reported = getUsageTokenCount(reportedTokenCount);
|
|
950
|
+
if (reported > 0) return reported;
|
|
951
|
+
return estimateCompletionTokensFallback(`${reasoning} ${response}`);
|
|
844
952
|
}
|
|
845
953
|
function asNonEmptyString(value) {
|
|
846
954
|
if (typeof value !== "string") return void 0;
|
|
@@ -1090,11 +1198,17 @@ async function resolveLocalModelMetadata(modelId, apiModel, modelsRootDir) {
|
|
|
1090
1198
|
const size = await readDirectorySizeBytes(source.fullPath);
|
|
1091
1199
|
if (size > bestSize) bestSize = size;
|
|
1092
1200
|
if (size > 0) {
|
|
1093
|
-
return {
|
|
1201
|
+
return {
|
|
1202
|
+
size,
|
|
1203
|
+
parameterSize: definition.parameterSize
|
|
1204
|
+
};
|
|
1094
1205
|
}
|
|
1095
1206
|
}
|
|
1096
1207
|
if (bestSize > 0) {
|
|
1097
|
-
return {
|
|
1208
|
+
return {
|
|
1209
|
+
size: bestSize,
|
|
1210
|
+
parameterSize: definition.parameterSize
|
|
1211
|
+
};
|
|
1098
1212
|
}
|
|
1099
1213
|
const fallback = await resolvePublisherModelMetadata(modelId, apiModel, modelsRootDir);
|
|
1100
1214
|
if (fallback.size > 0) {
|
|
@@ -1103,7 +1217,10 @@ async function resolveLocalModelMetadata(modelId, apiModel, modelsRootDir) {
|
|
|
1103
1217
|
parameterSize: definition.parameterSize ?? fallback.parameterSize
|
|
1104
1218
|
};
|
|
1105
1219
|
}
|
|
1106
|
-
return {
|
|
1220
|
+
return {
|
|
1221
|
+
size: 0,
|
|
1222
|
+
parameterSize: definition.parameterSize ?? fallback.parameterSize
|
|
1223
|
+
};
|
|
1107
1224
|
}
|
|
1108
1225
|
function parseSizeBytes(model) {
|
|
1109
1226
|
if (!model) return 0;
|
|
@@ -1141,6 +1258,21 @@ function inferParameterSizeFromModelId(modelId) {
|
|
|
1141
1258
|
}
|
|
1142
1259
|
return void 0;
|
|
1143
1260
|
}
|
|
1261
|
+
function resolveModelFormat(apiModel, _localMetadata, _modelId) {
|
|
1262
|
+
return asNonEmptyString(apiModel?.compatibility_type);
|
|
1263
|
+
}
|
|
1264
|
+
function buildModelEntry(id, apiModel, localMetadata) {
|
|
1265
|
+
const apiSize = parseSizeBytes(apiModel);
|
|
1266
|
+
return {
|
|
1267
|
+
name: id,
|
|
1268
|
+
size: apiSize > 0 ? apiSize : localMetadata?.size ?? 0,
|
|
1269
|
+
parameterSize: localMetadata?.parameterSize ?? inferParameterSizeFromModelId(id),
|
|
1270
|
+
quantization: asNonEmptyString(apiModel?.quantization),
|
|
1271
|
+
runtimeStatus: asNonEmptyString(apiModel?.state),
|
|
1272
|
+
modelFormat: resolveModelFormat(apiModel, localMetadata, id),
|
|
1273
|
+
family: asNonEmptyString(apiModel?.arch) ?? asNonEmptyString(apiModel?.type) ?? asNonEmptyString(apiModel?.publisher)
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1144
1276
|
function isLoadedState(state) {
|
|
1145
1277
|
if (!state) return false;
|
|
1146
1278
|
const normalized = state.trim().toLowerCase();
|
|
@@ -1148,6 +1280,128 @@ function isLoadedState(state) {
|
|
|
1148
1280
|
if (normalized === "loaded" || normalized === "ready") return true;
|
|
1149
1281
|
return normalized.includes("loaded");
|
|
1150
1282
|
}
|
|
1283
|
+
function execFileText(cmd, args, timeoutMs) {
|
|
1284
|
+
return new Promise((resolve, reject) => {
|
|
1285
|
+
execFile2(
|
|
1286
|
+
cmd,
|
|
1287
|
+
args,
|
|
1288
|
+
{
|
|
1289
|
+
timeout: timeoutMs,
|
|
1290
|
+
maxBuffer: 1024 * 1024,
|
|
1291
|
+
env: process.env
|
|
1292
|
+
},
|
|
1293
|
+
(err, stdout, stderr) => {
|
|
1294
|
+
if (err) {
|
|
1295
|
+
const error = err;
|
|
1296
|
+
error.stdout = stdout;
|
|
1297
|
+
error.stderr = stderr;
|
|
1298
|
+
reject(error);
|
|
1299
|
+
return;
|
|
1300
|
+
}
|
|
1301
|
+
resolve({ stdout, stderr });
|
|
1302
|
+
}
|
|
1303
|
+
);
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
function isCommandMissingError(err) {
|
|
1307
|
+
return err instanceof Error && "code" in err && err.code === "ENOENT";
|
|
1308
|
+
}
|
|
1309
|
+
async function runLmsCli(args) {
|
|
1310
|
+
const configuredPath = asNonEmptyString(process.env[LM_STUDIO_CLI_PATH_ENV]);
|
|
1311
|
+
const fallbackPath = path.join(getLMStudioHomeDir(), "bin", "lms");
|
|
1312
|
+
const candidates = [
|
|
1313
|
+
configuredPath,
|
|
1314
|
+
"lms",
|
|
1315
|
+
fallbackPath
|
|
1316
|
+
].filter(
|
|
1317
|
+
(candidate, index, list) => Boolean(candidate) && list.indexOf(candidate) === index
|
|
1318
|
+
);
|
|
1319
|
+
let lastError;
|
|
1320
|
+
for (const candidate of candidates) {
|
|
1321
|
+
try {
|
|
1322
|
+
return await execFileText(candidate, args, LM_STUDIO_CLI_TIMEOUT_MS);
|
|
1323
|
+
} catch (err) {
|
|
1324
|
+
lastError = err;
|
|
1325
|
+
if (isCommandMissingError(err)) continue;
|
|
1326
|
+
throw err;
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
throw lastError ?? new Error("LM Studio CLI is not available.");
|
|
1330
|
+
}
|
|
1331
|
+
function normalizeCliToken(value) {
|
|
1332
|
+
return (value ?? "").trim().toLowerCase();
|
|
1333
|
+
}
|
|
1334
|
+
function matchesLoadedModelCliEntry(entry, model) {
|
|
1335
|
+
const target = normalizeCliToken(model);
|
|
1336
|
+
if (!target) return false;
|
|
1337
|
+
return [
|
|
1338
|
+
entry.identifier,
|
|
1339
|
+
entry.indexedModelIdentifier,
|
|
1340
|
+
entry.path,
|
|
1341
|
+
entry.modelKey
|
|
1342
|
+
].some((candidate) => normalizeCliToken(candidate) === target);
|
|
1343
|
+
}
|
|
1344
|
+
async function listLoadedModelsFromCli() {
|
|
1345
|
+
const { stdout } = await runLmsCli(["ps", "--json"]);
|
|
1346
|
+
const parsed = JSON.parse(stdout);
|
|
1347
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1348
|
+
}
|
|
1349
|
+
function parseEstimatedBytes(output4) {
|
|
1350
|
+
const match = output4.match(/Estimated Total Memory:\s*([0-9]+(?:\.[0-9]+)?)\s*(KiB|MiB|GiB|TiB|KB|MB|GB|TB)/i);
|
|
1351
|
+
if (!match) return null;
|
|
1352
|
+
const value = Number.parseFloat(match[1] ?? "");
|
|
1353
|
+
const unit = (match[2] ?? "").toUpperCase();
|
|
1354
|
+
if (!Number.isFinite(value) || value <= 0) return null;
|
|
1355
|
+
const multipliers = {
|
|
1356
|
+
KIB: 1024,
|
|
1357
|
+
MIB: 1024 ** 2,
|
|
1358
|
+
GIB: 1024 ** 3,
|
|
1359
|
+
TIB: 1024 ** 4,
|
|
1360
|
+
KB: 1e3,
|
|
1361
|
+
MB: 1e3 ** 2,
|
|
1362
|
+
GB: 1e3 ** 3,
|
|
1363
|
+
TB: 1e3 ** 4
|
|
1364
|
+
};
|
|
1365
|
+
const multiplier = multipliers[unit];
|
|
1366
|
+
if (!multiplier) return null;
|
|
1367
|
+
return Math.round(value * multiplier);
|
|
1368
|
+
}
|
|
1369
|
+
async function estimateLoadedModelMemoryBytes(model) {
|
|
1370
|
+
let loadedEntry;
|
|
1371
|
+
try {
|
|
1372
|
+
const loadedModels = await listLoadedModelsFromCli();
|
|
1373
|
+
loadedEntry = loadedModels.find((entry) => matchesLoadedModelCliEntry(entry, model));
|
|
1374
|
+
} catch {
|
|
1375
|
+
loadedEntry = void 0;
|
|
1376
|
+
}
|
|
1377
|
+
if (!loadedEntry) return null;
|
|
1378
|
+
const candidateModelKeys = [
|
|
1379
|
+
loadedEntry?.path,
|
|
1380
|
+
loadedEntry?.indexedModelIdentifier,
|
|
1381
|
+
loadedEntry?.modelKey
|
|
1382
|
+
].filter(
|
|
1383
|
+
(candidate, index, list) => Boolean(candidate?.trim()) && list.findIndex((item) => item === candidate) === index
|
|
1384
|
+
);
|
|
1385
|
+
for (const candidate of candidateModelKeys) {
|
|
1386
|
+
const args = ["load", "--estimate-only", "-y"];
|
|
1387
|
+
if (typeof loadedEntry?.contextLength === "number" && Number.isFinite(loadedEntry.contextLength) && loadedEntry.contextLength > 0) {
|
|
1388
|
+
args.push("--context-length", String(Math.trunc(loadedEntry.contextLength)));
|
|
1389
|
+
}
|
|
1390
|
+
args.push(candidate);
|
|
1391
|
+
try {
|
|
1392
|
+
const { stdout, stderr } = await runLmsCli(args);
|
|
1393
|
+
const estimated = parseEstimatedBytes(`${stdout}
|
|
1394
|
+
${stderr}`);
|
|
1395
|
+
if (estimated !== null) return estimated;
|
|
1396
|
+
} catch (err) {
|
|
1397
|
+
const output4 = err instanceof Error ? `${String(err.stdout ?? "")}
|
|
1398
|
+
${String(err.stderr ?? "")}` : "";
|
|
1399
|
+
const estimated = parseEstimatedBytes(output4);
|
|
1400
|
+
if (estimated !== null) return estimated;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
return null;
|
|
1404
|
+
}
|
|
1151
1405
|
async function fetchApiModels() {
|
|
1152
1406
|
try {
|
|
1153
1407
|
const resp = await fetchWithTimeout(
|
|
@@ -1200,7 +1454,7 @@ async function getLMStudioVersion() {
|
|
|
1200
1454
|
const localVersion = await resolveLocalLMStudioVersion();
|
|
1201
1455
|
try {
|
|
1202
1456
|
const resp = await fetchWithTimeout(
|
|
1203
|
-
"/v1/models",
|
|
1457
|
+
"/api/v1/models",
|
|
1204
1458
|
{ method: "GET", headers: getLMStudioHeaders() },
|
|
1205
1459
|
5e3,
|
|
1206
1460
|
"LM Studio version check"
|
|
@@ -1215,7 +1469,7 @@ async function getLMStudioVersion() {
|
|
|
1215
1469
|
}
|
|
1216
1470
|
async function listModels2() {
|
|
1217
1471
|
const resp = await fetchWithTimeout(
|
|
1218
|
-
"/v1/models",
|
|
1472
|
+
"/api/v1/models",
|
|
1219
1473
|
{ method: "GET", headers: getLMStudioHeaders() },
|
|
1220
1474
|
LM_STUDIO_INIT_TIMEOUT_MS,
|
|
1221
1475
|
"LM Studio list models"
|
|
@@ -1233,25 +1487,25 @@ async function listModels2() {
|
|
|
1233
1487
|
apiById.set(id, model);
|
|
1234
1488
|
}
|
|
1235
1489
|
const modelsRootDir = await resolveModelsRootDir();
|
|
1236
|
-
const
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1490
|
+
const localMetadataEntries = await Promise.all(
|
|
1491
|
+
ids.map(async (id) => {
|
|
1492
|
+
const localMetadata = await resolveLocalModelMetadata(id, apiById.get(id), modelsRootDir);
|
|
1493
|
+
return [id, localMetadata];
|
|
1494
|
+
})
|
|
1495
|
+
);
|
|
1496
|
+
const localMetadataById = new Map(
|
|
1497
|
+
localMetadataEntries
|
|
1498
|
+
);
|
|
1499
|
+
return ids.map((id) => buildModelEntry(id, apiById.get(id), localMetadataById.get(id)));
|
|
1500
|
+
}
|
|
1501
|
+
async function resolveModel(modelId) {
|
|
1502
|
+
const id = modelId.trim();
|
|
1503
|
+
if (!id) return null;
|
|
1504
|
+
const apiModels = await fetchApiModels();
|
|
1505
|
+
const apiModel = apiModels?.find((candidate) => asNonEmptyString(candidate.id) === id);
|
|
1506
|
+
const modelsRootDir = await resolveModelsRootDir();
|
|
1507
|
+
const localMetadata = await resolveLocalModelMetadata(id, apiModel, modelsRootDir);
|
|
1508
|
+
return buildModelEntry(id, apiModel, localMetadata);
|
|
1255
1509
|
}
|
|
1256
1510
|
async function listRunningModels2() {
|
|
1257
1511
|
const apiModels = await fetchApiModels();
|
|
@@ -1272,11 +1526,11 @@ async function generate2(model, prompt, options) {
|
|
|
1272
1526
|
activeAbortControllers.add(controller);
|
|
1273
1527
|
try {
|
|
1274
1528
|
const baseUrl = getLMStudioBaseUrl();
|
|
1275
|
-
const url = new URL("/v1/chat
|
|
1529
|
+
const url = new URL("/api/v1/chat", baseUrl);
|
|
1276
1530
|
const doRequest = (includeSampling) => fetch(url, {
|
|
1277
1531
|
method: "POST",
|
|
1278
1532
|
headers: getLMStudioHeaders(),
|
|
1279
|
-
body: JSON.stringify(
|
|
1533
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, includeSampling)),
|
|
1280
1534
|
signal: controller.signal
|
|
1281
1535
|
});
|
|
1282
1536
|
let resp = await doRequest(true);
|
|
@@ -1293,21 +1547,33 @@ async function generate2(model, prompt, options) {
|
|
|
1293
1547
|
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
1294
1548
|
}
|
|
1295
1549
|
const payload = await resp.json();
|
|
1296
|
-
const
|
|
1297
|
-
const response =
|
|
1298
|
-
const reasoning =
|
|
1550
|
+
const nativeResponse = extractNativeResponse(payload);
|
|
1551
|
+
const response = nativeResponse.response;
|
|
1552
|
+
const reasoning = nativeResponse.reasoning;
|
|
1299
1553
|
assertThinkingModeRespected(model, options?.think, response, reasoning);
|
|
1300
|
-
const
|
|
1554
|
+
const stats = extractNativeStats(payload);
|
|
1301
1555
|
const totalDuration = Math.max(0, Date.now() - start) * 1e6;
|
|
1556
|
+
const outputTokens = getUsageTokenCount(stats?.total_output_tokens) || resolveCompletionTokenCount(void 0, response, reasoning);
|
|
1557
|
+
const throughput = getNativeStatNumber(stats?.tokens_per_second);
|
|
1558
|
+
const timeToFirstTokenSeconds = getNativeStatNumber(stats?.time_to_first_token_seconds);
|
|
1559
|
+
const modelLoadTimeSeconds = getNativeStatNumber(stats?.model_load_time_seconds);
|
|
1560
|
+
const evalCountEstimated = getUsageTokenCount(stats?.total_output_tokens) <= 0;
|
|
1561
|
+
const evalDuration = throughput !== void 0 && throughput > 0 && outputTokens > 0 ? Math.max(1, Math.round(outputTokens / throughput * 1e9)) : totalDuration;
|
|
1562
|
+
const promptEvalDuration = timeToFirstTokenSeconds !== void 0 ? Math.max(0, Math.round(timeToFirstTokenSeconds * 1e9)) : 0;
|
|
1563
|
+
const loadDuration = Math.max(
|
|
1564
|
+
0,
|
|
1565
|
+
Math.round((modelLoadTimeSeconds ?? 0) * 1e9)
|
|
1566
|
+
);
|
|
1302
1567
|
return {
|
|
1303
1568
|
response,
|
|
1304
1569
|
...reasoning ? { thinking: reasoning } : {},
|
|
1305
1570
|
totalDuration,
|
|
1306
|
-
loadDuration
|
|
1307
|
-
promptEvalCount:
|
|
1308
|
-
promptEvalDuration
|
|
1309
|
-
evalCount:
|
|
1310
|
-
evalDuration
|
|
1571
|
+
loadDuration,
|
|
1572
|
+
promptEvalCount: getUsageTokenCount(stats?.input_tokens),
|
|
1573
|
+
promptEvalDuration,
|
|
1574
|
+
evalCount: outputTokens,
|
|
1575
|
+
evalDuration,
|
|
1576
|
+
...evalCountEstimated ? { evalCountEstimated: true } : {}
|
|
1311
1577
|
};
|
|
1312
1578
|
} catch (err) {
|
|
1313
1579
|
if (err instanceof Error && err.name === "AbortError") {
|
|
@@ -1322,10 +1588,10 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1322
1588
|
const start = Date.now();
|
|
1323
1589
|
const controller = new AbortController();
|
|
1324
1590
|
activeAbortControllers.add(controller);
|
|
1325
|
-
const stallTimeoutMs =
|
|
1591
|
+
const stallTimeoutMs = resolveStreamStallTimeoutMs2(options?.stall_timeout_ms);
|
|
1326
1592
|
let abortedByStallTimeout = false;
|
|
1327
1593
|
const baseUrl = getLMStudioBaseUrl();
|
|
1328
|
-
const url = new URL("/v1/chat
|
|
1594
|
+
const url = new URL("/api/v1/chat", baseUrl);
|
|
1329
1595
|
let stallTimer = null;
|
|
1330
1596
|
const resetStallTimer = () => {
|
|
1331
1597
|
if (stallTimeoutMs === void 0) return;
|
|
@@ -1340,7 +1606,7 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1340
1606
|
const doRequest = (includeSampling) => fetch(url, {
|
|
1341
1607
|
method: "POST",
|
|
1342
1608
|
headers: getLMStudioHeaders(),
|
|
1343
|
-
body: JSON.stringify(
|
|
1609
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, includeSampling)),
|
|
1344
1610
|
signal: controller.signal
|
|
1345
1611
|
});
|
|
1346
1612
|
let resp = await doRequest(true);
|
|
@@ -1365,10 +1631,10 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1365
1631
|
let doneReceived = false;
|
|
1366
1632
|
let fullResponse = "";
|
|
1367
1633
|
let fullThinking = "";
|
|
1368
|
-
let
|
|
1634
|
+
let stats;
|
|
1369
1635
|
let firstChunkSeen = false;
|
|
1370
|
-
let
|
|
1371
|
-
let
|
|
1636
|
+
let firstGeneratedTokenTime = null;
|
|
1637
|
+
let lastGeneratedTokenTime = null;
|
|
1372
1638
|
const processDataLine = (rawLine) => {
|
|
1373
1639
|
const line = rawLine.trim();
|
|
1374
1640
|
if (!line.startsWith("data:")) return;
|
|
@@ -1384,18 +1650,27 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1384
1650
|
} catch {
|
|
1385
1651
|
return;
|
|
1386
1652
|
}
|
|
1387
|
-
const
|
|
1388
|
-
const content =
|
|
1389
|
-
const reasoning =
|
|
1390
|
-
const
|
|
1391
|
-
if (
|
|
1653
|
+
const delta = extractNativeDelta(payload);
|
|
1654
|
+
const content = delta.response;
|
|
1655
|
+
const reasoning = delta.reasoning;
|
|
1656
|
+
const chunkStats = extractNativeStats(payload);
|
|
1657
|
+
if (chunkStats) stats = chunkStats;
|
|
1658
|
+
const aggregate = extractNativeResponse(payload);
|
|
1659
|
+
if (aggregate.response) {
|
|
1660
|
+
fullResponse = aggregate.response;
|
|
1661
|
+
}
|
|
1662
|
+
if (aggregate.reasoning) {
|
|
1663
|
+
fullThinking = aggregate.reasoning;
|
|
1664
|
+
}
|
|
1665
|
+
if (reasoning || content) {
|
|
1666
|
+
const now = Date.now();
|
|
1667
|
+
if (firstGeneratedTokenTime === null) firstGeneratedTokenTime = now;
|
|
1668
|
+
lastGeneratedTokenTime = now;
|
|
1669
|
+
}
|
|
1392
1670
|
if (reasoning) {
|
|
1393
1671
|
fullThinking += reasoning;
|
|
1394
1672
|
}
|
|
1395
1673
|
if (content) {
|
|
1396
|
-
const now = Date.now();
|
|
1397
|
-
if (firstTokenTime === null) firstTokenTime = now;
|
|
1398
|
-
lastTokenTime = now;
|
|
1399
1674
|
fullResponse += content;
|
|
1400
1675
|
callbacks?.onToken?.(content);
|
|
1401
1676
|
}
|
|
@@ -1415,6 +1690,14 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1415
1690
|
processDataLine(rawLine);
|
|
1416
1691
|
}
|
|
1417
1692
|
}
|
|
1693
|
+
buffered += decoder.decode();
|
|
1694
|
+
if (buffered.length > 0) {
|
|
1695
|
+
const lines = buffered.split("\n");
|
|
1696
|
+
buffered = lines.pop() ?? "";
|
|
1697
|
+
for (const rawLine of lines) {
|
|
1698
|
+
processDataLine(rawLine);
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1418
1701
|
if (buffered.trim().length > 0) {
|
|
1419
1702
|
processDataLine(buffered);
|
|
1420
1703
|
}
|
|
@@ -1423,16 +1706,25 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1423
1706
|
throw new Error("LM Studio stream ended without content");
|
|
1424
1707
|
}
|
|
1425
1708
|
const totalDuration = Math.max(0, Date.now() - start) * 1e6;
|
|
1426
|
-
const
|
|
1709
|
+
const outputTokens = getUsageTokenCount(stats?.total_output_tokens) || resolveCompletionTokenCount(void 0, fullResponse, fullThinking);
|
|
1710
|
+
const throughput = getNativeStatNumber(stats?.tokens_per_second);
|
|
1711
|
+
const timeToFirstTokenSeconds = getNativeStatNumber(stats?.time_to_first_token_seconds);
|
|
1712
|
+
const modelLoadTimeSeconds = getNativeStatNumber(stats?.model_load_time_seconds);
|
|
1713
|
+
const evalCountEstimated = getUsageTokenCount(stats?.total_output_tokens) <= 0;
|
|
1714
|
+
const evalDurationMs = throughput !== void 0 && throughput > 0 && outputTokens > 0 ? outputTokens / throughput * 1e3 : firstGeneratedTokenTime !== null && lastGeneratedTokenTime !== null && lastGeneratedTokenTime > firstGeneratedTokenTime ? lastGeneratedTokenTime - firstGeneratedTokenTime : Date.now() - start;
|
|
1427
1715
|
const result = {
|
|
1428
1716
|
response: fullResponse,
|
|
1429
1717
|
...fullThinking ? { thinking: fullThinking } : {},
|
|
1430
1718
|
totalDuration,
|
|
1431
|
-
loadDuration:
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1719
|
+
loadDuration: Math.max(
|
|
1720
|
+
0,
|
|
1721
|
+
Math.round((modelLoadTimeSeconds ?? 0) * 1e9)
|
|
1722
|
+
),
|
|
1723
|
+
promptEvalCount: getUsageTokenCount(stats?.input_tokens),
|
|
1724
|
+
promptEvalDuration: timeToFirstTokenSeconds !== void 0 ? Math.max(0, Math.round(timeToFirstTokenSeconds * 1e9)) : firstGeneratedTokenTime !== null ? (firstGeneratedTokenTime - start) * 1e6 : 0,
|
|
1725
|
+
evalCount: outputTokens,
|
|
1726
|
+
evalDuration: Math.max(1, Math.round(evalDurationMs * 1e6)),
|
|
1727
|
+
...evalCountEstimated ? { evalCountEstimated: true } : {}
|
|
1436
1728
|
};
|
|
1437
1729
|
assertThinkingModeRespected(model, options?.think, fullResponse, fullThinking);
|
|
1438
1730
|
callbacks?.onDone?.(result);
|
|
@@ -1600,6 +1892,19 @@ function getRuntimeName() {
|
|
|
1600
1892
|
function getRuntimeModelFormat() {
|
|
1601
1893
|
return activeRuntime.modelFormat ?? "gguf";
|
|
1602
1894
|
}
|
|
1895
|
+
async function resolveRuntimeModel(model) {
|
|
1896
|
+
if (activeRuntime.name === "lm-studio") {
|
|
1897
|
+
return resolveModel(model);
|
|
1898
|
+
}
|
|
1899
|
+
const knownModels = await activeRuntime.listModels();
|
|
1900
|
+
const matchedModel = knownModels.find((candidate) => candidate.name === model);
|
|
1901
|
+
if (matchedModel) return matchedModel;
|
|
1902
|
+
return {
|
|
1903
|
+
name: model,
|
|
1904
|
+
size: 0,
|
|
1905
|
+
modelFormat: activeRuntime.modelFormat ?? "gguf"
|
|
1906
|
+
};
|
|
1907
|
+
}
|
|
1603
1908
|
|
|
1604
1909
|
// ../src/commands/bench.ts
|
|
1605
1910
|
import { createHash as createHash3 } from "crypto";
|
|
@@ -1608,11 +1913,33 @@ import chalk8 from "chalk";
|
|
|
1608
1913
|
// ../src/core/hardware.ts
|
|
1609
1914
|
import si from "systeminformation";
|
|
1610
1915
|
import os2 from "os";
|
|
1611
|
-
import { execFile as
|
|
1916
|
+
import { execFile as execFile3 } from "child_process";
|
|
1612
1917
|
import { readFile } from "fs/promises";
|
|
1918
|
+
function normalizeWhitespace(value) {
|
|
1919
|
+
return value.replace(/\s+/g, " ").trim();
|
|
1920
|
+
}
|
|
1921
|
+
function looksLikeGpuDescriptor(value) {
|
|
1922
|
+
return /\b(radeon|graphics|geforce|rtx|gtx|arc|iris|uhd|quadro|tesla|adreno|mali|powervr)\b/i.test(value);
|
|
1923
|
+
}
|
|
1924
|
+
function splitCpuAndInferredGpu(cpuLabel) {
|
|
1925
|
+
const normalized = normalizeWhitespace(cpuLabel);
|
|
1926
|
+
const withGpuMatch = normalized.match(/\s+(?:w\/\s*|with\s+)(.+)$/i);
|
|
1927
|
+
if (!withGpuMatch?.index) {
|
|
1928
|
+
return { cpu: normalized, inferredGpu: null };
|
|
1929
|
+
}
|
|
1930
|
+
const inferredGpu = normalizeWhitespace(withGpuMatch[1] ?? "");
|
|
1931
|
+
if (!looksLikeGpuDescriptor(inferredGpu)) {
|
|
1932
|
+
return { cpu: normalized, inferredGpu: null };
|
|
1933
|
+
}
|
|
1934
|
+
const cpu = normalizeWhitespace(normalized.slice(0, withGpuMatch.index));
|
|
1935
|
+
return {
|
|
1936
|
+
cpu: cpu || normalized,
|
|
1937
|
+
inferredGpu: inferredGpu || null
|
|
1938
|
+
};
|
|
1939
|
+
}
|
|
1613
1940
|
function execCommand(cmd, args, timeoutMs = 3e3) {
|
|
1614
1941
|
return new Promise((resolve) => {
|
|
1615
|
-
const child =
|
|
1942
|
+
const child = execFile3(cmd, args, { timeout: timeoutMs }, (err, stdout) => {
|
|
1616
1943
|
if (err) return resolve("");
|
|
1617
1944
|
resolve(stdout.trim());
|
|
1618
1945
|
});
|
|
@@ -1746,11 +2073,14 @@ async function getHardwareInfo() {
|
|
|
1746
2073
|
]);
|
|
1747
2074
|
const gpuController = graphics.controllers[0];
|
|
1748
2075
|
const gpuNames = graphics.controllers.map((g) => g.model).filter(Boolean).join(", ");
|
|
2076
|
+
const cpuLabelRaw = normalizeWhitespace(`${cpu.manufacturer} ${cpu.brand}`);
|
|
2077
|
+
const { cpu: cpuLabel, inferredGpu } = splitCpuAndInferredGpu(cpuLabelRaw);
|
|
2078
|
+
const defaultIntegratedGpu = process.platform === "darwin" ? "Integrated / Apple Silicon" : "Integrated / Unknown";
|
|
1749
2079
|
const gpuCoresRaw = gpuController?.cores;
|
|
1750
2080
|
const gpuCores = gpuCoresRaw ? parseInt(String(gpuCoresRaw), 10) : null;
|
|
1751
2081
|
const memType = memLayout.length > 0 ? memLayout[0].type : null;
|
|
1752
2082
|
return {
|
|
1753
|
-
cpu:
|
|
2083
|
+
cpu: cpuLabel,
|
|
1754
2084
|
cpuCores: cpu.cores,
|
|
1755
2085
|
cpuPCores: cpu.performanceCores || null,
|
|
1756
2086
|
cpuECores: cpu.efficiencyCores || null,
|
|
@@ -1760,7 +2090,7 @@ async function getHardwareInfo() {
|
|
|
1760
2090
|
memoryType: memType || null,
|
|
1761
2091
|
swapTotalGB: +(mem.swaptotal / 1024 / 1024 / 1024).toFixed(1),
|
|
1762
2092
|
swapUsedGB: +(mem.swapused / 1024 / 1024 / 1024).toFixed(1),
|
|
1763
|
-
gpu: gpuNames ||
|
|
2093
|
+
gpu: normalizeWhitespace(gpuNames) || inferredGpu || defaultIntegratedGpu,
|
|
1764
2094
|
gpuCores: gpuCores && !isNaN(gpuCores) ? gpuCores : null,
|
|
1765
2095
|
gpuVramMB: gpuController?.vram ?? null,
|
|
1766
2096
|
os: `${osInfo.distro} ${osInfo.release}`,
|
|
@@ -1795,6 +2125,10 @@ import chalk from "chalk";
|
|
|
1795
2125
|
|
|
1796
2126
|
// ../src/ui/terminal.ts
|
|
1797
2127
|
var supportsUnicode = process.platform !== "win32" || Boolean(process.env.WT_SESSION) || Boolean(process.env.TERM_PROGRAM);
|
|
2128
|
+
var ANSI_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
|
|
2129
|
+
function stripAnsi(value) {
|
|
2130
|
+
return value.replace(ANSI_RE, "");
|
|
2131
|
+
}
|
|
1798
2132
|
|
|
1799
2133
|
// ../src/ui/progress.ts
|
|
1800
2134
|
var FUN_PHRASES = [
|
|
@@ -1995,6 +2329,8 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
1995
2329
|
optionalProbeWithAvailability(() => getSwapUsedGB(), 0),
|
|
1996
2330
|
optionalProbe(() => detectBatteryPowered(), void 0)
|
|
1997
2331
|
]);
|
|
2332
|
+
const runningModelsBeforeWarmup = await optionalProbe(() => listRunningModels3(), []);
|
|
2333
|
+
const modelWasAlreadyLoaded = runningModelsBeforeWarmup.some((m) => m.name === model);
|
|
1998
2334
|
const warmup = await withTimeout(
|
|
1999
2335
|
generateStream3(model, WARMUP_PROMPT, void 0, {
|
|
2000
2336
|
...withBenchmarkProfile({
|
|
@@ -2012,15 +2348,6 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2012
2348
|
const loadTime = warmup.loadDuration / 1e6;
|
|
2013
2349
|
const runningModels = await listRunningModels3();
|
|
2014
2350
|
const thisModel = runningModels.find((m) => m.name === model);
|
|
2015
|
-
let installedModelSizeBytes = 0;
|
|
2016
|
-
try {
|
|
2017
|
-
const availableModels = await listModels3();
|
|
2018
|
-
const listedModel = availableModels.find((m) => m.name === model);
|
|
2019
|
-
if (listedModel && Number.isFinite(listedModel.size) && listedModel.size > 0) {
|
|
2020
|
-
installedModelSizeBytes = listedModel.size;
|
|
2021
|
-
}
|
|
2022
|
-
} catch {
|
|
2023
|
-
}
|
|
2024
2351
|
spinner.succeed("Model loaded");
|
|
2025
2352
|
const tpsValues = [];
|
|
2026
2353
|
const firstChunkValues = [];
|
|
@@ -2034,6 +2361,7 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2034
2361
|
let thinkingDetected = false;
|
|
2035
2362
|
let totalThinkingTokens = 0;
|
|
2036
2363
|
const cpuLoadSamples = [];
|
|
2364
|
+
let tokensPerSecondEstimated = false;
|
|
2037
2365
|
for (let i = 0; i < BENCH_PROMPTS.length; i++) {
|
|
2038
2366
|
spinner.start(`Running performance test ${i + 1}/${BENCH_PROMPTS.length}...`);
|
|
2039
2367
|
let firstChunkTime = null;
|
|
@@ -2071,6 +2399,9 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2071
2399
|
tpsValues.push(tps);
|
|
2072
2400
|
totalEvalCount += result.evalCount;
|
|
2073
2401
|
totalEvalDurationNs += result.evalDuration;
|
|
2402
|
+
if (result.evalCountEstimated) {
|
|
2403
|
+
tokensPerSecondEstimated = true;
|
|
2404
|
+
}
|
|
2074
2405
|
if (firstChunkTime !== null) {
|
|
2075
2406
|
firstChunkValues.push(firstChunkTime);
|
|
2076
2407
|
}
|
|
@@ -2116,10 +2447,18 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2116
2447
|
]);
|
|
2117
2448
|
let memoryUsedGB;
|
|
2118
2449
|
let memoryPercent;
|
|
2119
|
-
|
|
2450
|
+
let memoryFootprintEstimated = false;
|
|
2451
|
+
const runtimeReportsComparableLoadedSize = runtimeName !== "lm-studio";
|
|
2452
|
+
const estimatedLoadedModelSizeBytes = runtimeName === "lm-studio" && modelWasAlreadyLoaded ? await optionalProbe(() => estimateLoadedModelMemoryBytes(model), null) : null;
|
|
2453
|
+
const loadedModelSizeBytes = runtimeReportsComparableLoadedSize && thisModel && thisModel.size > 0 ? thisModel.size : 0;
|
|
2454
|
+
const memoryFootprintAvailable = runtimeReportsComparableLoadedSize ? loadedModelSizeBytes > 0 || !modelWasAlreadyLoaded : (estimatedLoadedModelSizeBytes ?? 0) > 0 || !modelWasAlreadyLoaded;
|
|
2120
2455
|
if (loadedModelSizeBytes > 0) {
|
|
2121
2456
|
memoryUsedGB = loadedModelSizeBytes / 1024 ** 3;
|
|
2122
2457
|
memoryPercent = memoryUsedGB / memAfter.totalGB * 100;
|
|
2458
|
+
} else if ((estimatedLoadedModelSizeBytes ?? 0) > 0) {
|
|
2459
|
+
memoryUsedGB = (estimatedLoadedModelSizeBytes ?? 0) / 1024 ** 3;
|
|
2460
|
+
memoryPercent = memoryUsedGB / memAfter.totalGB * 100;
|
|
2461
|
+
memoryFootprintEstimated = true;
|
|
2123
2462
|
} else {
|
|
2124
2463
|
memoryUsedGB = Math.max(0, memAfter.usedGB - memBefore.usedGB);
|
|
2125
2464
|
memoryPercent = Math.max(0, memAfter.percent - memBefore.percent);
|
|
@@ -2141,6 +2480,7 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2141
2480
|
return {
|
|
2142
2481
|
metrics: {
|
|
2143
2482
|
tokensPerSecond: totalEvalDurationNs > 0 ? totalEvalCount / (totalEvalDurationNs / 1e9) : avg(tpsValues),
|
|
2483
|
+
...tokensPerSecondEstimated ? { tokensPerSecondEstimated: true } : {},
|
|
2144
2484
|
...firstChunkMs !== void 0 ? { firstChunkMs } : {},
|
|
2145
2485
|
ttft: ttft >= 0 ? ttft : 3e4,
|
|
2146
2486
|
// Fallback: 30s if no TTFT measured
|
|
@@ -2151,6 +2491,8 @@ async function runPerformanceBench(model, options = {}) {
|
|
|
2151
2491
|
completionTokens: totalCompletionTokens,
|
|
2152
2492
|
memoryUsedGB: +memoryUsedGB.toFixed(1),
|
|
2153
2493
|
memoryPercent: +memoryPercent.toFixed(1),
|
|
2494
|
+
memoryFootprintAvailable,
|
|
2495
|
+
...memoryFootprintEstimated ? { memoryFootprintEstimated: true } : {},
|
|
2154
2496
|
memoryHostUsedGB: memAfter.usedGB,
|
|
2155
2497
|
memoryHostPercent: memAfter.percent,
|
|
2156
2498
|
tpsStdDev: tpsValues.length >= 2 ? stddev(tpsValues) : void 0,
|
|
@@ -2915,7 +3257,7 @@ Answer:`;
|
|
|
2915
3257
|
|
|
2916
3258
|
// ../src/benchmarks/coding.ts
|
|
2917
3259
|
import vm2 from "vm";
|
|
2918
|
-
import { spawn } from "child_process";
|
|
3260
|
+
import { spawn as spawn2 } from "child_process";
|
|
2919
3261
|
import { Worker } from "worker_threads";
|
|
2920
3262
|
|
|
2921
3263
|
// ../src/datasets/coding.json
|
|
@@ -6588,7 +6930,7 @@ async function runTestsInSubprocess(code, task) {
|
|
|
6588
6930
|
const total = task.tests.length;
|
|
6589
6931
|
return new Promise((resolve) => {
|
|
6590
6932
|
const wallTimeoutMs = computeIsolatedWallTimeoutMs(task);
|
|
6591
|
-
const child =
|
|
6933
|
+
const child = spawn2(
|
|
6592
6934
|
process.execPath,
|
|
6593
6935
|
[
|
|
6594
6936
|
"--max-old-space-size=96",
|
|
@@ -7891,13 +8233,15 @@ function computePerformanceScore(perf, hardware) {
|
|
|
7891
8233
|
const tuning = deriveHardwareFitTuning(hardware);
|
|
7892
8234
|
const safeTokensPerSecond = sanitizeNonNegative(perf.tokensPerSecond, 0);
|
|
7893
8235
|
const safeTtft = sanitizeNonNegative(perf.ttft, tuning.ttft.hardMaxMs * 2);
|
|
7894
|
-
const effectiveMemPercent = sanitizeNonNegative(
|
|
7895
|
-
perf.memoryHostPercent ?? perf.memoryPercent,
|
|
7896
|
-
100
|
|
7897
|
-
);
|
|
7898
8236
|
const speed = Math.round(scoreSpeed(safeTokensPerSecond, tuning));
|
|
7899
8237
|
const ttft = Math.round(scoreTTFT(safeTtft, tuning));
|
|
7900
|
-
const memory = Math.round(
|
|
8238
|
+
const memory = perf.memoryFootprintAvailable === false ? Math.round((speed + ttft) / (50 + 20) * 30) : Math.round(
|
|
8239
|
+
scoreMemory(
|
|
8240
|
+
// Score memory from the model's measured footprint/delta rather than
|
|
8241
|
+
// unrelated host RAM usage from other running workloads.
|
|
8242
|
+
sanitizeNonNegative(perf.memoryPercent, 100)
|
|
8243
|
+
)
|
|
8244
|
+
);
|
|
7901
8245
|
return {
|
|
7902
8246
|
total: clamp(speed + ttft + memory, 0, 100),
|
|
7903
8247
|
speed,
|
|
@@ -8006,11 +8350,9 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
8006
8350
|
const safeTokensPerSecond = sanitizeNonNegative(perf.tokensPerSecond, 0);
|
|
8007
8351
|
const safeTtft = sanitizeNonNegative(perf.ttft, tuning.ttft.hardMaxMs * 10);
|
|
8008
8352
|
const safeLoadTime = sanitizeNonNegative(perf.loadTime, tuning.loadTimeHardMaxMs * 10);
|
|
8009
|
-
const
|
|
8010
|
-
|
|
8011
|
-
|
|
8012
|
-
);
|
|
8013
|
-
const modelMemoryDeltaPercent = sanitizeNonNegative(perf.memoryPercent, 100);
|
|
8353
|
+
const modelMemoryFootprintAvailable = perf.memoryFootprintAvailable !== false;
|
|
8354
|
+
const modelMemoryDeltaPercent = modelMemoryFootprintAvailable ? sanitizeNonNegative(perf.memoryPercent, 100) : void 0;
|
|
8355
|
+
const hostMemoryPercent = perf.memoryHostPercent !== void 0 && Number.isFinite(perf.memoryHostPercent) && perf.memoryHostPercent >= 0 ? perf.memoryHostPercent : void 0;
|
|
8014
8356
|
const disqualifiers = [];
|
|
8015
8357
|
if (safeTokensPerSecond < tuning.speed.hardMin) {
|
|
8016
8358
|
disqualifiers.push(
|
|
@@ -8027,12 +8369,12 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
8027
8369
|
`Model load time too high: ${Math.round(safeLoadTime)}ms (maximum: ${tuning.loadTimeHardMaxMs}ms for ${tuning.profile} profile)`
|
|
8028
8370
|
);
|
|
8029
8371
|
}
|
|
8030
|
-
const hostCritical = hostMemoryPercent > 95;
|
|
8031
|
-
const modelDeltaCritical = modelMemoryDeltaPercent > 90;
|
|
8032
|
-
const modelDeltaSignificant = modelMemoryDeltaPercent >= 10;
|
|
8033
|
-
if (modelDeltaCritical
|
|
8372
|
+
const hostCritical = hostMemoryPercent !== void 0 && hostMemoryPercent > 95;
|
|
8373
|
+
const modelDeltaCritical = modelMemoryDeltaPercent !== void 0 && modelMemoryDeltaPercent > 90;
|
|
8374
|
+
const modelDeltaSignificant = modelMemoryDeltaPercent !== void 0 && modelMemoryDeltaPercent >= 10;
|
|
8375
|
+
if (modelDeltaCritical) {
|
|
8034
8376
|
disqualifiers.push(
|
|
8035
|
-
`Memory usage critical:
|
|
8377
|
+
`Memory usage critical: model delta +${modelMemoryDeltaPercent.toFixed(0)}%`
|
|
8036
8378
|
);
|
|
8037
8379
|
}
|
|
8038
8380
|
const verdictScore = globalScore ?? hardwareFitScore;
|
|
@@ -8067,9 +8409,28 @@ function computeFitness(perf, quality, hardware, benchEnv) {
|
|
|
8067
8409
|
`Token speed is unstable (stddev ${perf.tpsStdDev.toFixed(1)} tok/s, mean ${safeTokensPerSecond.toFixed(1)} tok/s) \u2014 may indicate thermal throttling or memory pressure.`
|
|
8068
8410
|
);
|
|
8069
8411
|
}
|
|
8070
|
-
if (
|
|
8412
|
+
if (perf.tokensPerSecondEstimated) {
|
|
8413
|
+
warnings.push(
|
|
8414
|
+
"Token throughput is estimated from LM Studio output because native token stats were unavailable. Compare tok/s across backends cautiously."
|
|
8415
|
+
);
|
|
8416
|
+
}
|
|
8417
|
+
if (perf.memoryFootprintEstimated) {
|
|
8071
8418
|
warnings.push(
|
|
8072
|
-
|
|
8419
|
+
"Model memory footprint is estimated via LM Studio CLI rather than measured from a fresh load."
|
|
8420
|
+
);
|
|
8421
|
+
}
|
|
8422
|
+
if (!modelMemoryFootprintAvailable) {
|
|
8423
|
+
warnings.push(
|
|
8424
|
+
"Model memory footprint was unavailable for this run, so RAM fit scoring was normalized from speed and TTFT only."
|
|
8425
|
+
);
|
|
8426
|
+
}
|
|
8427
|
+
if (hostCritical && !modelMemoryFootprintAvailable) {
|
|
8428
|
+
warnings.push(
|
|
8429
|
+
`Host memory is already high (${hostMemoryPercent.toFixed(0)}%) and model footprint was unavailable. Results may be influenced by other running workloads.`
|
|
8430
|
+
);
|
|
8431
|
+
} else if (hostCritical && modelMemoryDeltaPercent !== void 0 && !modelDeltaSignificant) {
|
|
8432
|
+
warnings.push(
|
|
8433
|
+
`Host memory is already high (${hostMemoryPercent.toFixed(0)}%) but model delta is limited (+${modelMemoryDeltaPercent.toFixed(0)}%). Results may be influenced by other running workloads.`
|
|
8073
8434
|
);
|
|
8074
8435
|
}
|
|
8075
8436
|
if (hardware?.powerMode === "low-power") {
|
|
@@ -8157,6 +8518,21 @@ function getLevel(score) {
|
|
|
8157
8518
|
if (score >= 25) return "Weak";
|
|
8158
8519
|
return "Poor";
|
|
8159
8520
|
}
|
|
8521
|
+
function formatCpuCoresLabel(hw) {
|
|
8522
|
+
if (hw.cpuPCores !== null && hw.cpuECores !== null) {
|
|
8523
|
+
return `${hw.cpuCores} total (${hw.cpuPCores} performance + ${hw.cpuECores} efficiency)`;
|
|
8524
|
+
}
|
|
8525
|
+
if (hw.cpuPCores !== null && hw.cpuCores > hw.cpuPCores) {
|
|
8526
|
+
return `${hw.cpuCores} threads (${hw.cpuPCores} cores)`;
|
|
8527
|
+
}
|
|
8528
|
+
if (hw.cpuPCores !== null) {
|
|
8529
|
+
return `${hw.cpuCores} total (${hw.cpuPCores} performance)`;
|
|
8530
|
+
}
|
|
8531
|
+
if (hw.cpuECores !== null) {
|
|
8532
|
+
return `${hw.cpuCores} total (${hw.cpuECores} efficiency)`;
|
|
8533
|
+
}
|
|
8534
|
+
return String(hw.cpuCores);
|
|
8535
|
+
}
|
|
8160
8536
|
function summarizeCategoryIssues(name, details) {
|
|
8161
8537
|
let crashes = 0;
|
|
8162
8538
|
let timeouts = 0;
|
|
@@ -8181,7 +8557,7 @@ function printHardwareTable(hw) {
|
|
|
8181
8557
|
head: [chalk3.bold("Hardware"), chalk3.bold("Value")],
|
|
8182
8558
|
style: { head: [], border: [] }
|
|
8183
8559
|
});
|
|
8184
|
-
const coresDetail =
|
|
8560
|
+
const coresDetail = formatCpuCoresLabel(hw);
|
|
8185
8561
|
const cpuLine = hw.cpuFreqGHz ? `${hw.cpu} @ ${hw.cpuFreqGHz} GHz` : hw.cpu;
|
|
8186
8562
|
const ramLine = hw.memoryType ? `${hw.totalMemoryGB} GB ${hw.memoryType} (${hw.freeMemoryGB} GB free)` : `${hw.totalMemoryGB} GB (${hw.freeMemoryGB} GB free)`;
|
|
8187
8563
|
const swapColor = hw.swapUsedGB > hw.swapTotalGB * 0.5 ? chalk3.yellow : chalk3.green;
|
|
@@ -8217,7 +8593,10 @@ function printPerformanceTable(perf, benchEnvironment) {
|
|
|
8217
8593
|
const ttftColor = perf.ttft < 1e3 ? chalk3.green : perf.ttft < 3e3 ? chalk3.yellow : chalk3.red;
|
|
8218
8594
|
const memColor = perf.memoryPercent < 50 ? chalk3.green : perf.memoryPercent < 80 ? chalk3.yellow : chalk3.red;
|
|
8219
8595
|
table.push(
|
|
8220
|
-
[
|
|
8596
|
+
[
|
|
8597
|
+
"Tokens/sec",
|
|
8598
|
+
perf.tokensPerSecondEstimated ? chalk3.yellow(`${perf.tokensPerSecond.toFixed(1)} tok/s (estimated)`) : tpsColor(`${perf.tokensPerSecond.toFixed(1)} tok/s`)
|
|
8599
|
+
],
|
|
8221
8600
|
[
|
|
8222
8601
|
"First Chunk Latency",
|
|
8223
8602
|
perf.firstChunkMs !== void 0 ? formatDuration(perf.firstChunkMs) : chalk3.dim("N/A (stream metric unavailable)")
|
|
@@ -8232,8 +8611,8 @@ function printPerformanceTable(perf, benchEnvironment) {
|
|
|
8232
8611
|
["Completion Tokens", String(perf.completionTokens)],
|
|
8233
8612
|
[
|
|
8234
8613
|
"Model Memory Footprint",
|
|
8235
|
-
memColor(
|
|
8236
|
-
`${perf.memoryUsedGB.toFixed(1)} GB (+${perf.memoryPercent.toFixed(0)}%)`
|
|
8614
|
+
perf.memoryFootprintAvailable === false ? chalk3.dim("N/A (model already loaded; runtime metric unavailable)") : memColor(
|
|
8615
|
+
`${perf.memoryUsedGB.toFixed(1)} GB (+${perf.memoryPercent.toFixed(0)}%)${perf.memoryFootprintEstimated ? " (estimated)" : ""}`
|
|
8237
8616
|
)
|
|
8238
8617
|
],
|
|
8239
8618
|
[
|
|
@@ -8319,7 +8698,7 @@ function printSummaryTable(results) {
|
|
|
8319
8698
|
chalk3.bold("Model"),
|
|
8320
8699
|
chalk3.bold("tok/s"),
|
|
8321
8700
|
chalk3.bold("TTFT"),
|
|
8322
|
-
chalk3.bold("
|
|
8701
|
+
chalk3.bold("Model RAM%"),
|
|
8323
8702
|
chalk3.bold("Profile"),
|
|
8324
8703
|
chalk3.bold("HW Fit"),
|
|
8325
8704
|
chalk3.bold("Quality"),
|
|
@@ -8332,17 +8711,23 @@ function printSummaryTable(results) {
|
|
|
8332
8711
|
style: { head: [], border: [] },
|
|
8333
8712
|
wordWrap: true
|
|
8334
8713
|
});
|
|
8714
|
+
const formatSummaryModelMemory = (result) => {
|
|
8715
|
+
if (result.performance.memoryFootprintAvailable === false) return "N/A";
|
|
8716
|
+
const value = `${result.performance.memoryPercent.toFixed(0)}%`;
|
|
8717
|
+
return result.performance.memoryFootprintEstimated ? `${value}~` : value;
|
|
8718
|
+
};
|
|
8335
8719
|
for (const r of results) {
|
|
8336
8720
|
const vColor = r.fitness.verdict === "EXCELLENT" ? chalk3.green.bold : r.fitness.verdict === "GOOD" ? chalk3.blue.bold : r.fitness.verdict === "MARGINAL" ? chalk3.yellow.bold : chalk3.red.bold;
|
|
8337
8721
|
const flags = [];
|
|
8338
8722
|
if (r.hardware.powerMode === "low-power") flags.push(chalk3.red("ECO"));
|
|
8339
8723
|
if (r.modelInfo?.thinkingDetected) flags.push(chalk3.magenta("THINK"));
|
|
8340
8724
|
const modelName = compact && r.model.length > 20 ? r.model.slice(0, 18) + ".." : r.model;
|
|
8725
|
+
const throughputLabel = r.performance.tokensPerSecondEstimated ? `~${r.performance.tokensPerSecond.toFixed(1)}` : `${r.performance.tokensPerSecond.toFixed(1)}`;
|
|
8341
8726
|
const row = [
|
|
8342
8727
|
modelName,
|
|
8343
|
-
|
|
8728
|
+
throughputLabel,
|
|
8344
8729
|
formatDuration(r.performance.ttft),
|
|
8345
|
-
r
|
|
8730
|
+
formatSummaryModelMemory(r),
|
|
8346
8731
|
r.fitness.tuning.profile,
|
|
8347
8732
|
scoreColor(r.fitness.hardwareFitScore)(
|
|
8348
8733
|
`${compactBar(r.fitness.hardwareFitScore)} ${r.fitness.hardwareFitScore}%`
|
|
@@ -8367,9 +8752,8 @@ function printSummaryTable(results) {
|
|
|
8367
8752
|
// ../src/ui/verdict.ts
|
|
8368
8753
|
import chalk4 from "chalk";
|
|
8369
8754
|
var BOX_INNER = 60;
|
|
8370
|
-
var ANSI_RE = /\x1b\[[0-9;]*m/g;
|
|
8371
8755
|
function visibleLength(str) {
|
|
8372
|
-
return str
|
|
8756
|
+
return stripAnsi(str).length;
|
|
8373
8757
|
}
|
|
8374
8758
|
function wrapText(text, maxWidth) {
|
|
8375
8759
|
if (visibleLength(text) <= maxWidth) return [text];
|
|
@@ -8739,6 +9123,15 @@ function assertUploaderConfig(config) {
|
|
|
8739
9123
|
);
|
|
8740
9124
|
}
|
|
8741
9125
|
}
|
|
9126
|
+
function resolveUploadedMemoryPercent(result) {
|
|
9127
|
+
return result.performance.memoryFootprintAvailable === false ? null : result.performance.memoryPercent;
|
|
9128
|
+
}
|
|
9129
|
+
function resolveUploadedModelFormat(result) {
|
|
9130
|
+
if (result.metadata.modelFormat?.trim()) return result.metadata.modelFormat;
|
|
9131
|
+
const runtimeBackend = result.metadata.runtimeBackend ?? "ollama";
|
|
9132
|
+
if (runtimeBackend === "ollama") return "gguf";
|
|
9133
|
+
return "unknown";
|
|
9134
|
+
}
|
|
8742
9135
|
async function uploadBenchResult(result, options = {}) {
|
|
8743
9136
|
const config = resolveUploaderConfig();
|
|
8744
9137
|
assertUploaderConfig(config);
|
|
@@ -8751,7 +9144,7 @@ async function uploadBenchResult(result, options = {}) {
|
|
|
8751
9144
|
thinking_detected: result.modelInfo?.thinkingDetected ?? null,
|
|
8752
9145
|
tokens_per_second: result.performance.tokensPerSecond,
|
|
8753
9146
|
ttft_ms: result.performance.ttft,
|
|
8754
|
-
memory_percent: result
|
|
9147
|
+
memory_percent: resolveUploadedMemoryPercent(result),
|
|
8755
9148
|
thinking_tokens_estimate: result.performance.thinkingTokensEstimate ?? null,
|
|
8756
9149
|
verdict: result.fitness.verdict,
|
|
8757
9150
|
global_score: result.fitness.globalScore,
|
|
@@ -8768,7 +9161,7 @@ async function uploadBenchResult(result, options = {}) {
|
|
|
8768
9161
|
benchmark_spec_version: result.metadata.benchmarkSpecVersion,
|
|
8769
9162
|
runtime_version: result.metadata.runtimeVersion,
|
|
8770
9163
|
runtime_backend: result.metadata.runtimeBackend ?? "ollama",
|
|
8771
|
-
model_format: result
|
|
9164
|
+
model_format: resolveUploadedModelFormat(result),
|
|
8772
9165
|
raw_log_hash: result.metadata.rawLogHash,
|
|
8773
9166
|
result
|
|
8774
9167
|
};
|
|
@@ -9021,6 +9414,7 @@ async function promptSubmitterProfile(deps, defaults = {}) {
|
|
|
9021
9414
|
}
|
|
9022
9415
|
console.log(chalk6.yellow("Nickname must be between 2 and 40 characters."));
|
|
9023
9416
|
}
|
|
9417
|
+
console.log(chalk6.dim("Your email is never stored \u2014 only a SHA-256 hash is saved to match your leaderboard entries."));
|
|
9024
9418
|
while (true) {
|
|
9025
9419
|
const emailHint = defaults.email ? ` [${defaults.email}]` : "";
|
|
9026
9420
|
const emailAnswer = await ask(`Email${emailHint} > `);
|
|
@@ -9308,7 +9702,7 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9308
9702
|
minSuccessfulPrompts: options.perfMinSuccessfulPrompts,
|
|
9309
9703
|
failOnPromptError: options.perfStrict,
|
|
9310
9704
|
think: thinkEnabled,
|
|
9311
|
-
streamStallTimeoutMs: options.
|
|
9705
|
+
streamStallTimeoutMs: options.streamStallTimeoutMs
|
|
9312
9706
|
});
|
|
9313
9707
|
const perf = perfResult.metrics;
|
|
9314
9708
|
const benchEnvironment = perfResult.benchEnvironment;
|
|
@@ -9346,13 +9740,22 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9346
9740
|
printVerdict(modelName, fitness);
|
|
9347
9741
|
}
|
|
9348
9742
|
const matchedModel = allModels.find((m) => m.name === modelName);
|
|
9349
|
-
|
|
9350
|
-
|
|
9351
|
-
|
|
9352
|
-
|
|
9353
|
-
|
|
9354
|
-
|
|
9355
|
-
|
|
9743
|
+
let resolvedModel = matchedModel;
|
|
9744
|
+
if (matchedModel?.modelFormat === void 0) {
|
|
9745
|
+
try {
|
|
9746
|
+
resolvedModel = await resolveRuntimeModel(modelName) ?? matchedModel;
|
|
9747
|
+
} catch {
|
|
9748
|
+
resolvedModel = matchedModel;
|
|
9749
|
+
}
|
|
9750
|
+
}
|
|
9751
|
+
const modelMetadataSource = resolvedModel ?? matchedModel;
|
|
9752
|
+
const modelInfo = modelMetadataSource ? {
|
|
9753
|
+
parameterSize: modelMetadataSource.parameterSize,
|
|
9754
|
+
quantization: modelMetadataSource.quantization,
|
|
9755
|
+
family: modelMetadataSource.family,
|
|
9756
|
+
// Persist actual observed thinking behavior from the benchmark run.
|
|
9757
|
+
thinkingDetected: perfResult.thinkingDetected
|
|
9758
|
+
} : { thinkingDetected: perfResult.thinkingDetected };
|
|
9356
9759
|
const partialResult = {
|
|
9357
9760
|
model: modelName,
|
|
9358
9761
|
modelInfo,
|
|
@@ -9367,7 +9770,7 @@ ${tl}${h.repeat(innerWidth)}${tr}`));
|
|
|
9367
9770
|
promptPackVersion: PROMPT_PACK_VERSION,
|
|
9368
9771
|
runtimeVersion,
|
|
9369
9772
|
runtimeBackend: getRuntimeName(),
|
|
9370
|
-
modelFormat:
|
|
9773
|
+
modelFormat: resolvedModel?.modelFormat ?? (getRuntimeName() === "ollama" ? getRuntimeModelFormat() : "unknown"),
|
|
9371
9774
|
benchmarkProfile: buildBenchmarkProfileMetadata(thinkEnabled)
|
|
9372
9775
|
}
|
|
9373
9776
|
};
|