metrillm 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +122 -49
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -5500,9 +5500,17 @@ function hasSamplingOverrides2(options) {
|
|
|
5500
5500
|
function isUnsupportedSamplingMessage(status, text) {
|
|
5501
5501
|
if (status !== 400 && status !== 422) return false;
|
|
5502
5502
|
const lower = text.toLowerCase();
|
|
5503
|
-
|
|
5504
|
-
|
|
5505
|
-
|
|
5503
|
+
if (!/\b(seed|top_p|topp)\b/.test(lower)) return false;
|
|
5504
|
+
return UNSUPPORTED_SAMPLING_FIELD_PATTERN.test(lower);
|
|
5505
|
+
}
|
|
5506
|
+
function isUnsupportedOutputLimitMessage(status, text, mode) {
|
|
5507
|
+
if (status !== 400 && status !== 422) return false;
|
|
5508
|
+
const lower = text.toLowerCase();
|
|
5509
|
+
const fieldName = mode === "legacy" ? "max_tokens" : "max_output_tokens";
|
|
5510
|
+
const alternateFieldName = mode === "legacy" ? "max_output_tokens" : "max_tokens";
|
|
5511
|
+
const mentionsUnsupportedCurrentField = lower.includes(fieldName) && UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN.test(lower);
|
|
5512
|
+
const mentionsRequiredAlternateField = lower.includes(alternateFieldName) && /\b(required|missing)\b/.test(lower);
|
|
5513
|
+
return mentionsUnsupportedCurrentField || mentionsRequiredAlternateField;
|
|
5506
5514
|
}
|
|
5507
5515
|
function extractLMStudioErrorMessage(body) {
|
|
5508
5516
|
const trimmed = body.trim();
|
|
@@ -5536,20 +5544,65 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
|
|
|
5536
5544
|
const suffix = backendMessage ? ` ${backendMessage}` : "";
|
|
5537
5545
|
return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
|
|
5538
5546
|
}
|
|
5539
|
-
function
|
|
5547
|
+
function buildUnsupportedOutputLimitNegotiationError(kind, model, body) {
|
|
5548
|
+
const backendMessage = extractLMStudioErrorMessage(body);
|
|
5549
|
+
return new Error(
|
|
5550
|
+
[
|
|
5551
|
+
`LM Studio ${kind} failed for "${model}" because this backend rejected both max_output_tokens and max_tokens.`,
|
|
5552
|
+
"MetriLLM cannot safely continue without an explicit output limit because benchmarks rely on bounded generation.",
|
|
5553
|
+
backendMessage ? `Backend error: ${backendMessage}` : void 0
|
|
5554
|
+
].filter(Boolean).join(" ")
|
|
5555
|
+
);
|
|
5556
|
+
}
|
|
5557
|
+
function buildNativeChatBody(model, prompt, options, stream, includeSampling, outputLimitMode) {
|
|
5540
5558
|
const reasoning = buildNativeThinkingOption(options?.think);
|
|
5559
|
+
const outputLimit = options?.num_predict !== void 0 ? options.num_predict : 512;
|
|
5541
5560
|
return {
|
|
5542
5561
|
model,
|
|
5543
5562
|
input: prompt,
|
|
5544
5563
|
temperature: options?.temperature ?? 0,
|
|
5545
5564
|
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
5546
5565
|
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
5547
|
-
|
|
5566
|
+
...outputLimitMode === "preferred" ? { max_output_tokens: outputLimit } : {},
|
|
5567
|
+
...outputLimitMode === "legacy" ? { max_tokens: outputLimit } : {},
|
|
5548
5568
|
stream,
|
|
5549
5569
|
...reasoning !== void 0 ? { reasoning } : {},
|
|
5550
5570
|
...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
|
|
5551
5571
|
};
|
|
5552
5572
|
}
|
|
5573
|
+
async function negotiateRequest(kind, model, cacheKey, options, makeRequest) {
|
|
5574
|
+
let includeSampling = true;
|
|
5575
|
+
let outputLimitMode = outputLimitModeCache.get(cacheKey) ?? "preferred";
|
|
5576
|
+
const triedOutputLimitModes = /* @__PURE__ */ new Set([outputLimitMode]);
|
|
5577
|
+
let resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5578
|
+
let retries = 0;
|
|
5579
|
+
while (!resp.ok && retries < MAX_NEGOTIATE_RETRIES) {
|
|
5580
|
+
retries++;
|
|
5581
|
+
const body = await resp.text().catch(() => "");
|
|
5582
|
+
if (includeSampling && hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
|
|
5583
|
+
includeSampling = false;
|
|
5584
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5585
|
+
continue;
|
|
5586
|
+
}
|
|
5587
|
+
if (isUnsupportedOutputLimitMessage(resp.status, body, outputLimitMode)) {
|
|
5588
|
+
const nextMode = outputLimitMode === "preferred" ? "legacy" : !triedOutputLimitModes.has("preferred") ? "preferred" : null;
|
|
5589
|
+
if (!nextMode) {
|
|
5590
|
+
throw buildUnsupportedOutputLimitNegotiationError(kind, model, body);
|
|
5591
|
+
}
|
|
5592
|
+
outputLimitMode = nextMode;
|
|
5593
|
+
triedOutputLimitModes.add(outputLimitMode);
|
|
5594
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5595
|
+
continue;
|
|
5596
|
+
}
|
|
5597
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
5598
|
+
}
|
|
5599
|
+
if (!resp.ok) {
|
|
5600
|
+
const body = await resp.text().catch(() => "");
|
|
5601
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
5602
|
+
}
|
|
5603
|
+
outputLimitModeCache.set(cacheKey, outputLimitMode);
|
|
5604
|
+
return resp;
|
|
5605
|
+
}
|
|
5553
5606
|
function getNativeStatNumber(value) {
|
|
5554
5607
|
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
|
|
5555
5608
|
return value;
|
|
@@ -6258,25 +6311,18 @@ async function generate2(model, prompt, options) {
|
|
|
6258
6311
|
try {
|
|
6259
6312
|
const baseUrl = getLMStudioBaseUrl();
|
|
6260
6313
|
const url = new URL("/api/v1/chat", baseUrl);
|
|
6261
|
-
const
|
|
6262
|
-
|
|
6263
|
-
|
|
6264
|
-
|
|
6265
|
-
|
|
6266
|
-
|
|
6267
|
-
|
|
6268
|
-
|
|
6269
|
-
|
|
6270
|
-
|
|
6271
|
-
|
|
6272
|
-
|
|
6273
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
6274
|
-
}
|
|
6275
|
-
}
|
|
6276
|
-
if (!resp.ok) {
|
|
6277
|
-
const body = await resp.text().catch(() => "");
|
|
6278
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
6279
|
-
}
|
|
6314
|
+
const resp = await negotiateRequest(
|
|
6315
|
+
"generate",
|
|
6316
|
+
model,
|
|
6317
|
+
baseUrl,
|
|
6318
|
+
options,
|
|
6319
|
+
(sampling, limitMode) => fetch(url, {
|
|
6320
|
+
method: "POST",
|
|
6321
|
+
headers: getLMStudioHeaders(),
|
|
6322
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, sampling, limitMode)),
|
|
6323
|
+
signal: controller.signal
|
|
6324
|
+
})
|
|
6325
|
+
);
|
|
6280
6326
|
const payload = await resp.json();
|
|
6281
6327
|
const nativeResponse = extractNativeResponse(payload);
|
|
6282
6328
|
const response = nativeResponse.response;
|
|
@@ -6334,25 +6380,18 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
6334
6380
|
};
|
|
6335
6381
|
try {
|
|
6336
6382
|
resetStallTimer();
|
|
6337
|
-
const
|
|
6338
|
-
|
|
6339
|
-
|
|
6340
|
-
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
|
|
6344
|
-
|
|
6345
|
-
|
|
6346
|
-
|
|
6347
|
-
|
|
6348
|
-
|
|
6349
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
6350
|
-
}
|
|
6351
|
-
}
|
|
6352
|
-
if (!resp.ok) {
|
|
6353
|
-
const body = await resp.text().catch(() => "");
|
|
6354
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
6355
|
-
}
|
|
6383
|
+
const resp = await negotiateRequest(
|
|
6384
|
+
"stream",
|
|
6385
|
+
model,
|
|
6386
|
+
baseUrl,
|
|
6387
|
+
options,
|
|
6388
|
+
(sampling, limitMode) => fetch(url, {
|
|
6389
|
+
method: "POST",
|
|
6390
|
+
headers: getLMStudioHeaders(),
|
|
6391
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, sampling, limitMode)),
|
|
6392
|
+
signal: controller.signal
|
|
6393
|
+
})
|
|
6394
|
+
);
|
|
6356
6395
|
if (!resp.body) {
|
|
6357
6396
|
throw new Error("LM Studio stream response body is empty");
|
|
6358
6397
|
}
|
|
@@ -6481,7 +6520,7 @@ function abortOngoingRequests2() {
|
|
|
6481
6520
|
}
|
|
6482
6521
|
activeAbortControllers.clear();
|
|
6483
6522
|
}
|
|
6484
|
-
var DEFAULT_LM_STUDIO_BASE_URL, LM_STUDIO_INIT_TIMEOUT_MS, LM_STUDIO_METADATA_TIMEOUT_MS, DEFAULT_STREAM_STALL_TIMEOUT_MS2, LM_STUDIO_CLI_TIMEOUT_MS, SHARED_STREAM_STALL_TIMEOUT_ENV2, DEFAULT_LM_STUDIO_HOME_DIR, DEFAULT_LM_STUDIO_MODELS_DIR, LM_STUDIO_HOME_DIR_ENV, LM_STUDIO_MODELS_DIR_ENV, LM_STUDIO_CLI_PATH_ENV, activeAbortControllers, directorySizeCache, modelDefinitionCache, NON_THINKING_SYSTEM_PROMPT;
|
|
6523
|
+
var DEFAULT_LM_STUDIO_BASE_URL, LM_STUDIO_INIT_TIMEOUT_MS, LM_STUDIO_METADATA_TIMEOUT_MS, DEFAULT_STREAM_STALL_TIMEOUT_MS2, LM_STUDIO_CLI_TIMEOUT_MS, SHARED_STREAM_STALL_TIMEOUT_ENV2, DEFAULT_LM_STUDIO_HOME_DIR, DEFAULT_LM_STUDIO_MODELS_DIR, LM_STUDIO_HOME_DIR_ENV, LM_STUDIO_MODELS_DIR_ENV, LM_STUDIO_CLI_PATH_ENV, activeAbortControllers, directorySizeCache, modelDefinitionCache, outputLimitModeCache, NON_THINKING_SYSTEM_PROMPT, UNSUPPORTED_SAMPLING_FIELD_PATTERN, UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN, MAX_NEGOTIATE_RETRIES;
|
|
6485
6524
|
var init_lm_studio_client = __esm({
|
|
6486
6525
|
"src/core/lm-studio-client.ts"() {
|
|
6487
6526
|
init_utils();
|
|
@@ -6499,12 +6538,16 @@ var init_lm_studio_client = __esm({
|
|
|
6499
6538
|
activeAbortControllers = /* @__PURE__ */ new Set();
|
|
6500
6539
|
directorySizeCache = /* @__PURE__ */ new Map();
|
|
6501
6540
|
modelDefinitionCache = /* @__PURE__ */ new Map();
|
|
6541
|
+
outputLimitModeCache = /* @__PURE__ */ new Map();
|
|
6502
6542
|
NON_THINKING_SYSTEM_PROMPT = [
|
|
6503
6543
|
"You are in non-thinking mode for benchmark reproducibility.",
|
|
6504
6544
|
"Return only the final answer.",
|
|
6505
6545
|
"Do not output internal reasoning, chain-of-thought, or scratchpad.",
|
|
6506
6546
|
"Never output tags or sections like <think>, </think>, [THINK], [/THINK], or Thinking Process."
|
|
6507
6547
|
].join(" ");
|
|
6548
|
+
UNSUPPORTED_SAMPLING_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/;
|
|
6549
|
+
UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|unexpected|additional|extra|invalid field/;
|
|
6550
|
+
MAX_NEGOTIATE_RETRIES = 5;
|
|
6508
6551
|
}
|
|
6509
6552
|
});
|
|
6510
6553
|
|
|
@@ -36469,6 +36512,30 @@ function parseRuntimeBackend(value) {
|
|
|
36469
36512
|
}
|
|
36470
36513
|
return void 0;
|
|
36471
36514
|
}
|
|
36515
|
+
function parseAutoShareConfig(parsed) {
|
|
36516
|
+
const autoSharePreferenceSet = parsed.autoSharePreferenceSet === true;
|
|
36517
|
+
if (parsed.autoShare === true) {
|
|
36518
|
+
return {
|
|
36519
|
+
autoShare: true,
|
|
36520
|
+
autoSharePreferenceSet: autoSharePreferenceSet || void 0
|
|
36521
|
+
};
|
|
36522
|
+
}
|
|
36523
|
+
if (parsed.autoShare === "ask") {
|
|
36524
|
+
return {
|
|
36525
|
+
autoShare: "ask",
|
|
36526
|
+
autoSharePreferenceSet: autoSharePreferenceSet || void 0
|
|
36527
|
+
};
|
|
36528
|
+
}
|
|
36529
|
+
if (parsed.autoShare === false) {
|
|
36530
|
+
return {
|
|
36531
|
+
autoShare: "ask",
|
|
36532
|
+
autoSharePreferenceSet: autoSharePreferenceSet || void 0
|
|
36533
|
+
};
|
|
36534
|
+
}
|
|
36535
|
+
return {
|
|
36536
|
+
autoShare: true
|
|
36537
|
+
};
|
|
36538
|
+
}
|
|
36472
36539
|
function resultFilename(result2) {
|
|
36473
36540
|
const ts = result2.timestamp.replace(/[:.]/g, "-").replace("T", "_").replace(/[^0-9_-]/g, "");
|
|
36474
36541
|
const model = result2.model.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
@@ -36486,14 +36553,14 @@ async function loadConfig() {
|
|
|
36486
36553
|
try {
|
|
36487
36554
|
const content = await readFile(CONFIG_PATH, "utf8");
|
|
36488
36555
|
const parsed = JSON.parse(content);
|
|
36489
|
-
const
|
|
36556
|
+
const autoShareConfig = parseAutoShareConfig(parsed);
|
|
36490
36557
|
const telemetry = typeof parsed.telemetry === "boolean" ? parsed.telemetry : void 0;
|
|
36491
36558
|
const submitterNickname = typeof parsed.submitterNickname === "string" && isValidNickname(parsed.submitterNickname) ? normalizeNickname(parsed.submitterNickname) : void 0;
|
|
36492
36559
|
const submitterEmail = typeof parsed.submitterEmail === "string" && isValidEmail(parsed.submitterEmail) ? normalizeEmail(parsed.submitterEmail) : void 0;
|
|
36493
36560
|
const runtimeBackend = parseRuntimeBackend(parsed.runtimeBackend);
|
|
36494
36561
|
return {
|
|
36495
36562
|
...DEFAULT_CONFIG,
|
|
36496
|
-
|
|
36563
|
+
...autoShareConfig,
|
|
36497
36564
|
telemetry,
|
|
36498
36565
|
submitterNickname,
|
|
36499
36566
|
submitterEmail,
|
|
@@ -36515,7 +36582,7 @@ var init_store = __esm({
|
|
|
36515
36582
|
RESULTS_DIR = join(BASE_DIR, "results");
|
|
36516
36583
|
CONFIG_PATH = join(BASE_DIR, "config.json");
|
|
36517
36584
|
DEFAULT_CONFIG = {
|
|
36518
|
-
autoShare:
|
|
36585
|
+
autoShare: true
|
|
36519
36586
|
};
|
|
36520
36587
|
}
|
|
36521
36588
|
});
|
|
@@ -48592,7 +48659,7 @@ async function promptShare(result2, deps = {}) {
|
|
|
48592
48659
|
if (!stdin.isTTY || !stdout.isTTY) return "skip";
|
|
48593
48660
|
const choice = await (deps.selectChoice ?? selectShareChoice)(result2);
|
|
48594
48661
|
if (choice === "always") {
|
|
48595
|
-
await saveConfig({ ...config, autoShare: true });
|
|
48662
|
+
await saveConfig({ ...config, autoShare: true, autoSharePreferenceSet: true });
|
|
48596
48663
|
return "share";
|
|
48597
48664
|
}
|
|
48598
48665
|
if (choice === "skip") {
|
|
@@ -53938,6 +54005,7 @@ async function selectWithArrows(title, options, config = {}) {
|
|
|
53938
54005
|
};
|
|
53939
54006
|
readline4.emitKeypressEvents(stdin);
|
|
53940
54007
|
stdin.resume();
|
|
54008
|
+
stdin.ref?.();
|
|
53941
54009
|
if (stdin.isTTY) {
|
|
53942
54010
|
stdin.setRawMode(true);
|
|
53943
54011
|
}
|
|
@@ -54020,6 +54088,7 @@ ${source_default.dim(message)}
|
|
|
54020
54088
|
};
|
|
54021
54089
|
readline4.emitKeypressEvents(stdin);
|
|
54022
54090
|
stdin.resume();
|
|
54091
|
+
stdin.ref?.();
|
|
54023
54092
|
if (stdin.isTTY) stdin.setRawMode(true);
|
|
54024
54093
|
stdout.write("\x1B[?25l");
|
|
54025
54094
|
stdin.on("keypress", onKeypress);
|
|
@@ -54274,7 +54343,11 @@ async function runSettingsMenu(deps = {}) {
|
|
|
54274
54343
|
if (action === "toggle-auto-share") {
|
|
54275
54344
|
const nextAutoShare = autoShareEnabled ? "ask" : true;
|
|
54276
54345
|
try {
|
|
54277
|
-
await saveUserConfig({
|
|
54346
|
+
await saveUserConfig({
|
|
54347
|
+
...config,
|
|
54348
|
+
autoShare: nextAutoShare,
|
|
54349
|
+
autoSharePreferenceSet: true
|
|
54350
|
+
});
|
|
54278
54351
|
successMsg(`Auto-share ${nextAutoShare === true ? "enabled" : "disabled"}.`);
|
|
54279
54352
|
} catch (err) {
|
|
54280
54353
|
errorMsg("Could not update auto-share setting.");
|