metrillm 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +93 -45
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -5500,9 +5500,17 @@ function hasSamplingOverrides2(options) {
|
|
|
5500
5500
|
function isUnsupportedSamplingMessage(status, text) {
|
|
5501
5501
|
if (status !== 400 && status !== 422) return false;
|
|
5502
5502
|
const lower = text.toLowerCase();
|
|
5503
|
-
|
|
5504
|
-
|
|
5505
|
-
|
|
5503
|
+
if (!/\b(seed|top_p|topp)\b/.test(lower)) return false;
|
|
5504
|
+
return UNSUPPORTED_SAMPLING_FIELD_PATTERN.test(lower);
|
|
5505
|
+
}
|
|
5506
|
+
function isUnsupportedOutputLimitMessage(status, text, mode) {
|
|
5507
|
+
if (status !== 400 && status !== 422) return false;
|
|
5508
|
+
const lower = text.toLowerCase();
|
|
5509
|
+
const fieldName = mode === "legacy" ? "max_tokens" : "max_output_tokens";
|
|
5510
|
+
const alternateFieldName = mode === "legacy" ? "max_output_tokens" : "max_tokens";
|
|
5511
|
+
const mentionsUnsupportedCurrentField = lower.includes(fieldName) && UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN.test(lower);
|
|
5512
|
+
const mentionsRequiredAlternateField = lower.includes(alternateFieldName) && /\b(required|missing)\b/.test(lower);
|
|
5513
|
+
return mentionsUnsupportedCurrentField || mentionsRequiredAlternateField;
|
|
5506
5514
|
}
|
|
5507
5515
|
function extractLMStudioErrorMessage(body) {
|
|
5508
5516
|
const trimmed = body.trim();
|
|
@@ -5536,20 +5544,65 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
|
|
|
5536
5544
|
const suffix = backendMessage ? ` ${backendMessage}` : "";
|
|
5537
5545
|
return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
|
|
5538
5546
|
}
|
|
5539
|
-
function
|
|
5547
|
+
function buildUnsupportedOutputLimitNegotiationError(kind, model, body) {
|
|
5548
|
+
const backendMessage = extractLMStudioErrorMessage(body);
|
|
5549
|
+
return new Error(
|
|
5550
|
+
[
|
|
5551
|
+
`LM Studio ${kind} failed for "${model}" because this backend rejected both max_output_tokens and max_tokens.`,
|
|
5552
|
+
"MetriLLM cannot safely continue without an explicit output limit because benchmarks rely on bounded generation.",
|
|
5553
|
+
backendMessage ? `Backend error: ${backendMessage}` : void 0
|
|
5554
|
+
].filter(Boolean).join(" ")
|
|
5555
|
+
);
|
|
5556
|
+
}
|
|
5557
|
+
function buildNativeChatBody(model, prompt, options, stream, includeSampling, outputLimitMode) {
|
|
5540
5558
|
const reasoning = buildNativeThinkingOption(options?.think);
|
|
5559
|
+
const outputLimit = options?.num_predict !== void 0 ? options.num_predict : 512;
|
|
5541
5560
|
return {
|
|
5542
5561
|
model,
|
|
5543
5562
|
input: prompt,
|
|
5544
5563
|
temperature: options?.temperature ?? 0,
|
|
5545
5564
|
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
5546
5565
|
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
5547
|
-
|
|
5566
|
+
...outputLimitMode === "preferred" ? { max_output_tokens: outputLimit } : {},
|
|
5567
|
+
...outputLimitMode === "legacy" ? { max_tokens: outputLimit } : {},
|
|
5548
5568
|
stream,
|
|
5549
5569
|
...reasoning !== void 0 ? { reasoning } : {},
|
|
5550
5570
|
...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
|
|
5551
5571
|
};
|
|
5552
5572
|
}
|
|
5573
|
+
async function negotiateRequest(kind, model, cacheKey, options, makeRequest) {
|
|
5574
|
+
let includeSampling = true;
|
|
5575
|
+
let outputLimitMode = outputLimitModeCache.get(cacheKey) ?? "preferred";
|
|
5576
|
+
const triedOutputLimitModes = /* @__PURE__ */ new Set([outputLimitMode]);
|
|
5577
|
+
let resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5578
|
+
let retries = 0;
|
|
5579
|
+
while (!resp.ok && retries < MAX_NEGOTIATE_RETRIES) {
|
|
5580
|
+
retries++;
|
|
5581
|
+
const body = await resp.text().catch(() => "");
|
|
5582
|
+
if (includeSampling && hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
|
|
5583
|
+
includeSampling = false;
|
|
5584
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5585
|
+
continue;
|
|
5586
|
+
}
|
|
5587
|
+
if (isUnsupportedOutputLimitMessage(resp.status, body, outputLimitMode)) {
|
|
5588
|
+
const nextMode = outputLimitMode === "preferred" ? "legacy" : !triedOutputLimitModes.has("preferred") ? "preferred" : null;
|
|
5589
|
+
if (!nextMode) {
|
|
5590
|
+
throw buildUnsupportedOutputLimitNegotiationError(kind, model, body);
|
|
5591
|
+
}
|
|
5592
|
+
outputLimitMode = nextMode;
|
|
5593
|
+
triedOutputLimitModes.add(outputLimitMode);
|
|
5594
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
5595
|
+
continue;
|
|
5596
|
+
}
|
|
5597
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
5598
|
+
}
|
|
5599
|
+
if (!resp.ok) {
|
|
5600
|
+
const body = await resp.text().catch(() => "");
|
|
5601
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
5602
|
+
}
|
|
5603
|
+
outputLimitModeCache.set(cacheKey, outputLimitMode);
|
|
5604
|
+
return resp;
|
|
5605
|
+
}
|
|
5553
5606
|
function getNativeStatNumber(value) {
|
|
5554
5607
|
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
|
|
5555
5608
|
return value;
|
|
@@ -6210,14 +6263,17 @@ async function listModels2() {
|
|
|
6210
6263
|
throw new Error(`LM Studio list models failed (${resp.status} ${resp.statusText})`);
|
|
6211
6264
|
}
|
|
6212
6265
|
const data = await resp.json();
|
|
6213
|
-
const
|
|
6266
|
+
const primaryIds = (data.data ?? []).map((m) => m.id?.trim()).filter((id) => Boolean(id));
|
|
6214
6267
|
const apiModels = await fetchApiModels();
|
|
6215
6268
|
const apiById = /* @__PURE__ */ new Map();
|
|
6269
|
+
const secondaryIds = [];
|
|
6216
6270
|
for (const model of apiModels ?? []) {
|
|
6217
6271
|
const id = asNonEmptyString(model.id);
|
|
6218
6272
|
if (!id) continue;
|
|
6219
6273
|
apiById.set(id, model);
|
|
6274
|
+
secondaryIds.push(id);
|
|
6220
6275
|
}
|
|
6276
|
+
const ids = Array.from(/* @__PURE__ */ new Set([...primaryIds, ...secondaryIds]));
|
|
6221
6277
|
const modelsRootDir = await resolveModelsRootDir();
|
|
6222
6278
|
const localMetadataEntries = await Promise.all(
|
|
6223
6279
|
ids.map(async (id) => {
|
|
@@ -6255,25 +6311,18 @@ async function generate2(model, prompt, options) {
|
|
|
6255
6311
|
try {
|
|
6256
6312
|
const baseUrl = getLMStudioBaseUrl();
|
|
6257
6313
|
const url = new URL("/api/v1/chat", baseUrl);
|
|
6258
|
-
const
|
|
6259
|
-
|
|
6260
|
-
|
|
6261
|
-
|
|
6262
|
-
|
|
6263
|
-
|
|
6264
|
-
|
|
6265
|
-
|
|
6266
|
-
|
|
6267
|
-
|
|
6268
|
-
|
|
6269
|
-
|
|
6270
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
6271
|
-
}
|
|
6272
|
-
}
|
|
6273
|
-
if (!resp.ok) {
|
|
6274
|
-
const body = await resp.text().catch(() => "");
|
|
6275
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
6276
|
-
}
|
|
6314
|
+
const resp = await negotiateRequest(
|
|
6315
|
+
"generate",
|
|
6316
|
+
model,
|
|
6317
|
+
baseUrl,
|
|
6318
|
+
options,
|
|
6319
|
+
(sampling, limitMode) => fetch(url, {
|
|
6320
|
+
method: "POST",
|
|
6321
|
+
headers: getLMStudioHeaders(),
|
|
6322
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, sampling, limitMode)),
|
|
6323
|
+
signal: controller.signal
|
|
6324
|
+
})
|
|
6325
|
+
);
|
|
6277
6326
|
const payload = await resp.json();
|
|
6278
6327
|
const nativeResponse = extractNativeResponse(payload);
|
|
6279
6328
|
const response = nativeResponse.response;
|
|
@@ -6331,25 +6380,18 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
6331
6380
|
};
|
|
6332
6381
|
try {
|
|
6333
6382
|
resetStallTimer();
|
|
6334
|
-
const
|
|
6335
|
-
|
|
6336
|
-
|
|
6337
|
-
|
|
6338
|
-
|
|
6339
|
-
|
|
6340
|
-
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
|
|
6344
|
-
|
|
6345
|
-
|
|
6346
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
6347
|
-
}
|
|
6348
|
-
}
|
|
6349
|
-
if (!resp.ok) {
|
|
6350
|
-
const body = await resp.text().catch(() => "");
|
|
6351
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
6352
|
-
}
|
|
6383
|
+
const resp = await negotiateRequest(
|
|
6384
|
+
"stream",
|
|
6385
|
+
model,
|
|
6386
|
+
baseUrl,
|
|
6387
|
+
options,
|
|
6388
|
+
(sampling, limitMode) => fetch(url, {
|
|
6389
|
+
method: "POST",
|
|
6390
|
+
headers: getLMStudioHeaders(),
|
|
6391
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, sampling, limitMode)),
|
|
6392
|
+
signal: controller.signal
|
|
6393
|
+
})
|
|
6394
|
+
);
|
|
6353
6395
|
if (!resp.body) {
|
|
6354
6396
|
throw new Error("LM Studio stream response body is empty");
|
|
6355
6397
|
}
|
|
@@ -6478,7 +6520,7 @@ function abortOngoingRequests2() {
|
|
|
6478
6520
|
}
|
|
6479
6521
|
activeAbortControllers.clear();
|
|
6480
6522
|
}
|
|
6481
|
-
var DEFAULT_LM_STUDIO_BASE_URL, LM_STUDIO_INIT_TIMEOUT_MS, LM_STUDIO_METADATA_TIMEOUT_MS, DEFAULT_STREAM_STALL_TIMEOUT_MS2, LM_STUDIO_CLI_TIMEOUT_MS, SHARED_STREAM_STALL_TIMEOUT_ENV2, DEFAULT_LM_STUDIO_HOME_DIR, DEFAULT_LM_STUDIO_MODELS_DIR, LM_STUDIO_HOME_DIR_ENV, LM_STUDIO_MODELS_DIR_ENV, LM_STUDIO_CLI_PATH_ENV, activeAbortControllers, directorySizeCache, modelDefinitionCache, NON_THINKING_SYSTEM_PROMPT;
|
|
6523
|
+
var DEFAULT_LM_STUDIO_BASE_URL, LM_STUDIO_INIT_TIMEOUT_MS, LM_STUDIO_METADATA_TIMEOUT_MS, DEFAULT_STREAM_STALL_TIMEOUT_MS2, LM_STUDIO_CLI_TIMEOUT_MS, SHARED_STREAM_STALL_TIMEOUT_ENV2, DEFAULT_LM_STUDIO_HOME_DIR, DEFAULT_LM_STUDIO_MODELS_DIR, LM_STUDIO_HOME_DIR_ENV, LM_STUDIO_MODELS_DIR_ENV, LM_STUDIO_CLI_PATH_ENV, activeAbortControllers, directorySizeCache, modelDefinitionCache, outputLimitModeCache, NON_THINKING_SYSTEM_PROMPT, UNSUPPORTED_SAMPLING_FIELD_PATTERN, UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN, MAX_NEGOTIATE_RETRIES;
|
|
6482
6524
|
var init_lm_studio_client = __esm({
|
|
6483
6525
|
"src/core/lm-studio-client.ts"() {
|
|
6484
6526
|
init_utils();
|
|
@@ -6496,12 +6538,16 @@ var init_lm_studio_client = __esm({
|
|
|
6496
6538
|
activeAbortControllers = /* @__PURE__ */ new Set();
|
|
6497
6539
|
directorySizeCache = /* @__PURE__ */ new Map();
|
|
6498
6540
|
modelDefinitionCache = /* @__PURE__ */ new Map();
|
|
6541
|
+
outputLimitModeCache = /* @__PURE__ */ new Map();
|
|
6499
6542
|
NON_THINKING_SYSTEM_PROMPT = [
|
|
6500
6543
|
"You are in non-thinking mode for benchmark reproducibility.",
|
|
6501
6544
|
"Return only the final answer.",
|
|
6502
6545
|
"Do not output internal reasoning, chain-of-thought, or scratchpad.",
|
|
6503
6546
|
"Never output tags or sections like <think>, </think>, [THINK], [/THINK], or Thinking Process."
|
|
6504
6547
|
].join(" ");
|
|
6548
|
+
UNSUPPORTED_SAMPLING_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/;
|
|
6549
|
+
UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|unexpected|additional|extra|invalid field/;
|
|
6550
|
+
MAX_NEGOTIATE_RETRIES = 5;
|
|
6505
6551
|
}
|
|
6506
6552
|
});
|
|
6507
6553
|
|
|
@@ -53935,6 +53981,7 @@ async function selectWithArrows(title, options, config = {}) {
|
|
|
53935
53981
|
};
|
|
53936
53982
|
readline4.emitKeypressEvents(stdin);
|
|
53937
53983
|
stdin.resume();
|
|
53984
|
+
stdin.ref?.();
|
|
53938
53985
|
if (stdin.isTTY) {
|
|
53939
53986
|
stdin.setRawMode(true);
|
|
53940
53987
|
}
|
|
@@ -54017,6 +54064,7 @@ ${source_default.dim(message)}
|
|
|
54017
54064
|
};
|
|
54018
54065
|
readline4.emitKeypressEvents(stdin);
|
|
54019
54066
|
stdin.resume();
|
|
54067
|
+
stdin.ref?.();
|
|
54020
54068
|
if (stdin.isTTY) stdin.setRawMode(true);
|
|
54021
54069
|
stdout.write("\x1B[?25l");
|
|
54022
54070
|
stdin.on("keypress", onKeypress);
|