metrillm-mcp 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -735,6 +735,7 @@ var defaultKeepAlive2;
735
735
  var activeAbortControllers = /* @__PURE__ */ new Set();
736
736
  var directorySizeCache = /* @__PURE__ */ new Map();
737
737
  var modelDefinitionCache = /* @__PURE__ */ new Map();
738
+ var outputLimitModeCache = /* @__PURE__ */ new Map();
738
739
  var NON_THINKING_SYSTEM_PROMPT = [
739
740
  "You are in non-thinking mode for benchmark reproducibility.",
740
741
  "Return only the final answer.",
@@ -765,12 +766,22 @@ function buildNativeThinkingOption(think) {
765
766
  function hasSamplingOverrides2(options) {
766
767
  return options?.top_p !== void 0 || options?.seed !== void 0;
767
768
  }
769
+ var UNSUPPORTED_SAMPLING_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/;
770
+ var UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|unexpected|additional|extra|invalid field/;
768
771
  function isUnsupportedSamplingMessage(status, text) {
769
772
  if (status !== 400 && status !== 422) return false;
770
773
  const lower = text.toLowerCase();
771
- const mentionsSampling = /\b(seed|top_p|topp)\b/.test(lower);
772
- if (!mentionsSampling) return false;
773
- return /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/.test(lower);
774
+ if (!/\b(seed|top_p|topp)\b/.test(lower)) return false;
775
+ return UNSUPPORTED_SAMPLING_FIELD_PATTERN.test(lower);
776
+ }
777
+ function isUnsupportedOutputLimitMessage(status, text, mode) {
778
+ if (status !== 400 && status !== 422) return false;
779
+ const lower = text.toLowerCase();
780
+ const fieldName = mode === "legacy" ? "max_tokens" : "max_output_tokens";
781
+ const alternateFieldName = mode === "legacy" ? "max_output_tokens" : "max_tokens";
782
+ const mentionsUnsupportedCurrentField = lower.includes(fieldName) && UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN.test(lower);
783
+ const mentionsRequiredAlternateField = lower.includes(alternateFieldName) && /\b(required|missing)\b/.test(lower);
784
+ return mentionsUnsupportedCurrentField || mentionsRequiredAlternateField;
774
785
  }
775
786
  function extractLMStudioErrorMessage(body) {
776
787
  const trimmed = body.trim();
@@ -804,20 +815,66 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
804
815
  const suffix = backendMessage ? ` ${backendMessage}` : "";
805
816
  return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
806
817
  }
807
- function buildNativeChatBody(model, prompt, options, stream, includeSampling) {
818
+ function buildUnsupportedOutputLimitNegotiationError(kind, model, body) {
819
+ const backendMessage = extractLMStudioErrorMessage(body);
820
+ return new Error(
821
+ [
822
+ `LM Studio ${kind} failed for "${model}" because this backend rejected both max_output_tokens and max_tokens.`,
823
+ "MetriLLM cannot safely continue without an explicit output limit because benchmarks rely on bounded generation.",
824
+ backendMessage ? `Backend error: ${backendMessage}` : void 0
825
+ ].filter(Boolean).join(" ")
826
+ );
827
+ }
828
+ function buildNativeChatBody(model, prompt, options, stream, includeSampling, outputLimitMode) {
808
829
  const reasoning = buildNativeThinkingOption(options?.think);
830
+ const outputLimit = options?.num_predict !== void 0 ? options.num_predict : 512;
809
831
  return {
810
832
  model,
811
833
  input: prompt,
812
834
  temperature: options?.temperature ?? 0,
813
835
  ...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
814
836
  ...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
815
- max_tokens: options?.num_predict ?? 512,
837
+ ...outputLimitMode === "preferred" ? { max_output_tokens: outputLimit } : {},
838
+ ...outputLimitMode === "legacy" ? { max_tokens: outputLimit } : {},
816
839
  stream,
817
840
  ...reasoning !== void 0 ? { reasoning } : {},
818
841
  ...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
819
842
  };
820
843
  }
844
+ var MAX_NEGOTIATE_RETRIES = 5;
845
+ async function negotiateRequest(kind, model, cacheKey, options, makeRequest) {
846
+ let includeSampling = true;
847
+ let outputLimitMode = outputLimitModeCache.get(cacheKey) ?? "preferred";
848
+ const triedOutputLimitModes = /* @__PURE__ */ new Set([outputLimitMode]);
849
+ let resp = await makeRequest(includeSampling, outputLimitMode);
850
+ let retries = 0;
851
+ while (!resp.ok && retries < MAX_NEGOTIATE_RETRIES) {
852
+ retries++;
853
+ const body = await resp.text().catch(() => "");
854
+ if (includeSampling && hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
855
+ includeSampling = false;
856
+ resp = await makeRequest(includeSampling, outputLimitMode);
857
+ continue;
858
+ }
859
+ if (isUnsupportedOutputLimitMessage(resp.status, body, outputLimitMode)) {
860
+ const nextMode = outputLimitMode === "preferred" ? "legacy" : !triedOutputLimitModes.has("preferred") ? "preferred" : null;
861
+ if (!nextMode) {
862
+ throw buildUnsupportedOutputLimitNegotiationError(kind, model, body);
863
+ }
864
+ outputLimitMode = nextMode;
865
+ triedOutputLimitModes.add(outputLimitMode);
866
+ resp = await makeRequest(includeSampling, outputLimitMode);
867
+ continue;
868
+ }
869
+ throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
870
+ }
871
+ if (!resp.ok) {
872
+ const body = await resp.text().catch(() => "");
873
+ throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
874
+ }
875
+ outputLimitModeCache.set(cacheKey, outputLimitMode);
876
+ return resp;
877
+ }
821
878
  function getNativeStatNumber(value) {
822
879
  if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
823
880
  return value;
@@ -1478,14 +1535,17 @@ async function listModels2() {
1478
1535
  throw new Error(`LM Studio list models failed (${resp.status} ${resp.statusText})`);
1479
1536
  }
1480
1537
  const data = await resp.json();
1481
- const ids = (data.data ?? []).map((m) => m.id?.trim()).filter((id) => Boolean(id));
1538
+ const primaryIds = (data.data ?? []).map((m) => m.id?.trim()).filter((id) => Boolean(id));
1482
1539
  const apiModels = await fetchApiModels();
1483
1540
  const apiById = /* @__PURE__ */ new Map();
1541
+ const secondaryIds = [];
1484
1542
  for (const model of apiModels ?? []) {
1485
1543
  const id = asNonEmptyString(model.id);
1486
1544
  if (!id) continue;
1487
1545
  apiById.set(id, model);
1546
+ secondaryIds.push(id);
1488
1547
  }
1548
+ const ids = Array.from(/* @__PURE__ */ new Set([...primaryIds, ...secondaryIds]));
1489
1549
  const modelsRootDir = await resolveModelsRootDir();
1490
1550
  const localMetadataEntries = await Promise.all(
1491
1551
  ids.map(async (id) => {
@@ -1527,25 +1587,18 @@ async function generate2(model, prompt, options) {
1527
1587
  try {
1528
1588
  const baseUrl = getLMStudioBaseUrl();
1529
1589
  const url = new URL("/api/v1/chat", baseUrl);
1530
- const doRequest = (includeSampling) => fetch(url, {
1531
- method: "POST",
1532
- headers: getLMStudioHeaders(),
1533
- body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, includeSampling)),
1534
- signal: controller.signal
1535
- });
1536
- let resp = await doRequest(true);
1537
- if (!resp.ok) {
1538
- const body = await resp.text().catch(() => "");
1539
- if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
1540
- resp = await doRequest(false);
1541
- } else {
1542
- throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
1543
- }
1544
- }
1545
- if (!resp.ok) {
1546
- const body = await resp.text().catch(() => "");
1547
- throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
1548
- }
1590
+ const resp = await negotiateRequest(
1591
+ "generate",
1592
+ model,
1593
+ baseUrl,
1594
+ options,
1595
+ (sampling, limitMode) => fetch(url, {
1596
+ method: "POST",
1597
+ headers: getLMStudioHeaders(),
1598
+ body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, sampling, limitMode)),
1599
+ signal: controller.signal
1600
+ })
1601
+ );
1549
1602
  const payload = await resp.json();
1550
1603
  const nativeResponse = extractNativeResponse(payload);
1551
1604
  const response = nativeResponse.response;
@@ -1603,25 +1656,18 @@ async function generateStream2(model, prompt, callbacks, options) {
1603
1656
  };
1604
1657
  try {
1605
1658
  resetStallTimer();
1606
- const doRequest = (includeSampling) => fetch(url, {
1607
- method: "POST",
1608
- headers: getLMStudioHeaders(),
1609
- body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, includeSampling)),
1610
- signal: controller.signal
1611
- });
1612
- let resp = await doRequest(true);
1613
- if (!resp.ok) {
1614
- const body = await resp.text().catch(() => "");
1615
- if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
1616
- resp = await doRequest(false);
1617
- } else {
1618
- throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
1619
- }
1620
- }
1621
- if (!resp.ok) {
1622
- const body = await resp.text().catch(() => "");
1623
- throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
1624
- }
1659
+ const resp = await negotiateRequest(
1660
+ "stream",
1661
+ model,
1662
+ baseUrl,
1663
+ options,
1664
+ (sampling, limitMode) => fetch(url, {
1665
+ method: "POST",
1666
+ headers: getLMStudioHeaders(),
1667
+ body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, sampling, limitMode)),
1668
+ signal: controller.signal
1669
+ })
1670
+ );
1625
1671
  if (!resp.body) {
1626
1672
  throw new Error("LM Studio stream response body is empty");
1627
1673
  }