metrillm-mcp 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -735,6 +735,7 @@ var defaultKeepAlive2;
735
735
  var activeAbortControllers = /* @__PURE__ */ new Set();
736
736
  var directorySizeCache = /* @__PURE__ */ new Map();
737
737
  var modelDefinitionCache = /* @__PURE__ */ new Map();
738
+ var outputLimitModeCache = /* @__PURE__ */ new Map();
738
739
  var NON_THINKING_SYSTEM_PROMPT = [
739
740
  "You are in non-thinking mode for benchmark reproducibility.",
740
741
  "Return only the final answer.",
@@ -765,12 +766,22 @@ function buildNativeThinkingOption(think) {
765
766
  function hasSamplingOverrides2(options) {
766
767
  return options?.top_p !== void 0 || options?.seed !== void 0;
767
768
  }
769
+ var UNSUPPORTED_SAMPLING_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/;
770
+ var UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|unexpected|additional|extra|invalid field/;
768
771
  function isUnsupportedSamplingMessage(status, text) {
769
772
  if (status !== 400 && status !== 422) return false;
770
773
  const lower = text.toLowerCase();
771
- const mentionsSampling = /\b(seed|top_p|topp)\b/.test(lower);
772
- if (!mentionsSampling) return false;
773
- return /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/.test(lower);
774
+ if (!/\b(seed|top_p|topp)\b/.test(lower)) return false;
775
+ return UNSUPPORTED_SAMPLING_FIELD_PATTERN.test(lower);
776
+ }
777
+ function isUnsupportedOutputLimitMessage(status, text, mode) {
778
+ if (status !== 400 && status !== 422) return false;
779
+ const lower = text.toLowerCase();
780
+ const fieldName = mode === "legacy" ? "max_tokens" : "max_output_tokens";
781
+ const alternateFieldName = mode === "legacy" ? "max_output_tokens" : "max_tokens";
782
+ const mentionsUnsupportedCurrentField = lower.includes(fieldName) && UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN.test(lower);
783
+ const mentionsRequiredAlternateField = lower.includes(alternateFieldName) && /\b(required|missing)\b/.test(lower);
784
+ return mentionsUnsupportedCurrentField || mentionsRequiredAlternateField;
774
785
  }
775
786
  function extractLMStudioErrorMessage(body) {
776
787
  const trimmed = body.trim();
@@ -804,20 +815,66 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
804
815
  const suffix = backendMessage ? ` ${backendMessage}` : "";
805
816
  return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
806
817
  }
807
- function buildNativeChatBody(model, prompt, options, stream, includeSampling) {
818
+ function buildUnsupportedOutputLimitNegotiationError(kind, model, body) {
819
+ const backendMessage = extractLMStudioErrorMessage(body);
820
+ return new Error(
821
+ [
822
+ `LM Studio ${kind} failed for "${model}" because this backend rejected both max_output_tokens and max_tokens.`,
823
+ "MetriLLM cannot safely continue without an explicit output limit because benchmarks rely on bounded generation.",
824
+ backendMessage ? `Backend error: ${backendMessage}` : void 0
825
+ ].filter(Boolean).join(" ")
826
+ );
827
+ }
828
+ function buildNativeChatBody(model, prompt, options, stream, includeSampling, outputLimitMode) {
808
829
  const reasoning = buildNativeThinkingOption(options?.think);
830
+ const outputLimit = options?.num_predict !== void 0 ? options.num_predict : 512;
809
831
  return {
810
832
  model,
811
833
  input: prompt,
812
834
  temperature: options?.temperature ?? 0,
813
835
  ...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
814
836
  ...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
815
- max_tokens: options?.num_predict ?? 512,
837
+ ...outputLimitMode === "preferred" ? { max_output_tokens: outputLimit } : {},
838
+ ...outputLimitMode === "legacy" ? { max_tokens: outputLimit } : {},
816
839
  stream,
817
840
  ...reasoning !== void 0 ? { reasoning } : {},
818
841
  ...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
819
842
  };
820
843
  }
844
+ var MAX_NEGOTIATE_RETRIES = 5;
845
+ async function negotiateRequest(kind, model, cacheKey, options, makeRequest) {
846
+ let includeSampling = true;
847
+ let outputLimitMode = outputLimitModeCache.get(cacheKey) ?? "preferred";
848
+ const triedOutputLimitModes = /* @__PURE__ */ new Set([outputLimitMode]);
849
+ let resp = await makeRequest(includeSampling, outputLimitMode);
850
+ let retries = 0;
851
+ while (!resp.ok && retries < MAX_NEGOTIATE_RETRIES) {
852
+ retries++;
853
+ const body = await resp.text().catch(() => "");
854
+ if (includeSampling && hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
855
+ includeSampling = false;
856
+ resp = await makeRequest(includeSampling, outputLimitMode);
857
+ continue;
858
+ }
859
+ if (isUnsupportedOutputLimitMessage(resp.status, body, outputLimitMode)) {
860
+ const nextMode = outputLimitMode === "preferred" ? "legacy" : !triedOutputLimitModes.has("preferred") ? "preferred" : null;
861
+ if (!nextMode) {
862
+ throw buildUnsupportedOutputLimitNegotiationError(kind, model, body);
863
+ }
864
+ outputLimitMode = nextMode;
865
+ triedOutputLimitModes.add(outputLimitMode);
866
+ resp = await makeRequest(includeSampling, outputLimitMode);
867
+ continue;
868
+ }
869
+ throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
870
+ }
871
+ if (!resp.ok) {
872
+ const body = await resp.text().catch(() => "");
873
+ throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
874
+ }
875
+ outputLimitModeCache.set(cacheKey, outputLimitMode);
876
+ return resp;
877
+ }
821
878
  function getNativeStatNumber(value) {
822
879
  if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
823
880
  return value;
@@ -1530,25 +1587,18 @@ async function generate2(model, prompt, options) {
1530
1587
  try {
1531
1588
  const baseUrl = getLMStudioBaseUrl();
1532
1589
  const url = new URL("/api/v1/chat", baseUrl);
1533
- const doRequest = (includeSampling) => fetch(url, {
1534
- method: "POST",
1535
- headers: getLMStudioHeaders(),
1536
- body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, includeSampling)),
1537
- signal: controller.signal
1538
- });
1539
- let resp = await doRequest(true);
1540
- if (!resp.ok) {
1541
- const body = await resp.text().catch(() => "");
1542
- if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
1543
- resp = await doRequest(false);
1544
- } else {
1545
- throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
1546
- }
1547
- }
1548
- if (!resp.ok) {
1549
- const body = await resp.text().catch(() => "");
1550
- throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
1551
- }
1590
+ const resp = await negotiateRequest(
1591
+ "generate",
1592
+ model,
1593
+ baseUrl,
1594
+ options,
1595
+ (sampling, limitMode) => fetch(url, {
1596
+ method: "POST",
1597
+ headers: getLMStudioHeaders(),
1598
+ body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, sampling, limitMode)),
1599
+ signal: controller.signal
1600
+ })
1601
+ );
1552
1602
  const payload = await resp.json();
1553
1603
  const nativeResponse = extractNativeResponse(payload);
1554
1604
  const response = nativeResponse.response;
@@ -1606,25 +1656,18 @@ async function generateStream2(model, prompt, callbacks, options) {
1606
1656
  };
1607
1657
  try {
1608
1658
  resetStallTimer();
1609
- const doRequest = (includeSampling) => fetch(url, {
1610
- method: "POST",
1611
- headers: getLMStudioHeaders(),
1612
- body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, includeSampling)),
1613
- signal: controller.signal
1614
- });
1615
- let resp = await doRequest(true);
1616
- if (!resp.ok) {
1617
- const body = await resp.text().catch(() => "");
1618
- if (hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
1619
- resp = await doRequest(false);
1620
- } else {
1621
- throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
1622
- }
1623
- }
1624
- if (!resp.ok) {
1625
- const body = await resp.text().catch(() => "");
1626
- throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
1627
- }
1659
+ const resp = await negotiateRequest(
1660
+ "stream",
1661
+ model,
1662
+ baseUrl,
1663
+ options,
1664
+ (sampling, limitMode) => fetch(url, {
1665
+ method: "POST",
1666
+ headers: getLMStudioHeaders(),
1667
+ body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, sampling, limitMode)),
1668
+ signal: controller.signal
1669
+ })
1670
+ );
1628
1671
  if (!resp.body) {
1629
1672
  throw new Error("LM Studio stream response body is empty");
1630
1673
  }
@@ -9034,7 +9077,7 @@ var BASE_DIR = join(homedir(), ".metrillm");
9034
9077
  var RESULTS_DIR = join(BASE_DIR, "results");
9035
9078
  var CONFIG_PATH = join(BASE_DIR, "config.json");
9036
9079
  var DEFAULT_CONFIG = {
9037
- autoShare: "ask"
9080
+ autoShare: true
9038
9081
  };
9039
9082
  async function ensureDirs() {
9040
9083
  await mkdir(RESULTS_DIR, { recursive: true });
@@ -9045,6 +9088,30 @@ function parseRuntimeBackend(value) {
9045
9088
  }
9046
9089
  return void 0;
9047
9090
  }
9091
+ function parseAutoShareConfig(parsed) {
9092
+ const autoSharePreferenceSet = parsed.autoSharePreferenceSet === true;
9093
+ if (parsed.autoShare === true) {
9094
+ return {
9095
+ autoShare: true,
9096
+ autoSharePreferenceSet: autoSharePreferenceSet || void 0
9097
+ };
9098
+ }
9099
+ if (parsed.autoShare === "ask") {
9100
+ return {
9101
+ autoShare: "ask",
9102
+ autoSharePreferenceSet: autoSharePreferenceSet || void 0
9103
+ };
9104
+ }
9105
+ if (parsed.autoShare === false) {
9106
+ return {
9107
+ autoShare: "ask",
9108
+ autoSharePreferenceSet: autoSharePreferenceSet || void 0
9109
+ };
9110
+ }
9111
+ return {
9112
+ autoShare: true
9113
+ };
9114
+ }
9048
9115
  function resultFilename(result) {
9049
9116
  const ts = result.timestamp.replace(/[:.]/g, "-").replace("T", "_").replace(/[^0-9_-]/g, "");
9050
9117
  const model = result.model.replace(/[^a-zA-Z0-9._-]/g, "_");
@@ -9062,14 +9129,14 @@ async function loadConfig() {
9062
9129
  try {
9063
9130
  const content = await readFile2(CONFIG_PATH, "utf8");
9064
9131
  const parsed = JSON.parse(content);
9065
- const autoShare = parsed.autoShare === true ? true : "ask";
9132
+ const autoShareConfig = parseAutoShareConfig(parsed);
9066
9133
  const telemetry = typeof parsed.telemetry === "boolean" ? parsed.telemetry : void 0;
9067
9134
  const submitterNickname = typeof parsed.submitterNickname === "string" && isValidNickname(parsed.submitterNickname) ? normalizeNickname(parsed.submitterNickname) : void 0;
9068
9135
  const submitterEmail = typeof parsed.submitterEmail === "string" && isValidEmail(parsed.submitterEmail) ? normalizeEmail(parsed.submitterEmail) : void 0;
9069
9136
  const runtimeBackend = parseRuntimeBackend(parsed.runtimeBackend);
9070
9137
  return {
9071
9138
  ...DEFAULT_CONFIG,
9072
- autoShare,
9139
+ ...autoShareConfig,
9073
9140
  telemetry,
9074
9141
  submitterNickname,
9075
9142
  submitterEmail,
@@ -9354,7 +9421,7 @@ async function promptShare(result, deps = {}) {
9354
9421
  if (!input.isTTY || !output.isTTY) return "skip";
9355
9422
  const choice = await (deps.selectChoice ?? selectShareChoice)(result);
9356
9423
  if (choice === "always") {
9357
- await saveConfig({ ...config, autoShare: true });
9424
+ await saveConfig({ ...config, autoShare: true, autoSharePreferenceSet: true });
9358
9425
  return "share";
9359
9426
  }
9360
9427
  if (choice === "skip") {