metrillm-mcp 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +86 -43
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -735,6 +735,7 @@ var defaultKeepAlive2;
|
|
|
735
735
|
var activeAbortControllers = /* @__PURE__ */ new Set();
|
|
736
736
|
var directorySizeCache = /* @__PURE__ */ new Map();
|
|
737
737
|
var modelDefinitionCache = /* @__PURE__ */ new Map();
|
|
738
|
+
var outputLimitModeCache = /* @__PURE__ */ new Map();
|
|
738
739
|
var NON_THINKING_SYSTEM_PROMPT = [
|
|
739
740
|
"You are in non-thinking mode for benchmark reproducibility.",
|
|
740
741
|
"Return only the final answer.",
|
|
@@ -765,12 +766,22 @@ function buildNativeThinkingOption(think) {
|
|
|
765
766
|
function hasSamplingOverrides2(options) {
|
|
766
767
|
return options?.top_p !== void 0 || options?.seed !== void 0;
|
|
767
768
|
}
|
|
769
|
+
var UNSUPPORTED_SAMPLING_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|invalid|unexpected|additional|extra/;
|
|
770
|
+
var UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN = /unrecognized|unknown|not support|unsupported|unexpected|additional|extra|invalid field/;
|
|
768
771
|
function isUnsupportedSamplingMessage(status, text) {
|
|
769
772
|
if (status !== 400 && status !== 422) return false;
|
|
770
773
|
const lower = text.toLowerCase();
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
+
if (!/\b(seed|top_p|topp)\b/.test(lower)) return false;
|
|
775
|
+
return UNSUPPORTED_SAMPLING_FIELD_PATTERN.test(lower);
|
|
776
|
+
}
|
|
777
|
+
function isUnsupportedOutputLimitMessage(status, text, mode) {
|
|
778
|
+
if (status !== 400 && status !== 422) return false;
|
|
779
|
+
const lower = text.toLowerCase();
|
|
780
|
+
const fieldName = mode === "legacy" ? "max_tokens" : "max_output_tokens";
|
|
781
|
+
const alternateFieldName = mode === "legacy" ? "max_output_tokens" : "max_tokens";
|
|
782
|
+
const mentionsUnsupportedCurrentField = lower.includes(fieldName) && UNSUPPORTED_OUTPUT_LIMIT_FIELD_PATTERN.test(lower);
|
|
783
|
+
const mentionsRequiredAlternateField = lower.includes(alternateFieldName) && /\b(required|missing)\b/.test(lower);
|
|
784
|
+
return mentionsUnsupportedCurrentField || mentionsRequiredAlternateField;
|
|
774
785
|
}
|
|
775
786
|
function extractLMStudioErrorMessage(body) {
|
|
776
787
|
const trimmed = body.trim();
|
|
@@ -804,20 +815,66 @@ function buildLMStudioRequestError(kind, model, status, statusText, body) {
|
|
|
804
815
|
const suffix = backendMessage ? ` ${backendMessage}` : "";
|
|
805
816
|
return new Error(`LM Studio ${kind} failed (${status} ${statusText})${suffix}`.trim());
|
|
806
817
|
}
|
|
807
|
-
function
|
|
818
|
+
function buildUnsupportedOutputLimitNegotiationError(kind, model, body) {
|
|
819
|
+
const backendMessage = extractLMStudioErrorMessage(body);
|
|
820
|
+
return new Error(
|
|
821
|
+
[
|
|
822
|
+
`LM Studio ${kind} failed for "${model}" because this backend rejected both max_output_tokens and max_tokens.`,
|
|
823
|
+
"MetriLLM cannot safely continue without an explicit output limit because benchmarks rely on bounded generation.",
|
|
824
|
+
backendMessage ? `Backend error: ${backendMessage}` : void 0
|
|
825
|
+
].filter(Boolean).join(" ")
|
|
826
|
+
);
|
|
827
|
+
}
|
|
828
|
+
function buildNativeChatBody(model, prompt, options, stream, includeSampling, outputLimitMode) {
|
|
808
829
|
const reasoning = buildNativeThinkingOption(options?.think);
|
|
830
|
+
const outputLimit = options?.num_predict !== void 0 ? options.num_predict : 512;
|
|
809
831
|
return {
|
|
810
832
|
model,
|
|
811
833
|
input: prompt,
|
|
812
834
|
temperature: options?.temperature ?? 0,
|
|
813
835
|
...includeSampling && options?.top_p !== void 0 ? { top_p: options.top_p } : {},
|
|
814
836
|
...includeSampling && options?.seed !== void 0 ? { seed: options.seed } : {},
|
|
815
|
-
|
|
837
|
+
...outputLimitMode === "preferred" ? { max_output_tokens: outputLimit } : {},
|
|
838
|
+
...outputLimitMode === "legacy" ? { max_tokens: outputLimit } : {},
|
|
816
839
|
stream,
|
|
817
840
|
...reasoning !== void 0 ? { reasoning } : {},
|
|
818
841
|
...options?.think === false ? { system_prompt: NON_THINKING_SYSTEM_PROMPT } : {}
|
|
819
842
|
};
|
|
820
843
|
}
|
|
844
|
+
var MAX_NEGOTIATE_RETRIES = 5;
|
|
845
|
+
async function negotiateRequest(kind, model, cacheKey, options, makeRequest) {
|
|
846
|
+
let includeSampling = true;
|
|
847
|
+
let outputLimitMode = outputLimitModeCache.get(cacheKey) ?? "preferred";
|
|
848
|
+
const triedOutputLimitModes = /* @__PURE__ */ new Set([outputLimitMode]);
|
|
849
|
+
let resp = await makeRequest(includeSampling, outputLimitMode);
|
|
850
|
+
let retries = 0;
|
|
851
|
+
while (!resp.ok && retries < MAX_NEGOTIATE_RETRIES) {
|
|
852
|
+
retries++;
|
|
853
|
+
const body = await resp.text().catch(() => "");
|
|
854
|
+
if (includeSampling && hasSamplingOverrides2(options) && isUnsupportedSamplingMessage(resp.status, body)) {
|
|
855
|
+
includeSampling = false;
|
|
856
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
857
|
+
continue;
|
|
858
|
+
}
|
|
859
|
+
if (isUnsupportedOutputLimitMessage(resp.status, body, outputLimitMode)) {
|
|
860
|
+
const nextMode = outputLimitMode === "preferred" ? "legacy" : !triedOutputLimitModes.has("preferred") ? "preferred" : null;
|
|
861
|
+
if (!nextMode) {
|
|
862
|
+
throw buildUnsupportedOutputLimitNegotiationError(kind, model, body);
|
|
863
|
+
}
|
|
864
|
+
outputLimitMode = nextMode;
|
|
865
|
+
triedOutputLimitModes.add(outputLimitMode);
|
|
866
|
+
resp = await makeRequest(includeSampling, outputLimitMode);
|
|
867
|
+
continue;
|
|
868
|
+
}
|
|
869
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
870
|
+
}
|
|
871
|
+
if (!resp.ok) {
|
|
872
|
+
const body = await resp.text().catch(() => "");
|
|
873
|
+
throw buildLMStudioRequestError(kind, model, resp.status, resp.statusText, body);
|
|
874
|
+
}
|
|
875
|
+
outputLimitModeCache.set(cacheKey, outputLimitMode);
|
|
876
|
+
return resp;
|
|
877
|
+
}
|
|
821
878
|
function getNativeStatNumber(value) {
|
|
822
879
|
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) return void 0;
|
|
823
880
|
return value;
|
|
@@ -1530,25 +1587,18 @@ async function generate2(model, prompt, options) {
|
|
|
1530
1587
|
try {
|
|
1531
1588
|
const baseUrl = getLMStudioBaseUrl();
|
|
1532
1589
|
const url = new URL("/api/v1/chat", baseUrl);
|
|
1533
|
-
const
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
1546
|
-
}
|
|
1547
|
-
}
|
|
1548
|
-
if (!resp.ok) {
|
|
1549
|
-
const body = await resp.text().catch(() => "");
|
|
1550
|
-
throw buildLMStudioRequestError("generate", model, resp.status, resp.statusText, body);
|
|
1551
|
-
}
|
|
1590
|
+
const resp = await negotiateRequest(
|
|
1591
|
+
"generate",
|
|
1592
|
+
model,
|
|
1593
|
+
baseUrl,
|
|
1594
|
+
options,
|
|
1595
|
+
(sampling, limitMode) => fetch(url, {
|
|
1596
|
+
method: "POST",
|
|
1597
|
+
headers: getLMStudioHeaders(),
|
|
1598
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, false, sampling, limitMode)),
|
|
1599
|
+
signal: controller.signal
|
|
1600
|
+
})
|
|
1601
|
+
);
|
|
1552
1602
|
const payload = await resp.json();
|
|
1553
1603
|
const nativeResponse = extractNativeResponse(payload);
|
|
1554
1604
|
const response = nativeResponse.response;
|
|
@@ -1606,25 +1656,18 @@ async function generateStream2(model, prompt, callbacks, options) {
|
|
|
1606
1656
|
};
|
|
1607
1657
|
try {
|
|
1608
1658
|
resetStallTimer();
|
|
1609
|
-
const
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
1622
|
-
}
|
|
1623
|
-
}
|
|
1624
|
-
if (!resp.ok) {
|
|
1625
|
-
const body = await resp.text().catch(() => "");
|
|
1626
|
-
throw buildLMStudioRequestError("stream", model, resp.status, resp.statusText, body);
|
|
1627
|
-
}
|
|
1659
|
+
const resp = await negotiateRequest(
|
|
1660
|
+
"stream",
|
|
1661
|
+
model,
|
|
1662
|
+
baseUrl,
|
|
1663
|
+
options,
|
|
1664
|
+
(sampling, limitMode) => fetch(url, {
|
|
1665
|
+
method: "POST",
|
|
1666
|
+
headers: getLMStudioHeaders(),
|
|
1667
|
+
body: JSON.stringify(buildNativeChatBody(model, prompt, options, true, sampling, limitMode)),
|
|
1668
|
+
signal: controller.signal
|
|
1669
|
+
})
|
|
1670
|
+
);
|
|
1628
1671
|
if (!resp.body) {
|
|
1629
1672
|
throw new Error("LM Studio stream response body is empty");
|
|
1630
1673
|
}
|