copilot-api-plus 1.2.16 → 1.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -1863,6 +1863,13 @@ async function* wrapGeneratorWithRelease(gen, releaseSlot) {
1863
1863
  */
1864
1864
  const reasoningUnsupportedModels = /* @__PURE__ */ new Set();
1865
1865
  /**
1866
+ * Models whose reasoning_effort must be capped at a lower level.
1867
+ * e.g. claude-opus-4.7 rejects "high" but accepts "medium".
1868
+ * When a model returns 400 with "is not supported by model", it is added
1869
+ * here with its maximum supported effort level.
1870
+ */
1871
+ const reasoningEffortCap = /* @__PURE__ */ new Map();
1872
+ /**
1866
1873
  * Compute an appropriate thinking_budget from model capabilities.
1867
1874
  * Returns undefined if the model does not support thinking.
1868
1875
  */
@@ -1892,7 +1899,9 @@ function isToolChoiceForced(toolChoice) {
1892
1899
  * 1. If the client already set reasoning_effort or thinking_budget → keep as-is
1893
1900
  * 2. If tool_choice forces tool use → skip (API rejects the combination)
1894
1901
  * 3. If model capabilities declare max_thinking_budget → inject thinking_budget
1895
- * 4. Otherwise → inject reasoning_effort="high" (works on claude-*-4.6)
1902
+ * 4. Otherwise → inject reasoning_effort at the highest level the model supports:
1903
+ * - "high" by default (maximum thinking for most models)
1904
+ * - Capped to "medium"/"low" if the model previously rejected "high"
1896
1905
  *
1897
1906
  * The fallback to reasoning_effort ensures thinking works even when the
1898
1907
  * /models endpoint doesn't expose thinking budget fields.
@@ -1905,16 +1914,17 @@ function injectThinking(payload, resolvedModel) {
1905
1914
  ...payload,
1906
1915
  thinking_budget: budget
1907
1916
  };
1908
- if (!reasoningUnsupportedModels.has(resolvedModel)) return {
1917
+ if (reasoningUnsupportedModels.has(resolvedModel)) return payload;
1918
+ const effort = reasoningEffortCap.get(resolvedModel) ?? "high";
1919
+ return {
1909
1920
  ...payload,
1910
- reasoning_effort: "high"
1921
+ reasoning_effort: effort
1911
1922
  };
1912
- return payload;
1913
1923
  }
1914
1924
  function logThinkingInjection(original, injected, resolvedModel) {
1915
1925
  if (original.reasoning_effort || original.thinking_budget) consola.debug(`Thinking: translated (reasoning_effort=${original.reasoning_effort ?? "none"} / thinking_budget=${original.thinking_budget ?? "none"})`);
1916
1926
  else if (injected.thinking_budget && injected.thinking_budget !== original.thinking_budget) consola.debug(`Thinking: injected thinking_budget=${injected.thinking_budget} for "${resolvedModel}"`);
1917
- else if (injected.reasoning_effort === "high") consola.debug(`Thinking: injected reasoning_effort=high for "${resolvedModel}"`);
1927
+ else if (injected.reasoning_effort && injected.reasoning_effort !== original.reasoning_effort) consola.debug(`Thinking: injected reasoning_effort=${injected.reasoning_effort} for "${resolvedModel}"`);
1918
1928
  else if (reasoningUnsupportedModels.has(resolvedModel)) consola.debug(`Thinking: skipped — "${resolvedModel}" does not support reasoning`);
1919
1929
  }
1920
1930
  const createChatCompletions = async (payload) => {
@@ -1934,10 +1944,24 @@ const createChatCompletions = async (payload) => {
1934
1944
  releaseSlot();
1935
1945
  return result;
1936
1946
  } catch (error) {
1937
- if (wasInjected && error instanceof HTTPError && error.response.status === 400 && error.message.includes("Unrecognized request argument")) {
1938
- reasoningUnsupportedModels.add(resolvedModel);
1939
- consola.info(`Model "${resolvedModel}" does not support reasoning_effort — disabled for future requests`);
1940
- return retryWithoutReasoning(routedPayload, releaseSlot);
1947
+ if (error instanceof HTTPError && error.response.status === 400) {
1948
+ const errMsg = error.message;
1949
+ if (wasInjected && errMsg.includes("Unrecognized request argument")) {
1950
+ reasoningUnsupportedModels.add(resolvedModel);
1951
+ consola.info(`Model "${resolvedModel}" does not support reasoning_effort — disabled for future requests`);
1952
+ return retryWithoutReasoning(routedPayload, releaseSlot);
1953
+ }
1954
+ if (errMsg.includes("is not supported by model")) {
1955
+ const currentEffort = thinkingPayload.reasoning_effort;
1956
+ if (currentEffort && currentEffort !== "medium" && currentEffort !== "low") {
1957
+ reasoningEffortCap.set(resolvedModel, "medium");
1958
+ consola.info(`Model "${resolvedModel}" rejected reasoning_effort="${currentEffort}" — downgrading to "medium" for future requests`);
1959
+ return retryWithDowngradedReasoning({
1960
+ ...routedPayload,
1961
+ reasoning_effort: "medium"
1962
+ }, releaseSlot);
1963
+ }
1964
+ }
1941
1965
  }
1942
1966
  releaseSlot();
1943
1967
  throw error;
@@ -1959,6 +1983,21 @@ async function retryWithoutReasoning(payload, releaseSlot) {
1959
1983
  }
1960
1984
  }
1961
1985
  /**
1986
+ * Retry a request with a downgraded reasoning_effort after the model
1987
+ * rejected the higher value (e.g. "high" → "medium").
1988
+ */
1989
+ async function retryWithDowngradedReasoning(payload, releaseSlot) {
1990
+ try {
1991
+ const result = await dispatchRequest(payload);
1992
+ if (Symbol.asyncIterator in result) return wrapGeneratorWithRelease(result, releaseSlot);
1993
+ releaseSlot();
1994
+ return result;
1995
+ } catch (retryError) {
1996
+ releaseSlot();
1997
+ throw retryError;
1998
+ }
1999
+ }
2000
+ /**
1962
2001
  * Dispatch request to either single-account or multi-account path.
1963
2002
  */
1964
2003
  function dispatchRequest(payload) {