@oh-my-pi/pi-catalog 16.0.0 → 16.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.2] - 2026-06-16
6
+
7
+ ### Fixed
8
+
9
+ - Fixed Kimi output caps for Umans AI Coding Plan and Venice so discovery metadata cannot use context-sized token ceilings as request caps.
10
+ - Marked Umans Anthropic-compatible models as client-tool escaped so cached and bundled metadata do not expose `web_search` as a provider server tool.
11
+
12
+ ## [16.0.1] - 2026-06-15
13
+
14
+ ### Added
15
+
16
+ - Added the Umans AI Coding Plan provider catalog with Anthropic-compatible model metadata and dynamic discovery ([#2636](https://github.com/can1357/oh-my-pi/pull/2636) by [@oldschoola](https://github.com/oldschoola)).
17
+
5
18
  ## [16.0.0] - 2026-06-15
6
19
 
7
20
  ### Breaking Changes
@@ -65,6 +65,10 @@ export declare const KNOWN_HOSTS: {
65
65
  readonly providers: readonly ["alibaba-coding-plan"];
66
66
  readonly urlMarkers: readonly ["dashscope"];
67
67
  };
68
+ readonly umans: {
69
+ readonly providers: readonly ["umans"];
70
+ readonly urlMarkers: readonly ["api.code.umans.ai"];
71
+ };
68
72
  readonly xiaomi: {
69
73
  readonly providers: readonly ["xiaomi"];
70
74
  readonly providerPrefixes: readonly ["xiaomi-token-plan-"];
@@ -272,6 +272,16 @@ export declare const CATALOG_PROVIDERS: readonly [{
272
272
  readonly catalogDiscovery: {
273
273
  readonly label: "Together";
274
274
  };
275
+ }, {
276
+ readonly id: "umans";
277
+ readonly defaultModel: "umans-coder";
278
+ readonly envVars: readonly ["UMANS_AI_CODING_PLAN_API_KEY"];
279
+ readonly createModelManagerOptions: (config: ModelManagerConfig) => import("..").ModelManagerOptions<"anthropic-messages", unknown>;
280
+ readonly dynamicModelsAuthoritative: true;
281
+ readonly catalogDiscovery: {
282
+ readonly label: "Umans AI Coding Plan";
283
+ readonly allowUnauthenticated: true;
284
+ };
275
285
  }, {
276
286
  readonly id: "venice";
277
287
  readonly defaultModel: "llama-3.3-70b";
@@ -40,6 +40,12 @@ type SimpleProviderConfig = {
40
40
  fetch?: FetchImpl;
41
41
  };
42
42
  export declare function createSimpleOpenAICompletionsOptions(providerId: Parameters<typeof getBundledModels>[0], defaultBaseUrl: string, config?: SimpleProviderConfig): ModelManagerOptions<"openai-completions">;
43
+ export interface UmansModelManagerConfig {
44
+ apiKey?: string;
45
+ baseUrl?: string;
46
+ fetch?: FetchImpl;
47
+ }
48
+ export declare function umansModelManagerOptions(config?: UmansModelManagerConfig): ModelManagerOptions<"anthropic-messages">;
43
49
  export interface OpenAIModelManagerConfig {
44
50
  apiKey?: string;
45
51
  baseUrl?: string;
@@ -166,6 +172,14 @@ export declare function isFireworksKimiK2ModelId(modelId: string): boolean;
166
172
  */
167
173
  export declare function clampFireworksKimiMaxTokens(modelId: string, candidate: number): number;
168
174
  export declare function clampFireworksKimiMaxTokens(modelId: string, candidate: number | null): number | null;
175
+ /**
176
+ * Kimi K2.7 Code's documented recommended output budget. Some provider
177
+ * discovery rows report the context-sized `max_completion_tokens` instead.
178
+ */
179
+ export declare const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32768;
180
+ export declare function isKimiK27CodeModelId(modelId: string): boolean;
181
+ export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
182
+ export declare function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
169
183
  /**
170
184
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
171
185
  * DeepSeek-native binary `thinking` toggle when both are present.
@@ -285,6 +285,13 @@ export interface AnthropicCompat {
285
285
  * Default: auto-detected from provider/baseUrl and `model.reasoning`.
286
286
  */
287
287
  replayUnsignedThinking?: boolean;
288
+ /**
289
+ * Prefix Anthropic built-in tool names (`web_search`, `code_execution`, ...)
290
+ * when they are ordinary client tools. Some Anthropic-compatible gateways
291
+ * intercept those exact names as server tools and return raw search/result
292
+ * blocks instead of normal `tool_use` calls.
293
+ */
294
+ escapeBuiltinToolNames?: boolean;
288
295
  }
289
296
  /**
290
297
  * OpenRouter provider routing preferences.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-catalog",
4
- "version": "16.0.0",
4
+ "version": "16.0.2",
5
5
  "description": "Model catalog for omp: bundled model database, provider discovery descriptors, model identity, classification, and equivalence",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -34,11 +34,11 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@bufbuild/protobuf": "^2.12.0",
37
- "@oh-my-pi/pi-utils": "16.0.0",
37
+ "@oh-my-pi/pi-utils": "16.0.2",
38
38
  "zod": "^4"
39
39
  },
40
40
  "devDependencies": {
41
- "@oh-my-pi/pi-ai": "16.0.0",
41
+ "@oh-my-pi/pi-ai": "16.0.2",
42
42
  "@types/bun": "^1.3.14"
43
43
  },
44
44
  "engines": {
@@ -67,6 +67,7 @@ export function buildAnthropicCompat(spec: ModelSpec<"anthropic-messages">): Res
67
67
  // arguments (#2005). Known non-signing hosts (Z.AI, DeepSeek) are also
68
68
  // preserved for compatibility.
69
69
  replayUnsignedThinking: isZai || modelMatchesHost(spec, "deepseekFamily") || (spec.reasoning && !official),
70
+ escapeBuiltinToolNames: modelMatchesHost(spec, "umans"),
70
71
  };
71
72
  applyCompatOverrides(compat, spec.compat);
72
73
  return compat;
package/src/hosts.ts CHANGED
@@ -42,6 +42,7 @@ export const KNOWN_HOSTS = {
42
42
  zhipu: { providers: ["zhipu-coding-plan"], urlMarkers: ["open.bigmodel.cn"] },
43
43
  kilo: { providers: ["kilo"], urlMarkers: ["api.kilo.ai"] },
44
44
  alibabaDashscope: { providers: ["alibaba-coding-plan"], urlMarkers: ["dashscope"] },
45
+ umans: { providers: ["umans"], urlMarkers: ["api.code.umans.ai"] },
45
46
  xiaomi: { providers: ["xiaomi"], providerPrefixes: ["xiaomi-token-plan-"], urlMarkers: ["xiaomimimo.com"] },
46
47
  xai: { providers: ["xai"], urlMarkers: ["api.x.ai"] },
47
48
  mistral: { providers: ["mistral"], urlMarkers: ["mistral.ai"] },
@@ -27,6 +27,7 @@ const DEFAULT_MODEL_PROVIDER_ORDER = [
27
27
  // Generic gateways and editor/proxy providers. These are useful when picked
28
28
  // explicitly, but should not win ambiguous automatic role selection.
29
29
  "alibaba-coding-plan",
30
+ "umans",
30
31
  "google-antigravity",
31
32
  "opencode-zen",
32
33
  "gitlab-duo",
package/src/models.json CHANGED
@@ -60012,8 +60012,8 @@
60012
60012
  "text"
60013
60013
  ],
60014
60014
  "cost": {
60015
- "input": 0.09,
60016
- "output": 0.18,
60015
+ "input": 0.098,
60016
+ "output": 0.196,
60017
60017
  "cacheRead": 0.02,
60018
60018
  "cacheWrite": 0
60019
60019
  },
@@ -65288,7 +65288,7 @@
65288
65288
  "cacheWrite": 0
65289
65289
  },
65290
65290
  "contextWindow": 262144,
65291
- "maxTokens": 262144,
65291
+ "maxTokens": 81920,
65292
65292
  "thinking": {
65293
65293
  "mode": "effort",
65294
65294
  "efforts": [
@@ -65311,12 +65311,12 @@
65311
65311
  "image"
65312
65312
  ],
65313
65313
  "cost": {
65314
- "input": 0.39,
65315
- "output": 2.34,
65314
+ "input": 0.385,
65315
+ "output": 2.4499999999999997,
65316
65316
  "cacheRead": 0.195,
65317
65317
  "cacheWrite": 0
65318
65318
  },
65319
- "contextWindow": 262144,
65319
+ "contextWindow": 256000,
65320
65320
  "maxTokens": 65536,
65321
65321
  "thinking": {
65322
65322
  "mode": "effort",
@@ -65932,13 +65932,13 @@
65932
65932
  "text"
65933
65933
  ],
65934
65934
  "cost": {
65935
- "input": 0.063,
65936
- "output": 0.21,
65937
- "cacheRead": 0.020999999999999998,
65935
+ "input": 0.06599999999999999,
65936
+ "output": 0.26,
65937
+ "cacheRead": 0.029,
65938
65938
  "cacheWrite": 0
65939
65939
  },
65940
65940
  "contextWindow": 262144,
65941
- "maxTokens": 64000,
65941
+ "maxTokens": 262144,
65942
65942
  "thinking": {
65943
65943
  "mode": "effort",
65944
65944
  "efforts": [
@@ -68887,6 +68887,207 @@
68887
68887
  }
68888
68888
  }
68889
68889
  },
68890
+ "umans": {
68891
+ "umans-coder": {
68892
+ "id": "umans-coder",
68893
+ "name": "Umans Coder",
68894
+ "api": "anthropic-messages",
68895
+ "provider": "umans",
68896
+ "baseUrl": "https://api.code.umans.ai",
68897
+ "reasoning": true,
68898
+ "input": [
68899
+ "text",
68900
+ "image"
68901
+ ],
68902
+ "cost": {
68903
+ "input": 0,
68904
+ "output": 0,
68905
+ "cacheRead": 0,
68906
+ "cacheWrite": 0
68907
+ },
68908
+ "contextWindow": 262144,
68909
+ "maxTokens": 32768,
68910
+ "thinking": {
68911
+ "mode": "budget",
68912
+ "efforts": [
68913
+ "minimal",
68914
+ "low",
68915
+ "medium",
68916
+ "high",
68917
+ "xhigh"
68918
+ ]
68919
+ },
68920
+ "compat": {
68921
+ "escapeBuiltinToolNames": true
68922
+ }
68923
+ },
68924
+ "umans-flash": {
68925
+ "id": "umans-flash",
68926
+ "name": "Umans Flash",
68927
+ "api": "anthropic-messages",
68928
+ "provider": "umans",
68929
+ "baseUrl": "https://api.code.umans.ai",
68930
+ "reasoning": true,
68931
+ "input": [
68932
+ "text",
68933
+ "image"
68934
+ ],
68935
+ "cost": {
68936
+ "input": 0,
68937
+ "output": 0,
68938
+ "cacheRead": 0,
68939
+ "cacheWrite": 0
68940
+ },
68941
+ "contextWindow": 262144,
68942
+ "maxTokens": 32768,
68943
+ "thinking": {
68944
+ "mode": "budget",
68945
+ "efforts": [
68946
+ "minimal",
68947
+ "low",
68948
+ "medium",
68949
+ "high",
68950
+ "xhigh"
68951
+ ]
68952
+ },
68953
+ "compat": {
68954
+ "escapeBuiltinToolNames": true
68955
+ }
68956
+ },
68957
+ "umans-glm-5.1": {
68958
+ "id": "umans-glm-5.1",
68959
+ "name": "Umans GLM 5.1",
68960
+ "api": "anthropic-messages",
68961
+ "provider": "umans",
68962
+ "baseUrl": "https://api.code.umans.ai",
68963
+ "reasoning": true,
68964
+ "input": [
68965
+ "text",
68966
+ "image"
68967
+ ],
68968
+ "cost": {
68969
+ "input": 0,
68970
+ "output": 0,
68971
+ "cacheRead": 0,
68972
+ "cacheWrite": 0
68973
+ },
68974
+ "contextWindow": 202752,
68975
+ "maxTokens": 131072,
68976
+ "thinking": {
68977
+ "mode": "budget",
68978
+ "efforts": [
68979
+ "minimal",
68980
+ "low",
68981
+ "medium",
68982
+ "high",
68983
+ "xhigh"
68984
+ ]
68985
+ },
68986
+ "compat": {
68987
+ "escapeBuiltinToolNames": true
68988
+ }
68989
+ },
68990
+ "umans-kimi-k2.6": {
68991
+ "id": "umans-kimi-k2.6",
68992
+ "name": "Umans Kimi K2.6",
68993
+ "api": "anthropic-messages",
68994
+ "provider": "umans",
68995
+ "baseUrl": "https://api.code.umans.ai",
68996
+ "reasoning": true,
68997
+ "input": [
68998
+ "text",
68999
+ "image"
69000
+ ],
69001
+ "cost": {
69002
+ "input": 0,
69003
+ "output": 0,
69004
+ "cacheRead": 0,
69005
+ "cacheWrite": 0
69006
+ },
69007
+ "contextWindow": 262144,
69008
+ "maxTokens": 32768,
69009
+ "thinking": {
69010
+ "mode": "budget",
69011
+ "efforts": [
69012
+ "minimal",
69013
+ "low",
69014
+ "medium",
69015
+ "high",
69016
+ "xhigh"
69017
+ ]
69018
+ },
69019
+ "compat": {
69020
+ "escapeBuiltinToolNames": true
69021
+ }
69022
+ },
69023
+ "umans-kimi-k2.7": {
69024
+ "id": "umans-kimi-k2.7",
69025
+ "name": "Umans Kimi K2.7 Code",
69026
+ "api": "anthropic-messages",
69027
+ "provider": "umans",
69028
+ "baseUrl": "https://api.code.umans.ai",
69029
+ "reasoning": true,
69030
+ "input": [
69031
+ "text",
69032
+ "image"
69033
+ ],
69034
+ "cost": {
69035
+ "input": 0,
69036
+ "output": 0,
69037
+ "cacheRead": 0,
69038
+ "cacheWrite": 0
69039
+ },
69040
+ "contextWindow": 262144,
69041
+ "maxTokens": 32768,
69042
+ "thinking": {
69043
+ "mode": "budget",
69044
+ "efforts": [
69045
+ "minimal",
69046
+ "low",
69047
+ "medium",
69048
+ "high",
69049
+ "xhigh"
69050
+ ],
69051
+ "requiresEffort": true
69052
+ },
69053
+ "compat": {
69054
+ "escapeBuiltinToolNames": true
69055
+ }
69056
+ },
69057
+ "umans-qwen3.6-35b-a3b": {
69058
+ "id": "umans-qwen3.6-35b-a3b",
69059
+ "name": "Umans Qwen3.6 35B A3B",
69060
+ "api": "anthropic-messages",
69061
+ "provider": "umans",
69062
+ "baseUrl": "https://api.code.umans.ai",
69063
+ "reasoning": true,
69064
+ "input": [
69065
+ "text",
69066
+ "image"
69067
+ ],
69068
+ "cost": {
69069
+ "input": 0,
69070
+ "output": 0,
69071
+ "cacheRead": 0,
69072
+ "cacheWrite": 0
69073
+ },
69074
+ "contextWindow": 262144,
69075
+ "maxTokens": 32768,
69076
+ "thinking": {
69077
+ "mode": "budget",
69078
+ "efforts": [
69079
+ "minimal",
69080
+ "low",
69081
+ "medium",
69082
+ "high",
69083
+ "xhigh"
69084
+ ]
69085
+ },
69086
+ "compat": {
69087
+ "escapeBuiltinToolNames": true
69088
+ }
69089
+ }
69090
+ },
68890
69091
  "venice": {
68891
69092
  "aion-labs-aion-2-0": {
68892
69093
  "id": "aion-labs-aion-2-0",
@@ -70356,6 +70557,28 @@
70356
70557
  ]
70357
70558
  }
70358
70559
  },
70560
+ "kimi-k2-7-code": {
70561
+ "id": "kimi-k2-7-code",
70562
+ "name": "kimi-k2-7-code",
70563
+ "api": "openai-completions",
70564
+ "provider": "venice",
70565
+ "baseUrl": "https://api.venice.ai/api/v1",
70566
+ "reasoning": false,
70567
+ "input": [
70568
+ "text"
70569
+ ],
70570
+ "cost": {
70571
+ "input": 0,
70572
+ "output": 0,
70573
+ "cacheRead": 0,
70574
+ "cacheWrite": 0
70575
+ },
70576
+ "contextWindow": 256000,
70577
+ "maxTokens": 32768,
70578
+ "compat": {
70579
+ "supportsUsageInStreaming": false
70580
+ }
70581
+ },
70359
70582
  "kimi-k2-thinking": {
70360
70583
  "id": "kimi-k2-thinking",
70361
70584
  "name": "Kimi K2 Thinking",
@@ -72105,7 +72328,8 @@
72105
72328
  "supportsForcedToolChoice": true,
72106
72329
  "supportsSamplingParams": true,
72107
72330
  "requiresToolResultId": false,
72108
- "replayUnsignedThinking": false
72331
+ "replayUnsignedThinking": false,
72332
+ "escapeBuiltinToolNames": false
72109
72333
  }
72110
72334
  },
72111
72335
  "alibaba/qwen3-max-preview": {
@@ -37,6 +37,7 @@ import {
37
37
  qwenPortalModelManagerOptions,
38
38
  syntheticModelManagerOptions,
39
39
  togetherModelManagerOptions,
40
+ umansModelManagerOptions,
40
41
  veniceModelManagerOptions,
41
42
  vercelAiGatewayModelManagerOptions,
42
43
  vllmModelManagerOptions,
@@ -313,6 +314,14 @@ export const CATALOG_PROVIDERS = [
313
314
  createModelManagerOptions: (config: ModelManagerConfig) => togetherModelManagerOptions(config),
314
315
  catalogDiscovery: { label: "Together" },
315
316
  },
317
+ {
318
+ id: "umans",
319
+ defaultModel: "umans-coder",
320
+ envVars: ["UMANS_AI_CODING_PLAN_API_KEY"],
321
+ createModelManagerOptions: (config: ModelManagerConfig) => umansModelManagerOptions(config),
322
+ dynamicModelsAuthoritative: true,
323
+ catalogDiscovery: { label: "Umans AI Coding Plan", allowUnauthenticated: true },
324
+ },
316
325
  {
317
326
  id: "venice",
318
327
  defaultModel: "llama-3.3-70b",
@@ -568,6 +568,159 @@ function createSimpleAnthropicProviderOptions(
568
568
  };
569
569
  }
570
570
 
571
+ // ---------------------------------------------------------------------------
572
+ // Umans AI Coding Plan
573
+ // ---------------------------------------------------------------------------
574
+
575
+ const UMANS_BASE_URL = "https://api.code.umans.ai";
576
+ const UMANS_MODELS_INFO_PATH = "/models/info";
577
+ const UMANS_REASONING_EFFORT_BY_LEVEL: Record<string, Effort> = {
578
+ minimal: Effort.Minimal,
579
+ low: Effort.Low,
580
+ medium: Effort.Medium,
581
+ high: Effort.High,
582
+ xhigh: Effort.XHigh,
583
+ };
584
+ const UMANS_DEFAULT_REASONING_EFFORTS = [Effort.Minimal, Effort.Low, Effort.Medium, Effort.High, Effort.XHigh] as const;
585
+
586
+ export interface UmansModelManagerConfig {
587
+ apiKey?: string;
588
+ baseUrl?: string;
589
+ fetch?: FetchImpl;
590
+ }
591
+
592
+ interface UmansModelInfo {
593
+ name?: unknown;
594
+ display_name?: unknown;
595
+ capabilities?: unknown;
596
+ }
597
+
598
+ function normalizeUmansBaseUrl(baseUrl: string | undefined): string {
599
+ const normalized = normalizeAnthropicBaseUrl(baseUrl, UMANS_BASE_URL);
600
+ return normalized.endsWith("/v1") ? normalized.slice(0, -3) : normalized;
601
+ }
602
+
603
+ function umansSupportsVision(value: unknown): boolean {
604
+ return value === true || (typeof value === "string" && value.length > 0);
605
+ }
606
+
607
+ function umansReasoningSupported(value: unknown): boolean {
608
+ return isRecord(value) ? value.supported === true : value === true;
609
+ }
610
+
611
+ function mapUmansReasoningEfforts(value: unknown): readonly Effort[] {
612
+ if (!isRecord(value) || !Array.isArray(value.levels)) {
613
+ return UMANS_DEFAULT_REASONING_EFFORTS;
614
+ }
615
+ const efforts: Effort[] = [];
616
+ for (const level of value.levels) {
617
+ if (typeof level !== "string") continue;
618
+ const effort = UMANS_REASONING_EFFORT_BY_LEVEL[level];
619
+ if (effort !== undefined && !efforts.includes(effort)) {
620
+ efforts.push(effort);
621
+ }
622
+ }
623
+ return efforts.length > 0 ? efforts : UMANS_DEFAULT_REASONING_EFFORTS;
624
+ }
625
+
626
+ function mapUmansThinkingConfig(value: unknown): ThinkingConfig | undefined {
627
+ if (!umansReasoningSupported(value)) return undefined;
628
+ const efforts = mapUmansReasoningEfforts(value);
629
+ const thinking: ThinkingConfig = { mode: "budget", efforts };
630
+ if (isRecord(value)) {
631
+ if (value.can_disable === false) {
632
+ thinking.requiresEffort = true;
633
+ }
634
+ if (typeof value.default_level === "string") {
635
+ const defaultLevel = UMANS_REASONING_EFFORT_BY_LEVEL[value.default_level];
636
+ if (defaultLevel !== undefined && efforts.includes(defaultLevel)) {
637
+ thinking.defaultLevel = defaultLevel;
638
+ }
639
+ }
640
+ }
641
+ return thinking;
642
+ }
643
+
644
+ function mapUmansModelInfo(
645
+ modelId: string,
646
+ raw: UmansModelInfo,
647
+ baseUrl: string,
648
+ reference: ModelSpec<"anthropic-messages"> | undefined,
649
+ ): ModelSpec<"anthropic-messages"> | null {
650
+ if (!modelId) return null;
651
+ const capabilities = isRecord(raw.capabilities) ? raw.capabilities : {};
652
+ const supportsTools = capabilities.supports_tools;
653
+ const thinking = mapUmansThinkingConfig(capabilities.reasoning);
654
+ return {
655
+ ...reference,
656
+ id: modelId,
657
+ name: toModelName(raw.display_name, toModelName(raw.name, modelId)),
658
+ api: "anthropic-messages",
659
+ provider: "umans",
660
+ baseUrl,
661
+ compat: { ...reference?.compat, escapeBuiltinToolNames: true },
662
+ reasoning: thinking !== undefined,
663
+ ...(thinking ? { thinking } : {}),
664
+ input: umansSupportsVision(capabilities.supports_vision) ? ["text", "image"] : ["text"],
665
+ ...(supportsTools === false ? { supportsTools: false } : {}),
666
+ cost: reference?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
667
+ contextWindow: toPositiveNumber(capabilities.context_window, reference?.contextWindow ?? null),
668
+ maxTokens: toPositiveNumber(
669
+ capabilities.recommended_max_tokens,
670
+ toPositiveNumber(capabilities.max_completion_tokens, reference?.maxTokens ?? null),
671
+ ),
672
+ };
673
+ }
674
+
675
+ async function fetchUmansModelsInfo(options: {
676
+ baseUrl: string;
677
+ apiKey?: string;
678
+ fetch?: FetchImpl;
679
+ references: Map<string, ModelSpec<"anthropic-messages">>;
680
+ }): Promise<ModelSpec<"anthropic-messages">[] | null> {
681
+ const discoveryBaseUrl = toAnthropicDiscoveryBaseUrl(options.baseUrl);
682
+ const requestHeaders: Record<string, string> = { Accept: "application/json" };
683
+ if (options.apiKey) {
684
+ requestHeaders["x-api-key"] = options.apiKey;
685
+ }
686
+ const fetchImpl = options.fetch ?? fetch;
687
+ let payload: unknown;
688
+ try {
689
+ const response = await fetchImpl(`${discoveryBaseUrl}${UMANS_MODELS_INFO_PATH}`, {
690
+ method: "GET",
691
+ headers: requestHeaders,
692
+ });
693
+ if (!response.ok) {
694
+ return null;
695
+ }
696
+ payload = await response.json();
697
+ } catch (error) {
698
+ throw new Error("Failed to fetch Umans models info", { cause: error });
699
+ }
700
+ if (!isRecord(payload)) {
701
+ return null;
702
+ }
703
+ const models: ModelSpec<"anthropic-messages">[] = [];
704
+ for (const [modelId, value] of Object.entries(payload)) {
705
+ if (!isRecord(value)) continue;
706
+ const mapped = mapUmansModelInfo(modelId, value, options.baseUrl, options.references.get(modelId));
707
+ if (mapped) {
708
+ models.push(mapped);
709
+ }
710
+ }
711
+ return models.sort((left, right) => left.id.localeCompare(right.id));
712
+ }
713
+
714
+ export function umansModelManagerOptions(config?: UmansModelManagerConfig): ModelManagerOptions<"anthropic-messages"> {
715
+ const apiKey = config?.apiKey;
716
+ const baseUrl = normalizeUmansBaseUrl(config?.baseUrl);
717
+ const references = createBundledReferenceMap<"anthropic-messages">("umans");
718
+ return {
719
+ providerId: "umans",
720
+ dynamicModelsAuthoritative: true,
721
+ fetchDynamicModels: () => fetchUmansModelsInfo({ baseUrl, apiKey, fetch: config?.fetch, references }),
722
+ };
723
+ }
571
724
  // ---------------------------------------------------------------------------
572
725
  // 1. OpenAI
573
726
  // ---------------------------------------------------------------------------
@@ -1083,6 +1236,23 @@ export function clampFireworksKimiMaxTokens(modelId: string, candidate: number |
1083
1236
  return isFireworksKimiK2ModelId(modelId) ? Math.min(candidate, FIREWORKS_KIMI_MAX_TOKENS) : candidate;
1084
1237
  }
1085
1238
 
1239
+ /**
1240
+ * Kimi K2.7 Code's documented recommended output budget. Some provider
1241
+ * discovery rows report the context-sized `max_completion_tokens` instead.
1242
+ */
1243
+ export const KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS = 32_768;
1244
+
1245
+ export function isKimiK27CodeModelId(modelId: string): boolean {
1246
+ return /(?:^|\/)kimi[-._]?k2(?:[._-]?|p)7[-._]?code$/i.test(modelId);
1247
+ }
1248
+
1249
+ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number): number;
1250
+ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null;
1251
+ export function clampKimiK27CodeMaxTokens(modelId: string, candidate: number | null): number | null {
1252
+ if (candidate === null) return null;
1253
+ return isKimiK27CodeModelId(modelId) ? Math.min(candidate, KIMI_K27_CODE_RECOMMENDED_MAX_TOKENS) : candidate;
1254
+ }
1255
+
1086
1256
  /**
1087
1257
  * Fireworks DeepSeek V4 accepts effort via `reasoning_effort` but rejects the
1088
1258
  * DeepSeek-native binary `thinking` toggle when both are present.
@@ -2127,6 +2297,7 @@ export function veniceModelManagerOptions(
2127
2297
  const model = mapWithBundledReference(entry, defaults, reference);
2128
2298
  return {
2129
2299
  ...model,
2300
+ maxTokens: clampKimiK27CodeMaxTokens(defaults.id, model.maxTokens),
2130
2301
  compat: { ...model.compat, supportsUsageInStreaming: false },
2131
2302
  };
2132
2303
  },
@@ -2307,7 +2478,7 @@ export function xiaomiModelManagerOptions(
2307
2478
  provider: providerId,
2308
2479
  baseUrl: url,
2309
2480
  apiKey,
2310
- filterModel: (_entry, model) => !model.id.includes("-tts"),
2481
+ filterModel: (_entry, model) => !model.id.includes("-tts") && !model.id.includes("-asr"),
2311
2482
  mapModel: (entry, defaults) => {
2312
2483
  const reference = references.get(defaults.id);
2313
2484
  const model = mapWithBundledReference(entry, defaults, reference);
@@ -3245,6 +3416,8 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
3245
3416
  const MODELS_DEV_PROVIDER_DESCRIPTORS_CODING_PLANS: readonly ModelsDevProviderDescriptor[] = [
3246
3417
  // --- zAI ---
3247
3418
  anthropicMessagesDescriptor("zai-coding-plan", "zai", "https://api.z.ai/api/anthropic"),
3419
+ // --- Umans AI Coding Plan ---
3420
+ anthropicMessagesDescriptor("umans-ai-coding-plan", "umans", UMANS_BASE_URL),
3248
3421
  // --- Xiaomi ---
3249
3422
  openAiCompletionsDescriptor("xiaomi", "xiaomi", "https://api.xiaomimimo.com/v1", {
3250
3423
  defaultContextWindow: 262144,
@@ -3393,7 +3566,12 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_SPECIALIZED: readonly ModelsDevProviderDes
3393
3566
  // --- Synthetic ---
3394
3567
  openAiCompletionsDescriptor("synthetic", "synthetic", "https://api.synthetic.new/openai/v1"),
3395
3568
  // --- Venice AI ---
3396
- openAiCompletionsDescriptor("venice", "venice", "https://api.venice.ai/api/v1"),
3569
+ openAiCompletionsDescriptor("venice", "venice", "https://api.venice.ai/api/v1", {
3570
+ transformModel: model => {
3571
+ const maxTokens = clampKimiK27CodeMaxTokens(model.id, model.maxTokens);
3572
+ return maxTokens === model.maxTokens ? model : { ...model, maxTokens };
3573
+ },
3574
+ }),
3397
3575
  // --- Ollama Cloud ---
3398
3576
  simpleModelsDevDescriptor("ollama-cloud", "ollama-cloud", "ollama-chat", "https://ollama.com"),
3399
3577
  // --- Xiaomi Token Plan ---
package/src/types.ts CHANGED
@@ -311,6 +311,13 @@ export interface AnthropicCompat {
311
311
  * Default: auto-detected from provider/baseUrl and `model.reasoning`.
312
312
  */
313
313
  replayUnsignedThinking?: boolean;
314
+ /**
315
+ * Prefix Anthropic built-in tool names (`web_search`, `code_execution`, ...)
316
+ * when they are ordinary client tools. Some Anthropic-compatible gateways
317
+ * intercept those exact names as server tools and return raw search/result
318
+ * blocks instead of normal `tool_use` calls.
319
+ */
320
+ escapeBuiltinToolNames?: boolean;
314
321
  }
315
322
 
316
323
  /**