jeo-code 0.6.21 → 0.6.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +25 -1
  2. package/README.ja.md +6 -2
  3. package/README.ko.md +6 -2
  4. package/README.md +6 -2
  5. package/README.zh.md +6 -2
  6. package/package.json +1 -1
  7. package/src/agent/config-schema.ts +12 -0
  8. package/src/agent/session.ts +10 -3
  9. package/src/agent/state.ts +19 -14
  10. package/src/ai/index.ts +1 -0
  11. package/src/ai/model-catalog.ts +121 -1
  12. package/src/ai/model-discovery.ts +55 -3
  13. package/src/ai/model-manager.ts +43 -11
  14. package/src/ai/model-registry.ts +2 -0
  15. package/src/ai/provider-status.ts +26 -7
  16. package/src/ai/providers/anthropic-compatible.ts +27 -0
  17. package/src/ai/providers/anthropic.ts +7 -3
  18. package/src/ai/providers/antigravity.ts +31 -6
  19. package/src/ai/providers/gemini.ts +45 -4
  20. package/src/ai/providers/kimi.ts +18 -0
  21. package/src/ai/providers/lmstudio.ts +8 -0
  22. package/src/ai/providers/ollama.ts +17 -5
  23. package/src/ai/providers/openai-compatible-catalog.ts +72 -0
  24. package/src/ai/providers/openai-compatible.ts +31 -0
  25. package/src/ai/providers/openai.ts +23 -7
  26. package/src/ai/providers/xai.ts +18 -0
  27. package/src/ai/register-providers.ts +18 -0
  28. package/src/ai/think-tags.ts +84 -0
  29. package/src/ai/types.ts +6 -1
  30. package/src/auth/flows/index.ts +3 -3
  31. package/src/auth/index.ts +4 -1
  32. package/src/auth/oauth.ts +3 -3
  33. package/src/auth/refresh.ts +5 -0
  34. package/src/auth/storage.ts +12 -1
  35. package/src/commands/auth.ts +19 -2
  36. package/src/commands/launch/flags.ts +5 -1
  37. package/src/commands/launch/input.ts +13 -0
  38. package/src/commands/launch.ts +78 -12
  39. package/src/commands/setup.ts +3 -2
  40. package/src/tui/app.ts +51 -31
  41. package/src/tui/components/ascii-art.ts +11 -7
  42. package/src/tui/components/autocomplete.ts +16 -0
  43. package/src/tui/components/forge.ts +1 -1
  44. package/src/tui/components/transcript.ts +7 -0
  45. package/src/tui/components/width.ts +21 -0
@@ -1,12 +1,14 @@
1
1
  import { providerRegistry } from "./provider-registry";
2
2
  import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
3
3
  import { readGlobalConfig } from "../agent/state";
4
- import { resolveCredential, type AuthProvider, type Credential } from "../auth";
4
+ import { resolveCredential, isOAuthProvider, type AuthProvider, type Credential } from "../auth";
5
5
  import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
6
6
  import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
7
7
  import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
8
8
  import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
9
9
  import { toProviderModel, CODEX_MODELS } from "./model-catalog";
10
+ import { xaiCredential } from "./providers/xai";
11
+ import { OPENAI_COMPAT_NAMES, isOpenAICompatProvider } from "./providers/openai-compatible-catalog";
10
12
  import { withRetry, defaultRetryable, type RetryOptions } from "../util/retry";
11
13
  import { jeoEnv } from "../util/env";
12
14
  import type { Config } from "../agent/state";
@@ -20,20 +22,39 @@ export function resolveProvider(model: string): ProviderName {
20
22
  const entry = findCatalogEntry(model);
21
23
  if (entry) return entry.provider;
22
24
  const m = (model ?? "").toLowerCase();
25
+ // Explicit `<provider>/` prefixes ALWAYS win over substring heuristics — a model id
26
+ // can legitimately contain another provider's name (e.g. `synthetic/hf:moonshotai/Kimi-K2.5`
27
+ // or `openrouter/openai/gpt-4o-mini`), so prefix routing is resolved first.
23
28
  if (m.startsWith("ollama/")) return "ollama";
29
+ if (m.startsWith("lmstudio/")) return "lmstudio";
24
30
  if (m.startsWith("antigravity/")) return "antigravity";
25
- // OpenAI: explicit prefix, any GPT, or a reasoning model (o1/o3/o4-mini, o1-preview…).
26
- if (m.startsWith("openai/") || m.includes("gpt") || /(^|\/)o\d/.test(m)) return "openai";
27
- if (m.startsWith("google/") || m.includes("gemini")) return "gemini";
31
+ if (m.startsWith("xai/")) return "xai";
32
+ if (m.startsWith("kimi/")) return "kimi";
33
+ for (const p of OPENAI_COMPAT_NAMES) if (m.startsWith(`${p}/`)) return p;
34
+ if (m.startsWith("openai/")) return "openai";
35
+ if (m.startsWith("google/")) return "gemini";
36
+ // Loose substring heuristics for BARE (unprefixed) ids only.
37
+ if (m.includes("grok")) return "xai";
38
+ if (m.includes("kimi") || m.includes("moonshot")) return "kimi";
39
+ if (m.includes("gpt") || /(^|\/)o\d/.test(m)) return "openai";
40
+ if (m.includes("gemini")) return "gemini";
28
41
  return "anthropic";
29
42
  }
30
- const PROVIDER_ID_PREFIX: Record<ProviderName, string> = {
43
+ // Static routing prefixes for the built-in (non-catalog) providers. Catalog
44
+ // OpenAI-compatible providers use `<name>/` directly (see providerIdPrefix).
45
+ const STATIC_ID_PREFIX: Partial<Record<ProviderName, string>> = {
31
46
  anthropic: "anthropic/",
32
47
  openai: "openai/",
33
48
  gemini: "google/",
34
49
  antigravity: "antigravity/",
35
50
  ollama: "ollama/",
51
+ lmstudio: "lmstudio/",
52
+ xai: "xai/",
53
+ kimi: "kimi/",
36
54
  };
55
+ function providerIdPrefix(provider: ProviderName): string {
56
+ return isOpenAICompatProvider(provider) ? `${provider}/` : (STATIC_ID_PREFIX[provider] ?? `${provider}/`);
57
+ }
37
58
 
38
59
  /**
39
60
  * Pin-time provider qualification: when a picked live model id would route to a
@@ -45,7 +66,7 @@ const PROVIDER_ID_PREFIX: Record<ProviderName, string> = {
45
66
  export function qualifyModelId(model: string, provider: ProviderName): string {
46
67
  const id = (model ?? "").trim();
47
68
  if (!id) return id;
48
- return resolveProvider(id) === provider ? id : `${PROVIDER_ID_PREFIX[provider]}${id}`;
69
+ return resolveProvider(id) === provider ? id : `${providerIdPrefix(provider)}${id}`;
49
70
  }
50
71
 
51
72
  /**
@@ -59,7 +80,11 @@ export function providerModelFor(model: string): string {
59
80
  model.startsWith("openai/") ||
60
81
  model.startsWith("anthropic/") ||
61
82
  model.startsWith("google/") ||
62
- model.startsWith("antigravity/")
83
+ model.startsWith("antigravity/") ||
84
+ model.startsWith("lmstudio/") ||
85
+ model.startsWith("xai/") ||
86
+ model.startsWith("kimi/") ||
87
+ isOpenAICompatProvider(model.split("/")[0])
63
88
  ) {
64
89
  return model;
65
90
  }
@@ -135,7 +160,7 @@ export interface ModelManager {
135
160
  resolveProvider: typeof resolveProvider;
136
161
  }
137
162
 
138
- const ALIAS_DEFAULTS = { fast: "ollama/qwen2.5:0.5b", local: "ollama/qwen2.5:0.5b", sonnet: "claude-sonnet-4-5", opus: "claude-opus-4-5", haiku: "claude-haiku-4-5", gpt: "gpt-5.5", flash: "gemini-2.5-flash" };
163
+ const ALIAS_DEFAULTS = { fast: "ollama/qwen2.5:0.5b", local: "ollama/qwen2.5:0.5b", sonnet: "claude-sonnet-4-5", opus: "claude-opus-4-5", haiku: "claude-haiku-4-5", gpt: "gpt-5.5", flash: "gemini-2.5-flash", grok: "grok-4.3" };
139
164
 
140
165
  /**
141
166
  * Build retry options from a config `retry` budget (gjc parity). `requestMaxRetries`
@@ -243,7 +268,7 @@ export function effectiveCredentialForProvider(
243
268
  if (credential.kind === "oauth") {
244
269
  const apiKey = config.providers[provider];
245
270
  if (apiKey) return { kind: "api_key", provider, token: apiKey };
246
- if (OAUTH_FLOW_REGISTRY[provider]?.verifiedEndToEnd === false) {
271
+ if (isOAuthProvider(provider) && OAUTH_FLOW_REGISTRY[provider].verifiedEndToEnd === false) {
247
272
  throw new Error(
248
273
  `Provider '${provider}' has only an OAuth token, but its OAuth backend is not compatible with the bundled adapter. Set ${provider.toUpperCase()}_API_KEY (or run 'jeo setup') to use ${model}.`,
249
274
  );
@@ -291,7 +316,8 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
291
316
  const baseUrl =
292
317
  options.baseUrl ??
293
318
  (provider === "openai" ? config.openaiBaseUrl : undefined) ??
294
- (provider === "ollama" ? config.ollamaBaseUrl : undefined);
319
+ (provider === "ollama" ? config.ollamaBaseUrl : undefined) ??
320
+ (provider === "lmstudio" ? config.lmstudioBaseUrl : undefined);
295
321
 
296
322
  const callOptions: CallOptions = {
297
323
  // Map a catalog canonical (e.g. claude-3-5-sonnet) to the exact wire id the
@@ -317,10 +343,16 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
317
343
  // generous gjc default of 100 only applies when the user configures it.
318
344
  const retry: RetryOptions = { ...resolveRetryOptions(config.retry, kind), ...(options.onRetry ? { onRetry: options.onRetry } : {}) };
319
345
 
320
- if (provider === "ollama") {
346
+ if (provider === "ollama" || provider === "lmstudio") {
321
347
  return { adapter, callOptions, credential: { kind: "none", provider: "openai" }, retry };
322
348
  }
323
349
 
350
+ if (provider === "xai") {
351
+ const key = config.providers?.xai;
352
+ if (!key) throw new Error("No credential for provider 'xai'. Set XAI_API_KEY (or providers.xai in config).");
353
+ return { adapter, callOptions, credential: xaiCredential(key), retry };
354
+ }
355
+
324
356
  if (provider === "antigravity") {
325
357
  // Prefer the dedicated Antigravity login (its client is what the agent
326
358
  // backend authorizes); fall back to a gemini-cli OAuth token for users with
@@ -14,6 +14,8 @@ export const BUILTIN_ALIASES: ModelAliases = {
14
14
  haiku: "claude-haiku-4-5",
15
15
  gpt: "gpt-5.5",
16
16
  flash: "gemini-2.5-flash",
17
+ grok: "grok-4.3",
18
+ kimi: "kimi-k2-0711-preview",
17
19
  };
18
20
 
19
21
  // Expand an alias to a concrete model id. Unknown input passes through unchanged.
@@ -5,11 +5,13 @@
5
5
  * its effective base URL, and whether it is ready to serve a request.
6
6
  */
7
7
  import { readGlobalConfig, type Config, type StoredOAuth } from "../agent/state";
8
- import type { AuthProvider, Credential } from "../auth";
8
+ import { isOAuthProvider, API_KEY_ONLY_PROVIDERS, type AuthProvider, type Credential } from "../auth";
9
9
  import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
10
10
  import type { ProviderName } from "./types";
11
11
 
12
- export const PROVIDER_NAMES: readonly ProviderName[] = ["anthropic", "openai", "gemini", "antigravity", "ollama"];
12
+ import { OPENAI_COMPAT_NAMES, openaiCompatDef } from "./providers/openai-compatible-catalog";
13
+
14
+ export const PROVIDER_NAMES: readonly ProviderName[] = ["anthropic", "openai", "gemini", "antigravity", "ollama", "lmstudio", "xai", "kimi", ...OPENAI_COMPAT_NAMES];
13
15
 
14
16
  /** Cloud providers that authenticate via API key / OAuth. Ollama is keyless. */
15
17
  export const CLOUD_PROVIDERS: readonly AuthProvider[] = ["anthropic", "openai", "gemini", "antigravity"];
@@ -29,9 +31,12 @@ export interface ProviderStatus {
29
31
  ready: boolean;
30
32
  }
31
33
 
32
- /** The uppercase `<PROVIDER>_API_KEY` env var name for a cloud provider. */
34
+ /** The env var that supplies a provider's API key. Catalog providers carry their
35
+ * own (e.g. HF_TOKEN, NANO_GPT_API_KEY); built-ins use `<PROVIDER>_API_KEY`. */
33
36
  export function providerEnvVar(name: ProviderName): string | undefined {
34
- if (name === "ollama" || name === "antigravity") return undefined;
37
+ if (name === "ollama" || name === "lmstudio" || name === "antigravity") return undefined;
38
+ const def = openaiCompatDef(name);
39
+ if (def) return def.apiKeyEnv;
35
40
  return `${name.toUpperCase()}_API_KEY`;
36
41
  }
37
42
 
@@ -74,10 +79,24 @@ function effectiveCredential(provider: AuthProvider, cred: Credential, cfg: Conf
74
79
  /** Resolve the status of a single provider. */
75
80
  export async function describeProvider(name: ProviderName, config?: Config): Promise<ProviderStatus> {
76
81
  const cfg = config ?? (await readGlobalConfig());
77
- if (name === "ollama") {
78
- const baseUrl = cfg.ollamaBaseUrl ?? "http://localhost:11434";
82
+ if (name === "ollama" || name === "lmstudio") {
83
+ const baseUrl = name === "ollama"
84
+ ? (cfg.ollamaBaseUrl ?? "http://localhost:11434")
85
+ : (cfg.lmstudioBaseUrl ?? "http://localhost:1234/v1");
79
86
  return { name, kind: "keyless", label: credentialLabel("keyless"), baseUrl, ready: true };
80
87
  }
88
+ if ((API_KEY_ONLY_PROVIDERS as readonly string[]).includes(name)) {
89
+ // API-key-only providers (xai/kimi): no OAuth flow — ready when their key is set.
90
+ const key = cfg.providers?.[name as AuthProvider];
91
+ const envVar = providerEnvVar(name);
92
+ return {
93
+ name,
94
+ kind: key ? "api_key" : "none",
95
+ label: key ? credentialLabel("api_key") : `none (set ${envVar})`,
96
+ envVar,
97
+ ready: !!key,
98
+ };
99
+ }
81
100
  const ownProvider = name as AuthProvider;
82
101
  const ownCred = configuredCredential(ownProvider, cfg);
83
102
  // Antigravity prefers its own login but accepts a gemini-cli OAuth fallback.
@@ -97,7 +116,7 @@ export async function describeProvider(name: ProviderName, config?: Config): Pro
97
116
  : hasGeminiFallback
98
117
  ? "OAuth catalog via Gemini CLI; calls need 'jeo auth login antigravity'"
99
118
  : "none (run 'jeo auth login antigravity')";
100
- } else if (kind === "oauth" && OAUTH_FLOW_REGISTRY[credentialProvider]?.verifiedEndToEnd === false) {
119
+ } else if (kind === "oauth" && isOAuthProvider(credentialProvider) && OAUTH_FLOW_REGISTRY[credentialProvider].verifiedEndToEnd === false) {
101
120
  ready = false;
102
121
  label = "OAuth (API key needed)";
103
122
  } else if (name === "gemini" && kind === "oauth") {
@@ -0,0 +1,27 @@
1
+ import type { ProviderAdapter, CallOptions, ProviderName } from "../types";
2
+ import { anthropicAdapter } from "./anthropic";
3
+
4
+ /**
5
+ * Factory for Anthropic-Messages-compatible providers (z.ai, MiniMax, …). They speak
6
+ * the same `/v1/messages` wire protocol as Anthropic with an `x-api-key` bearer, so each
7
+ * is a thin shim over `anthropicAdapter`: strip the `<name>/` routing prefix and pin the
8
+ * base URL (resolved upstream into `options.baseUrl`). The credential is an api_key —
9
+ * `anthropicAdapter` emits the plain `x-api-key` Messages headers for api_key creds
10
+ * (no Claude-Code OAuth cloaking / billing / betas), so it works as a generic client.
11
+ */
12
+ export function makeAnthropicCompatibleAdapter(opts: { name: ProviderName; baseUrl: string }): ProviderAdapter {
13
+ const prefix = `${opts.name}/`;
14
+ const prep = (o: CallOptions): CallOptions => ({
15
+ ...o,
16
+ model: o.model.startsWith(prefix) ? o.model.slice(prefix.length) : o.model,
17
+ baseUrl: o.baseUrl ?? opts.baseUrl,
18
+ });
19
+ return {
20
+ name: opts.name,
21
+ supportsNativeTools: anthropicAdapter.supportsNativeTools,
22
+ call: (messages, options, credential) => anthropicAdapter.call(messages, prep(options), credential),
23
+ async *stream(messages, options, credential) {
24
+ yield* anthropicAdapter.stream!(messages, prep(options), credential);
25
+ },
26
+ };
27
+ }
@@ -72,11 +72,13 @@ function anthropicSystemBlocks(
72
72
  return blocks;
73
73
  }
74
74
 
75
- /** Anthropic extended-thinking budget by reasoning effort (kept under max_tokens). Off for
76
- * low/minimal/unset effort so /fast and minimal thinking stay non-thinking (cheaper/faster). */
75
+ /** Anthropic extended-thinking budget by reasoning effort (kept under max_tokens). Cross-provider
76
+ * parity (matches Gemini's tiers): low/medium/high all enable thinking with scaling depth; only
77
+ * minimal/unset stay non-thinking so /fast and minimal thinking remain cheaper/faster. */
77
78
  function anthropicThinkingBudget(effort: CallOptions["reasoningEffort"], maxTokens: number): number | undefined {
78
79
  let budget: number;
79
80
  switch (effort) {
81
+ case "low": budget = 4000; break;
80
82
  case "medium": budget = 10000; break;
81
83
  case "high": budget = 24000; break;
82
84
  default: return undefined;
@@ -160,7 +162,9 @@ export function anthropicRequest(
160
162
  includeTemperature: boolean,
161
163
  ): { url: string; headers: Record<string, string>; body: string } {
162
164
  return {
163
- url: ANTHROPIC_URL,
165
+ // Anthropic-compatible providers (z.ai, MiniMax, …) accept the Messages wire
166
+ // format at their own host; an explicit baseUrl pins `${base}/v1/messages`.
167
+ url: options.baseUrl ? `${options.baseUrl.replace(/\/$/, "")}/v1/messages` : ANTHROPIC_URL,
164
168
  headers: headersFor(credential, stream),
165
169
  body: anthropicPayload(messages, options, stream, includeTemperature, credential),
166
170
  };
@@ -8,6 +8,18 @@ import { geminiThinkingBudget } from "./gemini";
8
8
 
9
9
  const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.googleapis.com";
10
10
  const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
11
+
12
+ /** Anthropic-style thinking budget for Claude served via CCA. gemini's budget fn
13
+ * returns undefined for claude ids, which left antigravity Claude with NO thinking
14
+ * requested (the opus "no reasoning" gap). Mirrors anthropic's effort→budget tiers. */
15
+ function antigravityClaudeThinkingBudget(effort: CallOptions["reasoningEffort"]): number | undefined {
16
+ switch (effort) {
17
+ case "low": return 4000;
18
+ case "medium": return 10000;
19
+ case "high": return 24000;
20
+ default: return undefined;
21
+ }
22
+ }
11
23
  const ENDPOINTS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
12
24
 
13
25
  export function getAntigravityUserAgent(): string {
@@ -129,13 +141,24 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
129
141
  const systemPrompt = options.systemPrompt ?? messages.find(m => m.role === "system")?.content;
130
142
  const generationConfig: Record<string, unknown> = {};
131
143
  if (options.temperature !== undefined) generationConfig.temperature = options.temperature;
144
+ const isClaude = model.toLowerCase().includes("claude");
132
145
  // Upstream Antigravity strips maxOutputTokens for non-Claude models; do the same.
133
- if (model.toLowerCase().includes("claude")) generationConfig.maxOutputTokens = options.maxTokens ?? 4000;
134
- // Apply the thinking level: antigravity serves Gemini models through CCA, so reuse the
135
- // Gemini thinkingConfig budget (off at minimal, scaling with reasoning effort). Without
136
- // this the thinking level only changed token budget, never actual reasoning depth.
137
- const agThinkingBudget = geminiThinkingBudget(model, options.reasoningEffort);
138
- if (agThinkingBudget !== undefined) generationConfig.thinkingConfig = { thinkingBudget: agThinkingBudget };
146
+ if (isClaude) generationConfig.maxOutputTokens = options.maxTokens ?? 4000;
147
+ // Apply the thinking level. CCA emits `thought` parts ONLY when thinkingConfig has
148
+ // includeThoughts set. Gemini scales via geminiThinkingBudget; Claude-via-CCA needs an
149
+ // Anthropic-style budget (gemini's fn returns undefined for claude) PLUS the
150
+ // interleaved-thinking beta header below — without both, antigravity Claude (e.g. opus)
151
+ // never streamed reasoning while native sonnet did.
152
+ const agThinkingBudget = isClaude
153
+ ? antigravityClaudeThinkingBudget(options.reasoningEffort)
154
+ : geminiThinkingBudget(model, options.reasoningEffort);
155
+ const claudeThinkingOn = isClaude && agThinkingBudget !== undefined;
156
+ if (agThinkingBudget !== undefined) {
157
+ generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget: agThinkingBudget };
158
+ // Claude (via CCA) enforces max_tokens > thinking.budget_tokens — bump the output cap
159
+ // above the budget (mirrors the native Anthropic provider) or CCA returns HTTP 400.
160
+ if (claudeThinkingOn) generationConfig.maxOutputTokens = Math.max((options.maxTokens ?? 4000), agThinkingBudget + 1024);
161
+ }
139
162
 
140
163
  const request: Record<string, unknown> = {
141
164
  contents: antigravityContents(messages),
@@ -165,6 +188,8 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
165
188
  "content-type": "application/json",
166
189
  accept: "text/event-stream",
167
190
  "User-Agent": getAntigravityUserAgent(),
191
+ // Claude reasoning over CCA requires the Anthropic interleaved-thinking beta (gjc parity).
192
+ ...(claudeThinkingOn ? { "anthropic-beta": "interleaved-thinking-2025-05-14" } : {}),
168
193
  },
169
194
  body,
170
195
  };
@@ -3,6 +3,7 @@ import type { CallOptions, Message, ProviderAdapter } from "../types";
3
3
  import { readSse } from "../sse";
4
4
  import { providerHttpError } from "./errors";
5
5
  import { jeoEnv } from "../../util/env";
6
+ import { serializeToolCalls } from "../../agent/tool-schemas";
6
7
 
7
8
  /** Gemini 2.5+/latest models think by default and BILL thought tokens against
8
9
  * `maxOutputTokens` — a small-budget call can burn its entire budget on thoughts
@@ -12,7 +13,13 @@ import { jeoEnv } from "../../util/env";
12
13
  * Older models (1.5/2.0) reject `thinkingConfig` entirely → undefined (omit). */
13
14
  export function geminiThinkingBudget(model: string, effort?: CallOptions["reasoningEffort"], maxTokens?: number): number | undefined {
14
15
  const m = model.toLowerCase();
15
- const thinkingCapable = /gemini-(2\.5|[3-9])|flash-latest|pro-latest/.test(m);
16
+ // Reasoning-capable when Gemini >= 2.5 (any 2.5+ minor) or major >= 3 (digit-count
17
+ // agnostic so gemini-10+ never silently loses thinking the way opus-4-8 did), plus
18
+ // the rolling *-latest aliases. Mirrors `inferCatalogMetadata` in model-catalog.ts.
19
+ const ver = m.match(/gemini-(\d+)(?:\.(\d+))?/);
20
+ const major = ver ? Number(ver[1]) : 0;
21
+ const minor = ver ? Number(ver[2] ?? 0) : 0;
22
+ const thinkingCapable = (major >= 3 || (major === 2 && minor >= 5)) || /flash-latest|pro-latest/.test(m);
16
23
  if (!thinkingCapable) return undefined;
17
24
  const floor = m.includes("pro") ? 128 : 0; // pro-class cannot fully disable thinking
18
25
  let budget: number;
@@ -63,12 +70,23 @@ export function buildGeminiPayload(messages: Message[], options: CallOptions): {
63
70
  temperature: options.temperature ?? 0.2,
64
71
  maxOutputTokens: options.maxTokens ?? 4000,
65
72
  };
66
- if (options.jsonMode) generationConfig.responseMimeType = "application/json";
73
+ // Function-calling and responseMimeType:json are mutually exclusive in the Gemini
74
+ // API — when native tools are declared, the functionCall parts replace JSON-in-prose.
75
+ if (options.jsonMode && !options.tools?.length) generationConfig.responseMimeType = "application/json";
67
76
  const thinkingBudget = geminiThinkingBudget(geminiModel, options.reasoningEffort, options.maxTokens);
68
- if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { thinkingBudget };
77
+ // includeThoughts: required for Gemini to STREAM thought summaries (the `thought:true`
78
+ // parts thoughtOf() routes to onReasoning) — without it the model thinks silently.
79
+ if (thinkingBudget !== undefined) generationConfig.thinkingConfig = { includeThoughts: true, thinkingBudget };
69
80
 
70
81
  const payload: Record<string, unknown> = { contents, generationConfig };
71
82
  if (systemPrompt) payload.systemInstruction = { parts: [{ text: systemPrompt }] };
83
+ if (options.tools?.length) {
84
+ // NATIVE function-calling (gjc/antigravity parity): declare the toolset so the
85
+ // model emits functionCall parts instead of hand-formatting the JSON tool protocol
86
+ // (which weaker models mangle — wasted steps + apology prose leaking into replies).
87
+ payload.tools = [{ functionDeclarations: options.tools.map(t => ({ name: t.name, description: t.description, parameters: t.parameters })) }];
88
+ payload.toolConfig = { functionCallingConfig: { mode: "AUTO" } };
89
+ }
72
90
  return { geminiModel, payload };
73
91
  }
74
92
 
@@ -119,7 +137,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
119
137
  }
120
138
 
121
139
  interface GeminiChunk {
122
- candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
140
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean; functionCall?: { name?: string; args?: Record<string, unknown> } }[] }; finishReason?: string }[];
123
141
  promptFeedback?: { blockReason?: string };
124
142
  usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
125
143
  }
@@ -138,6 +156,18 @@ function textOf(chunk: GeminiChunk): string {
138
156
  function thoughtOf(chunk: GeminiChunk): string {
139
157
  return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
140
158
  }
159
+ /** Native Gemini functionCall parts → {tool, arguments} (gjc/antigravity parity). Kept
160
+ * separate from textOf so the re-serialized canonical JSON envelope drives the loop. */
161
+ function geminiFunctionCallsOf(chunk: GeminiChunk): { tool: string; arguments: Record<string, unknown> }[] {
162
+ const parts = chunk.candidates?.[0]?.content?.parts ?? [];
163
+ const out: { tool: string; arguments: Record<string, unknown> }[] = [];
164
+ for (const p of parts) {
165
+ if (p.functionCall && typeof p.functionCall.name === "string") {
166
+ out.push({ tool: p.functionCall.name, arguments: (p.functionCall.args ?? {}) as Record<string, unknown> });
167
+ }
168
+ }
169
+ return out;
170
+ }
141
171
 
142
172
  /** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
143
173
  * RECITATION / MAX_TOKENS) instead of a silent empty string that downstream JSON
@@ -173,6 +203,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
173
203
  let lastUsage: GeminiChunk["usageMetadata"];
174
204
  let yieldedAny = false;
175
205
  let lastEmptyReason: string | undefined;
206
+ const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
176
207
  for await (const data of readSse(response.body)) {
177
208
  let chunk: CcaChunk;
178
209
  try {
@@ -192,7 +223,10 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
192
223
  lastEmptyReason = blockedReason(inner) ?? lastEmptyReason;
193
224
  }
194
225
  if (inner.usageMetadata) lastUsage = inner.usageMetadata;
226
+ fnCalls.push(...geminiFunctionCallsOf(inner));
195
227
  }
228
+ const envelope = serializeToolCalls(fnCalls);
229
+ if (envelope) { yieldedAny = true; yield envelope; }
196
230
  if (!yieldedAny) {
197
231
  throw new Error(`Gemini (Cloud Code Assist) returned no content${lastEmptyReason ? ` (${lastEmptyReason})` : ""}.`);
198
232
  }
@@ -206,6 +240,7 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
206
240
 
207
241
  export const geminiAdapter: ProviderAdapter = {
208
242
  name: "gemini",
243
+ supportsNativeTools: true,
209
244
  async call(messages, options, credential) {
210
245
  // OAuth (gemini-cli login) → Cloud Code Assist; no GEMINI_API_KEY required.
211
246
  if (credential.kind === "oauth") {
@@ -220,6 +255,8 @@ export const geminiAdapter: ProviderAdapter = {
220
255
  if (result.usageMetadata) {
221
256
  options.onUsage?.({ inputTokens: result.usageMetadata.promptTokenCount, outputTokens: result.usageMetadata.candidatesTokenCount });
222
257
  }
258
+ const envelope = serializeToolCalls(geminiFunctionCallsOf(result));
259
+ if (envelope) return envelope;
223
260
  const text = textOf(result);
224
261
  if (!text) {
225
262
  const reason = blockedReason(result);
@@ -240,6 +277,7 @@ export const geminiAdapter: ProviderAdapter = {
240
277
  let lastUsage: GeminiChunk["usageMetadata"];
241
278
  let yieldedAny = false;
242
279
  let lastEmptyReason: string | undefined;
280
+ const fnCalls: { tool: string; arguments: Record<string, unknown> }[] = [];
243
281
  for await (const data of readSse(response.body)) {
244
282
  let chunk: GeminiChunk;
245
283
  try {
@@ -259,7 +297,10 @@ export const geminiAdapter: ProviderAdapter = {
259
297
  // Gemini emits cumulative usageMetadata on most chunks; capture the last and
260
298
  // report ONCE after the stream so an accumulating sink can't over-count.
261
299
  if (chunk.usageMetadata) lastUsage = chunk.usageMetadata;
300
+ fnCalls.push(...geminiFunctionCallsOf(chunk));
262
301
  }
302
+ const envelope = serializeToolCalls(fnCalls);
303
+ if (envelope) { yieldedAny = true; yield envelope; }
263
304
  if (!yieldedAny && lastEmptyReason) {
264
305
  throw new Error(`Gemini returned no content (${lastEmptyReason}).`);
265
306
  }
@@ -0,0 +1,18 @@
1
+ import type { Credential } from "../../auth";
2
+ import { makeOpenAICompatibleAdapter } from "./openai-compatible";
3
+
4
+ /**
5
+ * Kimi (Moonshot) — OpenAI-compatible cloud API at https://api.moonshot.ai/v1, keyed
6
+ * by KIMI_API_KEY (or `providers.kimi`). The credential (an api_key bearer) is passed
7
+ * through; thinking models (kimi-thinking-preview) stream reasoning via
8
+ * `reasoning_content`/`<think>`, which the openai adapter routes to onReasoning.
9
+ */
10
+ export const KIMI_BASE_URL = "https://api.moonshot.ai/v1";
11
+
12
+ export const kimiAdapter = makeOpenAICompatibleAdapter({ name: "kimi", baseUrl: KIMI_BASE_URL });
13
+
14
+ /** Credential carrier for Kimi calls — an api_key bearer (the adapter only reads the
15
+ * token); a keyless `none` when no key is set. */
16
+ export function kimiCredential(key: string | undefined): Credential {
17
+ return key ? { kind: "api_key", provider: "openai", token: key } : { kind: "none", provider: "openai" };
18
+ }
@@ -0,0 +1,8 @@
1
+ import { makeOpenAICompatibleAdapter } from "./openai-compatible";
2
+
3
+ /** LM Studio — local, keyless, OpenAI-compatible server (default http://localhost:1234/v1). */
4
+ export const lmstudioAdapter = makeOpenAICompatibleAdapter({
5
+ name: "lmstudio",
6
+ baseUrl: "http://localhost:1234/v1",
7
+ keyless: true,
8
+ });
@@ -1,6 +1,7 @@
1
1
  import type { CallOptions, Message, ProviderAdapter } from "../types";
2
2
  import { readLines } from "../sse";
3
3
  import { providerHttpError } from "./errors";
4
+ import { createThinkSplitter } from "../think-tags";
4
5
 
5
6
  /**
6
7
  * Resolve the Ollama base URL. `OLLAMA_HOST` is documented as a bare host:port
@@ -61,24 +62,35 @@ export const ollamaAdapter: ProviderAdapter = {
61
62
  if (!response.body) return;
62
63
  let yieldedAny = false;
63
64
  let doneReason: string | undefined;
65
+ // Route inline <think>…</think> (local reasoning models) to the reasoning channel.
66
+ const think = createThinkSplitter(options.onReasoning);
64
67
  for await (const line of readLines(response.body)) {
65
- let chunk: { message?: { content?: string }; done?: boolean; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
68
+ let chunk: { message?: { content?: string; thinking?: string }; done?: boolean; done_reason?: string; prompt_eval_count?: number; eval_count?: number; total_duration?: number };
66
69
  try {
67
70
  chunk = JSON.parse(line);
68
71
  } catch {
69
72
  continue;
70
73
  }
71
- const delta = chunk.message?.content;
72
- if (delta) {
73
- yieldedAny = true;
74
- yield delta;
74
+ const raw = chunk.message?.content;
75
+ if (raw) {
76
+ const visible = think.push(raw);
77
+ if (visible) {
78
+ yieldedAny = true;
79
+ yield visible;
80
+ }
75
81
  }
82
+ // Native separated thinking (Ollama `message.thinking`, present when the model
83
+ // runs in think mode) → reasoning channel. Inline <think> is handled above.
84
+ const reason = chunk.message?.thinking;
85
+ if (reason) options.onReasoning?.(reason);
76
86
  if (chunk.done) {
77
87
  if (chunk.done_reason) doneReason = chunk.done_reason;
78
88
  options.onUsage?.({ inputTokens: chunk.prompt_eval_count, outputTokens: chunk.eval_count, durationMs: chunk.total_duration ? Math.round(chunk.total_duration / 1e6) : undefined });
79
89
  break;
80
90
  }
81
91
  }
92
+ const trailing = think.flush();
93
+ if (trailing) { yieldedAny = true; yield trailing; }
82
94
  if (!yieldedAny) throw emptyCompletionError(doneReason);
83
95
  },
84
96
  };
@@ -0,0 +1,72 @@
1
+ import type { ProviderName } from "../types";
2
+
3
+ /**
4
+ * gjc-style data-driven provider catalog. Every entry here is an OpenAI-compatible
5
+ * cloud API (same `/chat/completions` + `/models` wire protocol), so adding a new
6
+ * provider is ONE table row — `register-providers` builds its adapter via
7
+ * `makeOpenAICompatibleAdapter`, and routing / discovery / status / auth all derive
8
+ * their per-provider behavior from this table instead of hardcoded string branches.
9
+ *
10
+ * Constraints kept deliberately uniform so the generic paths stay simple:
11
+ * - `name` is the routing prefix (`<name>/…`) AND the config/auth key.
12
+ * - `apiKeyEnv` is `<NAME>_API_KEY` (matches `providerEnvVar`'s convention).
13
+ * - api-key-only (no OAuth flow); reasoning rides `reasoning_content`/`<think>`.
14
+ */
15
+ export interface OpenAICompatProviderDef {
16
+ /** Routing prefix + config/auth key (must be a ProviderName literal). */
17
+ readonly name: ProviderName;
18
+ /** Display name (companyLabel). */
19
+ readonly label: string;
20
+ /** Default API base URL (…/v1) — `${base}/chat/completions` + `${base}/models`. */
21
+ readonly baseUrl: string;
22
+ /** `<NAME>_API_KEY` env var that seeds `config.providers[name]`. */
23
+ readonly apiKeyEnv: string;
24
+ /** Default model id (provider-prefixed) used by `--provider <name>`. */
25
+ readonly defaultModel: string;
26
+ /** Wire protocol: "openai" (/chat/completions, default) or "anthropic" (/v1/messages). */
27
+ readonly protocol?: "openai" | "anthropic";
28
+ }
29
+
30
+ export const OPENAI_COMPAT_PROVIDERS: readonly OpenAICompatProviderDef[] = [
31
+ { name: "groq", label: "Groq", baseUrl: "https://api.groq.com/openai/v1", apiKeyEnv: "GROQ_API_KEY", defaultModel: "groq/llama-3.3-70b-versatile" },
32
+ { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1", apiKeyEnv: "DEEPSEEK_API_KEY", defaultModel: "deepseek/deepseek-chat" },
33
+ { name: "mistral", label: "Mistral", baseUrl: "https://api.mistral.ai/v1", apiKeyEnv: "MISTRAL_API_KEY", defaultModel: "mistral/mistral-large-latest" },
34
+ { name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", defaultModel: "openrouter/openai/gpt-4o-mini" },
35
+ { name: "together", label: "Together", baseUrl: "https://api.together.xyz/v1", apiKeyEnv: "TOGETHER_API_KEY", defaultModel: "together/meta-llama/Llama-3.3-70B-Instruct-Turbo" },
36
+ { name: "cerebras", label: "Cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKeyEnv: "CEREBRAS_API_KEY", defaultModel: "cerebras/llama-3.3-70b" },
37
+ { name: "fireworks", label: "Fireworks", baseUrl: "https://api.fireworks.ai/inference/v1", apiKeyEnv: "FIREWORKS_API_KEY", defaultModel: "fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct" },
38
+ { name: "nvidia", label: "NVIDIA", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", defaultModel: "nvidia/meta/llama-3.3-70b-instruct" },
39
+ // Additional gjc-parity OpenAI-compatible clouds (authoritative base URLs + env vars).
40
+ { name: "alibaba-coding-plan", label: "Alibaba Coding Plan", baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", apiKeyEnv: "ALIBABA_CODING_PLAN_API_KEY", defaultModel: "alibaba-coding-plan/qwen3.5-plus" },
41
+ { name: "huggingface", label: "Hugging Face", baseUrl: "https://router.huggingface.co/v1", apiKeyEnv: "HF_TOKEN", defaultModel: "huggingface/deepseek-ai/DeepSeek-R1" },
42
+ { name: "nanogpt", label: "NanoGPT", baseUrl: "https://nano-gpt.com/api/v1", apiKeyEnv: "NANO_GPT_API_KEY", defaultModel: "nanogpt/openai/gpt-5.4" },
43
+ { name: "qwen-portal", label: "Qwen Portal", baseUrl: "https://portal.qwen.ai/v1", apiKeyEnv: "QWEN_PORTAL_API_KEY", defaultModel: "qwen-portal/coder-model" },
44
+ { name: "synthetic", label: "Synthetic", baseUrl: "https://api.synthetic.new/openai/v1", apiKeyEnv: "SYNTHETIC_API_KEY", defaultModel: "synthetic/hf:moonshotai/Kimi-K2.5" },
45
+ { name: "venice", label: "Venice", baseUrl: "https://api.venice.ai/api/v1", apiKeyEnv: "VENICE_API_KEY", defaultModel: "venice/llama-3.3-70b" },
46
+ { name: "zenmux", label: "ZenMux", baseUrl: "https://zenmux.ai/api/v1", apiKeyEnv: "ZENMUX_API_KEY", defaultModel: "zenmux/anthropic/claude-opus-4.6" },
47
+ { name: "qianfan", label: "Qianfan", baseUrl: "https://qianfan.baidubce.com/v2", apiKeyEnv: "QIANFAN_API_KEY", defaultModel: "qianfan/deepseek-v3.2" },
48
+ { name: "xiaomi", label: "Xiaomi", baseUrl: "https://api.xiaomimimo.com/v1", apiKeyEnv: "XIAOMI_API_KEY", defaultModel: "xiaomi/mimo-v2-flash" },
49
+ { name: "xiaomi-token-plan-ams", label: "Xiaomi Token Plan (Europe)", baseUrl: "https://token-plan-ams.xiaomimimo.com/v1", apiKeyEnv: "XIAOMI_TOKEN_PLAN_AMS_API_KEY", defaultModel: "xiaomi-token-plan-ams/mimo-v2.5" },
50
+ { name: "xiaomi-token-plan-cn", label: "Xiaomi Token Plan (China)", baseUrl: "https://token-plan-cn.xiaomimimo.com/v1", apiKeyEnv: "XIAOMI_TOKEN_PLAN_CN_API_KEY", defaultModel: "xiaomi-token-plan-cn/mimo-v2.5" },
51
+ { name: "xiaomi-token-plan-sgp", label: "Xiaomi Token Plan (Singapore)", baseUrl: "https://token-plan-sgp.xiaomimimo.com/v1", apiKeyEnv: "XIAOMI_TOKEN_PLAN_SGP_API_KEY", defaultModel: "xiaomi-token-plan-sgp/mimo-v2.5" },
52
+ { name: "minimax-code", label: "MiniMax Code", baseUrl: "https://api.minimax.io/v1", apiKeyEnv: "MINIMAX_CODE_API_KEY", defaultModel: "minimax-code/minimax-m3" },
53
+ { name: "minimax-code-cn", label: "MiniMax Code (China)", baseUrl: "https://api.minimaxi.com/v1", apiKeyEnv: "MINIMAX_CODE_CN_API_KEY", defaultModel: "minimax-code-cn/minimax-m3" },
54
+ // Anthropic-Messages-protocol providers (served via makeAnthropicCompatibleAdapter).
55
+ { name: "zai", label: "z.ai", baseUrl: "https://api.z.ai/api/anthropic", apiKeyEnv: "ZAI_API_KEY", defaultModel: "zai/glm-5.2", protocol: "anthropic" },
56
+ { name: "minimax", label: "MiniMax", baseUrl: "https://api.minimax.io/anthropic", apiKeyEnv: "MINIMAX_API_KEY", defaultModel: "minimax/minimax-m3", protocol: "anthropic" },
57
+ ];
58
+
59
+ const BY_NAME = new Map<string, OpenAICompatProviderDef>(OPENAI_COMPAT_PROVIDERS.map(p => [p.name, p]));
60
+
61
+ /** All catalog provider names (for PROVIDER_NAMES / AuthProvider unions). */
62
+ export const OPENAI_COMPAT_NAMES: readonly ProviderName[] = OPENAI_COMPAT_PROVIDERS.map(p => p.name);
63
+
64
+ /** Catalog entry for a provider name, or undefined when it is not catalog-driven. */
65
+ export function openaiCompatDef(name: string): OpenAICompatProviderDef | undefined {
66
+ return BY_NAME.get(name);
67
+ }
68
+
69
+ /** True when `name` is a catalog-driven OpenAI-compatible provider. */
70
+ export function isOpenAICompatProvider(name: string): boolean {
71
+ return BY_NAME.has(name);
72
+ }
@@ -0,0 +1,31 @@
1
+ import type { ProviderAdapter, CallOptions, ProviderName } from "../types";
2
+ import type { Credential } from "../../auth";
3
+ import { openaiAdapter } from "./openai";
4
+
5
+ /**
6
+ * Factory for OpenAI-compatible providers (LM Studio, xAI/Grok, …). They all speak
7
+ * the same `/chat/completions` wire protocol, so each is a thin shim over
8
+ * `openaiAdapter`: strip the `<name>/` routing prefix, pin the base URL, and pass the
9
+ * credential (or force keyless for local servers that ignore auth). `keyless` keeps
10
+ * the openai adapter on plain /chat/completions (an oauth credential would divert to
11
+ * the Codex Responses backend).
12
+ */
13
+ const KEYLESS: Credential = { kind: "none", provider: "openai" };
14
+
15
+ export function makeOpenAICompatibleAdapter(opts: { name: ProviderName; baseUrl: string; keyless?: boolean }): ProviderAdapter {
16
+ const prefix = `${opts.name}/`;
17
+ const prep = (o: CallOptions): CallOptions => ({
18
+ ...o,
19
+ model: o.model.startsWith(prefix) ? o.model.slice(prefix.length) : o.model,
20
+ baseUrl: o.baseUrl ?? opts.baseUrl,
21
+ });
22
+ const credFor = (c: Credential): Credential => (opts.keyless ? KEYLESS : c);
23
+ return {
24
+ name: opts.name,
25
+ supportsNativeTools: openaiAdapter.supportsNativeTools,
26
+ call: (messages, options, credential) => openaiAdapter.call(messages, prep(options), credFor(credential)),
27
+ async *stream(messages, options, credential) {
28
+ yield* openaiAdapter.stream!(messages, prep(options), credFor(credential));
29
+ },
30
+ };
31
+ }