@oh-my-pi/pi-ai 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "9.1.0",
3
+ "version": "9.2.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -63,7 +63,7 @@
63
63
  "@connectrpc/connect-node": "^2.1.1",
64
64
  "@google/genai": "^1.38.0",
65
65
  "@mistralai/mistralai": "^1.13.0",
66
- "@oh-my-pi/pi-utils": "9.1.0",
66
+ "@oh-my-pi/pi-utils": "9.2.0",
67
67
  "@sinclair/typebox": "^0.34.48",
68
68
  "@smithy/node-http-handler": "^4.4.8",
69
69
  "ajv": "^8.17.1",
package/src/index.ts CHANGED
@@ -8,6 +8,7 @@ export * from "./providers/google";
8
8
  export * from "./providers/google-gemini-cli";
9
9
  export * from "./providers/google-gemini-cli-usage";
10
10
  export * from "./providers/google-vertex";
11
+ export * from "./providers/kimi";
11
12
  export * from "./providers/openai-completions";
12
13
  export * from "./providers/openai-responses";
13
14
  export * from "./stream";
@@ -424,9 +424,7 @@ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<s
424
424
  "X-App": "cli",
425
425
  };
426
426
 
427
- if (oauthToken || !isAnthropicBaseUrl(options.baseUrl)) {
428
- headers.Authorization = `Bearer ${options.apiKey}`;
429
- } else {
427
+ if (!oauthToken) {
430
428
  headers["X-Api-Key"] = options.apiKey;
431
429
  }
432
430
 
@@ -466,11 +464,19 @@ function createClient(
466
464
  defaultHeaders: defaultHeadersBase,
467
465
  };
468
466
 
469
- if (oauthToken || !isAnthropicBaseUrl(model.baseUrl)) {
467
+ if (isAnthropicBaseUrl(model.baseUrl)) {
468
+ // For Anthropic API, let SDK handle auth
469
+ if (oauthToken) {
470
+ clientOptions.apiKey = null;
471
+ clientOptions.authToken = apiKey;
472
+ } else {
473
+ clientOptions.apiKey = apiKey;
474
+ }
475
+ } else {
476
+ // For non-Anthropic URLs (e.g., Kimi), use authToken
477
+ // The SDK will add Authorization: Bearer header, which is what we want
470
478
  clientOptions.apiKey = null;
471
479
  clientOptions.authToken = apiKey;
472
- } else {
473
- clientOptions.apiKey = apiKey;
474
480
  }
475
481
 
476
482
  const client = new Anthropic(clientOptions);
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Kimi Code provider - wraps OpenAI or Anthropic API based on format setting.
3
+ *
4
+ * Kimi offers both OpenAI-compatible and Anthropic-compatible APIs:
5
+ * - OpenAI: https://api.kimi.com/coding/v1/chat/completions
6
+ * - Anthropic: https://api.kimi.com/coding/v1/messages
7
+ *
8
+ * The Anthropic API is generally more stable and recommended.
9
+ * Note: Kimi calculates TPM rate limits based on max_tokens, not actual output.
10
+ */
11
+
12
+ import type { Api, Context, Model, SimpleStreamOptions } from "../types";
13
+ import { AssistantMessageEventStream } from "../utils/event-stream";
14
+ import { getKimiCommonHeaders } from "../utils/oauth/kimi";
15
+ import { streamAnthropic } from "./anthropic";
16
+ import { streamOpenAICompletions } from "./openai-completions";
17
+
18
+ export type KimiApiFormat = "openai" | "anthropic";
19
+
20
+ // Note: Anthropic SDK appends /v1/messages, so base URL should not include /v1
21
+ const KIMI_ANTHROPIC_BASE_URL = "https://api.kimi.com/coding";
22
+
23
+ // Default thinking budgets for Anthropic format (matches stream.ts)
24
+ const DEFAULT_THINKING_BUDGETS = {
25
+ minimal: 1024,
26
+ low: 4096,
27
+ medium: 8192,
28
+ high: 16384,
29
+ xhigh: 32768,
30
+ } as const;
31
+
32
+ export interface KimiOptions extends SimpleStreamOptions {
33
+ /** API format: "openai" or "anthropic". Default: "anthropic" */
34
+ format?: KimiApiFormat;
35
+ }
36
+
37
+ /**
38
+ * Stream from Kimi Code, routing to either OpenAI or Anthropic API based on format.
39
+ * Returns synchronously like other providers - async header fetching happens internally.
40
+ */
41
+ export function streamKimi(
42
+ model: Model<"openai-completions">,
43
+ context: Context,
44
+ options?: KimiOptions,
45
+ ): AssistantMessageEventStream {
46
+ const stream = new AssistantMessageEventStream();
47
+ const format = options?.format ?? "anthropic";
48
+
49
+ // Async IIFE to handle header fetching and stream piping
50
+ (async () => {
51
+ try {
52
+ const kimiHeaders = await getKimiCommonHeaders();
53
+ const mergedHeaders = { ...kimiHeaders, ...options?.headers };
54
+
55
+ if (format === "anthropic") {
56
+ // Create a synthetic Anthropic model pointing to Kimi's endpoint
57
+ const anthropicModel: Model<"anthropic-messages"> = {
58
+ id: model.id,
59
+ name: model.name,
60
+ api: "anthropic-messages",
61
+ provider: model.provider,
62
+ baseUrl: KIMI_ANTHROPIC_BASE_URL,
63
+ headers: mergedHeaders,
64
+ contextWindow: model.contextWindow,
65
+ maxTokens: model.maxTokens,
66
+ reasoning: model.reasoning,
67
+ input: model.input,
68
+ cost: model.cost,
69
+ };
70
+
71
+ // Calculate thinking budget from reasoning level
72
+ const reasoning = options?.reasoning;
73
+ const thinkingEnabled = !!reasoning && model.reasoning;
74
+ const thinkingBudget = reasoning
75
+ ? (options?.thinkingBudgets?.[reasoning] ?? DEFAULT_THINKING_BUDGETS[reasoning])
76
+ : undefined;
77
+
78
+ const innerStream = streamAnthropic(anthropicModel, context, {
79
+ apiKey: options?.apiKey,
80
+ temperature: options?.temperature,
81
+ maxTokens: options?.maxTokens ?? Math.min(model.maxTokens, 32000),
82
+ signal: options?.signal,
83
+ headers: mergedHeaders,
84
+ sessionId: options?.sessionId,
85
+ onPayload: options?.onPayload,
86
+ thinkingEnabled,
87
+ thinkingBudgetTokens: thinkingBudget,
88
+ });
89
+
90
+ for await (const event of innerStream) {
91
+ stream.push(event);
92
+ }
93
+ } else {
94
+ // OpenAI format - use original model with Kimi headers
95
+ const innerStream = streamOpenAICompletions(model, context, {
96
+ apiKey: options?.apiKey,
97
+ temperature: options?.temperature,
98
+ maxTokens: options?.maxTokens ?? model.maxTokens,
99
+ signal: options?.signal,
100
+ headers: mergedHeaders,
101
+ sessionId: options?.sessionId,
102
+ onPayload: options?.onPayload,
103
+ reasoningEffort: options?.reasoning,
104
+ });
105
+
106
+ for await (const event of innerStream) {
107
+ stream.push(event);
108
+ }
109
+ }
110
+ } catch (err) {
111
+ stream.push({
112
+ type: "error",
113
+ reason: "error",
114
+ error: createErrorMessage(model, err),
115
+ });
116
+ }
117
+ })();
118
+
119
+ return stream;
120
+ }
121
+
122
+ function createErrorMessage(model: Model<Api>, err: unknown) {
123
+ return {
124
+ role: "assistant" as const,
125
+ content: [{ type: "text" as const, text: err instanceof Error ? err.message : String(err) }],
126
+ api: model.api,
127
+ provider: model.provider,
128
+ model: model.id,
129
+ usage: {
130
+ input: 0,
131
+ output: 0,
132
+ cacheRead: 0,
133
+ cacheWrite: 0,
134
+ totalTokens: 0,
135
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
136
+ },
137
+ stopReason: "error" as const,
138
+ timestamp: Date.now(),
139
+ };
140
+ }
141
+
142
+ /**
143
+ * Check if a model is a Kimi Code model.
144
+ */
145
+ export function isKimiModel(model: Model<Api>): boolean {
146
+ return model.provider === "kimi-code";
147
+ }
@@ -397,6 +397,12 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
397
397
  const messages = convertMessages(model, context, compat);
398
398
  maybeAddOpenRouterAnthropicCacheControl(model, messages);
399
399
 
400
+ // Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
401
+ // Always send max_tokens to avoid their high default causing rate limit issues.
402
+ // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
403
+ const isKimi = model.id.includes("moonshotai/kimi");
404
+ const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
405
+
400
406
  const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
401
407
  model: model.id,
402
408
  messages,
@@ -411,11 +417,11 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
411
417
  params.store = false;
412
418
  }
413
419
 
414
- if (options?.maxTokens) {
420
+ if (effectiveMaxTokens) {
415
421
  if (compat.maxTokensField === "max_tokens") {
416
- (params as any).max_tokens = options.maxTokens;
422
+ (params as any).max_tokens = effectiveMaxTokens;
417
423
  } else {
418
- params.max_completion_tokens = options.maxTokens;
424
+ params.max_completion_tokens = effectiveMaxTokens;
419
425
  }
420
426
  }
421
427
 
package/src/stream.ts CHANGED
@@ -13,6 +13,7 @@ import {
13
13
  streamGoogleGeminiCli,
14
14
  } from "./providers/google-gemini-cli";
15
15
  import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex";
16
+ import { isKimiModel, streamKimi } from "./providers/kimi";
16
17
  import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
17
18
  import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
18
19
  import { streamOpenAIResponses } from "./providers/openai-responses";
@@ -212,6 +213,16 @@ export function streamSimple<TApi extends Api>(
212
213
  throw new Error(`No API key for provider: ${model.provider}`);
213
214
  }
214
215
 
216
+ // Kimi Code - route to dedicated handler that wraps OpenAI or Anthropic API
217
+ if (isKimiModel(model)) {
218
+ // Pass raw SimpleStreamOptions - streamKimi handles mapping internally
219
+ return streamKimi(model as Model<"openai-completions">, context, {
220
+ ...options,
221
+ apiKey,
222
+ format: options?.kimiApiFormat ?? "anthropic",
223
+ });
224
+ }
225
+
215
226
  const providerOptions = mapOptionsForApi(model, options, apiKey);
216
227
  return stream(model, context, providerOptions);
217
228
  }
package/src/types.ts CHANGED
@@ -141,6 +141,8 @@ export interface SimpleStreamOptions extends StreamOptions {
141
141
  cursorOnToolResult?: CursorToolResultHandler;
142
142
  /** Optional tool choice override for compatible providers */
143
143
  toolChoice?: ToolChoice;
144
+ /** API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic") */
145
+ kimiApiFormat?: "openai" | "anthropic";
144
146
  }
145
147
 
146
148
  // Generic StreamFunction with typed options