@oh-my-pi/pi-ai 9.1.0 → 9.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/providers/anthropic.ts +12 -6
- package/src/providers/kimi.ts +147 -0
- package/src/providers/openai-completions.ts +9 -3
- package/src/stream.ts +11 -0
- package/src/types.ts +2 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "9.
|
|
3
|
+
"version": "9.2.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"@connectrpc/connect-node": "^2.1.1",
|
|
64
64
|
"@google/genai": "^1.38.0",
|
|
65
65
|
"@mistralai/mistralai": "^1.13.0",
|
|
66
|
-
"@oh-my-pi/pi-utils": "9.
|
|
66
|
+
"@oh-my-pi/pi-utils": "9.2.0",
|
|
67
67
|
"@sinclair/typebox": "^0.34.48",
|
|
68
68
|
"@smithy/node-http-handler": "^4.4.8",
|
|
69
69
|
"ajv": "^8.17.1",
|
package/src/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ export * from "./providers/google";
|
|
|
8
8
|
export * from "./providers/google-gemini-cli";
|
|
9
9
|
export * from "./providers/google-gemini-cli-usage";
|
|
10
10
|
export * from "./providers/google-vertex";
|
|
11
|
+
export * from "./providers/kimi";
|
|
11
12
|
export * from "./providers/openai-completions";
|
|
12
13
|
export * from "./providers/openai-responses";
|
|
13
14
|
export * from "./stream";
|
|
@@ -424,9 +424,7 @@ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<s
|
|
|
424
424
|
"X-App": "cli",
|
|
425
425
|
};
|
|
426
426
|
|
|
427
|
-
if (oauthToken
|
|
428
|
-
headers.Authorization = `Bearer ${options.apiKey}`;
|
|
429
|
-
} else {
|
|
427
|
+
if (!oauthToken) {
|
|
430
428
|
headers["X-Api-Key"] = options.apiKey;
|
|
431
429
|
}
|
|
432
430
|
|
|
@@ -466,11 +464,19 @@ function createClient(
|
|
|
466
464
|
defaultHeaders: defaultHeadersBase,
|
|
467
465
|
};
|
|
468
466
|
|
|
469
|
-
if (
|
|
467
|
+
if (isAnthropicBaseUrl(model.baseUrl)) {
|
|
468
|
+
// For Anthropic API, let SDK handle auth
|
|
469
|
+
if (oauthToken) {
|
|
470
|
+
clientOptions.apiKey = null;
|
|
471
|
+
clientOptions.authToken = apiKey;
|
|
472
|
+
} else {
|
|
473
|
+
clientOptions.apiKey = apiKey;
|
|
474
|
+
}
|
|
475
|
+
} else {
|
|
476
|
+
// For non-Anthropic URLs (e.g., Kimi), use authToken
|
|
477
|
+
// The SDK will add Authorization: Bearer header, which is what we want
|
|
470
478
|
clientOptions.apiKey = null;
|
|
471
479
|
clientOptions.authToken = apiKey;
|
|
472
|
-
} else {
|
|
473
|
-
clientOptions.apiKey = apiKey;
|
|
474
480
|
}
|
|
475
481
|
|
|
476
482
|
const client = new Anthropic(clientOptions);
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kimi Code provider - wraps OpenAI or Anthropic API based on format setting.
|
|
3
|
+
*
|
|
4
|
+
* Kimi offers both OpenAI-compatible and Anthropic-compatible APIs:
|
|
5
|
+
* - OpenAI: https://api.kimi.com/coding/v1/chat/completions
|
|
6
|
+
* - Anthropic: https://api.kimi.com/coding/v1/messages
|
|
7
|
+
*
|
|
8
|
+
* The Anthropic API is generally more stable and recommended.
|
|
9
|
+
* Note: Kimi calculates TPM rate limits based on max_tokens, not actual output.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Api, Context, Model, SimpleStreamOptions } from "../types";
|
|
13
|
+
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
14
|
+
import { getKimiCommonHeaders } from "../utils/oauth/kimi";
|
|
15
|
+
import { streamAnthropic } from "./anthropic";
|
|
16
|
+
import { streamOpenAICompletions } from "./openai-completions";
|
|
17
|
+
|
|
18
|
+
export type KimiApiFormat = "openai" | "anthropic";
|
|
19
|
+
|
|
20
|
+
// Note: Anthropic SDK appends /v1/messages, so base URL should not include /v1
|
|
21
|
+
const KIMI_ANTHROPIC_BASE_URL = "https://api.kimi.com/coding";
|
|
22
|
+
|
|
23
|
+
// Default thinking budgets for Anthropic format (matches stream.ts)
|
|
24
|
+
const DEFAULT_THINKING_BUDGETS = {
|
|
25
|
+
minimal: 1024,
|
|
26
|
+
low: 4096,
|
|
27
|
+
medium: 8192,
|
|
28
|
+
high: 16384,
|
|
29
|
+
xhigh: 32768,
|
|
30
|
+
} as const;
|
|
31
|
+
|
|
32
|
+
export interface KimiOptions extends SimpleStreamOptions {
|
|
33
|
+
/** API format: "openai" or "anthropic". Default: "anthropic" */
|
|
34
|
+
format?: KimiApiFormat;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Stream from Kimi Code, routing to either OpenAI or Anthropic API based on format.
|
|
39
|
+
* Returns synchronously like other providers - async header fetching happens internally.
|
|
40
|
+
*/
|
|
41
|
+
export function streamKimi(
|
|
42
|
+
model: Model<"openai-completions">,
|
|
43
|
+
context: Context,
|
|
44
|
+
options?: KimiOptions,
|
|
45
|
+
): AssistantMessageEventStream {
|
|
46
|
+
const stream = new AssistantMessageEventStream();
|
|
47
|
+
const format = options?.format ?? "anthropic";
|
|
48
|
+
|
|
49
|
+
// Async IIFE to handle header fetching and stream piping
|
|
50
|
+
(async () => {
|
|
51
|
+
try {
|
|
52
|
+
const kimiHeaders = await getKimiCommonHeaders();
|
|
53
|
+
const mergedHeaders = { ...kimiHeaders, ...options?.headers };
|
|
54
|
+
|
|
55
|
+
if (format === "anthropic") {
|
|
56
|
+
// Create a synthetic Anthropic model pointing to Kimi's endpoint
|
|
57
|
+
const anthropicModel: Model<"anthropic-messages"> = {
|
|
58
|
+
id: model.id,
|
|
59
|
+
name: model.name,
|
|
60
|
+
api: "anthropic-messages",
|
|
61
|
+
provider: model.provider,
|
|
62
|
+
baseUrl: KIMI_ANTHROPIC_BASE_URL,
|
|
63
|
+
headers: mergedHeaders,
|
|
64
|
+
contextWindow: model.contextWindow,
|
|
65
|
+
maxTokens: model.maxTokens,
|
|
66
|
+
reasoning: model.reasoning,
|
|
67
|
+
input: model.input,
|
|
68
|
+
cost: model.cost,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Calculate thinking budget from reasoning level
|
|
72
|
+
const reasoning = options?.reasoning;
|
|
73
|
+
const thinkingEnabled = !!reasoning && model.reasoning;
|
|
74
|
+
const thinkingBudget = reasoning
|
|
75
|
+
? (options?.thinkingBudgets?.[reasoning] ?? DEFAULT_THINKING_BUDGETS[reasoning])
|
|
76
|
+
: undefined;
|
|
77
|
+
|
|
78
|
+
const innerStream = streamAnthropic(anthropicModel, context, {
|
|
79
|
+
apiKey: options?.apiKey,
|
|
80
|
+
temperature: options?.temperature,
|
|
81
|
+
maxTokens: options?.maxTokens ?? Math.min(model.maxTokens, 32000),
|
|
82
|
+
signal: options?.signal,
|
|
83
|
+
headers: mergedHeaders,
|
|
84
|
+
sessionId: options?.sessionId,
|
|
85
|
+
onPayload: options?.onPayload,
|
|
86
|
+
thinkingEnabled,
|
|
87
|
+
thinkingBudgetTokens: thinkingBudget,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
for await (const event of innerStream) {
|
|
91
|
+
stream.push(event);
|
|
92
|
+
}
|
|
93
|
+
} else {
|
|
94
|
+
// OpenAI format - use original model with Kimi headers
|
|
95
|
+
const innerStream = streamOpenAICompletions(model, context, {
|
|
96
|
+
apiKey: options?.apiKey,
|
|
97
|
+
temperature: options?.temperature,
|
|
98
|
+
maxTokens: options?.maxTokens ?? model.maxTokens,
|
|
99
|
+
signal: options?.signal,
|
|
100
|
+
headers: mergedHeaders,
|
|
101
|
+
sessionId: options?.sessionId,
|
|
102
|
+
onPayload: options?.onPayload,
|
|
103
|
+
reasoningEffort: options?.reasoning,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
for await (const event of innerStream) {
|
|
107
|
+
stream.push(event);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
} catch (err) {
|
|
111
|
+
stream.push({
|
|
112
|
+
type: "error",
|
|
113
|
+
reason: "error",
|
|
114
|
+
error: createErrorMessage(model, err),
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
})();
|
|
118
|
+
|
|
119
|
+
return stream;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function createErrorMessage(model: Model<Api>, err: unknown) {
|
|
123
|
+
return {
|
|
124
|
+
role: "assistant" as const,
|
|
125
|
+
content: [{ type: "text" as const, text: err instanceof Error ? err.message : String(err) }],
|
|
126
|
+
api: model.api,
|
|
127
|
+
provider: model.provider,
|
|
128
|
+
model: model.id,
|
|
129
|
+
usage: {
|
|
130
|
+
input: 0,
|
|
131
|
+
output: 0,
|
|
132
|
+
cacheRead: 0,
|
|
133
|
+
cacheWrite: 0,
|
|
134
|
+
totalTokens: 0,
|
|
135
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
136
|
+
},
|
|
137
|
+
stopReason: "error" as const,
|
|
138
|
+
timestamp: Date.now(),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Check if a model is a Kimi Code model.
|
|
144
|
+
*/
|
|
145
|
+
export function isKimiModel(model: Model<Api>): boolean {
|
|
146
|
+
return model.provider === "kimi-code";
|
|
147
|
+
}
|
|
@@ -397,6 +397,12 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|
|
397
397
|
const messages = convertMessages(model, context, compat);
|
|
398
398
|
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
|
399
399
|
|
|
400
|
+
// Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
|
|
401
|
+
// Always send max_tokens to avoid their high default causing rate limit issues.
|
|
402
|
+
// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
|
|
403
|
+
const isKimi = model.id.includes("moonshotai/kimi");
|
|
404
|
+
const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
|
|
405
|
+
|
|
400
406
|
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
|
401
407
|
model: model.id,
|
|
402
408
|
messages,
|
|
@@ -411,11 +417,11 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|
|
411
417
|
params.store = false;
|
|
412
418
|
}
|
|
413
419
|
|
|
414
|
-
if (
|
|
420
|
+
if (effectiveMaxTokens) {
|
|
415
421
|
if (compat.maxTokensField === "max_tokens") {
|
|
416
|
-
(params as any).max_tokens =
|
|
422
|
+
(params as any).max_tokens = effectiveMaxTokens;
|
|
417
423
|
} else {
|
|
418
|
-
params.max_completion_tokens =
|
|
424
|
+
params.max_completion_tokens = effectiveMaxTokens;
|
|
419
425
|
}
|
|
420
426
|
}
|
|
421
427
|
|
package/src/stream.ts
CHANGED
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
streamGoogleGeminiCli,
|
|
14
14
|
} from "./providers/google-gemini-cli";
|
|
15
15
|
import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex";
|
|
16
|
+
import { isKimiModel, streamKimi } from "./providers/kimi";
|
|
16
17
|
import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
|
|
17
18
|
import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
|
|
18
19
|
import { streamOpenAIResponses } from "./providers/openai-responses";
|
|
@@ -212,6 +213,16 @@ export function streamSimple<TApi extends Api>(
|
|
|
212
213
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
213
214
|
}
|
|
214
215
|
|
|
216
|
+
// Kimi Code - route to dedicated handler that wraps OpenAI or Anthropic API
|
|
217
|
+
if (isKimiModel(model)) {
|
|
218
|
+
// Pass raw SimpleStreamOptions - streamKimi handles mapping internally
|
|
219
|
+
return streamKimi(model as Model<"openai-completions">, context, {
|
|
220
|
+
...options,
|
|
221
|
+
apiKey,
|
|
222
|
+
format: options?.kimiApiFormat ?? "anthropic",
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
215
226
|
const providerOptions = mapOptionsForApi(model, options, apiKey);
|
|
216
227
|
return stream(model, context, providerOptions);
|
|
217
228
|
}
|
package/src/types.ts
CHANGED
|
@@ -141,6 +141,8 @@ export interface SimpleStreamOptions extends StreamOptions {
|
|
|
141
141
|
cursorOnToolResult?: CursorToolResultHandler;
|
|
142
142
|
/** Optional tool choice override for compatible providers */
|
|
143
143
|
toolChoice?: ToolChoice;
|
|
144
|
+
/** API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic") */
|
|
145
|
+
kimiApiFormat?: "openai" | "anthropic";
|
|
144
146
|
}
|
|
145
147
|
|
|
146
148
|
// Generic StreamFunction with typed options
|