@xiaozhiclaw/provider-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
- package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
- package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
- package/dist/adapters/gemini-file-upload-adapter.js +92 -0
- package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
- package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
- package/dist/adapters/index.d.ts +10 -0
- package/dist/adapters/index.js +10 -0
- package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
- package/dist/adapters/openai-file-upload-adapter.js +56 -0
- package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
- package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
- package/dist/builtin-providers.d.ts +8 -0
- package/dist/builtin-providers.js +2237 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.js +1 -0
- package/dist/credentials.d.ts +1 -0
- package/dist/credentials.js +8 -0
- package/dist/debug-transport.d.ts +12 -0
- package/dist/debug-transport.js +99 -0
- package/dist/errors.d.ts +11 -0
- package/dist/errors.js +12 -0
- package/dist/events.d.ts +48 -0
- package/dist/events.js +1 -0
- package/dist/file-upload-service.d.ts +68 -0
- package/dist/file-upload-service.js +110 -0
- package/dist/gemini-schema-utils.d.ts +17 -0
- package/dist/gemini-schema-utils.js +76 -0
- package/dist/index.d.ts +37 -0
- package/dist/index.js +33 -0
- package/dist/llm-client.d.ts +43 -0
- package/dist/llm-client.js +217 -0
- package/dist/media-client.d.ts +42 -0
- package/dist/media-client.js +174 -0
- package/dist/media-transport.d.ts +176 -0
- package/dist/media-transport.js +16 -0
- package/dist/media.d.ts +2 -0
- package/dist/media.js +1 -0
- package/dist/model-detection.d.ts +22 -0
- package/dist/model-detection.js +28 -0
- package/dist/paths.d.ts +2 -0
- package/dist/paths.js +11 -0
- package/dist/provider-def.d.ts +220 -0
- package/dist/provider-def.js +9 -0
- package/dist/provider-registry.d.ts +51 -0
- package/dist/provider-registry.js +130 -0
- package/dist/provider-tool-api.d.ts +44 -0
- package/dist/provider-tool-api.js +9 -0
- package/dist/provider-variant-resolver.d.ts +35 -0
- package/dist/provider-variant-resolver.js +174 -0
- package/dist/retry.d.ts +37 -0
- package/dist/retry.js +71 -0
- package/dist/transport.d.ts +281 -0
- package/dist/transport.js +27 -0
- package/dist/transports/anthropic-messages.d.ts +65 -0
- package/dist/transports/anthropic-messages.js +1004 -0
- package/dist/transports/gemini-cache-api.d.ts +86 -0
- package/dist/transports/gemini-cache-api.js +141 -0
- package/dist/transports/gemini-file-api.d.ts +90 -0
- package/dist/transports/gemini-file-api.js +164 -0
- package/dist/transports/gemini-generatecontent.d.ts +56 -0
- package/dist/transports/gemini-generatecontent.js +688 -0
- package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
- package/dist/transports/gemini-lyria-realtime.js +295 -0
- package/dist/transports/gemini-media.d.ts +53 -0
- package/dist/transports/gemini-media.js +383 -0
- package/dist/transports/media-resolve.d.ts +50 -0
- package/dist/transports/media-resolve.js +91 -0
- package/dist/transports/minimax-media.d.ts +56 -0
- package/dist/transports/minimax-media.js +433 -0
- package/dist/transports/openai-chat.d.ts +81 -0
- package/dist/transports/openai-chat.js +782 -0
- package/dist/transports/openai-media.d.ts +24 -0
- package/dist/transports/openai-media.js +118 -0
- package/dist/transports/openai-responses.d.ts +63 -0
- package/dist/transports/openai-responses.js +778 -0
- package/dist/transports/qwen-media.d.ts +59 -0
- package/dist/transports/qwen-media.js +411 -0
- package/dist/transports/realtime-transport.d.ts +183 -0
- package/dist/transports/realtime-transport.js +332 -0
- package/dist/transports/volcengine-grounding.d.ts +58 -0
- package/dist/transports/volcengine-grounding.js +69 -0
- package/dist/transports/volcengine-media.d.ts +94 -0
- package/dist/transports/volcengine-media.js +801 -0
- package/dist/transports/volcengine-responses.d.ts +64 -0
- package/dist/transports/volcengine-responses.js +797 -0
- package/dist/transports/zhipu-media.d.ts +82 -0
- package/dist/transports/zhipu-media.js +522 -0
- package/dist/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/transports/zhipu-tool-api.js +126 -0
- package/dist/wire-types.d.ts +51 -0
- package/dist/wire-types.js +1 -0
- package/package.json +33 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
export class ProviderVariantResolver {
|
|
2
|
+
registry;
|
|
3
|
+
constructor(registry) {
|
|
4
|
+
this.registry = registry;
|
|
5
|
+
}
|
|
6
|
+
resolve(input) {
|
|
7
|
+
const publicModel = input.publicModel.trim();
|
|
8
|
+
if (!publicModel)
|
|
9
|
+
return [];
|
|
10
|
+
const allowedProviders = input.userPreference?.providerIds
|
|
11
|
+
? new Set(input.userPreference.providerIds)
|
|
12
|
+
: undefined;
|
|
13
|
+
const requestedTransport = normalizeRequestedProtocol(input.requestedProtocol);
|
|
14
|
+
const requiredCapabilities = input.capabilities ?? [];
|
|
15
|
+
const matches = [];
|
|
16
|
+
for (const provider of this.registry.listProviders()) {
|
|
17
|
+
if (allowedProviders && !allowedProviders.has(provider.id))
|
|
18
|
+
continue;
|
|
19
|
+
if (requestedTransport && provider.transport !== requestedTransport)
|
|
20
|
+
continue;
|
|
21
|
+
for (const model of this.registry.listModels(provider.id)) {
|
|
22
|
+
const matchKind = modelMatchKind(model, publicModel);
|
|
23
|
+
if (!matchKind)
|
|
24
|
+
continue;
|
|
25
|
+
const capabilities = inferCapabilities(provider, model);
|
|
26
|
+
if (requiredCapabilities.some(capability => !capabilities.includes(capability))) {
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
const variantKind = inferVariantKind(provider, model);
|
|
30
|
+
const billingChannelKind = provider.billingChannelKind ?? inferBillingChannelKind(variantKind);
|
|
31
|
+
const reasons = buildReasons({
|
|
32
|
+
matchKind,
|
|
33
|
+
requestedTransport,
|
|
34
|
+
requiredCapabilities,
|
|
35
|
+
preferProviderId: input.userPreference?.preferProviderId,
|
|
36
|
+
preferVariantKind: input.userPreference?.preferVariantKind,
|
|
37
|
+
purpose: input.purpose,
|
|
38
|
+
provider,
|
|
39
|
+
variantKind,
|
|
40
|
+
capabilities,
|
|
41
|
+
});
|
|
42
|
+
const score = scoreResolution({
|
|
43
|
+
matchKind,
|
|
44
|
+
requestedTransport,
|
|
45
|
+
requiredCapabilities,
|
|
46
|
+
preferProviderId: input.userPreference?.preferProviderId,
|
|
47
|
+
preferVariantKind: input.userPreference?.preferVariantKind,
|
|
48
|
+
purpose: input.purpose,
|
|
49
|
+
provider,
|
|
50
|
+
variantKind,
|
|
51
|
+
capabilities,
|
|
52
|
+
});
|
|
53
|
+
matches.push({
|
|
54
|
+
provider: provider.id,
|
|
55
|
+
group: provider.group ?? provider.id,
|
|
56
|
+
publicModel,
|
|
57
|
+
nativeModelId: model.id,
|
|
58
|
+
displayName: model.name,
|
|
59
|
+
transport: provider.transport,
|
|
60
|
+
variantKind,
|
|
61
|
+
billingChannelKind,
|
|
62
|
+
capabilities,
|
|
63
|
+
score,
|
|
64
|
+
reasons,
|
|
65
|
+
providerDef: provider,
|
|
66
|
+
modelInfo: model,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return matches.sort((a, b) => {
|
|
71
|
+
if (b.score !== a.score)
|
|
72
|
+
return b.score - a.score;
|
|
73
|
+
if (a.group !== b.group)
|
|
74
|
+
return a.group.localeCompare(b.group);
|
|
75
|
+
return a.provider.localeCompare(b.provider);
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
resolveBest(input) {
|
|
79
|
+
return this.resolve(input)[0];
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function normalizeRequestedProtocol(protocol) {
|
|
83
|
+
if (!protocol)
|
|
84
|
+
return undefined;
|
|
85
|
+
if (protocol === "openai")
|
|
86
|
+
return "openai-chat";
|
|
87
|
+
if (protocol === "anthropic")
|
|
88
|
+
return "anthropic-messages";
|
|
89
|
+
return protocol;
|
|
90
|
+
}
|
|
91
|
+
function modelMatchKind(model, publicModel) {
|
|
92
|
+
if (sameId(model.id, publicModel))
|
|
93
|
+
return "exact";
|
|
94
|
+
return (model.aliases ?? []).some(alias => sameId(alias, publicModel)) ? "alias" : undefined;
|
|
95
|
+
}
|
|
96
|
+
function sameId(a, b) {
|
|
97
|
+
return a.trim().toLowerCase() === b.trim().toLowerCase();
|
|
98
|
+
}
|
|
99
|
+
function inferVariantKind(provider, model) {
|
|
100
|
+
if (provider.variantKind)
|
|
101
|
+
return provider.variantKind;
|
|
102
|
+
if (model.mediaType === "realtime_audio" || model.mediaType === "realtime_video")
|
|
103
|
+
return "realtime";
|
|
104
|
+
if (model.mediaType)
|
|
105
|
+
return "media-plan";
|
|
106
|
+
if (provider.id.includes("coding") || model.id.toLowerCase().includes("coder") || model.id.toLowerCase().includes("codegeex"))
|
|
107
|
+
return "coding-plan";
|
|
108
|
+
if (provider.transport === "openai-chat" || provider.transport === "openai-responses" || provider.transport === "volcengine-responses")
|
|
109
|
+
return "openai-compatible";
|
|
110
|
+
if (provider.transport === "anthropic-messages")
|
|
111
|
+
return "anthropic-compatible";
|
|
112
|
+
return "standard";
|
|
113
|
+
}
|
|
114
|
+
function inferBillingChannelKind(variantKind) {
|
|
115
|
+
return variantKind === "coding-plan" || variantKind === "media-plan" ? "plan" : "paygo";
|
|
116
|
+
}
|
|
117
|
+
function inferCapabilities(provider, model) {
|
|
118
|
+
const capabilities = new Set(provider.capabilities ?? []);
|
|
119
|
+
if (model.reasoning)
|
|
120
|
+
capabilities.add("thinking");
|
|
121
|
+
if (model.vision)
|
|
122
|
+
capabilities.add("vision");
|
|
123
|
+
if (model.mediaType)
|
|
124
|
+
capabilities.add("media");
|
|
125
|
+
if (model.mediaType === "realtime_audio" || model.mediaType === "realtime_video")
|
|
126
|
+
capabilities.add("realtime");
|
|
127
|
+
if (model.toolCall && provider.quirks?.supportsToolStream)
|
|
128
|
+
capabilities.add("tool_stream");
|
|
129
|
+
if (provider.quirks?.supportsReasoningSplit)
|
|
130
|
+
capabilities.add("reasoning_split");
|
|
131
|
+
if (provider.quirks?.builtinWebSearch || provider.quirks?.builtinCodeInterpreter || provider.quirks?.builtinUrlContext) {
|
|
132
|
+
capabilities.add("builtin_tools");
|
|
133
|
+
}
|
|
134
|
+
if (provider.id.includes("coding") || model.id.toLowerCase().includes("coder") || model.id.toLowerCase().includes("codegeex")) {
|
|
135
|
+
capabilities.add("coding");
|
|
136
|
+
}
|
|
137
|
+
return [...capabilities].sort();
|
|
138
|
+
}
|
|
139
|
+
function scoreResolution(input) {
|
|
140
|
+
let score = 100;
|
|
141
|
+
score += input.matchKind === "exact" ? 25 : 15;
|
|
142
|
+
if (input.requestedTransport && input.provider.transport === input.requestedTransport)
|
|
143
|
+
score += 20;
|
|
144
|
+
score += input.requiredCapabilities.length * 15;
|
|
145
|
+
if (input.preferProviderId && input.provider.id === input.preferProviderId)
|
|
146
|
+
score += 50;
|
|
147
|
+
if (input.preferVariantKind && input.variantKind === input.preferVariantKind)
|
|
148
|
+
score += 35;
|
|
149
|
+
if (input.purpose === "textGeneration" && !input.capabilities.includes("media"))
|
|
150
|
+
score += 5;
|
|
151
|
+
if (input.purpose === "imageGeneration" && input.capabilities.includes("media"))
|
|
152
|
+
score += 10;
|
|
153
|
+
if (input.purpose === "videoGeneration" && input.capabilities.includes("media"))
|
|
154
|
+
score += 10;
|
|
155
|
+
if (input.capabilities.includes("tool_stream"))
|
|
156
|
+
score += 8;
|
|
157
|
+
if (input.capabilities.includes("reasoning_split"))
|
|
158
|
+
score += 8;
|
|
159
|
+
if (input.variantKind === "coding-plan")
|
|
160
|
+
score += input.provider.id.includes("coding") ? 12 : 5;
|
|
161
|
+
return score;
|
|
162
|
+
}
|
|
163
|
+
function buildReasons(input) {
|
|
164
|
+
const reasons = [input.matchKind === "exact" ? "model id matched" : "public alias matched"];
|
|
165
|
+
if (input.requestedTransport)
|
|
166
|
+
reasons.push(`protocol ${input.requestedTransport} matched`);
|
|
167
|
+
for (const capability of input.requiredCapabilities)
|
|
168
|
+
reasons.push(`capability ${capability} matched`);
|
|
169
|
+
if (input.preferProviderId && input.provider.id === input.preferProviderId)
|
|
170
|
+
reasons.push("preferred provider matched");
|
|
171
|
+
if (input.preferVariantKind && input.variantKind === input.preferVariantKind)
|
|
172
|
+
reasons.push("preferred variant matched");
|
|
173
|
+
return reasons;
|
|
174
|
+
}
|
package/dist/retry.d.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry/backoff utilities for LLM transport implementations.
|
|
3
|
+
*
|
|
4
|
+
* Provides common constants and helper functions used by all transports
|
|
5
|
+
* (anthropic-messages, openai-chat, volcengine-responses) to handle
|
|
6
|
+
* transient errors with exponential backoff.
|
|
7
|
+
*/
|
|
8
|
+
/** Default maximum number of retry attempts */
|
|
9
|
+
export declare const DEFAULT_MAX_RETRIES = 2;
|
|
10
|
+
/** Base delay for exponential backoff (doubles each attempt, capped at 30s) */
|
|
11
|
+
export declare const RETRY_BASE_DELAY_MS = 1000;
|
|
12
|
+
/** Maximum backoff delay */
|
|
13
|
+
export declare const RETRY_MAX_DELAY_MS = 30000;
|
|
14
|
+
/** HTTP status codes considered transient (worth retrying) */
|
|
15
|
+
export declare const TRANSIENT_STATUS_CODES: Set<number>;
|
|
16
|
+
/** Default timeout for idle stream detection (no data received) */
|
|
17
|
+
export declare const STREAM_IDLE_TIMEOUT_MS = 90000;
|
|
18
|
+
/**
|
|
19
|
+
* Calculate the delay for a given retry attempt using exponential backoff with jitter.
|
|
20
|
+
* @param attempt 1-based attempt number (1 = first retry)
|
|
21
|
+
* @returns delay in milliseconds
|
|
22
|
+
*/
|
|
23
|
+
export declare function retryDelay(attempt: number): number;
|
|
24
|
+
/**
|
|
25
|
+
* Check if an HTTP status code indicates a transient error worth retrying.
|
|
26
|
+
*/
|
|
27
|
+
export declare function isTransientStatus(status: number | null | undefined): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Sleep with abort signal support. Resolves after `ms` milliseconds
|
|
30
|
+
* or rejects if the signal is aborted.
|
|
31
|
+
*/
|
|
32
|
+
export declare function retrySleep(ms: number, signal?: AbortSignal): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Extract HTTP status from various error shapes.
|
|
35
|
+
* Works with fetch Response errors, Axios errors, and generic errors with status property.
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractHttpStatus(error: unknown): number | null;
|
package/dist/retry.js
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry/backoff utilities for LLM transport implementations.
|
|
3
|
+
*
|
|
4
|
+
* Provides common constants and helper functions used by all transports
|
|
5
|
+
* (anthropic-messages, openai-chat, volcengine-responses) to handle
|
|
6
|
+
* transient errors with exponential backoff.
|
|
7
|
+
*/
|
|
8
|
+
// 鈹€鈹€ Retry Constants 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
9
|
+
/** Default maximum number of retry attempts */
|
|
10
|
+
export const DEFAULT_MAX_RETRIES = 2;
|
|
11
|
+
/** Base delay for exponential backoff (doubles each attempt, capped at 30s) */
|
|
12
|
+
export const RETRY_BASE_DELAY_MS = 1_000;
|
|
13
|
+
/** Maximum backoff delay */
|
|
14
|
+
export const RETRY_MAX_DELAY_MS = 30_000;
|
|
15
|
+
/** HTTP status codes considered transient (worth retrying) */
|
|
16
|
+
export const TRANSIENT_STATUS_CODES = new Set([429, 529, 502, 503, 504]);
|
|
17
|
+
/** Default timeout for idle stream detection (no data received) */
|
|
18
|
+
export const STREAM_IDLE_TIMEOUT_MS = 90_000;
|
|
19
|
+
// 鈹€鈹€ Helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
20
|
+
/**
|
|
21
|
+
* Calculate the delay for a given retry attempt using exponential backoff with jitter.
|
|
22
|
+
* @param attempt 1-based attempt number (1 = first retry)
|
|
23
|
+
* @returns delay in milliseconds
|
|
24
|
+
*/
|
|
25
|
+
export function retryDelay(attempt) {
|
|
26
|
+
const baseDelay = Math.min(RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1), RETRY_MAX_DELAY_MS);
|
|
27
|
+
const jitter = Math.random() * 0.25 * baseDelay;
|
|
28
|
+
return baseDelay + jitter;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Check if an HTTP status code indicates a transient error worth retrying.
|
|
32
|
+
*/
|
|
33
|
+
export function isTransientStatus(status) {
|
|
34
|
+
return status !== null && status !== undefined && TRANSIENT_STATUS_CODES.has(status);
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Sleep with abort signal support. Resolves after `ms` milliseconds
|
|
38
|
+
* or rejects if the signal is aborted.
|
|
39
|
+
*/
|
|
40
|
+
export function retrySleep(ms, signal) {
|
|
41
|
+
return new Promise((resolve, reject) => {
|
|
42
|
+
if (signal?.aborted) {
|
|
43
|
+
reject(new Error("Request aborted"));
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
const timer = setTimeout(resolve, ms);
|
|
47
|
+
if (signal) {
|
|
48
|
+
signal.addEventListener("abort", () => {
|
|
49
|
+
clearTimeout(timer);
|
|
50
|
+
reject(new Error("Request aborted"));
|
|
51
|
+
}, { once: true });
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Extract HTTP status from various error shapes.
|
|
57
|
+
* Works with fetch Response errors, Axios errors, and generic errors with status property.
|
|
58
|
+
*/
|
|
59
|
+
export function extractHttpStatus(error) {
|
|
60
|
+
if (error && typeof error === "object") {
|
|
61
|
+
const e = error;
|
|
62
|
+
if (typeof e.status === "number")
|
|
63
|
+
return e.status;
|
|
64
|
+
if (typeof e.statusCode === "number")
|
|
65
|
+
return e.statusCode;
|
|
66
|
+
if (e.response && typeof e.response.status === "number") {
|
|
67
|
+
return e.response.status;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLMTransportabstract interface for LLM inference calls.
|
|
3
|
+
*
|
|
4
|
+
* Aligned with Hermes `ProviderTransport` ABC:
|
|
5
|
+
* stream(request, apiKey, signal) 閳?AsyncGenerator<LLMChunk>
|
|
6
|
+
*
|
|
7
|
+
* Two concrete implementations:
|
|
8
|
+
* - OpenAI Chat Completions (covers 95% of providers)
|
|
9
|
+
* - Anthropic Messages API
|
|
10
|
+
*/
|
|
11
|
+
import type { ChatMessage, ToolDefinition } from "./wire-types.js";
|
|
12
|
+
export type StructuredOutputConfig = {
|
|
13
|
+
mode: "json_object";
|
|
14
|
+
} | {
|
|
15
|
+
mode: "json_schema";
|
|
16
|
+
name: string;
|
|
17
|
+
schema: Record<string, unknown>;
|
|
18
|
+
strict?: boolean;
|
|
19
|
+
};
|
|
20
|
+
export interface CachingConfig {
|
|
21
|
+
type: "enabled" | "disabled";
|
|
22
|
+
/** Enable prefix caching mode (鎼?0.3). Requires store=true and stream=false. */
|
|
23
|
+
prefix?: boolean;
|
|
24
|
+
}
|
|
25
|
+
export type ContextEdit = {
|
|
26
|
+
type: "clear_thinking";
|
|
27
|
+
keep?: "all" | {
|
|
28
|
+
type: "thinking_turns";
|
|
29
|
+
value: number;
|
|
30
|
+
};
|
|
31
|
+
} | {
|
|
32
|
+
type: "clear_tool_uses";
|
|
33
|
+
trigger?: {
|
|
34
|
+
type: "tool_uses";
|
|
35
|
+
value: number;
|
|
36
|
+
};
|
|
37
|
+
keep?: {
|
|
38
|
+
type: "tool_uses";
|
|
39
|
+
value: number;
|
|
40
|
+
};
|
|
41
|
+
excludeTools?: string[];
|
|
42
|
+
clearToolInput?: boolean;
|
|
43
|
+
};
|
|
44
|
+
export interface ContextManagementConfig {
|
|
45
|
+
edits: ContextEdit[];
|
|
46
|
+
}
|
|
47
|
+
export interface LLMRequest {
|
|
48
|
+
model: string;
|
|
49
|
+
messages: ChatMessage[];
|
|
50
|
+
tools?: ToolDefinition[];
|
|
51
|
+
toolChoice?: "auto" | "none" | "required" | {
|
|
52
|
+
type: "function";
|
|
53
|
+
name: string;
|
|
54
|
+
};
|
|
55
|
+
temperature?: number;
|
|
56
|
+
/** Nucleus sampling: controls diversity via cumulative probability cutoff. */
|
|
57
|
+
topP?: number;
|
|
58
|
+
maxTokens?: number;
|
|
59
|
+
reasoning?: {
|
|
60
|
+
effort: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
61
|
+
/** Request encrypted original reasoning content (Volcengine 鎼?7.7). */
|
|
62
|
+
includeEncryptedReasoning?: boolean;
|
|
63
|
+
};
|
|
64
|
+
/** Volcengine: max builtin tool calls per turn (鎼?9.15). */
|
|
65
|
+
maxToolCalls?: number;
|
|
66
|
+
/**
|
|
67
|
+
* DeepSeek prefix completion: force model to continue from this prefix.
|
|
68
|
+
* Requires `/beta` endpoint; adds a trailing assistant message with `prefix: true`.
|
|
69
|
+
*/
|
|
70
|
+
prefixMessage?: string;
|
|
71
|
+
/**
|
|
72
|
+
* Model requires streamingdisable non-streaming fallback in transports.
|
|
73
|
+
* When true, transports must NOT fall back to non-streaming requests on failure.
|
|
74
|
+
* Set for models like QwQ/Omni where the provider rejects non-streaming calls.
|
|
75
|
+
*/
|
|
76
|
+
streamRequired?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Disable injection of provider-native builtin tools (web_search, code_interpreter)
|
|
79
|
+
* for this specific request. Allows session-level control over GLM/Kimi builtin tools.
|
|
80
|
+
*/
|
|
81
|
+
disableBuiltinTools?: boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Volcengine builtin tools to inject (web_search, image_process, knowledge_search).
|
|
84
|
+
* Each entry specifies a tool type and optional config.
|
|
85
|
+
* These are platform-executed tools requiring beta headers.
|
|
86
|
+
*/
|
|
87
|
+
builtinTools?: Array<{
|
|
88
|
+
type: "builtin_web_search" | "builtin_image_process" | "builtin_knowledge_search" | "builtin_doubao_app";
|
|
89
|
+
config?: Record<string, unknown>;
|
|
90
|
+
}>;
|
|
91
|
+
/**
|
|
92
|
+
* Server-side context continuation via response chain (鎼?).
|
|
93
|
+
* When set, the server automatically includes previous context,
|
|
94
|
+
* so messages[] only needs to contain the NEW user message.
|
|
95
|
+
*/
|
|
96
|
+
previousResponseId?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Control server-side storage of this request's input/output (鎼?.1).
|
|
99
|
+
* Default: true (server stores for 3 days).
|
|
100
|
+
*/
|
|
101
|
+
store?: boolean;
|
|
102
|
+
/** Expiration time for stored response (Unix seconds, max 7 days from now) */
|
|
103
|
+
storeExpireAt?: number;
|
|
104
|
+
/**
|
|
105
|
+
* Per-turn system instruction augmentation (鎼?).
|
|
106
|
+
* Temporarily overlays persona or adds constraints for this turn only.
|
|
107
|
+
* NOTE: Incompatible with cachingdo not use both together.
|
|
108
|
+
*/
|
|
109
|
+
instructions?: string;
|
|
110
|
+
/**
|
|
111
|
+
* Structured output format (鎼?6).
|
|
112
|
+
* Forces model to produce JSON conforming to the specified schema.
|
|
113
|
+
*/
|
|
114
|
+
structuredOutput?: StructuredOutputConfig;
|
|
115
|
+
/**
|
|
116
|
+
* Caching configuration (鎼?0).
|
|
117
|
+
* Controls prefix/session caching behavior.
|
|
118
|
+
* NOTE: Incompatible with instructions, json_schema, and builtin tools.
|
|
119
|
+
*/
|
|
120
|
+
caching?: CachingConfig;
|
|
121
|
+
/**
|
|
122
|
+
* Context management edits (鎼?1, beta).
|
|
123
|
+
* Server-side trimming of historical thinking chains and tool call traces.
|
|
124
|
+
*/
|
|
125
|
+
contextManagement?: ContextManagementConfig;
|
|
126
|
+
/**
|
|
127
|
+
* Gemini explicit cache reference (gemini-ProviderMax 鎼?).
|
|
128
|
+
* Passes a pre-created cache name (e.g. "cachedContents/abc123") to
|
|
129
|
+
* generateContent so the server uses cached tokens instead of re-processing.
|
|
130
|
+
* Create caches via GeminiCacheAPI.createCache() first.
|
|
131
|
+
*/
|
|
132
|
+
cachedContent?: string;
|
|
133
|
+
/**
|
|
134
|
+
* Predicted output for speculative decoding (openai-ProviderMax 鎼?1).
|
|
135
|
+
* When editing code, pass the existing content so the model can diff efficiently.
|
|
136
|
+
* Reduces latency by 3-5x when prediction matches. Falls back when it doesn't.
|
|
137
|
+
* Works with OpenAI GPT-5.x models via Responses API and Chat Completions.
|
|
138
|
+
*/
|
|
139
|
+
prediction?: {
|
|
140
|
+
type: "content";
|
|
141
|
+
content: string;
|
|
142
|
+
};
|
|
143
|
+
/**
|
|
144
|
+
* Prompt cache bucketing key (openai-ProviderMax 鎼?1).
|
|
145
|
+
* Replaces the deprecated `user` field. Helps OpenAI group similar requests
|
|
146
|
+
* for higher cache hit rates.
|
|
147
|
+
*/
|
|
148
|
+
promptCacheKey?: string;
|
|
149
|
+
/**
|
|
150
|
+
* Prompt cache retention policy (openai-ProviderMax 鎼?1).
|
|
151
|
+
* "in_memory" = default 5-10 min, "24h" = extended up to 24 hours.
|
|
152
|
+
*/
|
|
153
|
+
promptCacheRetention?: "in_memory" | "24h";
|
|
154
|
+
/**
|
|
155
|
+
* Service tier for request scheduling (openai-ProviderMax 鎼?4).
|
|
156
|
+
* "auto" = project default, "flex" = 50% cheaper / higher latency,
|
|
157
|
+
* "priority" = guaranteed low latency.
|
|
158
|
+
*/
|
|
159
|
+
serviceTier?: "auto" | "default" | "flex" | "priority";
|
|
160
|
+
/**
|
|
161
|
+
* OpenAI Responses API built-in tools (openai-ProviderMax 鎼?).
|
|
162
|
+
* Platform-executed tools like web_search, file_search, code_interpreter, etc.
|
|
163
|
+
*/
|
|
164
|
+
openaiBuiltinTools?: Array<{
|
|
165
|
+
type: "web_search_preview" | "file_search" | "code_interpreter" | "computer_use_preview";
|
|
166
|
+
[key: string]: unknown;
|
|
167
|
+
}>;
|
|
168
|
+
/**
|
|
169
|
+
* OpenAI Responses API conversation ID (openai-ProviderMax 鎼?.1).
|
|
170
|
+
* Alternative to previous_response_idpersistent server-side conversation.
|
|
171
|
+
* Cannot be used together with previousResponseId.
|
|
172
|
+
*/
|
|
173
|
+
conversationId?: string;
|
|
174
|
+
/**
|
|
175
|
+
* Disable parallel tool calling for this request.
|
|
176
|
+
* When false, the model must call tools sequentially.
|
|
177
|
+
*/
|
|
178
|
+
parallelToolCalls?: boolean;
|
|
179
|
+
/**
|
|
180
|
+
* Text output verbosity hint (openai-ProviderMax 鎼?).
|
|
181
|
+
* Controls how detailed the model's textual output should be.
|
|
182
|
+
*/
|
|
183
|
+
textVerbosity?: "low" | "medium" | "high";
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* FIM completion requestDeepSeek Beta Completions API.
|
|
187
|
+
* POST /beta/v1/completions with prompt + suffix.
|
|
188
|
+
* Only works with non-thinking mode.
|
|
189
|
+
*/
|
|
190
|
+
export interface FIMRequest {
|
|
191
|
+
model: string;
|
|
192
|
+
/** Text before the cursor (prefix context) */
|
|
193
|
+
prompt: string;
|
|
194
|
+
/** Text after the cursor (suffix context) */
|
|
195
|
+
suffix?: string;
|
|
196
|
+
/** Max tokens to generate for the infill */
|
|
197
|
+
maxTokens?: number;
|
|
198
|
+
/** Sampling temperature */
|
|
199
|
+
temperature?: number;
|
|
200
|
+
/** Stop sequences */
|
|
201
|
+
stop?: string[];
|
|
202
|
+
}
|
|
203
|
+
export type FIMChunk = {
|
|
204
|
+
type: "delta";
|
|
205
|
+
text: string;
|
|
206
|
+
} | {
|
|
207
|
+
type: "done";
|
|
208
|
+
finishReason: string;
|
|
209
|
+
};
|
|
210
|
+
export type LLMChunk = {
|
|
211
|
+
type: "delta";
|
|
212
|
+
text: string;
|
|
213
|
+
} | {
|
|
214
|
+
type: "tool_call_delta";
|
|
215
|
+
index: number;
|
|
216
|
+
id?: string;
|
|
217
|
+
name?: string;
|
|
218
|
+
arguments: string;
|
|
219
|
+
} | {
|
|
220
|
+
type: "reasoning_delta";
|
|
221
|
+
text: string;
|
|
222
|
+
} | {
|
|
223
|
+
type: "reasoning_block_complete";
|
|
224
|
+
thinking: string;
|
|
225
|
+
signature: string;
|
|
226
|
+
} | {
|
|
227
|
+
type: "usage";
|
|
228
|
+
promptTokens: number;
|
|
229
|
+
completionTokens: number;
|
|
230
|
+
reasoningTokens?: number;
|
|
231
|
+
cacheReadTokens?: number;
|
|
232
|
+
cacheCreationTokens?: number;
|
|
233
|
+
} | {
|
|
234
|
+
type: "response_id";
|
|
235
|
+
id: string;
|
|
236
|
+
} | {
|
|
237
|
+
/** Informational status from platform-executed builtin tools (web_search, image_process). */
|
|
238
|
+
type: "builtin_tool_status";
|
|
239
|
+
toolType: string;
|
|
240
|
+
event: string;
|
|
241
|
+
data?: Record<string, unknown>;
|
|
242
|
+
} | {
|
|
243
|
+
/** Web search citation annotations from provider builtin tools (Volcengine web_search, Gemini grounding). */
|
|
244
|
+
type: "annotations";
|
|
245
|
+
annotations: Array<{
|
|
246
|
+
type: string;
|
|
247
|
+
url?: string;
|
|
248
|
+
title?: string;
|
|
249
|
+
[key: string]: unknown;
|
|
250
|
+
}>;
|
|
251
|
+
} | {
|
|
252
|
+
type: "error";
|
|
253
|
+
message: string;
|
|
254
|
+
} | {
|
|
255
|
+
type: "done";
|
|
256
|
+
finishReason: string;
|
|
257
|
+
};
|
|
258
|
+
export interface AccumulatedToolCall {
|
|
259
|
+
id: string;
|
|
260
|
+
name: string;
|
|
261
|
+
arguments: string;
|
|
262
|
+
}
|
|
263
|
+
export interface LLMTransport {
|
|
264
|
+
/**
|
|
265
|
+
* Stream an LLM inference request.
|
|
266
|
+
* apiKey is passed explicitly (from agent.turn.config, not env).
|
|
267
|
+
*/
|
|
268
|
+
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
269
|
+
/**
|
|
270
|
+
* FIM (Fill-In-Middle) completionoptional capability.
|
|
271
|
+
* Only implemented by providers that support it (DeepSeek /beta endpoint).
|
|
272
|
+
*/
|
|
273
|
+
complete?(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Accumulate tool_call_delta chunks into complete ToolCall objects.
|
|
277
|
+
* Modeled after admin-infer-proxy-client's Map<index, toolCall> accumulator.
|
|
278
|
+
*/
|
|
279
|
+
export declare function accumulateToolCalls(accumulator: Map<number, AccumulatedToolCall>, chunk: LLMChunk & {
|
|
280
|
+
type: "tool_call_delta";
|
|
281
|
+
}): void;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLMTransportabstract interface for LLM inference calls.
|
|
3
|
+
*
|
|
4
|
+
* Aligned with Hermes `ProviderTransport` ABC:
|
|
5
|
+
* stream(request, apiKey, signal) 閳?AsyncGenerator<LLMChunk>
|
|
6
|
+
*
|
|
7
|
+
* Two concrete implementations:
|
|
8
|
+
* - OpenAI Chat Completions (covers 95% of providers)
|
|
9
|
+
* - Anthropic Messages API
|
|
10
|
+
*/
|
|
11
|
+
// 鈹€鈹€ Convenience: aggregate chunks to tool calls 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
12
|
+
/**
|
|
13
|
+
* Accumulate tool_call_delta chunks into complete ToolCall objects.
|
|
14
|
+
* Modeled after admin-infer-proxy-client's Map<index, toolCall> accumulator.
|
|
15
|
+
*/
|
|
16
|
+
export function accumulateToolCalls(accumulator, chunk) {
|
|
17
|
+
let tc = accumulator.get(chunk.index);
|
|
18
|
+
if (!tc) {
|
|
19
|
+
tc = { id: "", name: "", arguments: "" };
|
|
20
|
+
accumulator.set(chunk.index, tc);
|
|
21
|
+
}
|
|
22
|
+
if (chunk.id)
|
|
23
|
+
tc.id = chunk.id;
|
|
24
|
+
if (chunk.name)
|
|
25
|
+
tc.name += chunk.name;
|
|
26
|
+
tc.arguments += chunk.arguments;
|
|
27
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Messages TransportSSE streaming for Claude API.
|
|
3
|
+
*
|
|
4
|
+
* Aligned with CC (claude-code-haha) src/services/api/claude.ts:
|
|
5
|
+
* - cache_control ephemeral injection on system prompt blocks
|
|
6
|
+
* - ensureToolResultPairing() conversation repair before every request
|
|
7
|
+
* - Retry with exponential backoff on transient errors (429/529/overloaded)
|
|
8
|
+
* - Non-streaming fallback when stream errors out
|
|
9
|
+
* - 90s idle watchdog timeout for silently dropped connections
|
|
10
|
+
* - Adaptive/budget thinking with temperature omit
|
|
11
|
+
* - Cache token extraction with >0 guard (CC updateUsage parity)
|
|
12
|
+
* - signature_delta handling for thinking blocks
|
|
13
|
+
*/
|
|
14
|
+
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
15
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
16
|
+
import type { FileUploadAdapter } from "../file-upload-service.js";
|
|
17
|
+
export interface AnthropicTransportConfig {
|
|
18
|
+
baseUrl: string;
|
|
19
|
+
/** anthropic-version header (default "2023-06-01") */
|
|
20
|
+
apiVersion?: string;
|
|
21
|
+
/** Per-request timeout in ms (default 180_000) */
|
|
22
|
+
timeoutMs?: number;
|
|
23
|
+
/** Stream idle watchdog timeout in ms (default 90_000, CC parity) */
|
|
24
|
+
streamIdleTimeoutMs?: number;
|
|
25
|
+
/** Enable prompt caching via cache_control ephemeral (default true) */
|
|
26
|
+
enablePromptCaching?: boolean;
|
|
27
|
+
/** Max retry attempts on transient errors (default 3) */
|
|
28
|
+
maxRetries?: number;
|
|
29
|
+
/** Omit temperature when it equals 0MiniMax rejects temperature=0 */
|
|
30
|
+
omitZeroTemperature?: boolean;
|
|
31
|
+
/** Provider-specific quirks for conditional logic (CC/altcode parity) */
|
|
32
|
+
quirks?: ProviderQuirks;
|
|
33
|
+
/** File upload adapter (Hub OSS relay for Anthropic). */
|
|
34
|
+
fileUploadAdapter?: FileUploadAdapter;
|
|
35
|
+
}
|
|
36
|
+
export declare class AnthropicMessagesTransport implements LLMTransport {
|
|
37
|
+
private baseUrl;
|
|
38
|
+
private apiVersion;
|
|
39
|
+
private timeoutMs;
|
|
40
|
+
private streamIdleTimeoutMs;
|
|
41
|
+
private enablePromptCaching;
|
|
42
|
+
private maxRetries;
|
|
43
|
+
private omitZeroTemperature;
|
|
44
|
+
private quirks;
|
|
45
|
+
private fileUploadAdapter?;
|
|
46
|
+
constructor(config: AnthropicTransportConfig);
|
|
47
|
+
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
48
|
+
/**
|
|
49
|
+
* Stream with idle watchdog timer (CC parity: 90s default).
|
|
50
|
+
* Throws if no chunks received for streamIdleTimeoutMs.
|
|
51
|
+
*/
|
|
52
|
+
private streamWithWatchdog;
|
|
53
|
+
/**
|
|
54
|
+
* Non-streaming fallback (CC executeNonStreamingRequest parity).
|
|
55
|
+
* Used when streaming fails after all retries.
|
|
56
|
+
* Caps max_tokens at 64K and adjusts thinking budget accordingly.
|
|
57
|
+
*/
|
|
58
|
+
private nonStreamingFallback;
|
|
59
|
+
/**
|
|
60
|
+
* Convert a non-streaming API response to LLMChunk sequence.
|
|
61
|
+
*/
|
|
62
|
+
private mapNonStreamingResponse;
|
|
63
|
+
private parseSSEStream;
|
|
64
|
+
private mapEvent;
|
|
65
|
+
}
|