qlogicagent 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +7 -7
- package/dist/cli.js +204 -195
- package/dist/index.js +204 -195
- package/dist/orchestration.js +10 -10
- package/dist/types/agent/tool-loop.d.ts +2 -0
- package/dist/types/agent/types.d.ts +54 -1
- package/dist/types/cli/stdio-server.d.ts +10 -0
- package/dist/types/cli/tool-bootstrap.d.ts +13 -1
- package/dist/types/llm/index.d.ts +1 -1
- package/dist/types/llm/llm-client.d.ts +1 -1
- package/dist/types/llm/media-client.d.ts +3 -4
- package/dist/types/llm/media-transport.d.ts +75 -4
- package/dist/types/llm/provider-def.d.ts +124 -3
- package/dist/types/llm/provider-registry.d.ts +5 -0
- package/dist/types/llm/provider-tool-api.d.ts +44 -0
- package/dist/types/llm/retry.d.ts +37 -0
- package/dist/types/llm/transport.d.ts +161 -2
- package/dist/types/llm/transports/anthropic-messages.d.ts +7 -0
- package/dist/types/llm/transports/minimax-media.d.ts +5 -0
- package/dist/types/llm/transports/openai-chat.d.ts +44 -3
- package/dist/types/llm/transports/realtime-transport.d.ts +183 -0
- package/dist/types/llm/transports/volcengine-grounding.d.ts +58 -0
- package/dist/types/llm/transports/volcengine-media.d.ts +50 -0
- package/dist/types/llm/transports/volcengine-responses.d.ts +60 -0
- package/dist/types/llm/transports/zhipu-media.d.ts +60 -0
- package/dist/types/llm/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/types/orchestration/error-handling/error-classification.d.ts +1 -10
- package/dist/types/orchestration/index.d.ts +1 -1
- package/dist/types/orchestration/tool-cascade.d.ts +40 -0
- package/dist/types/orchestration/tool-loop/tool-schema.d.ts +4 -1
- package/dist/types/protocol/methods.d.ts +19 -0
- package/dist/types/skills/memory/memory-extractor.d.ts +1 -1
- package/dist/types/skills/tools/file-management-tool.d.ts +90 -0
- package/dist/types/skills/tools/image-generate-tool.d.ts +13 -1
- package/dist/types/skills/tools/music-generate-tool.d.ts +25 -0
- package/dist/types/skills/tools/stt-tool.d.ts +33 -0
- package/dist/types/skills/tools/three-d-generate-tool.d.ts +45 -0
- package/dist/types/skills/tools/tts-tool.d.ts +12 -0
- package/dist/types/skills/tools/video-edit-tool.d.ts +5 -2
- package/dist/types/skills/tools/video-generate-tool.d.ts +102 -2
- package/dist/types/skills/tools/video-merge-tool.d.ts +1 -1
- package/dist/types/skills/tools/video-upscale-tool.d.ts +1 -1
- package/dist/types/skills/tools/voice-clone-tool.d.ts +40 -0
- package/package.json +1 -1
|
@@ -9,9 +9,116 @@
|
|
|
9
9
|
* Layer 2: model-catalog.ts remote (models.dev)
|
|
10
10
|
* Layer 3: user config (from agent.turn.config)
|
|
11
11
|
*/
|
|
12
|
-
export type TransportType = "openai-chat" | "anthropic-messages";
|
|
12
|
+
export type TransportType = "openai-chat" | "anthropic-messages" | "volcengine-responses";
|
|
13
13
|
export type AuthType = "bearer" | "x-api-key" | "none";
|
|
14
|
-
export type MediaCapability = "image" | "video" | "music" | "tts" | "3d";
|
|
14
|
+
export type MediaCapability = "image" | "video" | "music" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing";
|
|
15
|
+
export type VideoOperation = "text2video" | "img2video" | "video2video" | "edit" | "merge" | "upscale";
|
|
16
|
+
export type ImageOperation = "text2image" | "img2img" | "inpainting" | "outpainting";
|
|
17
|
+
export type MusicOperation = "text2music" | "cover";
|
|
18
|
+
export type TtsOperation = "text2speech" | "voice_clone";
|
|
19
|
+
export type ThreeDOperation = "text2_3d" | "img2_3d";
|
|
20
|
+
export interface VideoCapabilities {
|
|
21
|
+
type: "video";
|
|
22
|
+
operations: VideoOperation[];
|
|
23
|
+
maxDurationSeconds?: number;
|
|
24
|
+
resolutions?: string[];
|
|
25
|
+
aspectRatios?: string[];
|
|
26
|
+
fps?: number[];
|
|
27
|
+
}
|
|
28
|
+
export interface ImageCapabilities {
|
|
29
|
+
type: "image";
|
|
30
|
+
operations: ImageOperation[];
|
|
31
|
+
sizes?: string[];
|
|
32
|
+
transparentBackground?: boolean;
|
|
33
|
+
}
|
|
34
|
+
export interface MusicCapabilities {
|
|
35
|
+
type: "music";
|
|
36
|
+
operations: MusicOperation[];
|
|
37
|
+
maxDurationSeconds?: number;
|
|
38
|
+
formats?: string[];
|
|
39
|
+
}
|
|
40
|
+
export interface TtsCapabilities {
|
|
41
|
+
type: "tts";
|
|
42
|
+
operations?: TtsOperation[];
|
|
43
|
+
voices?: string[];
|
|
44
|
+
maxCharacters?: number;
|
|
45
|
+
formats?: string[];
|
|
46
|
+
}
|
|
47
|
+
export interface ThreeDCapabilities {
|
|
48
|
+
type: "3d";
|
|
49
|
+
operations: ThreeDOperation[];
|
|
50
|
+
outputFormats?: string[];
|
|
51
|
+
}
|
|
52
|
+
export interface SttCapabilities {
|
|
53
|
+
type: "stt";
|
|
54
|
+
languages?: string[];
|
|
55
|
+
maxDurationSeconds?: number;
|
|
56
|
+
formats?: string[];
|
|
57
|
+
}
|
|
58
|
+
export interface EmbeddingCapabilities {
|
|
59
|
+
type: "embedding";
|
|
60
|
+
dimensions?: number;
|
|
61
|
+
maxTokens?: number;
|
|
62
|
+
}
|
|
63
|
+
export interface VideoUnderstandingCapabilities {
|
|
64
|
+
type: "video_understanding";
|
|
65
|
+
maxDurationSeconds?: number;
|
|
66
|
+
formats?: string[];
|
|
67
|
+
}
|
|
68
|
+
export interface ImageUnderstandingCapabilities {
|
|
69
|
+
type: "image_understanding";
|
|
70
|
+
formats?: string[];
|
|
71
|
+
}
|
|
72
|
+
export interface VoiceCloneCapabilities {
|
|
73
|
+
type: "voice_clone";
|
|
74
|
+
maxSampleDurationSeconds?: number;
|
|
75
|
+
maxSampleSizeMB?: number;
|
|
76
|
+
formats?: string[];
|
|
77
|
+
}
|
|
78
|
+
export interface RerankCapabilities {
|
|
79
|
+
type: "rerank";
|
|
80
|
+
maxDocuments?: number;
|
|
81
|
+
maxQueryLength?: number;
|
|
82
|
+
maxDocumentLength?: number;
|
|
83
|
+
}
|
|
84
|
+
export interface DocumentParsingCapabilities {
|
|
85
|
+
type: "document_parsing";
|
|
86
|
+
supportedFormats?: string[];
|
|
87
|
+
maxPageCount?: number;
|
|
88
|
+
maxFileSizeMB?: number;
|
|
89
|
+
}
|
|
90
|
+
export type MediaCapabilities = VideoCapabilities | ImageCapabilities | MusicCapabilities | TtsCapabilities | ThreeDCapabilities | SttCapabilities | EmbeddingCapabilities | VideoUnderstandingCapabilities | ImageUnderstandingCapabilities | VoiceCloneCapabilities | RerankCapabilities | DocumentParsingCapabilities;
|
|
91
|
+
/**
|
|
92
|
+
* Provider-specific quirks — drives conditional logic in transports.
|
|
93
|
+
* CC parity: provider detection via quirks flags instead of hardcoded if/else.
|
|
94
|
+
* altcode parity: provider auto-detect + per-provider parameter translation.
|
|
95
|
+
*/
|
|
96
|
+
export interface ProviderQuirks {
|
|
97
|
+
/** Provider doesn't support thinking content blocks (Qwen) */
|
|
98
|
+
filterThinkingBlocks?: boolean;
|
|
99
|
+
/** Provider doesn't support image content blocks — strip imageUrls before sending (DeepSeek, MiniMax) */
|
|
100
|
+
filterImageBlocks?: boolean;
|
|
101
|
+
/** DeepSeek: budget_tokens ignored, use output_config.effort instead */
|
|
102
|
+
useEffortInsteadOfBudget?: boolean;
|
|
103
|
+
/** Provider supports reasoning_effort param (Kimi K2, OpenAI o-series) */
|
|
104
|
+
supportsReasoningEffort?: boolean;
|
|
105
|
+
/** Provider has built-in web search (Kimi: builtin_function.$web_search, GLM: web_search) */
|
|
106
|
+
builtinWebSearch?: boolean;
|
|
107
|
+
/** Provider has built-in code interpreter */
|
|
108
|
+
builtinCodeInterpreter?: boolean;
|
|
109
|
+
/** Supports thinking.type="enabled"/"disabled" body param (Kimi K2, GLM).
|
|
110
|
+
* Disambiguation: GLM also sets supportsToolStream; Kimi does not. */
|
|
111
|
+
supportsThinkingParam?: boolean;
|
|
112
|
+
/** GLM-only: supports tool_stream=true for incremental tool call streaming */
|
|
113
|
+
supportsToolStream?: boolean;
|
|
114
|
+
/** DeepSeek only maps to "high"|"max"; low/medium→high */
|
|
115
|
+
maxReasoningEffort?: "high" | "max";
|
|
116
|
+
/** Supports prefix completion via /beta endpoint (DeepSeek Beta) */
|
|
117
|
+
supportsPrefixCompletion?: boolean;
|
|
118
|
+
/** MiniMax OpenAI route: inject reasoning_split=true to split thinking into reasoning_details.
|
|
119
|
+
* Streaming uses cumulative string updates (not incremental deltas). */
|
|
120
|
+
supportsReasoningSplit?: boolean;
|
|
121
|
+
}
|
|
15
122
|
export interface ProviderDef {
|
|
16
123
|
/** Unique provider id, e.g. "deepseek", "openai", "anthropic" */
|
|
17
124
|
id: string;
|
|
@@ -21,6 +128,12 @@ export interface ProviderDef {
|
|
|
21
128
|
transport: TransportType;
|
|
22
129
|
/** API base URL, e.g. "https://api.deepseek.com" */
|
|
23
130
|
baseUrl: string;
|
|
131
|
+
/**
|
|
132
|
+
* Logical provider group — links protocol variants of the same vendor.
|
|
133
|
+
* e.g. both "zhipu" (anthropic) and "zhipu-openai" share group "zhipu".
|
|
134
|
+
* Defaults to provider id if unset.
|
|
135
|
+
*/
|
|
136
|
+
group?: string;
|
|
24
137
|
/** Env var names for API key (priority order) */
|
|
25
138
|
apiKeyEnvVars: string[];
|
|
26
139
|
/** Auth header style */
|
|
@@ -37,9 +150,11 @@ export interface ProviderDef {
|
|
|
37
150
|
supportsStreamOptions?: boolean;
|
|
38
151
|
/** Whether to omit temperature when it equals 0 (some providers reject 0) */
|
|
39
152
|
omitZeroTemperature?: boolean;
|
|
153
|
+
/** Provider-specific quirks for transport-level conditional logic */
|
|
154
|
+
quirks?: ProviderQuirks;
|
|
40
155
|
}
|
|
41
156
|
export interface ModelInfo {
|
|
42
|
-
/** Model id, e.g. "deepseek-
|
|
157
|
+
/** Model id, e.g. "deepseek-v4-flash" */
|
|
43
158
|
id: string;
|
|
44
159
|
/** Display name, e.g. "DeepSeek Chat V3" */
|
|
45
160
|
name: string;
|
|
@@ -51,6 +166,10 @@ export interface ModelInfo {
|
|
|
51
166
|
toolCall: boolean;
|
|
52
167
|
/** Has reasoning/thinking mode */
|
|
53
168
|
reasoning: boolean;
|
|
169
|
+
/** Thinking is forced on — cannot be toggled off (e.g. QwQ, DeepSeek-R1) */
|
|
170
|
+
reasoningRequired?: boolean;
|
|
171
|
+
/** Model only supports streaming (non-stream requests will fail) */
|
|
172
|
+
streamRequired?: boolean;
|
|
54
173
|
/** Supports vision (image input) */
|
|
55
174
|
vision: boolean;
|
|
56
175
|
/** Cost per 1M input tokens (USD) */
|
|
@@ -63,4 +182,6 @@ export interface ModelInfo {
|
|
|
63
182
|
costCacheWrite?: number;
|
|
64
183
|
/** Media generation capability — undefined means chat/reasoning model */
|
|
65
184
|
mediaType?: MediaCapability;
|
|
185
|
+
/** Fine-grained media capabilities — operations, formats, limits */
|
|
186
|
+
mediaCapabilities?: MediaCapabilities;
|
|
66
187
|
}
|
|
@@ -41,6 +41,11 @@ export declare class ProviderRegistry {
|
|
|
41
41
|
* Merges: Layer 3 override > Layer 1 builtin > Layer 2 catalog enrichment.
|
|
42
42
|
*/
|
|
43
43
|
listModels(providerId: string): ModelInfo[];
|
|
44
|
+
/**
|
|
45
|
+
* Look up a single model's info by provider + model id.
|
|
46
|
+
* Returns undefined if the model is not found.
|
|
47
|
+
*/
|
|
48
|
+
getModelInfo(providerId: string, modelId: string): ModelInfo | undefined;
|
|
44
49
|
/**
|
|
45
50
|
* Trigger background refresh of the remote model catalog.
|
|
46
51
|
*/
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProviderToolAPI — interface for provider-specific utility endpoints
|
|
3
|
+
* that are neither LLM chat nor media generation.
|
|
4
|
+
*
|
|
5
|
+
* Examples: web search, content reader, tokenizer, moderation, realtime voice.
|
|
6
|
+
* Each provider can expose its own set of tool APIs; the agent's tool cascade
|
|
7
|
+
* mechanism (Q1) routes to these when the provider has a native capability.
|
|
8
|
+
*/
|
|
9
|
+
export interface WebSearchResult {
|
|
10
|
+
title: string;
|
|
11
|
+
url: string;
|
|
12
|
+
snippet: string;
|
|
13
|
+
/** Full page content if available */
|
|
14
|
+
content?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface ReaderResult {
|
|
17
|
+
title: string;
|
|
18
|
+
content: string;
|
|
19
|
+
url: string;
|
|
20
|
+
}
|
|
21
|
+
export interface TokenizerResult {
|
|
22
|
+
tokenCount: number;
|
|
23
|
+
model: string;
|
|
24
|
+
}
|
|
25
|
+
export interface ModerationResult {
|
|
26
|
+
flagged: boolean;
|
|
27
|
+
categories: Record<string, boolean>;
|
|
28
|
+
scores?: Record<string, number>;
|
|
29
|
+
}
|
|
30
|
+
export interface ProviderToolAPI {
|
|
31
|
+
/** Which tool APIs this provider supports */
|
|
32
|
+
readonly capabilities: readonly ProviderToolCapability[];
|
|
33
|
+
/** Web search — returns search result list */
|
|
34
|
+
webSearch?(query: string, options?: {
|
|
35
|
+
maxResults?: number;
|
|
36
|
+
}): Promise<WebSearchResult[]>;
|
|
37
|
+
/** URL reader — extracts content from a web page */
|
|
38
|
+
reader?(url: string): Promise<ReaderResult>;
|
|
39
|
+
/** Tokenizer — count tokens for given text/model */
|
|
40
|
+
tokenize?(text: string, model: string): Promise<TokenizerResult>;
|
|
41
|
+
/** Content moderation — check text for policy violations */
|
|
42
|
+
moderate?(text: string): Promise<ModerationResult>;
|
|
43
|
+
}
|
|
44
|
+
export type ProviderToolCapability = "web_search" | "reader" | "tokenizer" | "moderations";
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry/backoff utilities for LLM transport implementations.
|
|
3
|
+
*
|
|
4
|
+
* Provides common constants and helper functions used by all transports
|
|
5
|
+
* (anthropic-messages, openai-chat, volcengine-responses) to handle
|
|
6
|
+
* transient errors with exponential backoff.
|
|
7
|
+
*/
|
|
8
|
+
/** Default maximum number of retry attempts */
|
|
9
|
+
export declare const DEFAULT_MAX_RETRIES = 3;
|
|
10
|
+
/** Base delay for exponential backoff (doubles each attempt, capped at 30s) */
|
|
11
|
+
export declare const RETRY_BASE_DELAY_MS = 1000;
|
|
12
|
+
/** Maximum backoff delay */
|
|
13
|
+
export declare const RETRY_MAX_DELAY_MS = 30000;
|
|
14
|
+
/** HTTP status codes considered transient (worth retrying) */
|
|
15
|
+
export declare const TRANSIENT_STATUS_CODES: Set<number>;
|
|
16
|
+
/** Default timeout for idle stream detection (no data received) */
|
|
17
|
+
export declare const STREAM_IDLE_TIMEOUT_MS = 90000;
|
|
18
|
+
/**
|
|
19
|
+
* Calculate the delay for a given retry attempt using exponential backoff with jitter.
|
|
20
|
+
* @param attempt 1-based attempt number (1 = first retry)
|
|
21
|
+
* @returns delay in milliseconds
|
|
22
|
+
*/
|
|
23
|
+
export declare function retryDelay(attempt: number): number;
|
|
24
|
+
/**
|
|
25
|
+
* Check if an HTTP status code indicates a transient error worth retrying.
|
|
26
|
+
*/
|
|
27
|
+
export declare function isTransientStatus(status: number | null | undefined): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Sleep with abort signal support. Resolves after `ms` milliseconds
|
|
30
|
+
* or rejects if the signal is aborted.
|
|
31
|
+
*/
|
|
32
|
+
export declare function retrySleep(ms: number, signal?: AbortSignal): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Extract HTTP status from various error shapes.
|
|
35
|
+
* Works with fetch Response errors, Axios errors, and generic errors with status property.
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractHttpStatus(error: unknown): number | null;
|
|
@@ -9,17 +9,146 @@
|
|
|
9
9
|
* - Anthropic Messages API
|
|
10
10
|
*/
|
|
11
11
|
import type { ChatMessage, ToolDefinition } from "../agent/types.js";
|
|
12
|
+
export type StructuredOutputConfig = {
|
|
13
|
+
mode: "json_object";
|
|
14
|
+
} | {
|
|
15
|
+
mode: "json_schema";
|
|
16
|
+
name: string;
|
|
17
|
+
schema: Record<string, unknown>;
|
|
18
|
+
strict?: boolean;
|
|
19
|
+
};
|
|
20
|
+
export interface CachingConfig {
|
|
21
|
+
type: "enabled" | "disabled";
|
|
22
|
+
/** Enable prefix caching mode (§20.3). Requires store=true and stream=false. */
|
|
23
|
+
prefix?: boolean;
|
|
24
|
+
}
|
|
25
|
+
export type ContextEdit = {
|
|
26
|
+
type: "clear_thinking";
|
|
27
|
+
keep?: "all" | {
|
|
28
|
+
type: "thinking_turns";
|
|
29
|
+
value: number;
|
|
30
|
+
};
|
|
31
|
+
} | {
|
|
32
|
+
type: "clear_tool_uses";
|
|
33
|
+
trigger?: {
|
|
34
|
+
type: "tool_uses";
|
|
35
|
+
value: number;
|
|
36
|
+
};
|
|
37
|
+
keep?: {
|
|
38
|
+
type: "tool_uses";
|
|
39
|
+
value: number;
|
|
40
|
+
};
|
|
41
|
+
excludeTools?: string[];
|
|
42
|
+
clearToolInput?: boolean;
|
|
43
|
+
};
|
|
44
|
+
export interface ContextManagementConfig {
|
|
45
|
+
edits: ContextEdit[];
|
|
46
|
+
}
|
|
12
47
|
export interface LLMRequest {
|
|
13
48
|
model: string;
|
|
14
49
|
messages: ChatMessage[];
|
|
15
50
|
tools?: ToolDefinition[];
|
|
16
|
-
toolChoice?: "auto" | "none" | "required"
|
|
51
|
+
toolChoice?: "auto" | "none" | "required" | {
|
|
52
|
+
type: "function";
|
|
53
|
+
name: string;
|
|
54
|
+
};
|
|
17
55
|
temperature?: number;
|
|
56
|
+
/** Nucleus sampling: controls diversity via cumulative probability cutoff. */
|
|
57
|
+
topP?: number;
|
|
18
58
|
maxTokens?: number;
|
|
19
59
|
reasoning?: {
|
|
20
|
-
effort: "low" | "medium" | "high";
|
|
60
|
+
effort: "minimal" | "low" | "medium" | "high";
|
|
61
|
+
/** Request encrypted original reasoning content (Volcengine §17.7). */
|
|
62
|
+
includeEncryptedReasoning?: boolean;
|
|
21
63
|
};
|
|
64
|
+
/** Volcengine: max builtin tool calls per turn (§19.15). */
|
|
65
|
+
maxToolCalls?: number;
|
|
66
|
+
/**
|
|
67
|
+
* DeepSeek prefix completion: force model to continue from this prefix.
|
|
68
|
+
* Requires `/beta` endpoint; adds a trailing assistant message with `prefix: true`.
|
|
69
|
+
*/
|
|
70
|
+
prefixMessage?: string;
|
|
71
|
+
/**
|
|
72
|
+
* Model requires streaming — disable non-streaming fallback in transports.
|
|
73
|
+
* When true, transports must NOT fall back to non-streaming requests on failure.
|
|
74
|
+
* Set for models like QwQ/Omni where the provider rejects non-streaming calls.
|
|
75
|
+
*/
|
|
76
|
+
streamRequired?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Disable injection of provider-native builtin tools (web_search, code_interpreter)
|
|
79
|
+
* for this specific request. Allows session-level control over GLM/Kimi builtin tools.
|
|
80
|
+
*/
|
|
81
|
+
disableBuiltinTools?: boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Volcengine builtin tools to inject (web_search, image_process, knowledge_search).
|
|
84
|
+
* Each entry specifies a tool type and optional config.
|
|
85
|
+
* These are platform-executed tools requiring beta headers.
|
|
86
|
+
*/
|
|
87
|
+
builtinTools?: Array<{
|
|
88
|
+
type: "builtin_web_search" | "builtin_image_process" | "builtin_knowledge_search" | "builtin_doubao_app";
|
|
89
|
+
config?: Record<string, unknown>;
|
|
90
|
+
}>;
|
|
91
|
+
/**
|
|
92
|
+
* Server-side context continuation via response chain (§5).
|
|
93
|
+
* When set, the server automatically includes previous context,
|
|
94
|
+
* so messages[] only needs to contain the NEW user message.
|
|
95
|
+
*/
|
|
96
|
+
previousResponseId?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Control server-side storage of this request's input/output (§5.1).
|
|
99
|
+
* Default: true (server stores for 3 days).
|
|
100
|
+
*/
|
|
101
|
+
store?: boolean;
|
|
102
|
+
/** Expiration time for stored response (Unix seconds, max 7 days from now) */
|
|
103
|
+
storeExpireAt?: number;
|
|
104
|
+
/**
|
|
105
|
+
* Per-turn system instruction augmentation (§8).
|
|
106
|
+
* Temporarily overlays persona or adds constraints for this turn only.
|
|
107
|
+
* NOTE: Incompatible with caching — do not use both together.
|
|
108
|
+
*/
|
|
109
|
+
instructions?: string;
|
|
110
|
+
/**
|
|
111
|
+
* Structured output format (§16).
|
|
112
|
+
* Forces model to produce JSON conforming to the specified schema.
|
|
113
|
+
*/
|
|
114
|
+
structuredOutput?: StructuredOutputConfig;
|
|
115
|
+
/**
|
|
116
|
+
* Caching configuration (§20).
|
|
117
|
+
* Controls prefix/session caching behavior.
|
|
118
|
+
* NOTE: Incompatible with instructions, json_schema, and builtin tools.
|
|
119
|
+
*/
|
|
120
|
+
caching?: CachingConfig;
|
|
121
|
+
/**
|
|
122
|
+
* Context management edits (§21, beta).
|
|
123
|
+
* Server-side trimming of historical thinking chains and tool call traces.
|
|
124
|
+
*/
|
|
125
|
+
contextManagement?: ContextManagementConfig;
|
|
22
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* FIM completion request — DeepSeek Beta Completions API.
|
|
129
|
+
* POST /beta/v1/completions with prompt + suffix.
|
|
130
|
+
* Only works with non-thinking mode.
|
|
131
|
+
*/
|
|
132
|
+
export interface FIMRequest {
|
|
133
|
+
model: string;
|
|
134
|
+
/** Text before the cursor (prefix context) */
|
|
135
|
+
prompt: string;
|
|
136
|
+
/** Text after the cursor (suffix context) */
|
|
137
|
+
suffix?: string;
|
|
138
|
+
/** Max tokens to generate for the infill */
|
|
139
|
+
maxTokens?: number;
|
|
140
|
+
/** Sampling temperature */
|
|
141
|
+
temperature?: number;
|
|
142
|
+
/** Stop sequences */
|
|
143
|
+
stop?: string[];
|
|
144
|
+
}
|
|
145
|
+
export type FIMChunk = {
|
|
146
|
+
type: "delta";
|
|
147
|
+
text: string;
|
|
148
|
+
} | {
|
|
149
|
+
type: "done";
|
|
150
|
+
finishReason: string;
|
|
151
|
+
};
|
|
23
152
|
export type LLMChunk = {
|
|
24
153
|
type: "delta";
|
|
25
154
|
text: string;
|
|
@@ -32,6 +161,10 @@ export type LLMChunk = {
|
|
|
32
161
|
} | {
|
|
33
162
|
type: "reasoning_delta";
|
|
34
163
|
text: string;
|
|
164
|
+
} | {
|
|
165
|
+
type: "reasoning_block_complete";
|
|
166
|
+
thinking: string;
|
|
167
|
+
signature: string;
|
|
35
168
|
} | {
|
|
36
169
|
type: "usage";
|
|
37
170
|
promptTokens: number;
|
|
@@ -39,6 +172,27 @@ export type LLMChunk = {
|
|
|
39
172
|
reasoningTokens?: number;
|
|
40
173
|
cacheReadTokens?: number;
|
|
41
174
|
cacheCreationTokens?: number;
|
|
175
|
+
} | {
|
|
176
|
+
type: "response_id";
|
|
177
|
+
id: string;
|
|
178
|
+
} | {
|
|
179
|
+
/** Informational status from platform-executed builtin tools (web_search, image_process). */
|
|
180
|
+
type: "builtin_tool_status";
|
|
181
|
+
toolType: string;
|
|
182
|
+
event: string;
|
|
183
|
+
data?: Record<string, unknown>;
|
|
184
|
+
} | {
|
|
185
|
+
/** Web search citation annotations from Volcengine web_search results. */
|
|
186
|
+
type: "annotations";
|
|
187
|
+
annotations: Array<{
|
|
188
|
+
type: string;
|
|
189
|
+
url?: string;
|
|
190
|
+
title?: string;
|
|
191
|
+
[key: string]: unknown;
|
|
192
|
+
}>;
|
|
193
|
+
} | {
|
|
194
|
+
type: "error";
|
|
195
|
+
message: string;
|
|
42
196
|
} | {
|
|
43
197
|
type: "done";
|
|
44
198
|
finishReason: string;
|
|
@@ -54,6 +208,11 @@ export interface LLMTransport {
|
|
|
54
208
|
* apiKey is passed explicitly (from agent.turn.config, not env).
|
|
55
209
|
*/
|
|
56
210
|
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
211
|
+
/**
|
|
212
|
+
* FIM (Fill-In-Middle) completion — optional capability.
|
|
213
|
+
* Only implemented by providers that support it (DeepSeek /beta endpoint).
|
|
214
|
+
*/
|
|
215
|
+
complete?(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
|
|
57
216
|
}
|
|
58
217
|
/**
|
|
59
218
|
* Accumulate tool_call_delta chunks into complete ToolCall objects.
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
* - signature_delta handling for thinking blocks
|
|
13
13
|
*/
|
|
14
14
|
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
15
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
15
16
|
export interface AnthropicTransportConfig {
|
|
16
17
|
baseUrl: string;
|
|
17
18
|
/** anthropic-version header (default "2023-06-01") */
|
|
@@ -24,6 +25,10 @@ export interface AnthropicTransportConfig {
|
|
|
24
25
|
enablePromptCaching?: boolean;
|
|
25
26
|
/** Max retry attempts on transient errors (default 3) */
|
|
26
27
|
maxRetries?: number;
|
|
28
|
+
/** Omit temperature when it equals 0 — MiniMax rejects temperature=0 */
|
|
29
|
+
omitZeroTemperature?: boolean;
|
|
30
|
+
/** Provider-specific quirks for conditional logic (CC/altcode parity) */
|
|
31
|
+
quirks?: ProviderQuirks;
|
|
27
32
|
}
|
|
28
33
|
export declare class AnthropicMessagesTransport implements LLMTransport {
|
|
29
34
|
private baseUrl;
|
|
@@ -32,6 +37,8 @@ export declare class AnthropicMessagesTransport implements LLMTransport {
|
|
|
32
37
|
private streamIdleTimeoutMs;
|
|
33
38
|
private enablePromptCaching;
|
|
34
39
|
private maxRetries;
|
|
40
|
+
private omitZeroTemperature;
|
|
41
|
+
private quirks;
|
|
35
42
|
constructor(config: AnthropicTransportConfig);
|
|
36
43
|
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
37
44
|
/**
|
|
@@ -18,4 +18,9 @@ export declare class MiniMaxMediaTransport implements MediaTransport {
|
|
|
18
18
|
constructor(config: MiniMaxMediaConfig);
|
|
19
19
|
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
20
20
|
private pollTask;
|
|
21
|
+
/**
|
|
22
|
+
* Generate lyrics from a text prompt via MiniMax Lyrics Generation API.
|
|
23
|
+
* POST /v1/lyrics_generation — returns structured lyrics with tags.
|
|
24
|
+
*/
|
|
25
|
+
generateLyrics(prompt: string, apiKey: string, signal?: AbortSignal): Promise<string>;
|
|
21
26
|
}
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Adapted from admin-infer-proxy-client.ts SSE logic + Hermes openai_chat.py transport.
|
|
13
13
|
*/
|
|
14
|
-
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
14
|
+
import type { LLMChunk, LLMRequest, LLMTransport, FIMRequest, FIMChunk } from "../transport.js";
|
|
15
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
15
16
|
export interface OpenAIChatTransportConfig {
|
|
16
17
|
baseUrl: string;
|
|
17
18
|
/** Additional headers (e.g. for specific providers) */
|
|
@@ -22,6 +23,8 @@ export interface OpenAIChatTransportConfig {
|
|
|
22
23
|
supportsStreamOptions?: boolean;
|
|
23
24
|
/** Whether to omit temperature when it equals 0 (e.g. Moonshot rejects 0) */
|
|
24
25
|
omitZeroTemperature?: boolean;
|
|
26
|
+
/** Provider-specific quirks (CC/altcode parity) */
|
|
27
|
+
quirks?: ProviderQuirks;
|
|
25
28
|
}
|
|
26
29
|
export declare class OpenAIChatTransport implements LLMTransport {
|
|
27
30
|
private baseUrl;
|
|
@@ -29,8 +32,46 @@ export declare class OpenAIChatTransport implements LLMTransport {
|
|
|
29
32
|
private timeoutMs;
|
|
30
33
|
private supportsStreamOptions;
|
|
31
34
|
private omitZeroTemperature;
|
|
35
|
+
private quirks;
|
|
36
|
+
private cumulativeReasoningLen;
|
|
37
|
+
private cumulativeContentLen;
|
|
32
38
|
constructor(config: OpenAIChatTransportConfig);
|
|
33
39
|
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
34
|
-
private
|
|
35
|
-
|
|
40
|
+
private fetchAndStream;
|
|
41
|
+
/**
|
|
42
|
+
* Handle non-streaming JSON response from providers that ignore stream:true.
|
|
43
|
+
* Synthesize the same LLMChunk events a streaming response would produce.
|
|
44
|
+
*/
|
|
45
|
+
private handleNonStreamingResponse;
|
|
46
|
+
/**
|
|
47
|
+
* Parse SSE stream with 90s idle watchdog (CC parity).
|
|
48
|
+
* If no data arrives within STREAM_IDLE_TIMEOUT_MS, throw to trigger retry.
|
|
49
|
+
*/
|
|
50
|
+
private parseSSEStreamWithWatchdog;
|
|
51
|
+
private processChunk;
|
|
52
|
+
/**
|
|
53
|
+
* FIM completion via /beta/v1/completions.
|
|
54
|
+
* Only works with DeepSeek (requires supportsPrefixCompletion quirk).
|
|
55
|
+
* Non-thinking mode only; max completion 4K tokens.
|
|
56
|
+
*/
|
|
57
|
+
complete(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
|
|
58
|
+
/**
|
|
59
|
+
* Upload a file for use in conversations (Kimi File API).
|
|
60
|
+
* Returns a file_id that can be referenced in user messages.
|
|
61
|
+
* POST /v1/files with multipart/form-data.
|
|
62
|
+
*/
|
|
63
|
+
uploadFile(fileBlob: Blob, filename: string, purpose: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
64
|
+
fileId: string;
|
|
65
|
+
filename: string;
|
|
66
|
+
bytes: number;
|
|
67
|
+
}>;
|
|
68
|
+
/**
|
|
69
|
+
* Get file content/status — GET /v1/files/{file_id}
|
|
70
|
+
*/
|
|
71
|
+
getFileInfo(fileId: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
72
|
+
id: string;
|
|
73
|
+
filename: string;
|
|
74
|
+
bytes: number;
|
|
75
|
+
status: string;
|
|
76
|
+
}>;
|
|
36
77
|
}
|