qlogicagent 2.10.12 → 2.10.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/agent.js +14 -14
  2. package/dist/cli.js +344 -363
  3. package/dist/index.js +343 -362
  4. package/dist/orchestration.js +6 -6
  5. package/dist/protocol.js +1 -1
  6. package/dist/types/agent/agent.d.ts +1 -1
  7. package/dist/types/agent/tool-loop.d.ts +1 -1
  8. package/dist/types/agent/types.d.ts +1 -1
  9. package/dist/types/cli/handlers/memory-handler.d.ts +1 -0
  10. package/dist/types/cli/handlers/turn-handler.d.ts +1 -1
  11. package/dist/types/cli/tool-bootstrap.d.ts +3 -3
  12. package/dist/types/index.d.ts +2 -2
  13. package/dist/types/llm/index.d.ts +1 -1
  14. package/dist/types/orchestration/tool-cascade.d.ts +2 -2
  15. package/dist/types/protocol/wire/gateway-rpc.d.ts +14 -0
  16. package/dist/types/runtime/execution/dream-agent.d.ts +1 -1
  17. package/dist/types/runtime/execution/forked-agent.d.ts +1 -1
  18. package/dist/types/runtime/hooks/context-compression.d.ts +1 -1
  19. package/dist/types/runtime/session/session-persistence.d.ts +1 -1
  20. package/package.json +3 -2
  21. package/dist/types/provider-core/adapters/aliyun-oss-file-upload-adapter.d.ts +0 -44
  22. package/dist/types/provider-core/adapters/gemini-file-upload-adapter.d.ts +0 -26
  23. package/dist/types/provider-core/adapters/hub-oss-file-upload-adapter.d.ts +0 -29
  24. package/dist/types/provider-core/adapters/index.d.ts +0 -10
  25. package/dist/types/provider-core/adapters/openai-file-upload-adapter.d.ts +0 -38
  26. package/dist/types/provider-core/adapters/volcengine-file-upload-adapter.d.ts +0 -24
  27. package/dist/types/provider-core/builtin-providers.d.ts +0 -8
  28. package/dist/types/provider-core/constants.d.ts +0 -1
  29. package/dist/types/provider-core/credentials.d.ts +0 -1
  30. package/dist/types/provider-core/debug-transport.d.ts +0 -12
  31. package/dist/types/provider-core/errors.d.ts +0 -11
  32. package/dist/types/provider-core/events.d.ts +0 -48
  33. package/dist/types/provider-core/file-upload-service.d.ts +0 -68
  34. package/dist/types/provider-core/gemini-schema-utils.d.ts +0 -17
  35. package/dist/types/provider-core/index.d.ts +0 -37
  36. package/dist/types/provider-core/llm-client.d.ts +0 -43
  37. package/dist/types/provider-core/media-client.d.ts +0 -42
  38. package/dist/types/provider-core/media-transport.d.ts +0 -176
  39. package/dist/types/provider-core/media.d.ts +0 -2
  40. package/dist/types/provider-core/model-detection.d.ts +0 -22
  41. package/dist/types/provider-core/paths.d.ts +0 -2
  42. package/dist/types/provider-core/provider-def.d.ts +0 -220
  43. package/dist/types/provider-core/provider-registry.d.ts +0 -50
  44. package/dist/types/provider-core/provider-tool-api.d.ts +0 -44
  45. package/dist/types/provider-core/provider-variant-resolver.d.ts +0 -35
  46. package/dist/types/provider-core/retry.d.ts +0 -37
  47. package/dist/types/provider-core/transport.d.ts +0 -281
  48. package/dist/types/provider-core/transports/anthropic-messages.d.ts +0 -65
  49. package/dist/types/provider-core/transports/gemini-cache-api.d.ts +0 -86
  50. package/dist/types/provider-core/transports/gemini-file-api.d.ts +0 -90
  51. package/dist/types/provider-core/transports/gemini-generatecontent.d.ts +0 -56
  52. package/dist/types/provider-core/transports/gemini-lyria-realtime.d.ts +0 -117
  53. package/dist/types/provider-core/transports/gemini-media.d.ts +0 -53
  54. package/dist/types/provider-core/transports/media-resolve.d.ts +0 -50
  55. package/dist/types/provider-core/transports/minimax-media.d.ts +0 -56
  56. package/dist/types/provider-core/transports/openai-chat.d.ts +0 -81
  57. package/dist/types/provider-core/transports/openai-media.d.ts +0 -24
  58. package/dist/types/provider-core/transports/openai-responses.d.ts +0 -63
  59. package/dist/types/provider-core/transports/qwen-media.d.ts +0 -59
  60. package/dist/types/provider-core/transports/realtime-transport.d.ts +0 -183
  61. package/dist/types/provider-core/transports/volcengine-grounding.d.ts +0 -58
  62. package/dist/types/provider-core/transports/volcengine-media.d.ts +0 -94
  63. package/dist/types/provider-core/transports/volcengine-responses.d.ts +0 -64
  64. package/dist/types/provider-core/transports/zhipu-media.d.ts +0 -82
  65. package/dist/types/provider-core/transports/zhipu-tool-api.d.ts +0 -35
  66. package/dist/types/provider-core/wire-types.d.ts +0 -51
@@ -1,220 +0,0 @@
1
- /**
2
- * ProviderDef 鈥?defines how to connect to an LLM provider.
3
- *
4
- * Aligned with Hermes `ProviderDef` dataclass pattern:
5
- * id + name + transport type + baseUrl + auth config + model list
6
- *
7
- * Single curated model catalog plus optional user provider overrides.
8
- */
9
- export type TransportType = "openai-chat" | "openai-responses" | "anthropic-messages" | "volcengine-responses" | "gemini-generatecontent";
10
- export type AuthType = "bearer" | "x-api-key" | "none";
11
- export type MediaCapability = "image" | "video" | "music" | "music_realtime" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing" | "realtime_audio" | "realtime_video";
12
- export type ProviderVariantKind = "standard" | "openai-compatible" | "anthropic-compatible" | "coding-plan" | "media-plan" | "realtime";
13
- export type ProviderBillingChannelKind = "paygo" | "plan" | "discount" | "official";
14
- export type ProviderVariantCapability = "thinking" | "reasoning_split" | "tool_stream" | "builtin_tools" | "vision" | "media" | "coding" | "realtime";
15
- export type VideoOperation = "text2video" | "img2video" | "video2video" | "edit" | "merge" | "upscale";
16
- export type ImageOperation = "text2image" | "img2img" | "inpainting" | "outpainting";
17
- export type MusicOperation = "text2music" | "cover" | "realtime";
18
- export type TtsOperation = "text2speech" | "voice_clone";
19
- export type ThreeDOperation = "text2_3d" | "img2_3d";
20
- export interface VideoCapabilities {
21
- type: "video";
22
- operations: VideoOperation[];
23
- maxDurationSeconds?: number;
24
- resolutions?: string[];
25
- aspectRatios?: string[];
26
- fps?: number[];
27
- }
28
- export interface ImageCapabilities {
29
- type: "image";
30
- operations: ImageOperation[];
31
- sizes?: string[];
32
- transparentBackground?: boolean;
33
- }
34
- export interface MusicCapabilities {
35
- type: "music";
36
- operations: MusicOperation[];
37
- maxDurationSeconds?: number;
38
- formats?: string[];
39
- }
40
- export interface TtsCapabilities {
41
- type: "tts";
42
- operations?: TtsOperation[];
43
- voices?: string[];
44
- maxCharacters?: number;
45
- formats?: string[];
46
- }
47
- export interface ThreeDCapabilities {
48
- type: "3d";
49
- operations: ThreeDOperation[];
50
- outputFormats?: string[];
51
- }
52
- export interface SttCapabilities {
53
- type: "stt";
54
- languages?: string[];
55
- maxDurationSeconds?: number;
56
- formats?: string[];
57
- }
58
- export interface EmbeddingCapabilities {
59
- type: "embedding";
60
- dimensions?: number;
61
- maxTokens?: number;
62
- }
63
- export interface VideoUnderstandingCapabilities {
64
- type: "video_understanding";
65
- maxDurationSeconds?: number;
66
- formats?: string[];
67
- }
68
- export interface ImageUnderstandingCapabilities {
69
- type: "image_understanding";
70
- formats?: string[];
71
- }
72
- export interface VoiceCloneCapabilities {
73
- type: "voice_clone";
74
- maxSampleDurationSeconds?: number;
75
- maxSampleSizeMB?: number;
76
- formats?: string[];
77
- }
78
- export interface RerankCapabilities {
79
- type: "rerank";
80
- maxDocuments?: number;
81
- maxQueryLength?: number;
82
- maxDocumentLength?: number;
83
- }
84
- export interface DocumentParsingCapabilities {
85
- type: "document_parsing";
86
- supportedFormats?: string[];
87
- maxPageCount?: number;
88
- maxFileSizeMB?: number;
89
- }
90
- export interface RealtimeAudioCapabilities {
91
- type: "realtime_audio";
92
- voices?: string[];
93
- modalities?: Array<"text" | "audio">;
94
- vad?: boolean;
95
- toolCalling?: boolean;
96
- }
97
- export interface RealtimeVideoCapabilities {
98
- type: "realtime_video";
99
- modalities?: Array<"text" | "audio" | "video">;
100
- vad?: boolean;
101
- toolCalling?: boolean;
102
- maxDurationSeconds?: number;
103
- }
104
- export type MediaCapabilities = VideoCapabilities | ImageCapabilities | MusicCapabilities | TtsCapabilities | ThreeDCapabilities | SttCapabilities | EmbeddingCapabilities | VideoUnderstandingCapabilities | ImageUnderstandingCapabilities | VoiceCloneCapabilities | RerankCapabilities | DocumentParsingCapabilities | RealtimeAudioCapabilities | RealtimeVideoCapabilities;
105
- /**
106
- * Provider-specific quirks 鈥?drives conditional logic in transports.
107
- * CC parity: provider detection via quirks flags instead of hardcoded if/else.
108
- * altcode parity: provider auto-detect + per-provider parameter translation.
109
- */
110
- export interface ProviderQuirks {
111
- /** Provider doesn't support thinking content blocks (Qwen) */
112
- filterThinkingBlocks?: boolean;
113
- /** Provider doesn't support image content blocks 鈥?strip imageUrls before sending (DeepSeek, MiniMax) */
114
- filterImageBlocks?: boolean;
115
- /** DeepSeek: budget_tokens ignored, use output_config.effort instead */
116
- useEffortInsteadOfBudget?: boolean;
117
- /** Provider natively supports PDF/document content blocks (Anthropic document, Gemini fileData).
118
- * When false, PDFs are annotated as text labels and the agent must use tools to extract content. */
119
- supportsDocumentVision?: boolean;
120
- /** Provider supports reasoning_effort param (Kimi K2, OpenAI o-series) */
121
- supportsReasoningEffort?: boolean;
122
- /** Provider has built-in web search (Kimi: builtin_function.$web_search, GLM: web_search) */
123
- builtinWebSearch?: boolean;
124
- /** Provider has built-in code interpreter */
125
- builtinCodeInterpreter?: boolean;
126
- /** Provider supports native URL context fetching (Gemini urlContext tool) */
127
- builtinUrlContext?: boolean;
128
- /** Provider supports Google Maps Grounding (Gemini googleMaps tool) */
129
- builtinMapsGrounding?: boolean;
130
- /** Provider supports native file search (Gemini fileSearch tool) */
131
- builtinFileSearch?: boolean;
132
- /** Supports thinking.type="enabled"/"disabled" body param (Kimi K2, GLM).
133
- * Disambiguation: GLM also sets supportsToolStream; Kimi does not. */
134
- supportsThinkingParam?: boolean;
135
- /** When true, send thinking.type="disabled" unless the caller explicitly requests reasoning. */
136
- disableThinkingByDefault?: boolean;
137
- /** GLM-only: supports tool_stream=true for incremental tool call streaming */
138
- supportsToolStream?: boolean;
139
- /** DeepSeek only maps to "high"|"max"; low/medium鈫抙igh */
140
- maxReasoningEffort?: "high" | "max";
141
- /** Supports prefix completion via /beta endpoint (DeepSeek Beta) */
142
- supportsPrefixCompletion?: boolean;
143
- /** MiniMax OpenAI route: inject reasoning_split=true to split thinking into reasoning_details.
144
- * Streaming uses cumulative string updates (not incremental deltas). */
145
- supportsReasoningSplit?: boolean;
146
- }
147
- export interface ProviderDef {
148
- /** Unique provider id, e.g. "deepseek", "openai", "anthropic" */
149
- id: string;
150
- /** Display name, e.g. "DeepSeek" */
151
- name: string;
152
- /** Which transport to use for LLM calls */
153
- transport: TransportType;
154
- /** API base URL, e.g. "https://api.deepseek.com" */
155
- baseUrl: string;
156
- /**
157
- * Logical provider group 鈥?links protocol variants of the same vendor.
158
- * e.g. both "zhipu" (anthropic) and "zhipu-openai" share group "zhipu".
159
- * Defaults to provider id if unset.
160
- */
161
- group?: string;
162
- /** Technical protocol variant kind for resolver ranking. */
163
- variantKind?: ProviderVariantKind;
164
- /** Channel type hint only; commercial cost selection is owned by llmrouter. */
165
- billingChannelKind?: ProviderBillingChannelKind;
166
- /** Provider-level capability hints used by ProviderVariantResolver. */
167
- capabilities?: ProviderVariantCapability[];
168
- /** Env var names for API key (priority order) */
169
- apiKeyEnvVars: string[];
170
- /** Auth header style */
171
- authType: AuthType;
172
- /** Is an aggregator (OpenRouter, 纭呭熀) 鈥?model ids may have prefix */
173
- isAggregator: boolean;
174
- /** Recommended default model */
175
- defaultModel?: string;
176
- /** Known models for this provider */
177
- models?: ModelInfo[];
178
- /** Extra headers to send with every request (e.g. aggregator-specific) */
179
- extraHeaders?: Record<string, string>;
180
- /** Whether this provider supports stream_options (default true for openai-chat) */
181
- supportsStreamOptions?: boolean;
182
- /** Whether to omit temperature when it equals 0 (some providers reject 0) */
183
- omitZeroTemperature?: boolean;
184
- /** Provider-specific quirks for transport-level conditional logic */
185
- quirks?: ProviderQuirks;
186
- }
187
- export interface ModelInfo {
188
- /** Model id, e.g. "deepseek-v4-flash" */
189
- id: string;
190
- /** Stable public aliases exposed by llmrouter or older qlogicagent configs */
191
- aliases?: string[];
192
- /** Display name, e.g. "DeepSeek Chat V3" */
193
- name: string;
194
- /** Context window in tokens */
195
- contextWindow: number;
196
- /** Max output tokens */
197
- maxOutput: number;
198
- /** Supports function/tool calling */
199
- toolCall: boolean;
200
- /** Has reasoning/thinking mode */
201
- reasoning: boolean;
202
- /** Thinking is forced on 鈥?cannot be toggled off (e.g. QwQ, DeepSeek-R1) */
203
- reasoningRequired?: boolean;
204
- /** Model only supports streaming (non-stream requests will fail) */
205
- streamRequired?: boolean;
206
- /** Supports vision (image input) */
207
- vision: boolean;
208
- /** Cost per 1M input tokens (USD) */
209
- costInput?: number;
210
- /** Cost per 1M output tokens (USD) */
211
- costOutput?: number;
212
- /** Cost per 1M cache read tokens (USD) */
213
- costCacheRead?: number;
214
- /** Cost per 1M cache write tokens (USD) */
215
- costCacheWrite?: number;
216
- /** Media generation capability 鈥?undefined means chat/reasoning model */
217
- mediaType?: MediaCapability;
218
- /** Fine-grained media capabilities 鈥?operations, formats, limits */
219
- mediaCapabilities?: MediaCapabilities;
220
- }
@@ -1,50 +0,0 @@
1
- /**
2
- * ProviderRegistry 鈥?single-source curated registry for LLM providers.
3
- *
4
- * Layer 1: builtin-providers.ts curated providers and models
5
- * Layer 2: user config override (from agent.turn.config)
6
- *
7
- * Model IDs are intentionally not enriched from external catalogs. llmrouter and
8
- * OpenClaw consume the same curated Provider Core catalog, and upstream request
9
- * IDs are resolved through model aliases/native IDs.
10
- *
11
- * Aligned with Hermes provider_registry.py.
12
- */
13
- import type { ModelInfo, ProviderDef } from "./provider-def.js";
14
- export declare class ProviderRegistry {
15
- /** Curated providers and models */
16
- private builtins;
17
- /** User overrides (from agent.turn.config) */
18
- private overrides;
19
- constructor();
20
- /**
21
- * Apply user config override for a provider.
22
- * Typically called when agent.turn.config has baseUrl/apiKey overrides.
23
- */
24
- applyOverride(providerId: string, override: Partial<ProviderDef>): void;
25
- /**
26
- * Get merged ProviderDef by id (Layer 3 > Layer 1).
27
- * Returns undefined if provider not found.
28
- * Supports common aliases (e.g., "claude" 鈫?"anthropic").
29
- */
30
- getProvider(id: string): ProviderDef | undefined;
31
- /**
32
- * List all known provider ids.
33
- */
34
- listProviders(): ProviderDef[];
35
- /**
36
- * List models for a specific provider.
37
- */
38
- listModels(providerId: string): ModelInfo[];
39
- /**
40
- * Look up a single model's info by provider + model id.
41
- * Returns undefined if the model is not found.
42
- */
43
- getModelInfo(providerId: string, modelId: string): ModelInfo | undefined;
44
- /**
45
- * Resolve API key for a provider:
46
- * 1. Explicit key (from agent.turn.config)
47
- * 2. Environment variables (ProviderDef.apiKeyEnvVars)
48
- */
49
- resolveApiKey(providerId: string, explicitKey?: string): string | undefined;
50
- }
@@ -1,44 +0,0 @@
1
- /**
2
- * ProviderToolAPI 鈥?interface for provider-specific utility endpoints
3
- * that are neither LLM chat nor media generation.
4
- *
5
- * Examples: web search, content reader, tokenizer, moderation, realtime voice.
6
- * Each provider can expose its own set of tool APIs; the agent's tool cascade
7
- * mechanism (Q1) routes to these when the provider has a native capability.
8
- */
9
- export interface WebSearchResult {
10
- title: string;
11
- url: string;
12
- snippet: string;
13
- /** Full page content if available */
14
- content?: string;
15
- }
16
- export interface ReaderResult {
17
- title: string;
18
- content: string;
19
- url: string;
20
- }
21
- export interface TokenizerResult {
22
- tokenCount: number;
23
- model: string;
24
- }
25
- export interface ModerationResult {
26
- flagged: boolean;
27
- categories: Record<string, boolean>;
28
- scores?: Record<string, number>;
29
- }
30
- export interface ProviderToolAPI {
31
- /** Which tool APIs this provider supports */
32
- readonly capabilities: readonly ProviderToolCapability[];
33
- /** Web search 鈥?returns search result list */
34
- webSearch?(query: string, options?: {
35
- maxResults?: number;
36
- }): Promise<WebSearchResult[]>;
37
- /** URL reader 鈥?extracts content from a web page */
38
- reader?(url: string): Promise<ReaderResult>;
39
- /** Tokenizer 鈥?count tokens for given text/model */
40
- tokenize?(text: string, model: string): Promise<TokenizerResult>;
41
- /** Content moderation 鈥?check text for policy violations */
42
- moderate?(text: string): Promise<ModerationResult>;
43
- }
44
- export type ProviderToolCapability = "web_search" | "reader" | "tokenizer" | "moderations";
@@ -1,35 +0,0 @@
1
- import type { ModelInfo, ProviderBillingChannelKind, ProviderDef, ProviderVariantCapability, ProviderVariantKind, TransportType } from "./provider-def.js";
2
- import type { ProviderRegistry } from "./provider-registry.js";
3
- export type RequestedProviderProtocol = TransportType | "openai" | "anthropic";
4
- export interface ProviderVariantResolverInput {
5
- publicModel: string;
6
- requestedProtocol?: RequestedProviderProtocol;
7
- capabilities?: ProviderVariantCapability[];
8
- purpose?: string;
9
- userPreference?: {
10
- providerIds?: string[];
11
- preferProviderId?: string;
12
- preferVariantKind?: ProviderVariantKind;
13
- };
14
- }
15
- export interface ProviderVariantResolution {
16
- provider: string;
17
- group: string;
18
- publicModel: string;
19
- nativeModelId: string;
20
- displayName: string;
21
- transport: TransportType;
22
- variantKind: ProviderVariantKind;
23
- billingChannelKind: ProviderBillingChannelKind;
24
- capabilities: ProviderVariantCapability[];
25
- score: number;
26
- reasons: string[];
27
- providerDef: ProviderDef;
28
- modelInfo: ModelInfo;
29
- }
30
- export declare class ProviderVariantResolver {
31
- private readonly registry;
32
- constructor(registry: Pick<ProviderRegistry, "listProviders" | "listModels">);
33
- resolve(input: ProviderVariantResolverInput): ProviderVariantResolution[];
34
- resolveBest(input: ProviderVariantResolverInput): ProviderVariantResolution | undefined;
35
- }
@@ -1,37 +0,0 @@
1
- /**
2
- * Shared retry/backoff utilities for LLM transport implementations.
3
- *
4
- * Provides common constants and helper functions used by all transports
5
- * (anthropic-messages, openai-chat, volcengine-responses) to handle
6
- * transient errors with exponential backoff.
7
- */
8
- /** Default maximum number of retry attempts */
9
- export declare const DEFAULT_MAX_RETRIES = 2;
10
- /** Base delay for exponential backoff (doubles each attempt, capped at 30s) */
11
- export declare const RETRY_BASE_DELAY_MS = 1000;
12
- /** Maximum backoff delay */
13
- export declare const RETRY_MAX_DELAY_MS = 30000;
14
- /** HTTP status codes considered transient (worth retrying) */
15
- export declare const TRANSIENT_STATUS_CODES: Set<number>;
16
- /** Default timeout for idle stream detection (no data received) */
17
- export declare const STREAM_IDLE_TIMEOUT_MS = 90000;
18
- /**
19
- * Calculate the delay for a given retry attempt using exponential backoff with jitter.
20
- * @param attempt 1-based attempt number (1 = first retry)
21
- * @returns delay in milliseconds
22
- */
23
- export declare function retryDelay(attempt: number): number;
24
- /**
25
- * Check if an HTTP status code indicates a transient error worth retrying.
26
- */
27
- export declare function isTransientStatus(status: number | null | undefined): boolean;
28
- /**
29
- * Sleep with abort signal support. Resolves after `ms` milliseconds
30
- * or rejects if the signal is aborted.
31
- */
32
- export declare function retrySleep(ms: number, signal?: AbortSignal): Promise<void>;
33
- /**
34
- * Extract HTTP status from various error shapes.
35
- * Works with fetch Response errors, Axios errors, and generic errors with status property.
36
- */
37
- export declare function extractHttpStatus(error: unknown): number | null;
@@ -1,281 +0,0 @@
1
- /**
2
- * LLMTransportabstract interface for LLM inference calls.
3
- *
4
- * Aligned with Hermes `ProviderTransport` ABC:
5
- * stream(request, apiKey, signal) 閳?AsyncGenerator<LLMChunk>
6
- *
7
- * Two concrete implementations:
8
- * - OpenAI Chat Completions (covers 95% of providers)
9
- * - Anthropic Messages API
10
- */
11
- import type { ChatMessage, ToolDefinition } from "./wire-types.js";
12
- export type StructuredOutputConfig = {
13
- mode: "json_object";
14
- } | {
15
- mode: "json_schema";
16
- name: string;
17
- schema: Record<string, unknown>;
18
- strict?: boolean;
19
- };
20
- export interface CachingConfig {
21
- type: "enabled" | "disabled";
22
- /** Enable prefix caching mode (鎼?0.3). Requires store=true and stream=false. */
23
- prefix?: boolean;
24
- }
25
- export type ContextEdit = {
26
- type: "clear_thinking";
27
- keep?: "all" | {
28
- type: "thinking_turns";
29
- value: number;
30
- };
31
- } | {
32
- type: "clear_tool_uses";
33
- trigger?: {
34
- type: "tool_uses";
35
- value: number;
36
- };
37
- keep?: {
38
- type: "tool_uses";
39
- value: number;
40
- };
41
- excludeTools?: string[];
42
- clearToolInput?: boolean;
43
- };
44
- export interface ContextManagementConfig {
45
- edits: ContextEdit[];
46
- }
47
- export interface LLMRequest {
48
- model: string;
49
- messages: ChatMessage[];
50
- tools?: ToolDefinition[];
51
- toolChoice?: "auto" | "none" | "required" | {
52
- type: "function";
53
- name: string;
54
- };
55
- temperature?: number;
56
- /** Nucleus sampling: controls diversity via cumulative probability cutoff. */
57
- topP?: number;
58
- maxTokens?: number;
59
- reasoning?: {
60
- effort: "minimal" | "low" | "medium" | "high" | "xhigh";
61
- /** Request encrypted original reasoning content (Volcengine 鎼?7.7). */
62
- includeEncryptedReasoning?: boolean;
63
- };
64
- /** Volcengine: max builtin tool calls per turn (鎼?9.15). */
65
- maxToolCalls?: number;
66
- /**
67
- * DeepSeek prefix completion: force model to continue from this prefix.
68
- * Requires `/beta` endpoint; adds a trailing assistant message with `prefix: true`.
69
- */
70
- prefixMessage?: string;
71
- /**
72
- * Model requires streamingdisable non-streaming fallback in transports.
73
- * When true, transports must NOT fall back to non-streaming requests on failure.
74
- * Set for models like QwQ/Omni where the provider rejects non-streaming calls.
75
- */
76
- streamRequired?: boolean;
77
- /**
78
- * Disable injection of provider-native builtin tools (web_search, code_interpreter)
79
- * for this specific request. Allows session-level control over GLM/Kimi builtin tools.
80
- */
81
- disableBuiltinTools?: boolean;
82
- /**
83
- * Volcengine builtin tools to inject (web_search, image_process, knowledge_search).
84
- * Each entry specifies a tool type and optional config.
85
- * These are platform-executed tools requiring beta headers.
86
- */
87
- builtinTools?: Array<{
88
- type: "builtin_web_search" | "builtin_image_process" | "builtin_knowledge_search" | "builtin_doubao_app";
89
- config?: Record<string, unknown>;
90
- }>;
91
- /**
92
- * Server-side context continuation via response chain (鎼?).
93
- * When set, the server automatically includes previous context,
94
- * so messages[] only needs to contain the NEW user message.
95
- */
96
- previousResponseId?: string;
97
- /**
98
- * Control server-side storage of this request's input/output (鎼?.1).
99
- * Default: true (server stores for 3 days).
100
- */
101
- store?: boolean;
102
- /** Expiration time for stored response (Unix seconds, max 7 days from now) */
103
- storeExpireAt?: number;
104
- /**
105
- * Per-turn system instruction augmentation (鎼?).
106
- * Temporarily overlays persona or adds constraints for this turn only.
107
- * NOTE: Incompatible with cachingdo not use both together.
108
- */
109
- instructions?: string;
110
- /**
111
- * Structured output format (鎼?6).
112
- * Forces model to produce JSON conforming to the specified schema.
113
- */
114
- structuredOutput?: StructuredOutputConfig;
115
- /**
116
- * Caching configuration (鎼?0).
117
- * Controls prefix/session caching behavior.
118
- * NOTE: Incompatible with instructions, json_schema, and builtin tools.
119
- */
120
- caching?: CachingConfig;
121
- /**
122
- * Context management edits (鎼?1, beta).
123
- * Server-side trimming of historical thinking chains and tool call traces.
124
- */
125
- contextManagement?: ContextManagementConfig;
126
- /**
127
- * Gemini explicit cache reference (gemini-ProviderMax 鎼?).
128
- * Passes a pre-created cache name (e.g. "cachedContents/abc123") to
129
- * generateContent so the server uses cached tokens instead of re-processing.
130
- * Create caches via GeminiCacheAPI.createCache() first.
131
- */
132
- cachedContent?: string;
133
- /**
134
- * Predicted output for speculative decoding (openai-ProviderMax 鎼?1).
135
- * When editing code, pass the existing content so the model can diff efficiently.
136
- * Reduces latency by 3-5x when prediction matches. Falls back when it doesn't.
137
- * Works with OpenAI GPT-5.x models via Responses API and Chat Completions.
138
- */
139
- prediction?: {
140
- type: "content";
141
- content: string;
142
- };
143
- /**
144
- * Prompt cache bucketing key (openai-ProviderMax 鎼?1).
145
- * Replaces the deprecated `user` field. Helps OpenAI group similar requests
146
- * for higher cache hit rates.
147
- */
148
- promptCacheKey?: string;
149
- /**
150
- * Prompt cache retention policy (openai-ProviderMax 鎼?1).
151
- * "in_memory" = default 5-10 min, "24h" = extended up to 24 hours.
152
- */
153
- promptCacheRetention?: "in_memory" | "24h";
154
- /**
155
- * Service tier for request scheduling (openai-ProviderMax 鎼?4).
156
- * "auto" = project default, "flex" = 50% cheaper / higher latency,
157
- * "priority" = guaranteed low latency.
158
- */
159
- serviceTier?: "auto" | "default" | "flex" | "priority";
160
- /**
161
- * OpenAI Responses API built-in tools (openai-ProviderMax 鎼?).
162
- * Platform-executed tools like web_search, file_search, code_interpreter, etc.
163
- */
164
- openaiBuiltinTools?: Array<{
165
- type: "web_search_preview" | "file_search" | "code_interpreter" | "computer_use_preview";
166
- [key: string]: unknown;
167
- }>;
168
- /**
169
- * OpenAI Responses API conversation ID (openai-ProviderMax 鎼?.1).
170
- * Alternative to previous_response_idpersistent server-side conversation.
171
- * Cannot be used together with previousResponseId.
172
- */
173
- conversationId?: string;
174
- /**
175
- * Disable parallel tool calling for this request.
176
- * When false, the model must call tools sequentially.
177
- */
178
- parallelToolCalls?: boolean;
179
- /**
180
- * Text output verbosity hint (openai-ProviderMax 鎼?).
181
- * Controls how detailed the model's textual output should be.
182
- */
183
- textVerbosity?: "low" | "medium" | "high";
184
- }
185
- /**
186
- * FIM completion requestDeepSeek Beta Completions API.
187
- * POST /beta/v1/completions with prompt + suffix.
188
- * Only works with non-thinking mode.
189
- */
190
- export interface FIMRequest {
191
- model: string;
192
- /** Text before the cursor (prefix context) */
193
- prompt: string;
194
- /** Text after the cursor (suffix context) */
195
- suffix?: string;
196
- /** Max tokens to generate for the infill */
197
- maxTokens?: number;
198
- /** Sampling temperature */
199
- temperature?: number;
200
- /** Stop sequences */
201
- stop?: string[];
202
- }
203
- export type FIMChunk = {
204
- type: "delta";
205
- text: string;
206
- } | {
207
- type: "done";
208
- finishReason: string;
209
- };
210
- export type LLMChunk = {
211
- type: "delta";
212
- text: string;
213
- } | {
214
- type: "tool_call_delta";
215
- index: number;
216
- id?: string;
217
- name?: string;
218
- arguments: string;
219
- } | {
220
- type: "reasoning_delta";
221
- text: string;
222
- } | {
223
- type: "reasoning_block_complete";
224
- thinking: string;
225
- signature: string;
226
- } | {
227
- type: "usage";
228
- promptTokens: number;
229
- completionTokens: number;
230
- reasoningTokens?: number;
231
- cacheReadTokens?: number;
232
- cacheCreationTokens?: number;
233
- } | {
234
- type: "response_id";
235
- id: string;
236
- } | {
237
- /** Informational status from platform-executed builtin tools (web_search, image_process). */
238
- type: "builtin_tool_status";
239
- toolType: string;
240
- event: string;
241
- data?: Record<string, unknown>;
242
- } | {
243
- /** Web search citation annotations from provider builtin tools (Volcengine web_search, Gemini grounding). */
244
- type: "annotations";
245
- annotations: Array<{
246
- type: string;
247
- url?: string;
248
- title?: string;
249
- [key: string]: unknown;
250
- }>;
251
- } | {
252
- type: "error";
253
- message: string;
254
- } | {
255
- type: "done";
256
- finishReason: string;
257
- };
258
- export interface AccumulatedToolCall {
259
- id: string;
260
- name: string;
261
- arguments: string;
262
- }
263
- export interface LLMTransport {
264
- /**
265
- * Stream an LLM inference request.
266
- * apiKey is passed explicitly (from agent.turn.config, not env).
267
- */
268
- stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
269
- /**
270
- * FIM (Fill-In-Middle) completionoptional capability.
271
- * Only implemented by providers that support it (DeepSeek /beta endpoint).
272
- */
273
- complete?(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
274
- }
275
- /**
276
- * Accumulate tool_call_delta chunks into complete ToolCall objects.
277
- * Modeled after admin-infer-proxy-client's Map<index, toolCall> accumulator.
278
- */
279
- export declare function accumulateToolCalls(accumulator: Map<number, AccumulatedToolCall>, chunk: LLMChunk & {
280
- type: "tool_call_delta";
281
- }): void;