qlogicagent 2.10.1 → 2.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +2 -2
- package/dist/index.js +2 -2
- package/dist/types/agent/agent.d.ts +1 -1
- package/dist/types/agent/tool-loop.d.ts +1 -1
- package/dist/types/agent/types.d.ts +1 -1
- package/dist/types/cli/handlers/turn-handler.d.ts +1 -1
- package/dist/types/cli/tool-bootstrap.d.ts +3 -3
- package/dist/types/index.d.ts +2 -2
- package/dist/types/llm/index.d.ts +1 -1
- package/dist/types/orchestration/tool-cascade.d.ts +2 -2
- package/dist/types/provider-core/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
- package/dist/types/provider-core/adapters/gemini-file-upload-adapter.d.ts +26 -0
- package/dist/types/provider-core/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
- package/dist/types/provider-core/adapters/index.d.ts +10 -0
- package/dist/types/provider-core/adapters/openai-file-upload-adapter.d.ts +38 -0
- package/dist/types/provider-core/adapters/volcengine-file-upload-adapter.d.ts +24 -0
- package/dist/types/provider-core/builtin-providers.d.ts +10 -0
- package/dist/types/provider-core/constants.d.ts +1 -0
- package/dist/types/provider-core/credentials.d.ts +1 -0
- package/dist/types/provider-core/debug-transport.d.ts +12 -0
- package/dist/types/provider-core/errors.d.ts +11 -0
- package/dist/types/provider-core/events.d.ts +48 -0
- package/dist/types/provider-core/file-upload-service.d.ts +68 -0
- package/dist/types/provider-core/gemini-schema-utils.d.ts +17 -0
- package/dist/types/provider-core/index.d.ts +37 -0
- package/dist/types/provider-core/llm-client.d.ts +43 -0
- package/dist/types/provider-core/media-client.d.ts +42 -0
- package/dist/types/provider-core/media-transport.d.ts +176 -0
- package/dist/types/provider-core/media.d.ts +2 -0
- package/dist/types/provider-core/model-catalog.d.ts +82 -0
- package/dist/types/provider-core/model-detection.d.ts +22 -0
- package/dist/types/provider-core/paths.d.ts +2 -0
- package/dist/types/provider-core/provider-def.d.ts +214 -0
- package/dist/types/provider-core/provider-registry.d.ts +59 -0
- package/dist/types/provider-core/provider-tool-api.d.ts +44 -0
- package/dist/types/provider-core/retry.d.ts +37 -0
- package/dist/types/provider-core/transport.d.ts +281 -0
- package/dist/types/provider-core/transports/anthropic-messages.d.ts +65 -0
- package/dist/types/provider-core/transports/gemini-cache-api.d.ts +86 -0
- package/dist/types/provider-core/transports/gemini-file-api.d.ts +90 -0
- package/dist/types/provider-core/transports/gemini-generatecontent.d.ts +56 -0
- package/dist/types/provider-core/transports/gemini-lyria-realtime.d.ts +117 -0
- package/dist/types/provider-core/transports/gemini-media.d.ts +53 -0
- package/dist/types/provider-core/transports/media-resolve.d.ts +50 -0
- package/dist/types/provider-core/transports/minimax-media.d.ts +55 -0
- package/dist/types/provider-core/transports/openai-chat.d.ts +81 -0
- package/dist/types/provider-core/transports/openai-media.d.ts +24 -0
- package/dist/types/provider-core/transports/openai-responses.d.ts +63 -0
- package/dist/types/provider-core/transports/qwen-media.d.ts +50 -0
- package/dist/types/provider-core/transports/realtime-transport.d.ts +183 -0
- package/dist/types/provider-core/transports/volcengine-grounding.d.ts +58 -0
- package/dist/types/provider-core/transports/volcengine-media.d.ts +93 -0
- package/dist/types/provider-core/transports/volcengine-responses.d.ts +64 -0
- package/dist/types/provider-core/transports/zhipu-media.d.ts +82 -0
- package/dist/types/provider-core/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/types/provider-core/wire-types.d.ts +51 -0
- package/dist/types/runtime/execution/dream-agent.d.ts +1 -1
- package/dist/types/runtime/execution/forked-agent.d.ts +1 -1
- package/dist/types/runtime/hooks/context-compression.d.ts +1 -1
- package/dist/types/runtime/session/session-persistence.d.ts +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MediaTransport 鈥?transport interface for generation APIs (image, video, music, 3D).
|
|
3
|
+
*
|
|
4
|
+
* Parallel to LLMTransport (chat/reasoning), MediaTransport handles
|
|
5
|
+
* non-chat generation endpoints that each vendor exposes differently:
|
|
6
|
+
* - Sync (OpenAI images, Volcengine Seedream, Gemini generateContent)
|
|
7
|
+
* - Async job (Volcengine Seedance/3D, MiniMax music) 鈥?submit 鈫?poll 鈫?result
|
|
8
|
+
*
|
|
9
|
+
* Each provider adapter implements this interface and hides vendor-specific
|
|
10
|
+
* auth, endpoint paths, request shapes, and polling logic.
|
|
11
|
+
*/
|
|
12
|
+
export type MediaType = "image" | "video" | "music" | "music_realtime" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing" | "realtime_audio" | "realtime_video";
|
|
13
|
+
export interface MediaRequest {
|
|
14
|
+
/** Generation model id, e.g. "doubao-seedream-5-0-260128", "gpt-image-2" */
|
|
15
|
+
model: string;
|
|
16
|
+
/** What kind of media to generate */
|
|
17
|
+
mediaType: MediaType;
|
|
18
|
+
/** Text prompt for generation */
|
|
19
|
+
prompt: string;
|
|
20
|
+
/** Optional reference image URL (img2img, i2v, img-to-3d) */
|
|
21
|
+
imageUrl?: string;
|
|
22
|
+
/** Desired dimensions, e.g. "1024x1024" */
|
|
23
|
+
size?: string;
|
|
24
|
+
/** Aspect ratio for video, e.g. "16:9" */
|
|
25
|
+
aspectRatio?: string;
|
|
26
|
+
/** Duration in seconds (video, music) */
|
|
27
|
+
duration?: number;
|
|
28
|
+
/** Number of outputs (image) */
|
|
29
|
+
n?: number;
|
|
30
|
+
/** Visual/musical style */
|
|
31
|
+
style?: string;
|
|
32
|
+
/** Intended use / purpose */
|
|
33
|
+
purpose?: string;
|
|
34
|
+
/** Lyrics for music generation */
|
|
35
|
+
lyrics?: string;
|
|
36
|
+
/** Text for TTS */
|
|
37
|
+
text?: string;
|
|
38
|
+
/** TTS channel hint */
|
|
39
|
+
channel?: string;
|
|
40
|
+
/** Source video URLs for edit/merge operations */
|
|
41
|
+
sourceVideos?: string[];
|
|
42
|
+
/** Reference images (no role / first_frame / last_frame) */
|
|
43
|
+
referenceImages?: string[];
|
|
44
|
+
/** Reference image roles 鈥?parallel array with referenceImages. */
|
|
45
|
+
imageRoles?: Array<"first_frame" | "last_frame" | "reference_image">;
|
|
46
|
+
/** Reference video URLs for multimodal reference generation (Seedance 2.0) */
|
|
47
|
+
referenceVideos?: string[];
|
|
48
|
+
/** Reference audio URLs for multimodal reference generation (Seedance 2.0) */
|
|
49
|
+
referenceAudios?: string[];
|
|
50
|
+
/** Generate synchronized audio track (Seedance 2.0 / 1.5 pro) */
|
|
51
|
+
generateAudio?: boolean;
|
|
52
|
+
/** Output resolution for upscale, e.g. "1080p" */
|
|
53
|
+
resolution?: string;
|
|
54
|
+
/** Operation variant: generate (default), edit, merge, upscale, multimodal_reference, extend */
|
|
55
|
+
operation?: "generate" | "edit" | "merge" | "upscale" | "multimodal_reference" | "extend";
|
|
56
|
+
/** Image/video quality, e.g. "auto", "high", "low", "hd" */
|
|
57
|
+
quality?: string;
|
|
58
|
+
/** Seed for reproducible generation */
|
|
59
|
+
seed?: number;
|
|
60
|
+
/** TTS voice, e.g. "alloy", "nova", "shimmer" */
|
|
61
|
+
voice?: string;
|
|
62
|
+
/** TTS speech speed multiplier */
|
|
63
|
+
speed?: number;
|
|
64
|
+
/** Pure instrumental mode (MiniMax music) */
|
|
65
|
+
isInstrumental?: boolean;
|
|
66
|
+
/** Source audio URL for cover/remix (MiniMax music) */
|
|
67
|
+
audioUrl?: string;
|
|
68
|
+
/** Output audio format, e.g. "mp3", "wav", "flac" */
|
|
69
|
+
audioFormat?: string;
|
|
70
|
+
/** Video frames per second */
|
|
71
|
+
fps?: number;
|
|
72
|
+
/** Whether to add AI watermark */
|
|
73
|
+
watermark?: boolean;
|
|
74
|
+
/** img2img guidance/control strength (0-1), e.g. Volcengine */
|
|
75
|
+
guidanceScale?: number;
|
|
76
|
+
/** Auto-generate lyrics when none provided (MiniMax) */
|
|
77
|
+
lyricsOptimizer?: boolean;
|
|
78
|
+
/** 3D output format, e.g. "glb", "obj", "usd", "usdz" */
|
|
79
|
+
outputFormat?: string;
|
|
80
|
+
/** Image background mode (OpenAI gpt-image-2): "transparent", "opaque", "auto" */
|
|
81
|
+
background?: string;
|
|
82
|
+
/** Provider-specific request metadata (e.g. rerank documents, parsing options) */
|
|
83
|
+
metadata?: Record<string, unknown>;
|
|
84
|
+
/** Ask the provider to optimize/enhance the prompt before generation */
|
|
85
|
+
enhancePrompt?: boolean;
|
|
86
|
+
/**
|
|
87
|
+
* Progress callback for async polling operations (video gen, 3D gen, etc.).
|
|
88
|
+
* Called periodically with estimated progress percentage, status text, and
|
|
89
|
+
* the provider-specific task ID (available once the task is submitted).
|
|
90
|
+
*/
|
|
91
|
+
onProgress?: (percent: number, status: string, taskId?: string) => void;
|
|
92
|
+
/** Enable progressive/streaming image generation (Volcengine Seedream stream:true) */
|
|
93
|
+
streamImage?: boolean;
|
|
94
|
+
/** Lock camera position (Seedance 1.0/1.5, not Seedance 2.0) */
|
|
95
|
+
cameraFixed?: boolean;
|
|
96
|
+
/** Return last frame URL for chaining continuous video segments */
|
|
97
|
+
returnLastFrame?: boolean;
|
|
98
|
+
/** Service tier: 'default' (online) or 'flex' (offline, ~50% cost) 鈥?not all models support flex */
|
|
99
|
+
serviceTier?: "default" | "flex";
|
|
100
|
+
/** Task expiration in seconds (for flex/offline scheduling) */
|
|
101
|
+
executionExpiresAfterSeconds?: number;
|
|
102
|
+
/** Draft mode 鈥?low-cost preview (Seedance 1.5 pro only) */
|
|
103
|
+
draft?: boolean;
|
|
104
|
+
/** Draft task ID to promote to final video */
|
|
105
|
+
draftTaskId?: string;
|
|
106
|
+
/** Video-level builtin tools, e.g. ["web_search"] (Seedance 2.0) */
|
|
107
|
+
videoTools?: string[];
|
|
108
|
+
/** End-user safety identifier for content moderation */
|
|
109
|
+
safetyIdentifier?: string;
|
|
110
|
+
/** Callback URL for async task status push notifications */
|
|
111
|
+
callbackUrl?: string;
|
|
112
|
+
/** Image detail level control: 'auto' | 'low' | 'high' */
|
|
113
|
+
detail?: "auto" | "low" | "high";
|
|
114
|
+
/** Max image pixels budget (Volcengine image_pixel_limit) */
|
|
115
|
+
imagePixelLimit?: number;
|
|
116
|
+
}
|
|
117
|
+
export interface MediaResult {
|
|
118
|
+
/** URLs of generated media files */
|
|
119
|
+
mediaUrls: string[];
|
|
120
|
+
/** Model actually used */
|
|
121
|
+
model?: string;
|
|
122
|
+
/** Output dimensions / format info */
|
|
123
|
+
size?: string;
|
|
124
|
+
/** Total generation time in ms */
|
|
125
|
+
durationMs?: number;
|
|
126
|
+
/** Billing unit type for non-token models */
|
|
127
|
+
billingUnit?: "per_call" | "per_second" | "per_character" | "per_pixel" | "per_token";
|
|
128
|
+
/** Quantity consumed (seconds, characters, pixels, etc.) */
|
|
129
|
+
billingQuantity?: number;
|
|
130
|
+
/** Provider-specific metadata */
|
|
131
|
+
metadata?: Record<string, unknown>;
|
|
132
|
+
/** Last frame image URL for chaining continuous video generation */
|
|
133
|
+
lastFrameUrl?: string;
|
|
134
|
+
/** Task ID (for continuing draft鈫抐inal or querying) */
|
|
135
|
+
taskId?: string;
|
|
136
|
+
}
|
|
137
|
+
export interface MediaTransport {
|
|
138
|
+
/**
|
|
139
|
+
* Generate media content.
|
|
140
|
+
* Handles sync APIs directly and async job APIs (submit + poll) internally.
|
|
141
|
+
*
|
|
142
|
+
* @param request - Generation parameters
|
|
143
|
+
* @param apiKey - User API key (passed explicitly, not from env)
|
|
144
|
+
* @param signal - Optional abort signal
|
|
145
|
+
*/
|
|
146
|
+
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
147
|
+
/**
|
|
148
|
+
* Which media types this transport supports.
|
|
149
|
+
* Used by the media client factory to route requests.
|
|
150
|
+
*/
|
|
151
|
+
readonly supportedTypes: readonly MediaType[];
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Extended transport for providers that support async task management
|
|
155
|
+
* (submit 鈫?poll 鈫?cancel/query). Volcengine Seedance, MiniMax async, etc.
|
|
156
|
+
*/
|
|
157
|
+
export interface AsyncMediaTransport extends MediaTransport {
|
|
158
|
+
/** Cancel / delete an async task. */
|
|
159
|
+
deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
160
|
+
/** List recent tasks for status queries. */
|
|
161
|
+
listVideoTasks(apiKey: string, options?: {
|
|
162
|
+
after?: string;
|
|
163
|
+
limit?: number;
|
|
164
|
+
status?: string;
|
|
165
|
+
}, signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
166
|
+
/**
|
|
167
|
+
* Query a single task by ID 鈥?preferred over listVideoTasks for direct lookups.
|
|
168
|
+
* Not all providers support listing; all async providers support single-task queries.
|
|
169
|
+
*/
|
|
170
|
+
getTaskStatus?(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
171
|
+
status: string;
|
|
172
|
+
task: Record<string, unknown>;
|
|
173
|
+
}>;
|
|
174
|
+
}
|
|
175
|
+
/** Type guard for transports that support async task management. */
|
|
176
|
+
export declare function isAsyncMediaTransport(t: MediaTransport): t is AsyncMediaTransport;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ModelCatalog 鈥?remote model directory with disk cache + fallback.
|
|
3
|
+
*
|
|
4
|
+
* Fetches model metadata from models.dev/api.json (community-maintained,
|
|
5
|
+
* comprehensive single-source catalog with 100+ providers).
|
|
6
|
+
*
|
|
7
|
+
* Two-layer fallback:
|
|
8
|
+
* 1. In-memory cache (process-level, TTL check against disk mtime)
|
|
9
|
+
* 2. Disk cache (~/.qlogicagent/cache/model_catalog.json)
|
|
10
|
+
* 3. Remote fetch (models.dev single endpoint)
|
|
11
|
+
* 4. Stale disk cache (if remote fails)
|
|
12
|
+
* 5. Empty (caller falls back to builtin-providers.ts hardcoded)
|
|
13
|
+
*
|
|
14
|
+
* Non-blocking: first startup without cache returns empty immediately,
|
|
15
|
+
* triggers background async fetch. Agent uses builtin providers until
|
|
16
|
+
* catalog arrives.
|
|
17
|
+
*/
|
|
18
|
+
import type { ModelInfo } from "./provider-def.js";
|
|
19
|
+
/** Embedding model metadata from models.dev catalog. */
|
|
20
|
+
export interface EmbeddingModelInfo {
|
|
21
|
+
id: string;
|
|
22
|
+
name: string;
|
|
23
|
+
/** Provider-internal ID (e.g. "openai", "qwen") */
|
|
24
|
+
providerId: string;
|
|
25
|
+
/** Output vector dimensions (from limit.output) */
|
|
26
|
+
dimensions: number;
|
|
27
|
+
/** Max input context tokens */
|
|
28
|
+
maxInputTokens: number;
|
|
29
|
+
/** Cost per million input tokens (USD) */
|
|
30
|
+
costInput?: number;
|
|
31
|
+
/** Model family string from models.dev */
|
|
32
|
+
family: string;
|
|
33
|
+
}
|
|
34
|
+
export declare class ModelCatalog {
|
|
35
|
+
private cache;
|
|
36
|
+
private cacheDir;
|
|
37
|
+
private cacheFile;
|
|
38
|
+
private ttlMs;
|
|
39
|
+
private fetching;
|
|
40
|
+
private lastFetchAttempt;
|
|
41
|
+
constructor(opts?: {
|
|
42
|
+
cacheDir?: string;
|
|
43
|
+
ttlMs?: number;
|
|
44
|
+
});
|
|
45
|
+
/**
|
|
46
|
+
* Get models for a provider. Non-blocking: returns whatever is cached.
|
|
47
|
+
* Triggers background refresh if stale.
|
|
48
|
+
*/
|
|
49
|
+
getModels(providerId: string): ModelInfo[];
|
|
50
|
+
/**
|
|
51
|
+
* Get a single model by provider + model id.
|
|
52
|
+
*/
|
|
53
|
+
getModel(providerId: string, modelId: string): ModelInfo | undefined;
|
|
54
|
+
/**
|
|
55
|
+
* List all known provider ids from the catalog.
|
|
56
|
+
*/
|
|
57
|
+
listProviderIds(): string[];
|
|
58
|
+
/**
|
|
59
|
+
* Get embedding models for a provider.
|
|
60
|
+
* Returns models sorted by cost (cheapest first, then largest dims).
|
|
61
|
+
*/
|
|
62
|
+
getEmbeddingModels(providerId: string): EmbeddingModelInfo[];
|
|
63
|
+
/**
|
|
64
|
+
* Get the default embedding model for a provider.
|
|
65
|
+
* Picks the cheapest model with >=512 dimensions.
|
|
66
|
+
*/
|
|
67
|
+
getDefaultEmbeddingModel(providerId: string): EmbeddingModelInfo | undefined;
|
|
68
|
+
/**
|
|
69
|
+
* Get all embedding models across all providers.
|
|
70
|
+
*/
|
|
71
|
+
getAllEmbeddingModels(): Map<string, EmbeddingModelInfo[]>;
|
|
72
|
+
/**
|
|
73
|
+
* Force refresh from remote. Returns true if successful.
|
|
74
|
+
*/
|
|
75
|
+
refreshCatalog(): Promise<boolean>;
|
|
76
|
+
private ensureLoaded;
|
|
77
|
+
private isStale;
|
|
78
|
+
private loadFromDisk;
|
|
79
|
+
private saveToDisk;
|
|
80
|
+
private backgroundFetch;
|
|
81
|
+
private fetchRemote;
|
|
82
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model detection helpers shared between OpenAI transport implementations.
|
|
3
|
+
*
|
|
4
|
+
* These are used by both openai-chat.ts and openai-responses.ts to detect
|
|
5
|
+
* model families and apply family-specific constraints:
|
|
6
|
+
* - GPT-5.x: unified reasoning, temperature allowed, reasoning object format
|
|
7
|
+
* - GPT-5.4-nano: reasoning effort capped at medium
|
|
8
|
+
* - o-series (legacy): reasoning_effort flat string, temperature suppressed
|
|
9
|
+
*/
|
|
10
|
+
/** GPT-5.x models (new generation with unified reasoning). */
|
|
11
|
+
export declare function isGPT5xModel(model: string): boolean;
|
|
12
|
+
/**
|
|
13
|
+
* GPT-5.4-nano models 鈥?reasoning effort capped at medium.
|
|
14
|
+
* openai-ProviderMax 搂3: gpt-5.4-nano only supports none/low/medium effort.
|
|
15
|
+
*/
|
|
16
|
+
export declare function isGPT5NanoModel(model: string): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* OpenAI o-series reasoning models (legacy, kept for 3rd-party provider compat).
|
|
19
|
+
* These suppress temperature/top_p and use reasoning_effort as flat string.
|
|
20
|
+
* Matches: o1, o1-mini, o1-pro, o3, o3-mini, o3-pro, o4-mini, etc.
|
|
21
|
+
*/
|
|
22
|
+
export declare function isOpenAIReasoningModel(model: string): boolean;
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProviderDef 鈥?defines how to connect to an LLM provider.
|
|
3
|
+
*
|
|
4
|
+
* Aligned with Hermes `ProviderDef` dataclass pattern:
|
|
5
|
+
* id + name + transport type + baseUrl + auth config + model list
|
|
6
|
+
*
|
|
7
|
+
* Three-layer merge strategy (Layer 3 > Layer 2 > Layer 1):
|
|
8
|
+
* Layer 1: builtin-providers.ts hardcoded (fallback)
|
|
9
|
+
* Layer 2: model-catalog.ts remote (models.dev)
|
|
10
|
+
* Layer 3: user config (from agent.turn.config)
|
|
11
|
+
*/
|
|
12
|
+
export type TransportType = "openai-chat" | "openai-responses" | "anthropic-messages" | "volcengine-responses" | "gemini-generatecontent";
|
|
13
|
+
export type AuthType = "bearer" | "x-api-key" | "none";
|
|
14
|
+
export type MediaCapability = "image" | "video" | "music" | "music_realtime" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing" | "realtime_audio" | "realtime_video";
|
|
15
|
+
export type VideoOperation = "text2video" | "img2video" | "video2video" | "edit" | "merge" | "upscale";
|
|
16
|
+
export type ImageOperation = "text2image" | "img2img" | "inpainting" | "outpainting";
|
|
17
|
+
export type MusicOperation = "text2music" | "cover" | "realtime";
|
|
18
|
+
export type TtsOperation = "text2speech" | "voice_clone";
|
|
19
|
+
export type ThreeDOperation = "text2_3d" | "img2_3d";
|
|
20
|
+
export interface VideoCapabilities {
|
|
21
|
+
type: "video";
|
|
22
|
+
operations: VideoOperation[];
|
|
23
|
+
maxDurationSeconds?: number;
|
|
24
|
+
resolutions?: string[];
|
|
25
|
+
aspectRatios?: string[];
|
|
26
|
+
fps?: number[];
|
|
27
|
+
}
|
|
28
|
+
export interface ImageCapabilities {
|
|
29
|
+
type: "image";
|
|
30
|
+
operations: ImageOperation[];
|
|
31
|
+
sizes?: string[];
|
|
32
|
+
transparentBackground?: boolean;
|
|
33
|
+
}
|
|
34
|
+
export interface MusicCapabilities {
|
|
35
|
+
type: "music";
|
|
36
|
+
operations: MusicOperation[];
|
|
37
|
+
maxDurationSeconds?: number;
|
|
38
|
+
formats?: string[];
|
|
39
|
+
}
|
|
40
|
+
export interface TtsCapabilities {
|
|
41
|
+
type: "tts";
|
|
42
|
+
operations?: TtsOperation[];
|
|
43
|
+
voices?: string[];
|
|
44
|
+
maxCharacters?: number;
|
|
45
|
+
formats?: string[];
|
|
46
|
+
}
|
|
47
|
+
export interface ThreeDCapabilities {
|
|
48
|
+
type: "3d";
|
|
49
|
+
operations: ThreeDOperation[];
|
|
50
|
+
outputFormats?: string[];
|
|
51
|
+
}
|
|
52
|
+
export interface SttCapabilities {
|
|
53
|
+
type: "stt";
|
|
54
|
+
languages?: string[];
|
|
55
|
+
maxDurationSeconds?: number;
|
|
56
|
+
formats?: string[];
|
|
57
|
+
}
|
|
58
|
+
export interface EmbeddingCapabilities {
|
|
59
|
+
type: "embedding";
|
|
60
|
+
dimensions?: number;
|
|
61
|
+
maxTokens?: number;
|
|
62
|
+
}
|
|
63
|
+
export interface VideoUnderstandingCapabilities {
|
|
64
|
+
type: "video_understanding";
|
|
65
|
+
maxDurationSeconds?: number;
|
|
66
|
+
formats?: string[];
|
|
67
|
+
}
|
|
68
|
+
export interface ImageUnderstandingCapabilities {
|
|
69
|
+
type: "image_understanding";
|
|
70
|
+
formats?: string[];
|
|
71
|
+
}
|
|
72
|
+
export interface VoiceCloneCapabilities {
|
|
73
|
+
type: "voice_clone";
|
|
74
|
+
maxSampleDurationSeconds?: number;
|
|
75
|
+
maxSampleSizeMB?: number;
|
|
76
|
+
formats?: string[];
|
|
77
|
+
}
|
|
78
|
+
export interface RerankCapabilities {
|
|
79
|
+
type: "rerank";
|
|
80
|
+
maxDocuments?: number;
|
|
81
|
+
maxQueryLength?: number;
|
|
82
|
+
maxDocumentLength?: number;
|
|
83
|
+
}
|
|
84
|
+
export interface DocumentParsingCapabilities {
|
|
85
|
+
type: "document_parsing";
|
|
86
|
+
supportedFormats?: string[];
|
|
87
|
+
maxPageCount?: number;
|
|
88
|
+
maxFileSizeMB?: number;
|
|
89
|
+
}
|
|
90
|
+
export interface RealtimeAudioCapabilities {
|
|
91
|
+
type: "realtime_audio";
|
|
92
|
+
voices?: string[];
|
|
93
|
+
modalities?: Array<"text" | "audio">;
|
|
94
|
+
vad?: boolean;
|
|
95
|
+
toolCalling?: boolean;
|
|
96
|
+
}
|
|
97
|
+
export interface RealtimeVideoCapabilities {
|
|
98
|
+
type: "realtime_video";
|
|
99
|
+
modalities?: Array<"text" | "audio" | "video">;
|
|
100
|
+
vad?: boolean;
|
|
101
|
+
toolCalling?: boolean;
|
|
102
|
+
maxDurationSeconds?: number;
|
|
103
|
+
}
|
|
104
|
+
export type MediaCapabilities = VideoCapabilities | ImageCapabilities | MusicCapabilities | TtsCapabilities | ThreeDCapabilities | SttCapabilities | EmbeddingCapabilities | VideoUnderstandingCapabilities | ImageUnderstandingCapabilities | VoiceCloneCapabilities | RerankCapabilities | DocumentParsingCapabilities | RealtimeAudioCapabilities | RealtimeVideoCapabilities;
|
|
105
|
+
/**
|
|
106
|
+
* Provider-specific quirks 鈥?drives conditional logic in transports.
|
|
107
|
+
* CC parity: provider detection via quirks flags instead of hardcoded if/else.
|
|
108
|
+
* altcode parity: provider auto-detect + per-provider parameter translation.
|
|
109
|
+
*/
|
|
110
|
+
export interface ProviderQuirks {
|
|
111
|
+
/** Provider doesn't support thinking content blocks (Qwen) */
|
|
112
|
+
filterThinkingBlocks?: boolean;
|
|
113
|
+
/** Provider doesn't support image content blocks 鈥?strip imageUrls before sending (DeepSeek, MiniMax) */
|
|
114
|
+
filterImageBlocks?: boolean;
|
|
115
|
+
/** DeepSeek: budget_tokens ignored, use output_config.effort instead */
|
|
116
|
+
useEffortInsteadOfBudget?: boolean;
|
|
117
|
+
/** Provider natively supports PDF/document content blocks (Anthropic document, Gemini fileData).
|
|
118
|
+
* When false, PDFs are annotated as text labels and the agent must use tools to extract content. */
|
|
119
|
+
supportsDocumentVision?: boolean;
|
|
120
|
+
/** Provider supports reasoning_effort param (Kimi K2, OpenAI o-series) */
|
|
121
|
+
supportsReasoningEffort?: boolean;
|
|
122
|
+
/** Provider has built-in web search (Kimi: builtin_function.$web_search, GLM: web_search) */
|
|
123
|
+
builtinWebSearch?: boolean;
|
|
124
|
+
/** Provider has built-in code interpreter */
|
|
125
|
+
builtinCodeInterpreter?: boolean;
|
|
126
|
+
/** Provider supports native URL context fetching (Gemini urlContext tool) */
|
|
127
|
+
builtinUrlContext?: boolean;
|
|
128
|
+
/** Provider supports Google Maps Grounding (Gemini googleMaps tool) */
|
|
129
|
+
builtinMapsGrounding?: boolean;
|
|
130
|
+
/** Provider supports native file search (Gemini fileSearch tool) */
|
|
131
|
+
builtinFileSearch?: boolean;
|
|
132
|
+
/** Supports thinking.type="enabled"/"disabled" body param (Kimi K2, GLM).
|
|
133
|
+
* Disambiguation: GLM also sets supportsToolStream; Kimi does not. */
|
|
134
|
+
supportsThinkingParam?: boolean;
|
|
135
|
+
/** When true, send thinking.type="disabled" unless the caller explicitly requests reasoning. */
|
|
136
|
+
disableThinkingByDefault?: boolean;
|
|
137
|
+
/** GLM-only: supports tool_stream=true for incremental tool call streaming */
|
|
138
|
+
supportsToolStream?: boolean;
|
|
139
|
+
/** DeepSeek only maps to "high"|"max"; low/medium鈫抙igh */
|
|
140
|
+
maxReasoningEffort?: "high" | "max";
|
|
141
|
+
/** Supports prefix completion via /beta endpoint (DeepSeek Beta) */
|
|
142
|
+
supportsPrefixCompletion?: boolean;
|
|
143
|
+
/** MiniMax OpenAI route: inject reasoning_split=true to split thinking into reasoning_details.
|
|
144
|
+
* Streaming uses cumulative string updates (not incremental deltas). */
|
|
145
|
+
supportsReasoningSplit?: boolean;
|
|
146
|
+
}
|
|
147
|
+
export interface ProviderDef {
|
|
148
|
+
/** Unique provider id, e.g. "deepseek", "openai", "anthropic" */
|
|
149
|
+
id: string;
|
|
150
|
+
/** Display name, e.g. "DeepSeek" */
|
|
151
|
+
name: string;
|
|
152
|
+
/** Which transport to use for LLM calls */
|
|
153
|
+
transport: TransportType;
|
|
154
|
+
/** API base URL, e.g. "https://api.deepseek.com" */
|
|
155
|
+
baseUrl: string;
|
|
156
|
+
/**
|
|
157
|
+
* Logical provider group 鈥?links protocol variants of the same vendor.
|
|
158
|
+
* e.g. both "zhipu" (anthropic) and "zhipu-openai" share group "zhipu".
|
|
159
|
+
* Defaults to provider id if unset.
|
|
160
|
+
*/
|
|
161
|
+
group?: string;
|
|
162
|
+
/** Env var names for API key (priority order) */
|
|
163
|
+
apiKeyEnvVars: string[];
|
|
164
|
+
/** Auth header style */
|
|
165
|
+
authType: AuthType;
|
|
166
|
+
/** Is an aggregator (OpenRouter, 纭呭熀) 鈥?model ids may have prefix */
|
|
167
|
+
isAggregator: boolean;
|
|
168
|
+
/** Recommended default model */
|
|
169
|
+
defaultModel?: string;
|
|
170
|
+
/** Known models for this provider */
|
|
171
|
+
models?: ModelInfo[];
|
|
172
|
+
/** Extra headers to send with every request (e.g. aggregator-specific) */
|
|
173
|
+
extraHeaders?: Record<string, string>;
|
|
174
|
+
/** Whether this provider supports stream_options (default true for openai-chat) */
|
|
175
|
+
supportsStreamOptions?: boolean;
|
|
176
|
+
/** Whether to omit temperature when it equals 0 (some providers reject 0) */
|
|
177
|
+
omitZeroTemperature?: boolean;
|
|
178
|
+
/** Provider-specific quirks for transport-level conditional logic */
|
|
179
|
+
quirks?: ProviderQuirks;
|
|
180
|
+
}
|
|
181
|
+
export interface ModelInfo {
|
|
182
|
+
/** Model id, e.g. "deepseek-v4-flash" */
|
|
183
|
+
id: string;
|
|
184
|
+
/** Stable public aliases exposed by llmrouter or older qlogicagent configs */
|
|
185
|
+
aliases?: string[];
|
|
186
|
+
/** Display name, e.g. "DeepSeek Chat V3" */
|
|
187
|
+
name: string;
|
|
188
|
+
/** Context window in tokens */
|
|
189
|
+
contextWindow: number;
|
|
190
|
+
/** Max output tokens */
|
|
191
|
+
maxOutput: number;
|
|
192
|
+
/** Supports function/tool calling */
|
|
193
|
+
toolCall: boolean;
|
|
194
|
+
/** Has reasoning/thinking mode */
|
|
195
|
+
reasoning: boolean;
|
|
196
|
+
/** Thinking is forced on 鈥?cannot be toggled off (e.g. QwQ, DeepSeek-R1) */
|
|
197
|
+
reasoningRequired?: boolean;
|
|
198
|
+
/** Model only supports streaming (non-stream requests will fail) */
|
|
199
|
+
streamRequired?: boolean;
|
|
200
|
+
/** Supports vision (image input) */
|
|
201
|
+
vision: boolean;
|
|
202
|
+
/** Cost per 1M input tokens (USD) */
|
|
203
|
+
costInput?: number;
|
|
204
|
+
/** Cost per 1M output tokens (USD) */
|
|
205
|
+
costOutput?: number;
|
|
206
|
+
/** Cost per 1M cache read tokens (USD) */
|
|
207
|
+
costCacheRead?: number;
|
|
208
|
+
/** Cost per 1M cache write tokens (USD) */
|
|
209
|
+
costCacheWrite?: number;
|
|
210
|
+
/** Media generation capability 鈥?undefined means chat/reasoning model */
|
|
211
|
+
mediaType?: MediaCapability;
|
|
212
|
+
/** Fine-grained media capabilities 鈥?operations, formats, limits */
|
|
213
|
+
mediaCapabilities?: MediaCapabilities;
|
|
214
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProviderRegistry 鈥?two-layer merge registry for LLM providers.
|
|
3
|
+
*
|
|
4
|
+
* Layer 1: builtin-providers.ts hardcoded (lowest priority, ~20 providers)
|
|
5
|
+
* Layer 2: model-catalog.ts remote (models.dev 鈥?enriches model metadata)
|
|
6
|
+
* Layer 3: user config override (from agent.turn.config 鈥?highest priority)
|
|
7
|
+
*
|
|
8
|
+
* Merge strategy: Layer 3 > Layer 2 > Layer 1 (later layers override same-id fields)
|
|
9
|
+
*
|
|
10
|
+
* Aligned with Hermes provider_registry.py.
|
|
11
|
+
*/
|
|
12
|
+
import type { ModelInfo, ProviderDef } from "./provider-def.js";
|
|
13
|
+
import { ModelCatalog } from "./model-catalog.js";
|
|
14
|
+
export declare class ProviderRegistry {
|
|
15
|
+
/** Layer 1: builtin hardcoded providers */
|
|
16
|
+
private builtins;
|
|
17
|
+
/** Layer 2: remote model catalog (models.dev) */
|
|
18
|
+
private catalog;
|
|
19
|
+
/** Layer 3: user overrides (from agent.turn.config) */
|
|
20
|
+
private overrides;
|
|
21
|
+
constructor(opts?: {
|
|
22
|
+
catalog?: ModelCatalog;
|
|
23
|
+
});
|
|
24
|
+
/**
|
|
25
|
+
* Apply user config override for a provider.
|
|
26
|
+
* Typically called when agent.turn.config has baseUrl/apiKey overrides.
|
|
27
|
+
*/
|
|
28
|
+
applyOverride(providerId: string, override: Partial<ProviderDef>): void;
|
|
29
|
+
/**
|
|
30
|
+
* Get merged ProviderDef by id (Layer 3 > Layer 1).
|
|
31
|
+
* Returns undefined if provider not found.
|
|
32
|
+
* Supports common aliases (e.g., "claude" 鈫?"anthropic").
|
|
33
|
+
*/
|
|
34
|
+
getProvider(id: string): ProviderDef | undefined;
|
|
35
|
+
/**
|
|
36
|
+
* List all known provider ids.
|
|
37
|
+
*/
|
|
38
|
+
listProviders(): ProviderDef[];
|
|
39
|
+
/**
|
|
40
|
+
* List models for a specific provider.
|
|
41
|
+
* Merges: Layer 3 override > Layer 1 builtin > Layer 2 catalog enrichment.
|
|
42
|
+
*/
|
|
43
|
+
listModels(providerId: string): ModelInfo[];
|
|
44
|
+
/**
|
|
45
|
+
* Look up a single model's info by provider + model id.
|
|
46
|
+
* Returns undefined if the model is not found.
|
|
47
|
+
*/
|
|
48
|
+
getModelInfo(providerId: string, modelId: string): ModelInfo | undefined;
|
|
49
|
+
/**
|
|
50
|
+
* Trigger background refresh of the remote model catalog.
|
|
51
|
+
*/
|
|
52
|
+
refreshCatalog(): Promise<boolean>;
|
|
53
|
+
/**
|
|
54
|
+
* Resolve API key for a provider:
|
|
55
|
+
* 1. Explicit key (from agent.turn.config)
|
|
56
|
+
* 2. Environment variables (ProviderDef.apiKeyEnvVars)
|
|
57
|
+
*/
|
|
58
|
+
resolveApiKey(providerId: string, explicitKey?: string): string | undefined;
|
|
59
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProviderToolAPI 鈥?interface for provider-specific utility endpoints
|
|
3
|
+
* that are neither LLM chat nor media generation.
|
|
4
|
+
*
|
|
5
|
+
* Examples: web search, content reader, tokenizer, moderation, realtime voice.
|
|
6
|
+
* Each provider can expose its own set of tool APIs; the agent's tool cascade
|
|
7
|
+
* mechanism (Q1) routes to these when the provider has a native capability.
|
|
8
|
+
*/
|
|
9
|
+
export interface WebSearchResult {
|
|
10
|
+
title: string;
|
|
11
|
+
url: string;
|
|
12
|
+
snippet: string;
|
|
13
|
+
/** Full page content if available */
|
|
14
|
+
content?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface ReaderResult {
|
|
17
|
+
title: string;
|
|
18
|
+
content: string;
|
|
19
|
+
url: string;
|
|
20
|
+
}
|
|
21
|
+
export interface TokenizerResult {
|
|
22
|
+
tokenCount: number;
|
|
23
|
+
model: string;
|
|
24
|
+
}
|
|
25
|
+
export interface ModerationResult {
|
|
26
|
+
flagged: boolean;
|
|
27
|
+
categories: Record<string, boolean>;
|
|
28
|
+
scores?: Record<string, number>;
|
|
29
|
+
}
|
|
30
|
+
export interface ProviderToolAPI {
|
|
31
|
+
/** Which tool APIs this provider supports */
|
|
32
|
+
readonly capabilities: readonly ProviderToolCapability[];
|
|
33
|
+
/** Web search 鈥?returns search result list */
|
|
34
|
+
webSearch?(query: string, options?: {
|
|
35
|
+
maxResults?: number;
|
|
36
|
+
}): Promise<WebSearchResult[]>;
|
|
37
|
+
/** URL reader 鈥?extracts content from a web page */
|
|
38
|
+
reader?(url: string): Promise<ReaderResult>;
|
|
39
|
+
/** Tokenizer 鈥?count tokens for given text/model */
|
|
40
|
+
tokenize?(text: string, model: string): Promise<TokenizerResult>;
|
|
41
|
+
/** Content moderation 鈥?check text for policy violations */
|
|
42
|
+
moderate?(text: string): Promise<ModerationResult>;
|
|
43
|
+
}
|
|
44
|
+
export type ProviderToolCapability = "web_search" | "reader" | "tokenizer" | "moderations";
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry/backoff utilities for LLM transport implementations.
|
|
3
|
+
*
|
|
4
|
+
* Provides common constants and helper functions used by all transports
|
|
5
|
+
* (anthropic-messages, openai-chat, volcengine-responses) to handle
|
|
6
|
+
* transient errors with exponential backoff.
|
|
7
|
+
*/
|
|
8
|
+
/** Default maximum number of retry attempts */
|
|
9
|
+
export declare const DEFAULT_MAX_RETRIES = 2;
|
|
10
|
+
/** Base delay for exponential backoff (doubles each attempt, capped at 30s) */
|
|
11
|
+
export declare const RETRY_BASE_DELAY_MS = 1000;
|
|
12
|
+
/** Maximum backoff delay */
|
|
13
|
+
export declare const RETRY_MAX_DELAY_MS = 30000;
|
|
14
|
+
/** HTTP status codes considered transient (worth retrying) */
|
|
15
|
+
export declare const TRANSIENT_STATUS_CODES: Set<number>;
|
|
16
|
+
/** Default timeout for idle stream detection (no data received) */
|
|
17
|
+
export declare const STREAM_IDLE_TIMEOUT_MS = 90000;
|
|
18
|
+
/**
|
|
19
|
+
* Calculate the delay for a given retry attempt using exponential backoff with jitter.
|
|
20
|
+
* @param attempt 1-based attempt number (1 = first retry)
|
|
21
|
+
* @returns delay in milliseconds
|
|
22
|
+
*/
|
|
23
|
+
export declare function retryDelay(attempt: number): number;
|
|
24
|
+
/**
|
|
25
|
+
* Check if an HTTP status code indicates a transient error worth retrying.
|
|
26
|
+
*/
|
|
27
|
+
export declare function isTransientStatus(status: number | null | undefined): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Sleep with abort signal support. Resolves after `ms` milliseconds
|
|
30
|
+
* or rejects if the signal is aborted.
|
|
31
|
+
*/
|
|
32
|
+
export declare function retrySleep(ms: number, signal?: AbortSignal): Promise<void>;
|
|
33
|
+
/**
|
|
34
|
+
* Extract HTTP status from various error shapes.
|
|
35
|
+
* Works with fetch Response errors, Axios errors, and generic errors with status property.
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractHttpStatus(error: unknown): number | null;
|