qlogicagent 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +8 -6
- package/dist/cli.js +258 -214
- package/dist/index.js +258 -214
- package/dist/orchestration.js +12 -9
- package/dist/types/agent/tool-loop.d.ts +22 -0
- package/dist/types/agent/types.d.ts +32 -0
- package/dist/types/cli/stdio-server.d.ts +96 -1
- package/dist/types/cli/tool-bootstrap.d.ts +8 -1
- package/dist/types/llm/gemini-schema-utils.d.ts +17 -0
- package/dist/types/llm/index.d.ts +11 -2
- package/dist/types/llm/media-transport.d.ts +28 -3
- package/dist/types/llm/model-detection.d.ts +22 -0
- package/dist/types/llm/provider-def.d.ts +17 -4
- package/dist/types/llm/transport.d.ts +60 -2
- package/dist/types/llm/transports/gemini-cache-api.d.ts +86 -0
- package/dist/types/llm/transports/gemini-file-api.d.ts +90 -0
- package/dist/types/llm/transports/gemini-generatecontent.d.ts +52 -0
- package/dist/types/llm/transports/gemini-lyria-realtime.d.ts +117 -0
- package/dist/types/llm/transports/gemini-media.d.ts +40 -8
- package/dist/types/llm/transports/minimax-media.d.ts +34 -5
- package/dist/types/llm/transports/openai-responses.d.ts +60 -0
- package/dist/types/llm/transports/qwen-media.d.ts +32 -7
- package/dist/types/llm/transports/realtime-transport.d.ts +1 -1
- package/dist/types/llm/transports/volcengine-media.d.ts +10 -2
- package/dist/types/llm/transports/zhipu-media.d.ts +24 -2
- package/dist/types/orchestration/agent-instance.d.ts +58 -0
- package/dist/types/orchestration/dag-scheduler.d.ts +72 -0
- package/dist/types/orchestration/product-budget.d.ts +56 -0
- package/dist/types/orchestration/product-checkpoint.d.ts +46 -0
- package/dist/types/orchestration/product-persistence.d.ts +40 -0
- package/dist/types/orchestration/product-worktree.d.ts +13 -0
- package/dist/types/orchestration/solo-evaluator.d.ts +59 -0
- package/dist/types/orchestration/subagent/fork-subagent.d.ts +2 -0
- package/dist/types/orchestration/subagent/task-types.d.ts +4 -0
- package/dist/types/orchestration/tool-cascade.d.ts +2 -2
- package/dist/types/protocol/methods.d.ts +92 -0
- package/dist/types/protocol/notifications.d.ts +162 -0
- package/dist/types/runtime/infra/acp-detector.d.ts +36 -0
- package/dist/types/runtime/infra/acp-detector.test.d.ts +1 -0
- package/dist/types/runtime/infra/acp-protocol-adapter.d.ts +73 -0
- package/dist/types/runtime/infra/acp-protocol-adapter.test.d.ts +1 -0
- package/dist/types/runtime/infra/acp-types.d.ts +397 -0
- package/dist/types/runtime/infra/acp-usage-tracker.d.ts +46 -0
- package/dist/types/runtime/infra/acp-usage-tracker.test.d.ts +1 -0
- package/dist/types/runtime/infra/agent-config-store.d.ts +30 -0
- package/dist/types/runtime/infra/agent-config-store.test.d.ts +1 -0
- package/dist/types/runtime/infra/agent-paths.d.ts +8 -0
- package/dist/types/runtime/infra/agent-process.d.ts +280 -0
- package/dist/types/runtime/infra/agent-process.test.d.ts +1 -0
- package/dist/types/runtime/infra/index.d.ts +10 -0
- package/dist/types/runtime/infra/mcp-bridge.d.ts +166 -0
- package/dist/types/runtime/infra/mcp-bridge.test.d.ts +1 -0
- package/dist/types/runtime/infra/model-id-translator.d.ts +22 -0
- package/dist/types/runtime/infra/model-id-translator.test.d.ts +1 -0
- package/dist/types/runtime/infra/skill-injector.d.ts +51 -0
- package/dist/types/runtime/infra/skill-injector.test.d.ts +1 -0
- package/dist/types/runtime/infra/worktree-backend.d.ts +1 -0
- package/dist/types/runtime/prompt/environment-context.d.ts +6 -0
- package/dist/types/runtime/session/session-persistence.d.ts +9 -8
- package/dist/types/runtime/session/session-state.d.ts +3 -31
- package/dist/types/skills/index.d.ts +2 -10
- package/dist/types/skills/tools/skill-tool.d.ts +101 -0
- package/dist/types/skills/tools/team-tool.d.ts +23 -1
- package/package.json +1 -1
- package/dist/types/runtime/session/session-memory.d.ts +0 -90
- package/dist/types/skills/memory/memory-extractor.d.ts +0 -64
|
@@ -9,12 +9,12 @@
|
|
|
9
9
|
* Layer 2: model-catalog.ts remote (models.dev)
|
|
10
10
|
* Layer 3: user config (from agent.turn.config)
|
|
11
11
|
*/
|
|
12
|
-
export type TransportType = "openai-chat" | "anthropic-messages" | "volcengine-responses";
|
|
12
|
+
export type TransportType = "openai-chat" | "openai-responses" | "anthropic-messages" | "volcengine-responses" | "gemini-generatecontent";
|
|
13
13
|
export type AuthType = "bearer" | "x-api-key" | "none";
|
|
14
|
-
export type MediaCapability = "image" | "video" | "music" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing";
|
|
14
|
+
export type MediaCapability = "image" | "video" | "music" | "music_realtime" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing" | "realtime_audio";
|
|
15
15
|
export type VideoOperation = "text2video" | "img2video" | "video2video" | "edit" | "merge" | "upscale";
|
|
16
16
|
export type ImageOperation = "text2image" | "img2img" | "inpainting" | "outpainting";
|
|
17
|
-
export type MusicOperation = "text2music" | "cover";
|
|
17
|
+
export type MusicOperation = "text2music" | "cover" | "realtime";
|
|
18
18
|
export type TtsOperation = "text2speech" | "voice_clone";
|
|
19
19
|
export type ThreeDOperation = "text2_3d" | "img2_3d";
|
|
20
20
|
export interface VideoCapabilities {
|
|
@@ -87,7 +87,14 @@ export interface DocumentParsingCapabilities {
|
|
|
87
87
|
maxPageCount?: number;
|
|
88
88
|
maxFileSizeMB?: number;
|
|
89
89
|
}
|
|
90
|
-
export
|
|
90
|
+
export interface RealtimeAudioCapabilities {
|
|
91
|
+
type: "realtime_audio";
|
|
92
|
+
voices?: string[];
|
|
93
|
+
modalities?: Array<"text" | "audio">;
|
|
94
|
+
vad?: boolean;
|
|
95
|
+
toolCalling?: boolean;
|
|
96
|
+
}
|
|
97
|
+
export type MediaCapabilities = VideoCapabilities | ImageCapabilities | MusicCapabilities | TtsCapabilities | ThreeDCapabilities | SttCapabilities | EmbeddingCapabilities | VideoUnderstandingCapabilities | ImageUnderstandingCapabilities | VoiceCloneCapabilities | RerankCapabilities | DocumentParsingCapabilities | RealtimeAudioCapabilities;
|
|
91
98
|
/**
|
|
92
99
|
* Provider-specific quirks — drives conditional logic in transports.
|
|
93
100
|
* CC parity: provider detection via quirks flags instead of hardcoded if/else.
|
|
@@ -106,6 +113,12 @@ export interface ProviderQuirks {
|
|
|
106
113
|
builtinWebSearch?: boolean;
|
|
107
114
|
/** Provider has built-in code interpreter */
|
|
108
115
|
builtinCodeInterpreter?: boolean;
|
|
116
|
+
/** Provider supports native URL context fetching (Gemini urlContext tool) */
|
|
117
|
+
builtinUrlContext?: boolean;
|
|
118
|
+
/** Provider supports Google Maps Grounding (Gemini googleMaps tool) */
|
|
119
|
+
builtinMapsGrounding?: boolean;
|
|
120
|
+
/** Provider supports native file search (Gemini fileSearch tool) */
|
|
121
|
+
builtinFileSearch?: boolean;
|
|
109
122
|
/** Supports thinking.type="enabled"/"disabled" body param (Kimi K2, GLM).
|
|
110
123
|
* Disambiguation: GLM also sets supportsToolStream; Kimi does not. */
|
|
111
124
|
supportsThinkingParam?: boolean;
|
|
@@ -57,7 +57,7 @@ export interface LLMRequest {
|
|
|
57
57
|
topP?: number;
|
|
58
58
|
maxTokens?: number;
|
|
59
59
|
reasoning?: {
|
|
60
|
-
effort: "minimal" | "low" | "medium" | "high";
|
|
60
|
+
effort: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
61
61
|
/** Request encrypted original reasoning content (Volcengine §17.7). */
|
|
62
62
|
includeEncryptedReasoning?: boolean;
|
|
63
63
|
};
|
|
@@ -123,6 +123,64 @@ export interface LLMRequest {
|
|
|
123
123
|
* Server-side trimming of historical thinking chains and tool call traces.
|
|
124
124
|
*/
|
|
125
125
|
contextManagement?: ContextManagementConfig;
|
|
126
|
+
/**
|
|
127
|
+
* Gemini explicit cache reference (gemini-ProviderMax §8).
|
|
128
|
+
* Passes a pre-created cache name (e.g. "cachedContents/abc123") to
|
|
129
|
+
* generateContent so the server uses cached tokens instead of re-processing.
|
|
130
|
+
* Create caches via GeminiCacheAPI.createCache() first.
|
|
131
|
+
*/
|
|
132
|
+
cachedContent?: string;
|
|
133
|
+
/**
|
|
134
|
+
* Predicted output for speculative decoding (openai-ProviderMax §11).
|
|
135
|
+
* When editing code, pass the existing content so the model can diff efficiently.
|
|
136
|
+
* Reduces latency by 3-5x when prediction matches. Falls back when it doesn't.
|
|
137
|
+
* Works with OpenAI GPT-5.x models via Responses API and Chat Completions.
|
|
138
|
+
*/
|
|
139
|
+
prediction?: {
|
|
140
|
+
type: "content";
|
|
141
|
+
content: string;
|
|
142
|
+
};
|
|
143
|
+
/**
|
|
144
|
+
* Prompt cache bucketing key (openai-ProviderMax §11).
|
|
145
|
+
* Replaces the deprecated `user` field. Helps OpenAI group similar requests
|
|
146
|
+
* for higher cache hit rates.
|
|
147
|
+
*/
|
|
148
|
+
promptCacheKey?: string;
|
|
149
|
+
/**
|
|
150
|
+
* Prompt cache retention policy (openai-ProviderMax §11).
|
|
151
|
+
* "in_memory" = default 5-10 min, "24h" = extended up to 24 hours.
|
|
152
|
+
*/
|
|
153
|
+
promptCacheRetention?: "in_memory" | "24h";
|
|
154
|
+
/**
|
|
155
|
+
* Service tier for request scheduling (openai-ProviderMax §14).
|
|
156
|
+
* "auto" = project default, "flex" = 50% cheaper / higher latency,
|
|
157
|
+
* "priority" = guaranteed low latency.
|
|
158
|
+
*/
|
|
159
|
+
serviceTier?: "auto" | "default" | "flex" | "priority";
|
|
160
|
+
/**
|
|
161
|
+
* OpenAI Responses API built-in tools (openai-ProviderMax §7).
|
|
162
|
+
* Platform-executed tools like web_search, file_search, code_interpreter, etc.
|
|
163
|
+
*/
|
|
164
|
+
openaiBuiltinTools?: Array<{
|
|
165
|
+
type: "web_search_preview" | "file_search" | "code_interpreter" | "computer_use_preview";
|
|
166
|
+
[key: string]: unknown;
|
|
167
|
+
}>;
|
|
168
|
+
/**
|
|
169
|
+
* OpenAI Responses API conversation ID (openai-ProviderMax §2.1).
|
|
170
|
+
* Alternative to previous_response_id — persistent server-side conversation.
|
|
171
|
+
* Cannot be used together with previousResponseId.
|
|
172
|
+
*/
|
|
173
|
+
conversationId?: string;
|
|
174
|
+
/**
|
|
175
|
+
* Disable parallel tool calling for this request.
|
|
176
|
+
* When false, the model must call tools sequentially.
|
|
177
|
+
*/
|
|
178
|
+
parallelToolCalls?: boolean;
|
|
179
|
+
/**
|
|
180
|
+
* Text output verbosity hint (openai-ProviderMax §5).
|
|
181
|
+
* Controls how detailed the model's textual output should be.
|
|
182
|
+
*/
|
|
183
|
+
textVerbosity?: "low" | "medium" | "high";
|
|
126
184
|
}
|
|
127
185
|
/**
|
|
128
186
|
* FIM completion request — DeepSeek Beta Completions API.
|
|
@@ -182,7 +240,7 @@ export type LLMChunk = {
|
|
|
182
240
|
event: string;
|
|
183
241
|
data?: Record<string, unknown>;
|
|
184
242
|
} | {
|
|
185
|
-
/** Web search citation annotations from Volcengine web_search
|
|
243
|
+
/** Web search citation annotations from provider builtin tools (Volcengine web_search, Gemini grounding). */
|
|
186
244
|
type: "annotations";
|
|
187
245
|
annotations: Array<{
|
|
188
246
|
type: string;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiCacheAPI — Explicit Context Caching for Gemini generateContent.
|
|
3
|
+
*
|
|
4
|
+
* Manages named cached content resources that can be referenced in
|
|
5
|
+
* generateContent requests via the `cachedContent` field.
|
|
6
|
+
*
|
|
7
|
+
* REST endpoints:
|
|
8
|
+
* POST /v1beta/cachedContents — create cache
|
|
9
|
+
* GET /v1beta/cachedContents — list caches
|
|
10
|
+
* GET /v1beta/cachedContents/{name} — get cache metadata
|
|
11
|
+
* PATCH /v1beta/cachedContents/{name} — update TTL
|
|
12
|
+
* DELETE /v1beta/cachedContents/{name} — delete cache
|
|
13
|
+
*
|
|
14
|
+
* Minimum cacheable content: 1024 tokens (Flash) / 4096 tokens (Pro).
|
|
15
|
+
* TTL default: 1 hour. Storage: ~$1.00/hour/MTok (Flash series).
|
|
16
|
+
*
|
|
17
|
+
* Docs: https://ai.google.dev/gemini-api/docs/caching
|
|
18
|
+
*/
|
|
19
|
+
export interface GeminiCacheCreateOptions {
|
|
20
|
+
/** Model to use, e.g. "models/gemini-3-flash-preview" */
|
|
21
|
+
model: string;
|
|
22
|
+
/** Contents to cache (same format as generateContent contents) */
|
|
23
|
+
contents: Array<Record<string, unknown>>;
|
|
24
|
+
/** Optional system instruction to include in cache */
|
|
25
|
+
systemInstruction?: Record<string, unknown>;
|
|
26
|
+
/** Time-to-live, e.g. "300s" for 5 minutes. Default: "3600s" (1 hour) */
|
|
27
|
+
ttl?: string;
|
|
28
|
+
/** Human-readable name for identifying the cache */
|
|
29
|
+
displayName?: string;
|
|
30
|
+
}
|
|
31
|
+
export interface GeminiCachedContent {
|
|
32
|
+
/** Resource name, e.g. "cachedContents/abc123" */
|
|
33
|
+
name: string;
|
|
34
|
+
/** Model this cache is bound to */
|
|
35
|
+
model: string;
|
|
36
|
+
/** Display name (if set) */
|
|
37
|
+
displayName?: string;
|
|
38
|
+
/** Token usage metadata */
|
|
39
|
+
usageMetadata?: {
|
|
40
|
+
totalTokenCount?: number;
|
|
41
|
+
};
|
|
42
|
+
/** Creation time (ISO 8601) */
|
|
43
|
+
createTime?: string;
|
|
44
|
+
/** Last update time (ISO 8601) */
|
|
45
|
+
updateTime?: string;
|
|
46
|
+
/** Expiration time (ISO 8601) */
|
|
47
|
+
expireTime?: string;
|
|
48
|
+
}
|
|
49
|
+
export declare class GeminiCacheAPI {
|
|
50
|
+
private baseUrl;
|
|
51
|
+
private timeoutMs;
|
|
52
|
+
constructor(config: {
|
|
53
|
+
baseUrl: string;
|
|
54
|
+
timeoutMs?: number;
|
|
55
|
+
});
|
|
56
|
+
/**
|
|
57
|
+
* Create a new cached content resource.
|
|
58
|
+
* The cache name returned can be passed as `cachedContent` in generateContent.
|
|
59
|
+
*/
|
|
60
|
+
createCache(options: GeminiCacheCreateOptions, apiKey: string, signal?: AbortSignal): Promise<GeminiCachedContent>;
|
|
61
|
+
/**
|
|
62
|
+
* Get metadata for a cached content resource.
|
|
63
|
+
*/
|
|
64
|
+
getCache(name: string, apiKey: string, signal?: AbortSignal): Promise<GeminiCachedContent>;
|
|
65
|
+
/**
|
|
66
|
+
* List all cached content resources.
|
|
67
|
+
*/
|
|
68
|
+
listCaches(apiKey: string, options?: {
|
|
69
|
+
pageSize?: number;
|
|
70
|
+
pageToken?: string;
|
|
71
|
+
}, signal?: AbortSignal): Promise<{
|
|
72
|
+
cachedContents: GeminiCachedContent[];
|
|
73
|
+
nextPageToken?: string;
|
|
74
|
+
}>;
|
|
75
|
+
/**
|
|
76
|
+
* Update a cache's TTL or expiration time.
|
|
77
|
+
*/
|
|
78
|
+
updateCache(name: string, update: {
|
|
79
|
+
ttl?: string;
|
|
80
|
+
expireTime?: string;
|
|
81
|
+
}, apiKey: string, signal?: AbortSignal): Promise<GeminiCachedContent>;
|
|
82
|
+
/**
|
|
83
|
+
* Delete a cached content resource.
|
|
84
|
+
*/
|
|
85
|
+
deleteCache(name: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
86
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiFileAPI — Gemini File API for uploading and managing files.
|
|
3
|
+
*
|
|
4
|
+
* Files uploaded via this API can be referenced in generateContent requests
|
|
5
|
+
* using `file_data: { file_uri, mime_type }` parts.
|
|
6
|
+
*
|
|
7
|
+
* Upload uses the resumable upload protocol (2-step):
|
|
8
|
+
* 1. POST /upload/v1beta/files → get upload URL (response header)
|
|
9
|
+
* 2. PUT {upload_url} with file bytes → get file info
|
|
10
|
+
*
|
|
11
|
+
* Other operations:
|
|
12
|
+
* GET /v1beta/files — list files
|
|
13
|
+
* GET /v1beta/files/{name} — get file metadata
|
|
14
|
+
* DELETE /v1beta/files/{name} — delete file
|
|
15
|
+
*
|
|
16
|
+
* Files expire after 48 hours. Max 2GB per file, 20GB per project.
|
|
17
|
+
*
|
|
18
|
+
* Docs: https://ai.google.dev/gemini-api/docs/files
|
|
19
|
+
*/
|
|
20
|
+
export interface GeminiFileInfo {
|
|
21
|
+
/** Resource name, e.g. "files/abc123" */
|
|
22
|
+
name: string;
|
|
23
|
+
/** Display name (set during upload) */
|
|
24
|
+
displayName?: string;
|
|
25
|
+
/** MIME type */
|
|
26
|
+
mimeType: string;
|
|
27
|
+
/** File size in bytes */
|
|
28
|
+
sizeBytes?: string;
|
|
29
|
+
/** File URI for use in generateContent, e.g. "https://generativelanguage.googleapis.com/v1beta/files/abc123" */
|
|
30
|
+
uri: string;
|
|
31
|
+
/** Processing state: PROCESSING | ACTIVE | FAILED */
|
|
32
|
+
state: string;
|
|
33
|
+
/** Creation time (ISO 8601) */
|
|
34
|
+
createTime?: string;
|
|
35
|
+
/** Last update time (ISO 8601) */
|
|
36
|
+
updateTime?: string;
|
|
37
|
+
/** Expiration time (ISO 8601) */
|
|
38
|
+
expirationTime?: string;
|
|
39
|
+
/** Error info if state is FAILED */
|
|
40
|
+
error?: {
|
|
41
|
+
code: number;
|
|
42
|
+
message: string;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export declare class GeminiFileAPI {
|
|
46
|
+
private baseUrl;
|
|
47
|
+
private timeoutMs;
|
|
48
|
+
constructor(config: {
|
|
49
|
+
baseUrl: string;
|
|
50
|
+
timeoutMs?: number;
|
|
51
|
+
});
|
|
52
|
+
/**
|
|
53
|
+
* Upload a file using the resumable upload protocol.
|
|
54
|
+
*
|
|
55
|
+
* Step 1: Initiate upload → get upload URL from response header
|
|
56
|
+
* Step 2: Upload bytes to that URL → get file metadata
|
|
57
|
+
*
|
|
58
|
+
* @returns GeminiFileInfo with .uri for use in generateContent
|
|
59
|
+
*/
|
|
60
|
+
uploadFile(file: Blob | Buffer, apiKey: string, options?: {
|
|
61
|
+
mimeType?: string;
|
|
62
|
+
displayName?: string;
|
|
63
|
+
}, signal?: AbortSignal): Promise<GeminiFileInfo>;
|
|
64
|
+
/**
|
|
65
|
+
* Wait for a file to finish processing (state → ACTIVE).
|
|
66
|
+
* Some file types (video, audio) require server-side processing.
|
|
67
|
+
*/
|
|
68
|
+
waitForProcessing(name: string, apiKey: string, options?: {
|
|
69
|
+
pollIntervalMs?: number;
|
|
70
|
+
maxWaitMs?: number;
|
|
71
|
+
}): Promise<GeminiFileInfo>;
|
|
72
|
+
/**
|
|
73
|
+
* Get metadata for an uploaded file.
|
|
74
|
+
*/
|
|
75
|
+
getFile(name: string, apiKey: string, signal?: AbortSignal): Promise<GeminiFileInfo>;
|
|
76
|
+
/**
|
|
77
|
+
* List uploaded files.
|
|
78
|
+
*/
|
|
79
|
+
listFiles(apiKey: string, options?: {
|
|
80
|
+
pageSize?: number;
|
|
81
|
+
pageToken?: string;
|
|
82
|
+
}, signal?: AbortSignal): Promise<{
|
|
83
|
+
files: GeminiFileInfo[];
|
|
84
|
+
nextPageToken?: string;
|
|
85
|
+
}>;
|
|
86
|
+
/**
|
|
87
|
+
* Delete an uploaded file.
|
|
88
|
+
*/
|
|
89
|
+
deleteFile(name: string, apiKey: string, signal?: AbortSignal): Promise<void>;
|
|
90
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini generateContent Transport — Native Gemini API streaming implementation.
|
|
3
|
+
*
|
|
4
|
+
* Targets Gemini 3 series exclusively (3.1 Pro, 3 Flash, 3.1 Flash-Lite).
|
|
5
|
+
* Uses the native Gemini REST API instead of the OpenAI compatibility layer,
|
|
6
|
+
* unlocking Gemini-exclusive features unavailable via the compat endpoint:
|
|
7
|
+
* - thinkingConfig (thinkingLevel — G3 native control)
|
|
8
|
+
* - Google Search / Maps Grounding
|
|
9
|
+
* - Code Execution
|
|
10
|
+
* - Safety Settings fine-grained control
|
|
11
|
+
* - Thought Signatures for multi-turn reasoning continuity
|
|
12
|
+
* - URL Context / File Search
|
|
13
|
+
* - systemInstruction top-level field
|
|
14
|
+
*
|
|
15
|
+
* Streaming endpoint: POST .../models/{model}:streamGenerateContent?alt=sse
|
|
16
|
+
* Non-streaming: POST .../models/{model}:generateContent
|
|
17
|
+
* Auth: x-goog-api-key header
|
|
18
|
+
*
|
|
19
|
+
* Protocol reference: https://ai.google.dev/gemini-api/docs
|
|
20
|
+
* Aligned with gemini-ProviderMax.md native protocol strategy.
|
|
21
|
+
*/
|
|
22
|
+
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
23
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
24
|
+
export interface GeminiGenerateContentTransportConfig {
|
|
25
|
+
/** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta" */
|
|
26
|
+
baseUrl: string;
|
|
27
|
+
/** Per-request timeout in ms (default 180_000) */
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
/** Provider-specific quirks */
|
|
30
|
+
quirks?: ProviderQuirks;
|
|
31
|
+
}
|
|
32
|
+
export declare class GeminiGenerateContentTransport implements LLMTransport {
|
|
33
|
+
private baseUrl;
|
|
34
|
+
private timeoutMs;
|
|
35
|
+
private quirks;
|
|
36
|
+
constructor(config: GeminiGenerateContentTransportConfig);
|
|
37
|
+
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
38
|
+
private buildRequestBody;
|
|
39
|
+
private buildTools;
|
|
40
|
+
private buildToolConfig;
|
|
41
|
+
private buildGenerationConfig;
|
|
42
|
+
private fetchAndStream;
|
|
43
|
+
/**
|
|
44
|
+
* Parse SSE stream with 90s idle watchdog (CC parity).
|
|
45
|
+
*/
|
|
46
|
+
private parseSSEStreamWithWatchdog;
|
|
47
|
+
/**
|
|
48
|
+
* Process a single Gemini response chunk, yielding LLMChunk events.
|
|
49
|
+
*/
|
|
50
|
+
private processResponse;
|
|
51
|
+
private nonStreamingFallback;
|
|
52
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiLyriaRealtimeSession — WebSocket-based real-time streaming music generation.
|
|
3
|
+
*
|
|
4
|
+
* Uses Lyria RealTime (`lyria-realtime-exp`) via the Gemini Live API WebSocket.
|
|
5
|
+
* Provides both a full interactive session API and a one-shot convenience method.
|
|
6
|
+
*
|
|
7
|
+
* Protocol:
|
|
8
|
+
* - WebSocket URL: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent
|
|
9
|
+
* - Auth: API key as query parameter
|
|
10
|
+
* - Client → Server: setup, musicInput (weightedPrompts, musicGenerationConfig, playbackControl)
|
|
11
|
+
* - Server → Client: serverContent.audioChunks (base64 PCM s16le, 48kHz, stereo)
|
|
12
|
+
*
|
|
13
|
+
* Docs: https://ai.google.dev/gemini-api/docs/realtime-music-generation
|
|
14
|
+
*/
|
|
15
|
+
export interface WeightedPrompt {
|
|
16
|
+
text: string;
|
|
17
|
+
weight: number;
|
|
18
|
+
}
|
|
19
|
+
export type MusicScale = "C_MAJOR_A_MINOR" | "D_FLAT_MAJOR_B_FLAT_MINOR" | "D_MAJOR_B_MINOR" | "E_FLAT_MAJOR_C_MINOR" | "E_MAJOR_D_FLAT_MINOR" | "F_MAJOR_D_MINOR" | "G_FLAT_MAJOR_E_FLAT_MINOR" | "G_MAJOR_E_MINOR" | "A_FLAT_MAJOR_F_MINOR" | "A_MAJOR_G_FLAT_MINOR" | "B_FLAT_MAJOR_G_MINOR" | "B_MAJOR_A_FLAT_MINOR" | "SCALE_UNSPECIFIED";
|
|
20
|
+
export type MusicGenerationMode = "QUALITY" | "DIVERSITY" | "VOCALIZATION";
|
|
21
|
+
export interface MusicGenerationConfig {
|
|
22
|
+
bpm?: number;
|
|
23
|
+
density?: number;
|
|
24
|
+
brightness?: number;
|
|
25
|
+
guidance?: number;
|
|
26
|
+
scale?: MusicScale;
|
|
27
|
+
temperature?: number;
|
|
28
|
+
topK?: number;
|
|
29
|
+
seed?: number;
|
|
30
|
+
muteBass?: boolean;
|
|
31
|
+
muteDrums?: boolean;
|
|
32
|
+
onlyBassAndDrums?: boolean;
|
|
33
|
+
musicGenerationMode?: MusicGenerationMode;
|
|
34
|
+
audioFormat?: string;
|
|
35
|
+
sampleRateHz?: number;
|
|
36
|
+
}
|
|
37
|
+
export interface LyriaRealtimeConfig {
|
|
38
|
+
/** Base URL (REST), e.g. "https://generativelanguage.googleapis.com/v1beta" */
|
|
39
|
+
baseUrl: string;
|
|
40
|
+
}
|
|
41
|
+
export interface LyriaRealtimeSessionOptions {
|
|
42
|
+
model?: string;
|
|
43
|
+
prompts: WeightedPrompt[];
|
|
44
|
+
config?: MusicGenerationConfig;
|
|
45
|
+
}
|
|
46
|
+
/** Audio chunk received from the server. */
|
|
47
|
+
export interface AudioChunk {
|
|
48
|
+
/** Raw PCM s16le data (48kHz, stereo) */
|
|
49
|
+
data: Buffer;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Interactive Lyria RealTime session over WebSocket.
|
|
53
|
+
*
|
|
54
|
+
* Usage:
|
|
55
|
+
* const session = new GeminiLyriaRealtimeSession({ baseUrl: "..." });
|
|
56
|
+
* await session.connect(apiKey, { prompts: [{ text: "jazz", weight: 1 }] });
|
|
57
|
+
* session.onAudioChunk = (chunk) => { ... };
|
|
58
|
+
* await session.play();
|
|
59
|
+
* // ... later
|
|
60
|
+
* await session.stop();
|
|
61
|
+
* session.close();
|
|
62
|
+
*/
|
|
63
|
+
export declare class GeminiLyriaRealtimeSession {
|
|
64
|
+
private ws;
|
|
65
|
+
private wsUrl;
|
|
66
|
+
private model;
|
|
67
|
+
/** Called for each audio chunk received from the server. */
|
|
68
|
+
onAudioChunk: ((chunk: AudioChunk) => void) | null;
|
|
69
|
+
/** Called when the server reports a filtered prompt. */
|
|
70
|
+
onFilteredPrompt: ((reason: string) => void) | null;
|
|
71
|
+
/** Called on WebSocket error. */
|
|
72
|
+
onError: ((error: Error) => void) | null;
|
|
73
|
+
/** Called when the WebSocket connection closes. */
|
|
74
|
+
onClose: (() => void) | null;
|
|
75
|
+
constructor(config: LyriaRealtimeConfig);
|
|
76
|
+
/**
|
|
77
|
+
* Connect to the Lyria RealTime WebSocket and send setup + initial config.
|
|
78
|
+
*/
|
|
79
|
+
connect(apiKey: string, options: LyriaRealtimeSessionOptions): Promise<void>;
|
|
80
|
+
/** Set or update weighted prompts (smooth transition). */
|
|
81
|
+
setWeightedPrompts(prompts: WeightedPrompt[]): Promise<void>;
|
|
82
|
+
/** Set or update music generation config. */
|
|
83
|
+
setMusicGenerationConfig(config: MusicGenerationConfig): Promise<void>;
|
|
84
|
+
/** Start streaming music. */
|
|
85
|
+
play(): Promise<void>;
|
|
86
|
+
/** Pause music streaming (can resume with play). */
|
|
87
|
+
pause(): Promise<void>;
|
|
88
|
+
/** Stop music streaming (terminates the current piece). */
|
|
89
|
+
stop(): Promise<void>;
|
|
90
|
+
/** Reset model context (for BPM/scale changes). */
|
|
91
|
+
resetContext(): Promise<void>;
|
|
92
|
+
/** Close the WebSocket connection. */
|
|
93
|
+
close(): void;
|
|
94
|
+
/** Whether the session is connected. */
|
|
95
|
+
get connected(): boolean;
|
|
96
|
+
private send;
|
|
97
|
+
private handleMessage;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Generate a fixed-duration music clip using Lyria RealTime.
|
|
101
|
+
*
|
|
102
|
+
* Connects, plays for the specified duration (default 30s), collects all
|
|
103
|
+
* audio chunks, assembles into a WAV file, and returns the file path.
|
|
104
|
+
*
|
|
105
|
+
* Output: 48kHz, stereo, 16-bit PCM wrapped in WAV.
|
|
106
|
+
*/
|
|
107
|
+
export declare function generateRealtimeMusic(apiKey: string, config: LyriaRealtimeConfig, options: {
|
|
108
|
+
prompts: WeightedPrompt[];
|
|
109
|
+
durationSeconds?: number;
|
|
110
|
+
musicConfig?: MusicGenerationConfig;
|
|
111
|
+
model?: string;
|
|
112
|
+
signal?: AbortSignal;
|
|
113
|
+
onProgress?: (percent: number, status: string) => void;
|
|
114
|
+
}): Promise<{
|
|
115
|
+
filePath: string;
|
|
116
|
+
durationMs: number;
|
|
117
|
+
}>;
|
|
@@ -1,21 +1,53 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Gemini Media Transport —
|
|
2
|
+
* Gemini Media Transport — unified media generation for all Gemini media APIs.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* POST /
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* Supported media types and endpoints:
|
|
5
|
+
* image — POST /models/{model}:generateContent (responseModalities: ["TEXT","IMAGE"])
|
|
6
|
+
* video — POST /models/{model}:predictLongRunning → poll operations → download URI
|
|
7
|
+
* music — POST /models/{model}:generateContent (Lyria 3 — inlineData audio)
|
|
8
|
+
* music_realtime — WebSocket session (Lyria RealTime — streaming PCM → WAV)
|
|
9
|
+
* tts — POST /models/{model}:generateContent (speechConfig — inlineData PCM)
|
|
10
|
+
* embedding — POST /models/{model}:embedContent (float vector)
|
|
11
|
+
*
|
|
12
|
+
* Auth: x-goog-api-key header for all endpoints.
|
|
8
13
|
*/
|
|
9
|
-
import type {
|
|
14
|
+
import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
|
|
10
15
|
export interface GeminiMediaConfig {
|
|
11
|
-
/** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta
|
|
16
|
+
/** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta" */
|
|
12
17
|
baseUrl: string;
|
|
13
18
|
timeoutMs?: number;
|
|
14
19
|
}
|
|
15
|
-
export declare class GeminiMediaTransport implements
|
|
20
|
+
export declare class GeminiMediaTransport implements AsyncMediaTransport {
|
|
16
21
|
readonly supportedTypes: readonly MediaType[];
|
|
17
22
|
private apiBase;
|
|
18
23
|
private timeoutMs;
|
|
19
24
|
constructor(config: GeminiMediaConfig);
|
|
20
25
|
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
26
|
+
deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
|
|
27
|
+
listVideoTasks(_apiKey: string, _options?: {
|
|
28
|
+
after?: string;
|
|
29
|
+
limit?: number;
|
|
30
|
+
status?: string;
|
|
31
|
+
}, _signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
32
|
+
getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
33
|
+
status: string;
|
|
34
|
+
task: Record<string, unknown>;
|
|
35
|
+
}>;
|
|
36
|
+
private generateImage;
|
|
37
|
+
private generateVideo;
|
|
38
|
+
private generateMusic;
|
|
39
|
+
private generateMusicRealtime;
|
|
40
|
+
private generateTTS;
|
|
41
|
+
private generateEmbedding;
|
|
42
|
+
private postJson;
|
|
43
|
+
private pollOperation;
|
|
44
|
+
/**
|
|
45
|
+
* Resolve an image URL to inline data for the Veo API.
|
|
46
|
+
* Supports file:// paths and https:// URLs.
|
|
47
|
+
*/
|
|
48
|
+
private resolveImageData;
|
|
49
|
+
/** Extract base64 image data from generateContent response → persist to cache files. */
|
|
50
|
+
private extractInlineImages;
|
|
51
|
+
/** Extract base64 audio data from generateContent response → persist to cache files. */
|
|
52
|
+
private extractInlineAudio;
|
|
21
53
|
}
|
|
@@ -1,26 +1,55 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* MiniMax Media Transport — Music
|
|
2
|
+
* MiniMax Media Transport — Music + Video Generation.
|
|
3
|
+
*
|
|
4
|
+
* Music: POST /v1/music_generation (sync or async poll)
|
|
5
|
+
* Video: POST /v1/video_generation (4 modes: text, image, first-last-frame, subject-ref)
|
|
6
|
+
* Video Query: GET /v1/query/video_generation?task_id=XXX
|
|
7
|
+
* File Retrieve: GET /v1/files/retrieve?file_id=XXX (get download_url)
|
|
3
8
|
*
|
|
4
|
-
* POST /v1/music_generation (async job: submit → poll → result)
|
|
5
9
|
* Auth: Authorization: Bearer $MINIMAX_API_KEY
|
|
6
|
-
* Docs:
|
|
10
|
+
* Docs: minimax-ProviderMax.md §13-18 (video), §21 (music), §24-28 (files)
|
|
7
11
|
*/
|
|
8
|
-
import type {
|
|
12
|
+
import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
|
|
9
13
|
export interface MiniMaxMediaConfig {
|
|
10
14
|
/** Base URL, e.g. "https://api.minimaxi.com" */
|
|
11
15
|
baseUrl: string;
|
|
12
16
|
timeoutMs?: number;
|
|
13
17
|
}
|
|
14
|
-
export declare class MiniMaxMediaTransport implements
|
|
18
|
+
export declare class MiniMaxMediaTransport implements AsyncMediaTransport {
|
|
15
19
|
readonly supportedTypes: readonly MediaType[];
|
|
16
20
|
private baseUrl;
|
|
17
21
|
private timeoutMs;
|
|
18
22
|
constructor(config: MiniMaxMediaConfig);
|
|
19
23
|
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
24
|
+
private generateMusic;
|
|
20
25
|
private pollTask;
|
|
21
26
|
/**
|
|
22
27
|
* Generate lyrics from a text prompt via MiniMax Lyrics Generation API.
|
|
23
28
|
* POST /v1/lyrics_generation — returns structured lyrics with tags.
|
|
24
29
|
*/
|
|
25
30
|
generateLyrics(prompt: string, apiKey: string, signal?: AbortSignal): Promise<string>;
|
|
31
|
+
private generateVideo;
|
|
32
|
+
private pollVideoTask;
|
|
33
|
+
private getFileDownloadUrl;
|
|
34
|
+
/**
|
|
35
|
+
* Query a single video task by ID.
|
|
36
|
+
* GET /v1/query/video_generation?task_id=XXX
|
|
37
|
+
*/
|
|
38
|
+
getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
39
|
+
status: string;
|
|
40
|
+
task: Record<string, unknown>;
|
|
41
|
+
}>;
|
|
42
|
+
/**
|
|
43
|
+
* List tasks — MiniMax does not have a bulk list endpoint.
|
|
44
|
+
* Each task must be queried individually with getTaskStatus().
|
|
45
|
+
*/
|
|
46
|
+
listVideoTasks(_apiKey: string, _options?: {
|
|
47
|
+
after?: string;
|
|
48
|
+
limit?: number;
|
|
49
|
+
status?: string;
|
|
50
|
+
}, _signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
51
|
+
/**
|
|
52
|
+
* Cancel/delete is not natively supported by MiniMax video API.
|
|
53
|
+
*/
|
|
54
|
+
deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
|
|
26
55
|
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Responses API Transport — SSE streaming implementation.
|
|
3
|
+
*
|
|
4
|
+
* Implements the OpenAI Responses API (`POST /v1/responses`),
|
|
5
|
+
* the officially recommended path for GPT-5.x text generation.
|
|
6
|
+
*
|
|
7
|
+
* Key differences from OpenAI Chat Completions:
|
|
8
|
+
* - Endpoint: POST {baseUrl}/v1/responses
|
|
9
|
+
* - Request body uses `input` (not `messages`), `instructions`, `reasoning`
|
|
10
|
+
* - SSE events: response.output_text.delta, response.function_call_arguments.delta,
|
|
11
|
+
* response.completed, etc.
|
|
12
|
+
* - Tool defs: { type: "function", name, parameters } (not nested under `function:`)
|
|
13
|
+
* - Tool results: { type: "function_call_output", call_id, output }
|
|
14
|
+
* - Context persistence: previous_response_id for server-side session continuation
|
|
15
|
+
* - Structured output: `text: { format: { type: "json_schema", ... } }`
|
|
16
|
+
* - Reasoning: `reasoning: { effort, summary }` for GPT-5.x models
|
|
17
|
+
*
|
|
18
|
+
* Wire format reference:
|
|
19
|
+
* https://developers.openai.com/api/docs/api-reference/responses/create
|
|
20
|
+
* https://developers.openai.com/api/docs/api-reference/responses/streaming-events
|
|
21
|
+
*
|
|
22
|
+
* Design: Closely mirrors volcengine-responses.ts patterns while adapting to
|
|
23
|
+
* OpenAI-specific wire format. Shared LLMChunk output makes upper layers
|
|
24
|
+
* transport-agnostic.
|
|
25
|
+
*/
|
|
26
|
+
import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
|
|
27
|
+
import type { ProviderQuirks } from "../provider-def.js";
|
|
28
|
+
export interface OpenAIResponsesTransportConfig {
|
|
29
|
+
baseUrl: string;
|
|
30
|
+
extraHeaders?: Record<string, string>;
|
|
31
|
+
timeoutMs?: number;
|
|
32
|
+
quirks?: ProviderQuirks;
|
|
33
|
+
}
|
|
34
|
+
export declare class OpenAIResponsesTransport implements LLMTransport {
|
|
35
|
+
private baseUrl;
|
|
36
|
+
private extraHeaders;
|
|
37
|
+
private timeoutMs;
|
|
38
|
+
private quirks;
|
|
39
|
+
constructor(config: OpenAIResponsesTransportConfig);
|
|
40
|
+
stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
|
|
41
|
+
private buildRequestBody;
|
|
42
|
+
private fetchAndStream;
|
|
43
|
+
private handleNonStreamingResponse;
|
|
44
|
+
/**
|
|
45
|
+
* Parse OpenAI Responses API SSE stream.
|
|
46
|
+
*
|
|
47
|
+
* Event format: "event: <type>\ndata: <json>\n\n"
|
|
48
|
+
* Key events:
|
|
49
|
+
* - response.output_text.delta → text content delta
|
|
50
|
+
* - response.reasoning_summary_text.delta → reasoning summary text
|
|
51
|
+
* - response.function_call_arguments.delta → tool call arguments streaming
|
|
52
|
+
* - response.output_item.added → new output item started
|
|
53
|
+
* - response.output_item.done → output item completed
|
|
54
|
+
* - response.content_part.done → content part completed (annotations)
|
|
55
|
+
* - response.completed → full response complete with usage
|
|
56
|
+
* - response.failed → error
|
|
57
|
+
*/
|
|
58
|
+
private parseSSEStream;
|
|
59
|
+
private processEvent;
|
|
60
|
+
}
|