qlogicagent 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/agent.js +6 -6
  2. package/dist/cli.js +181 -172
  3. package/dist/index.js +181 -172
  4. package/dist/orchestration.js +1 -1
  5. package/dist/types/agent/tool-loop.d.ts +2 -0
  6. package/dist/types/agent/types.d.ts +46 -1
  7. package/dist/types/cli/stdio-server.d.ts +10 -0
  8. package/dist/types/cli/tool-bootstrap.d.ts +13 -1
  9. package/dist/types/llm/index.d.ts +1 -1
  10. package/dist/types/llm/llm-client.d.ts +1 -1
  11. package/dist/types/llm/media-client.d.ts +3 -4
  12. package/dist/types/llm/media-transport.d.ts +75 -4
  13. package/dist/types/llm/provider-def.d.ts +124 -3
  14. package/dist/types/llm/provider-registry.d.ts +5 -0
  15. package/dist/types/llm/provider-tool-api.d.ts +44 -0
  16. package/dist/types/llm/retry.d.ts +37 -0
  17. package/dist/types/llm/transport.d.ts +157 -2
  18. package/dist/types/llm/transports/anthropic-messages.d.ts +7 -0
  19. package/dist/types/llm/transports/minimax-media.d.ts +5 -0
  20. package/dist/types/llm/transports/openai-chat.d.ts +44 -3
  21. package/dist/types/llm/transports/realtime-transport.d.ts +183 -0
  22. package/dist/types/llm/transports/volcengine-grounding.d.ts +58 -0
  23. package/dist/types/llm/transports/volcengine-media.d.ts +50 -0
  24. package/dist/types/llm/transports/volcengine-responses.d.ts +60 -0
  25. package/dist/types/llm/transports/zhipu-media.d.ts +60 -0
  26. package/dist/types/llm/transports/zhipu-tool-api.d.ts +35 -0
  27. package/dist/types/orchestration/tool-cascade.d.ts +40 -0
  28. package/dist/types/orchestration/tool-loop/tool-schema.d.ts +1 -1
  29. package/dist/types/protocol/methods.d.ts +19 -0
  30. package/dist/types/skills/memory/memory-extractor.d.ts +1 -1
  31. package/dist/types/skills/tools/file-management-tool.d.ts +90 -0
  32. package/dist/types/skills/tools/image-generate-tool.d.ts +13 -1
  33. package/dist/types/skills/tools/music-generate-tool.d.ts +25 -0
  34. package/dist/types/skills/tools/stt-tool.d.ts +33 -0
  35. package/dist/types/skills/tools/three-d-generate-tool.d.ts +45 -0
  36. package/dist/types/skills/tools/tts-tool.d.ts +12 -0
  37. package/dist/types/skills/tools/video-edit-tool.d.ts +5 -2
  38. package/dist/types/skills/tools/video-generate-tool.d.ts +102 -2
  39. package/dist/types/skills/tools/video-merge-tool.d.ts +1 -1
  40. package/dist/types/skills/tools/video-upscale-tool.d.ts +1 -1
  41. package/dist/types/skills/tools/voice-clone-tool.d.ts +40 -0
  42. package/package.json +1 -1
@@ -9,17 +9,146 @@
9
9
  * - Anthropic Messages API
10
10
  */
11
11
  import type { ChatMessage, ToolDefinition } from "../agent/types.js";
12
+ export type StructuredOutputConfig = {
13
+ mode: "json_object";
14
+ } | {
15
+ mode: "json_schema";
16
+ name: string;
17
+ schema: Record<string, unknown>;
18
+ strict?: boolean;
19
+ };
20
+ export interface CachingConfig {
21
+ type: "enabled" | "disabled";
22
+ /** Enable prefix caching mode (§20.3). Requires store=true and stream=false. */
23
+ prefix?: boolean;
24
+ }
25
+ export type ContextEdit = {
26
+ type: "clear_thinking";
27
+ keep?: "all" | {
28
+ type: "thinking_turns";
29
+ value: number;
30
+ };
31
+ } | {
32
+ type: "clear_tool_uses";
33
+ trigger?: {
34
+ type: "tool_uses";
35
+ value: number;
36
+ };
37
+ keep?: {
38
+ type: "tool_uses";
39
+ value: number;
40
+ };
41
+ excludeTools?: string[];
42
+ clearToolInput?: boolean;
43
+ };
44
+ export interface ContextManagementConfig {
45
+ edits: ContextEdit[];
46
+ }
12
47
  export interface LLMRequest {
13
48
  model: string;
14
49
  messages: ChatMessage[];
15
50
  tools?: ToolDefinition[];
16
- toolChoice?: "auto" | "none" | "required";
51
+ toolChoice?: "auto" | "none" | "required" | {
52
+ type: "function";
53
+ name: string;
54
+ };
17
55
  temperature?: number;
56
+ /** Nucleus sampling: controls diversity via cumulative probability cutoff. */
57
+ topP?: number;
18
58
  maxTokens?: number;
19
59
  reasoning?: {
20
- effort: "low" | "medium" | "high";
60
+ effort: "minimal" | "low" | "medium" | "high";
61
+ /** Request encrypted original reasoning content (Volcengine §17.7). */
62
+ includeEncryptedReasoning?: boolean;
21
63
  };
64
+ /** Volcengine: max builtin tool calls per turn (§19.15). */
65
+ maxToolCalls?: number;
66
+ /**
67
+ * DeepSeek prefix completion: force model to continue from this prefix.
68
+ * Requires `/beta` endpoint; adds a trailing assistant message with `prefix: true`.
69
+ */
70
+ prefixMessage?: string;
71
+ /**
72
+ * Model requires streaming — disable non-streaming fallback in transports.
73
+ * When true, transports must NOT fall back to non-streaming requests on failure.
74
+ * Set for models like QwQ/Omni where the provider rejects non-streaming calls.
75
+ */
76
+ streamRequired?: boolean;
77
+ /**
78
+ * Disable injection of provider-native builtin tools (web_search, code_interpreter)
79
+ * for this specific request. Allows session-level control over GLM/Kimi builtin tools.
80
+ */
81
+ disableBuiltinTools?: boolean;
82
+ /**
83
+ * Volcengine builtin tools to inject (web_search, image_process, knowledge_search).
84
+ * Each entry specifies a tool type and optional config.
85
+ * These are platform-executed tools requiring beta headers.
86
+ */
87
+ builtinTools?: Array<{
88
+ type: "builtin_web_search" | "builtin_image_process" | "builtin_knowledge_search" | "builtin_doubao_app";
89
+ config?: Record<string, unknown>;
90
+ }>;
91
+ /**
92
+ * Server-side context continuation via response chain (§5).
93
+ * When set, the server automatically includes previous context,
94
+ * so messages[] only needs to contain the NEW user message.
95
+ */
96
+ previousResponseId?: string;
97
+ /**
98
+ * Control server-side storage of this request's input/output (§5.1).
99
+ * Default: true (server stores for 3 days).
100
+ */
101
+ store?: boolean;
102
+ /** Expiration time for stored response (Unix seconds, max 7 days from now) */
103
+ storeExpireAt?: number;
104
+ /**
105
+ * Per-turn system instruction augmentation (§8).
106
+ * Temporarily overlays persona or adds constraints for this turn only.
107
+ * NOTE: Incompatible with caching — do not use both together.
108
+ */
109
+ instructions?: string;
110
+ /**
111
+ * Structured output format (§16).
112
+ * Forces model to produce JSON conforming to the specified schema.
113
+ */
114
+ structuredOutput?: StructuredOutputConfig;
115
+ /**
116
+ * Caching configuration (§20).
117
+ * Controls prefix/session caching behavior.
118
+ * NOTE: Incompatible with instructions, json_schema, and builtin tools.
119
+ */
120
+ caching?: CachingConfig;
121
+ /**
122
+ * Context management edits (§21, beta).
123
+ * Server-side trimming of historical thinking chains and tool call traces.
124
+ */
125
+ contextManagement?: ContextManagementConfig;
22
126
  }
127
+ /**
128
+ * FIM completion request — DeepSeek Beta Completions API.
129
+ * POST /beta/v1/completions with prompt + suffix.
130
+ * Only works with non-thinking mode.
131
+ */
132
+ export interface FIMRequest {
133
+ model: string;
134
+ /** Text before the cursor (prefix context) */
135
+ prompt: string;
136
+ /** Text after the cursor (suffix context) */
137
+ suffix?: string;
138
+ /** Max tokens to generate for the infill */
139
+ maxTokens?: number;
140
+ /** Sampling temperature */
141
+ temperature?: number;
142
+ /** Stop sequences */
143
+ stop?: string[];
144
+ }
145
+ export type FIMChunk = {
146
+ type: "delta";
147
+ text: string;
148
+ } | {
149
+ type: "done";
150
+ finishReason: string;
151
+ };
23
152
  export type LLMChunk = {
24
153
  type: "delta";
25
154
  text: string;
@@ -43,6 +172,27 @@ export type LLMChunk = {
43
172
  reasoningTokens?: number;
44
173
  cacheReadTokens?: number;
45
174
  cacheCreationTokens?: number;
175
+ } | {
176
+ type: "response_id";
177
+ id: string;
178
+ } | {
179
+ /** Informational status from platform-executed builtin tools (web_search, image_process). */
180
+ type: "builtin_tool_status";
181
+ toolType: string;
182
+ event: string;
183
+ data?: Record<string, unknown>;
184
+ } | {
185
+ /** Web search citation annotations from Volcengine web_search results. */
186
+ type: "annotations";
187
+ annotations: Array<{
188
+ type: string;
189
+ url?: string;
190
+ title?: string;
191
+ [key: string]: unknown;
192
+ }>;
193
+ } | {
194
+ type: "error";
195
+ message: string;
46
196
  } | {
47
197
  type: "done";
48
198
  finishReason: string;
@@ -58,6 +208,11 @@ export interface LLMTransport {
58
208
  * apiKey is passed explicitly (from agent.turn.config, not env).
59
209
  */
60
210
  stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
211
+ /**
212
+ * FIM (Fill-In-Middle) completion — optional capability.
213
+ * Only implemented by providers that support it (DeepSeek /beta endpoint).
214
+ */
215
+ complete?(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
61
216
  }
62
217
  /**
63
218
  * Accumulate tool_call_delta chunks into complete ToolCall objects.
@@ -12,6 +12,7 @@
12
12
  * - signature_delta handling for thinking blocks
13
13
  */
14
14
  import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
15
+ import type { ProviderQuirks } from "../provider-def.js";
15
16
  export interface AnthropicTransportConfig {
16
17
  baseUrl: string;
17
18
  /** anthropic-version header (default "2023-06-01") */
@@ -24,6 +25,10 @@ export interface AnthropicTransportConfig {
24
25
  enablePromptCaching?: boolean;
25
26
  /** Max retry attempts on transient errors (default 3) */
26
27
  maxRetries?: number;
28
+ /** Omit temperature when it equals 0 — MiniMax rejects temperature=0 */
29
+ omitZeroTemperature?: boolean;
30
+ /** Provider-specific quirks for conditional logic (CC/altcode parity) */
31
+ quirks?: ProviderQuirks;
27
32
  }
28
33
  export declare class AnthropicMessagesTransport implements LLMTransport {
29
34
  private baseUrl;
@@ -32,6 +37,8 @@ export declare class AnthropicMessagesTransport implements LLMTransport {
32
37
  private streamIdleTimeoutMs;
33
38
  private enablePromptCaching;
34
39
  private maxRetries;
40
+ private omitZeroTemperature;
41
+ private quirks;
35
42
  constructor(config: AnthropicTransportConfig);
36
43
  stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
37
44
  /**
@@ -18,4 +18,9 @@ export declare class MiniMaxMediaTransport implements MediaTransport {
18
18
  constructor(config: MiniMaxMediaConfig);
19
19
  generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
20
20
  private pollTask;
21
+ /**
22
+ * Generate lyrics from a text prompt via MiniMax Lyrics Generation API.
23
+ * POST /v1/lyrics_generation — returns structured lyrics with tags.
24
+ */
25
+ generateLyrics(prompt: string, apiKey: string, signal?: AbortSignal): Promise<string>;
21
26
  }
@@ -11,7 +11,8 @@
11
11
  *
12
12
  * Adapted from admin-infer-proxy-client.ts SSE logic + Hermes openai_chat.py transport.
13
13
  */
14
- import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
14
+ import type { LLMChunk, LLMRequest, LLMTransport, FIMRequest, FIMChunk } from "../transport.js";
15
+ import type { ProviderQuirks } from "../provider-def.js";
15
16
  export interface OpenAIChatTransportConfig {
16
17
  baseUrl: string;
17
18
  /** Additional headers (e.g. for specific providers) */
@@ -22,6 +23,8 @@ export interface OpenAIChatTransportConfig {
22
23
  supportsStreamOptions?: boolean;
23
24
  /** Whether to omit temperature when it equals 0 (e.g. Moonshot rejects 0) */
24
25
  omitZeroTemperature?: boolean;
26
+ /** Provider-specific quirks (CC/altcode parity) */
27
+ quirks?: ProviderQuirks;
25
28
  }
26
29
  export declare class OpenAIChatTransport implements LLMTransport {
27
30
  private baseUrl;
@@ -29,8 +32,46 @@ export declare class OpenAIChatTransport implements LLMTransport {
29
32
  private timeoutMs;
30
33
  private supportsStreamOptions;
31
34
  private omitZeroTemperature;
35
+ private quirks;
36
+ private cumulativeReasoningLen;
37
+ private cumulativeContentLen;
32
38
  constructor(config: OpenAIChatTransportConfig);
33
39
  stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
34
- private parseSSEStream;
35
- private mapChunk;
40
+ private fetchAndStream;
41
+ /**
42
+ * Handle non-streaming JSON response from providers that ignore stream:true.
43
+ * Synthesize the same LLMChunk events a streaming response would produce.
44
+ */
45
+ private handleNonStreamingResponse;
46
+ /**
47
+ * Parse SSE stream with 90s idle watchdog (CC parity).
48
+ * If no data arrives within STREAM_IDLE_TIMEOUT_MS, throw to trigger retry.
49
+ */
50
+ private parseSSEStreamWithWatchdog;
51
+ private processChunk;
52
+ /**
53
+ * FIM completion via /beta/v1/completions.
54
+ * Only works with DeepSeek (requires supportsPrefixCompletion quirk).
55
+ * Non-thinking mode only; max completion 4K tokens.
56
+ */
57
+ complete(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
58
+ /**
59
+ * Upload a file for use in conversations (Kimi File API).
60
+ * Returns a file_id that can be referenced in user messages.
61
+ * POST /v1/files with multipart/form-data.
62
+ */
63
+ uploadFile(fileBlob: Blob, filename: string, purpose: string, apiKey: string, signal?: AbortSignal): Promise<{
64
+ fileId: string;
65
+ filename: string;
66
+ bytes: number;
67
+ }>;
68
+ /**
69
+ * Get file content/status — GET /v1/files/{file_id}
70
+ */
71
+ getFileInfo(fileId: string, apiKey: string, signal?: AbortSignal): Promise<{
72
+ id: string;
73
+ filename: string;
74
+ bytes: number;
75
+ status: string;
76
+ }>;
36
77
  }
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Realtime WebSocket Transport — bidirectional audio/voice streaming
3
+ * via the OpenAI Realtime API protocol (also compatible with GLM Realtime).
4
+ *
5
+ * ## Protocol: WebSocket JSON events
6
+ *
7
+ * Client → Server:
8
+ * - session.update: configure session (model, voice, tools, etc.)
9
+ * - input_audio_buffer.append: send audio chunks (base64 PCM16)
10
+ * - input_audio_buffer.commit: signal end of audio input
11
+ * - conversation.item.create: inject text/function_result items
12
+ * - response.create: request a model response
13
+ * - response.cancel: abort in-progress response
14
+ *
15
+ * Server → Client:
16
+ * - session.created: session initialized
17
+ * - session.updated: config acknowledged
18
+ * - input_audio_buffer.speech_started: VAD detected speech
19
+ * - input_audio_buffer.speech_stopped: VAD detected silence
20
+ * - response.created: response generation started
21
+ * - response.output_item.added: new output item (text/audio/function_call)
22
+ * - response.audio.delta: audio chunk (base64 PCM16)
23
+ * - response.audio_transcript.delta: transcript of generated speech
24
+ * - response.text.delta: text generation delta
25
+ * - response.function_call_arguments.delta: tool call args delta
26
+ * - response.function_call_arguments.done: tool call complete
27
+ * - response.output_item.done: output item finished
28
+ * - response.done: full response complete
29
+ * - error: server error
30
+ *
31
+ * ## Architecture
32
+ *
33
+ * RealtimeTransport manages a single persistent WebSocket connection per session.
34
+ * It exposes an event-driven API (AsyncGenerator) that the agent tool-loop
35
+ * can consume for voice-enabled interactions.
36
+ *
37
+ * Docs:
38
+ * - OpenAI: https://platform.openai.com/docs/api-reference/realtime
39
+ * - GLM: https://docs.bigmodel.cn/cn/guide/develop/realtime-api
40
+ */
41
+ export interface RealtimeConfig {
42
+ /** WebSocket endpoint (e.g. "wss://api.openai.com/v1/realtime") */
43
+ baseUrl: string;
44
+ /** Model to use (e.g. "gpt-4o-realtime-preview", "glm-realtime") */
45
+ model: string;
46
+ /** API key */
47
+ apiKey: string;
48
+ /** Voice for TTS output */
49
+ voice?: string;
50
+ /** Input modalities: "text", "audio", or both */
51
+ inputModalities?: Array<"text" | "audio">;
52
+ /** Output modalities: "text", "audio", or both */
53
+ outputModalities?: Array<"text" | "audio">;
54
+ /** Temperature for generation */
55
+ temperature?: number;
56
+ /** Tool definitions for function calling */
57
+ tools?: RealtimeTool[];
58
+ /** Voice Activity Detection mode */
59
+ vadMode?: "server_vad" | "none";
60
+ /** VAD threshold (0.0-1.0) */
61
+ vadThreshold?: number;
62
+ /** Auth type: "header" (OpenAI) or "query" (GLM) */
63
+ authMode?: "header" | "query";
64
+ }
65
+ export interface RealtimeTool {
66
+ type: "function";
67
+ name: string;
68
+ description: string;
69
+ parameters: Record<string, unknown>;
70
+ }
71
+ export type RealtimeEvent = {
72
+ type: "session_created";
73
+ sessionId: string;
74
+ } | {
75
+ type: "speech_started";
76
+ } | {
77
+ type: "speech_stopped";
78
+ audioEndMs: number;
79
+ } | {
80
+ type: "audio_delta";
81
+ delta: string;
82
+ } | {
83
+ type: "audio_transcript_delta";
84
+ delta: string;
85
+ } | {
86
+ type: "text_delta";
87
+ delta: string;
88
+ } | {
89
+ type: "function_call_start";
90
+ callId: string;
91
+ name: string;
92
+ } | {
93
+ type: "function_call_delta";
94
+ callId: string;
95
+ delta: string;
96
+ } | {
97
+ type: "function_call_done";
98
+ callId: string;
99
+ name: string;
100
+ arguments: string;
101
+ } | {
102
+ type: "response_done";
103
+ usage?: RealtimeUsage;
104
+ } | {
105
+ type: "error";
106
+ code: string;
107
+ message: string;
108
+ } | {
109
+ type: "closed";
110
+ code: number;
111
+ reason: string;
112
+ };
113
+ export interface RealtimeUsage {
114
+ inputTokens: number;
115
+ outputTokens: number;
116
+ inputAudioTokens?: number;
117
+ outputAudioTokens?: number;
118
+ }
119
+ /**
120
+ * Manages a persistent WebSocket connection for real-time audio/voice
121
+ * interactions with an LLM provider.
122
+ *
123
+ * Usage:
124
+ * ```ts
125
+ * const rt = new RealtimeTransport(config);
126
+ * rt.connect();
127
+ *
128
+ * // Send audio
129
+ * rt.appendAudio(base64Chunk);
130
+ * rt.commitAudio();
131
+ *
132
+ * // Or send text
133
+ * rt.sendText("Hello!");
134
+ *
135
+ * // Submit function results
136
+ * rt.sendFunctionResult(callId, result);
137
+ *
138
+ * // Consume events
139
+ * for await (const event of rt.events()) {
140
+ * switch (event.type) {
141
+ * case "audio_delta": playAudio(event.delta); break;
142
+ * case "function_call_done": handleToolCall(event); break;
143
+ * }
144
+ * }
145
+ *
146
+ * rt.close();
147
+ * ```
148
+ */
149
+ export declare class RealtimeTransport {
150
+ private ws;
151
+ private config;
152
+ private eventQueue;
153
+ private waiters;
154
+ private closed;
155
+ constructor(config: RealtimeConfig);
156
+ /** Open WebSocket connection and configure session. */
157
+ connect(): Promise<void>;
158
+ /** Send audio data (base64 PCM16). */
159
+ appendAudio(base64Chunk: string): void;
160
+ /** Mark end of audio input and trigger response. */
161
+ commitAudio(): void;
162
+ /** Send a text message. */
163
+ sendText(text: string): void;
164
+ /** Submit a function call result back to the model. */
165
+ sendFunctionResult(callId: string, output: string): void;
166
+ /** Trigger a model response (e.g. after sending text). */
167
+ requestResponse(): void;
168
+ /** Cancel an in-progress response. */
169
+ cancelResponse(): void;
170
+ /** Async iterator of server events. */
171
+ events(): AsyncGenerator<RealtimeEvent>;
172
+ /** Close the WebSocket connection. */
173
+ close(): void;
174
+ private buildUrl;
175
+ private sendSessionUpdate;
176
+ private send;
177
+ private push;
178
+ private drainWaiters;
179
+ /**
180
+ * Parse a server-sent JSON event into our typed event(s).
181
+ */
182
+ private parseServerEvent;
183
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Volcengine Grounding — spatial coordinate parser (volcengine-ProviderMax §14).
3
+ *
4
+ * Parses model-emitted spatial reference tags from text output:
5
+ * - <bbox>x_min y_min x_max y_max</bbox> → bounding box
6
+ * - <point>x y</point> → single point
7
+ * - <polygon>x1 y1 x2 y2 ...</polygon> → polygon vertices
8
+ *
9
+ * All coordinates are in normalized 1000×1000 space, range [0, 999].
10
+ * Use `toPixelCoords()` to convert to actual image pixel coordinates.
11
+ */
12
+ export type SpatialReference = {
13
+ type: "bbox";
14
+ x1: number;
15
+ y1: number;
16
+ x2: number;
17
+ y2: number;
18
+ space: "normalized_1000";
19
+ } | {
20
+ type: "point";
21
+ x: number;
22
+ y: number;
23
+ space: "normalized_1000";
24
+ } | {
25
+ type: "polygon";
26
+ points: Array<{
27
+ x: number;
28
+ y: number;
29
+ }>;
30
+ space: "normalized_1000";
31
+ };
32
+ export interface PixelBbox {
33
+ x1: number;
34
+ y1: number;
35
+ x2: number;
36
+ y2: number;
37
+ }
38
+ export interface PixelPoint {
39
+ x: number;
40
+ y: number;
41
+ }
42
+ /**
43
+ * Extract all spatial references from model output text.
44
+ * Returns an empty array if no grounding tags are found.
45
+ */
46
+ export declare function parseGroundingTags(text: string): SpatialReference[];
47
+ /**
48
+ * Convert a normalized 1000×1000 bounding box to pixel coordinates.
49
+ */
50
+ export declare function bboxToPixels(ref: Extract<SpatialReference, {
51
+ type: "bbox";
52
+ }>, width: number, height: number): PixelBbox;
53
+ /**
54
+ * Convert a normalized 1000×1000 point to pixel coordinates.
55
+ */
56
+ export declare function pointToPixels(ref: Extract<SpatialReference, {
57
+ type: "point";
58
+ }>, width: number, height: number): PixelPoint;
@@ -27,8 +27,58 @@ export declare class VolcengineMediaTransport implements MediaTransport {
27
27
  */
28
28
  canHandle(request: MediaRequest): boolean;
29
29
  private generateImage;
30
+ /**
31
+ * Parse streaming image SSE — yields progressive image quality upgrades.
32
+ * Final event contains the full-quality image URL.
33
+ */
34
+ private parseStreamingImage;
30
35
  private generateVideo;
31
36
  private generate3D;
37
+ /**
38
+ * List video generation tasks with optional filters.
39
+ * GET /v3/contents/generations/tasks
40
+ */
41
+ listVideoTasks(apiKey: string, options?: {
42
+ after?: string;
43
+ limit?: number;
44
+ status?: string;
45
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
46
+ /**
47
+ * Cancel or delete a video generation task.
48
+ * DELETE /v3/contents/generations/tasks/{taskId}
49
+ */
50
+ deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
51
+ /**
52
+ * Upload a file to Volcengine Files API for reuse in multimodal requests.
53
+ * POST /v3/files
54
+ */
55
+ uploadFile(file: Blob | Buffer, apiKey: string, options?: {
56
+ purpose?: string;
57
+ filename?: string;
58
+ }, signal?: AbortSignal): Promise<{
59
+ id: string;
60
+ status: string;
61
+ }>;
62
+ /**
63
+ * Get file info by ID.
64
+ * GET /v3/files/{fileId}
65
+ */
66
+ getFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<Record<string, unknown>>;
67
+ /**
68
+ * List uploaded files.
69
+ * GET /v3/files
70
+ */
71
+ listFiles(apiKey: string, options?: {
72
+ after?: string;
73
+ limit?: number;
74
+ purpose?: string;
75
+ order?: "asc" | "desc";
76
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
77
+ /**
78
+ * Delete a file.
79
+ * DELETE /v3/files/{fileId}
80
+ */
81
+ deleteFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
32
82
  private submitTask;
33
83
  private pollTask;
34
84
  }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Volcengine Responses API Transport — SSE streaming implementation.
3
+ *
4
+ * Implements the fire mountain ark Responses API (`/api/v3/responses`),
5
+ * which is the officially recommended primary path for Doubao LLM text generation
6
+ * (250615+ models: doubao-seed-2.0 series).
7
+ *
8
+ * Key differences from OpenAI Chat Completions:
9
+ * - Endpoint: POST {baseUrl}/v3/responses
10
+ * - Request body uses `input` (not `messages`), `instructions`, `thinking`, `reasoning`
11
+ * - SSE events: response.output_text.delta, response.reasoning_summary_text.delta,
12
+ * response.function_call_arguments.delta, response.completed, etc.
13
+ * - Tool calling: function_call / function_call_output with call_id
14
+ * - Context persistence: previous_response_id for server-side session continuation
15
+ * - Deep thinking: thinking.type (enabled/disabled/auto) + reasoning.effort
16
+ *
17
+ * Docs: https://www.volcengine.com/docs/82379/1399008
18
+ */
19
+ import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
20
+ import type { ProviderQuirks } from "../provider-def.js";
21
+ export interface VolcengineResponsesTransportConfig {
22
+ baseUrl: string;
23
+ extraHeaders?: Record<string, string>;
24
+ timeoutMs?: number;
25
+ quirks?: ProviderQuirks;
26
+ }
27
+ export declare class VolcengineResponsesTransport implements LLMTransport {
28
+ private baseUrl;
29
+ private extraHeaders;
30
+ private timeoutMs;
31
+ private quirks;
32
+ constructor(config: VolcengineResponsesTransportConfig);
33
+ stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
34
+ /**
35
+ * Resolve known Volcengine Responses API incompatibilities:
36
+ * - instructions + caching → drop caching (§20.7)
37
+ * - caching + json_schema → downgrade to json_object (§20.10)
38
+ * - caching + builtin_web_search/image_process → drop those builtin tools
39
+ * Returns a shallow copy with fields adjusted; never mutates the original.
40
+ */
41
+ private resolveConstraints;
42
+ private buildRequestBody;
43
+ private fetchAndStream;
44
+ private handleNonStreamingResponse;
45
+ /**
46
+ * Parse Volcengine Responses API SSE stream.
47
+ *
48
+ * Event format: "event: <type>\ndata: <json>\n\n"
49
+ * Key events:
50
+ * - response.output_text.delta → text content delta
51
+ * - response.reasoning_summary_text.delta → thinking/reasoning text
52
+ * - response.function_call_arguments.delta → tool call arguments streaming
53
+ * - response.output_item.added → new output item started
54
+ * - response.output_item.done → output item completed
55
+ * - response.completed → full response complete with usage
56
+ * - response.failed → error
57
+ */
58
+ private parseSSEStream;
59
+ private processEvent;
60
+ }