@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,332 @@
1
+ /**
2
+ * Realtime WebSocket Transport 鈥?bidirectional audio/voice streaming
3
+ * via the OpenAI Realtime API protocol (also compatible with GLM Realtime).
4
+ *
5
+ * ## Protocol: WebSocket JSON events
6
+ *
7
+ * Client 鈫?Server:
8
+ * - session.update: configure session (model, voice, tools, etc.)
9
+ * - input_audio_buffer.append: send audio chunks (base64 PCM16)
10
+ * - input_audio_buffer.commit: signal end of audio input
11
+ * - conversation.item.create: inject text/function_result items
12
+ * - response.create: request a model response
13
+ * - response.cancel: abort in-progress response
14
+ *
15
+ * Server 鈫?Client:
16
+ * - session.created: session initialized
17
+ * - session.updated: config acknowledged
18
+ * - input_audio_buffer.speech_started: VAD detected speech
19
+ * - input_audio_buffer.speech_stopped: VAD detected silence
20
+ * - response.created: response generation started
21
+ * - response.output_item.added: new output item (text/audio/function_call)
22
+ * - response.audio.delta: audio chunk (base64 PCM16)
23
+ * - response.audio_transcript.delta: transcript of generated speech
24
+ * - response.text.delta: text generation delta
25
+ * - response.function_call_arguments.delta: tool call args delta
26
+ * - response.function_call_arguments.done: tool call complete
27
+ * - response.output_item.done: output item finished
28
+ * - response.done: full response complete
29
+ * - error: server error
30
+ *
31
+ * ## Architecture
32
+ *
33
+ * RealtimeTransport manages a single persistent WebSocket connection per session.
34
+ * It exposes an event-driven API (AsyncGenerator) that the agent tool-loop
35
+ * can consume for voice-enabled interactions.
36
+ *
37
+ * Docs:
38
+ * - OpenAI: https://platform.openai.com/docs/api-reference/realtime
39
+ * - GLM: https://docs.bigmodel.cn/cn/guide/develop/realtime-api
40
+ */
41
+ // 鈹€鈹€ Transport 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
42
+ /**
43
+ * Manages a persistent WebSocket connection for real-time audio/voice
44
+ * interactions with an LLM provider.
45
+ *
46
+ * Usage:
47
+ * ```ts
48
+ * const rt = new RealtimeTransport(config);
49
+ * rt.connect();
50
+ *
51
+ * // Send audio
52
+ * rt.appendAudio(base64Chunk);
53
+ * rt.commitAudio();
54
+ *
55
+ * // Or send text
56
+ * rt.sendText("Hello!");
57
+ *
58
+ * // Submit function results
59
+ * rt.sendFunctionResult(callId, result);
60
+ *
61
+ * // Consume events
62
+ * for await (const event of rt.events()) {
63
+ * switch (event.type) {
64
+ * case "audio_delta": playAudio(event.delta); break;
65
+ * case "function_call_done": handleToolCall(event); break;
66
+ * }
67
+ * }
68
+ *
69
+ * rt.close();
70
+ * ```
71
+ */
72
+ export class RealtimeTransport {
73
+ ws = null;
74
+ config;
75
+ eventQueue = [];
76
+ waiters = [];
77
+ closed = false;
78
+ constructor(config) {
79
+ this.config = config;
80
+ }
81
+ /** Open WebSocket connection and configure session. */
82
+ async connect() {
83
+ const url = this.buildUrl();
84
+ // Use native WebSocket (Node 22+ has global WebSocket)
85
+ // For older Node, set globalThis.WebSocket from 'ws' package before calling
86
+ const WS = globalThis.WebSocket;
87
+ if (!WS)
88
+ throw new Error("WebSocket not available. Node 22+ required or polyfill globalThis.WebSocket.");
89
+ // Pass auth headers via protocol sub-protocol trick (OpenAI) or URL query (GLM)
90
+ const protocols = this.config.authMode === "header"
91
+ ? ["realtime", `openai-insecure-api-key.${this.config.apiKey}`, "openai-beta.realtime-v1"]
92
+ : undefined;
93
+ this.ws = new WS(url, protocols);
94
+ this.ws.onmessage = (event) => {
95
+ try {
96
+ const data = typeof event.data === "string" ? event.data : String(event.data);
97
+ const msg = JSON.parse(data);
98
+ const events = this.parseServerEvent(msg);
99
+ for (const ev of events) {
100
+ this.push(ev);
101
+ }
102
+ }
103
+ catch {
104
+ this.push({ type: "error", code: "parse_error", message: "Failed to parse server event" });
105
+ }
106
+ };
107
+ this.ws.onclose = (event) => {
108
+ this.push({ type: "closed", code: event.code, reason: event.reason });
109
+ this.closed = true;
110
+ this.drainWaiters();
111
+ };
112
+ this.ws.onerror = () => {
113
+ this.push({ type: "error", code: "ws_error", message: "WebSocket connection error" });
114
+ };
115
+ // Wait for connection to be established
116
+ await new Promise((resolve, reject) => {
117
+ this.ws.onopen = () => {
118
+ this.sendSessionUpdate();
119
+ resolve();
120
+ };
121
+ this.ws.onerror = () => reject(new Error("WebSocket connection failed"));
122
+ });
123
+ }
124
+ /** Send audio data (base64 PCM16). */
125
+ appendAudio(base64Chunk) {
126
+ this.send({
127
+ type: "input_audio_buffer.append",
128
+ audio: base64Chunk,
129
+ });
130
+ }
131
+ /** Mark end of audio input and trigger response. */
132
+ commitAudio() {
133
+ this.send({ type: "input_audio_buffer.commit" });
134
+ }
135
+ /** Send a text message. */
136
+ sendText(text) {
137
+ this.send({
138
+ type: "conversation.item.create",
139
+ item: {
140
+ type: "message",
141
+ role: "user",
142
+ content: [{ type: "input_text", text }],
143
+ },
144
+ });
145
+ }
146
+ /** Submit a function call result back to the model. */
147
+ sendFunctionResult(callId, output) {
148
+ this.send({
149
+ type: "conversation.item.create",
150
+ item: {
151
+ type: "function_call_output",
152
+ call_id: callId,
153
+ output,
154
+ },
155
+ });
156
+ // Request a new response after submitting the result
157
+ this.send({ type: "response.create" });
158
+ }
159
+ /** Trigger a model response (e.g. after sending text). */
160
+ requestResponse() {
161
+ this.send({ type: "response.create" });
162
+ }
163
+ /** Cancel an in-progress response. */
164
+ cancelResponse() {
165
+ this.send({ type: "response.cancel" });
166
+ }
167
+ /** Async iterator of server events. */
168
+ async *events() {
169
+ while (!this.closed || this.eventQueue.length > 0) {
170
+ if (this.eventQueue.length > 0) {
171
+ yield this.eventQueue.shift();
172
+ }
173
+ else {
174
+ const event = await new Promise((resolve) => {
175
+ this.waiters.push(resolve);
176
+ });
177
+ if (event.done)
178
+ return;
179
+ yield event.value;
180
+ }
181
+ }
182
+ }
183
+ /** Close the WebSocket connection. */
184
+ close() {
185
+ this.closed = true;
186
+ if (this.ws && this.ws.readyState !== WebSocket.CLOSED) {
187
+ this.ws.close(1000, "client_close");
188
+ }
189
+ this.drainWaiters();
190
+ }
191
+ // 鈹€鈹€ Private 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
192
+ buildUrl() {
193
+ let url = this.config.baseUrl;
194
+ if (this.config.authMode === "query") {
195
+ // GLM style: pass model + API key as query params
196
+ const sep = url.includes("?") ? "&" : "?";
197
+ url += `${sep}model=${encodeURIComponent(this.config.model)}&token=${encodeURIComponent(this.config.apiKey)}`;
198
+ }
199
+ else {
200
+ // OpenAI style: model in query, auth via sub-protocol
201
+ const sep = url.includes("?") ? "&" : "?";
202
+ url += `${sep}model=${encodeURIComponent(this.config.model)}`;
203
+ }
204
+ return url;
205
+ }
206
+ sendSessionUpdate() {
207
+ this.send({
208
+ type: "session.update",
209
+ session: {
210
+ model: this.config.model,
211
+ voice: this.config.voice ?? "alloy",
212
+ modalities: this.config.outputModalities ?? ["text", "audio"],
213
+ instructions: "You are a helpful assistant.",
214
+ temperature: this.config.temperature ?? 0.8,
215
+ tools: this.config.tools?.map(t => ({
216
+ type: t.type,
217
+ name: t.name,
218
+ description: t.description,
219
+ parameters: t.parameters,
220
+ })) ?? [],
221
+ turn_detection: this.config.vadMode === "none"
222
+ ? null
223
+ : {
224
+ type: "server_vad",
225
+ threshold: this.config.vadThreshold ?? 0.5,
226
+ prefix_padding_ms: 300,
227
+ silence_duration_ms: 500,
228
+ },
229
+ },
230
+ });
231
+ }
232
+ send(msg) {
233
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
234
+ throw new Error("WebSocket not connected");
235
+ }
236
+ this.ws.send(JSON.stringify(msg));
237
+ }
238
+ push(event) {
239
+ if (this.waiters.length > 0) {
240
+ const waiter = this.waiters.shift();
241
+ waiter({ value: event, done: false });
242
+ }
243
+ else {
244
+ this.eventQueue.push(event);
245
+ }
246
+ }
247
+ drainWaiters() {
248
+ for (const waiter of this.waiters) {
249
+ waiter({ value: undefined, done: true });
250
+ }
251
+ this.waiters.length = 0;
252
+ }
253
+ /**
254
+ * Parse a server-sent JSON event into our typed event(s).
255
+ */
256
+ parseServerEvent(msg) {
257
+ const type = msg.type;
258
+ switch (type) {
259
+ case "session.created":
260
+ return [{ type: "session_created", sessionId: String(msg.session?.id ?? "") }];
261
+ case "input_audio_buffer.speech_started":
262
+ return [{ type: "speech_started" }];
263
+ case "input_audio_buffer.speech_stopped":
264
+ return [{ type: "speech_stopped", audioEndMs: Number(msg.audio_end_ms ?? 0) }];
265
+ case "response.audio.delta":
266
+ return [{ type: "audio_delta", delta: String(msg.delta ?? "") }];
267
+ case "response.audio_transcript.delta":
268
+ return [{ type: "audio_transcript_delta", delta: String(msg.delta ?? "") }];
269
+ case "response.text.delta":
270
+ return [{ type: "text_delta", delta: String(msg.delta ?? "") }];
271
+ case "response.function_call_arguments.delta": {
272
+ return [{
273
+ type: "function_call_delta",
274
+ callId: String(msg.call_id ?? ""),
275
+ delta: String(msg.delta ?? ""),
276
+ }];
277
+ }
278
+ case "response.function_call_arguments.done": {
279
+ return [{
280
+ type: "function_call_done",
281
+ callId: String(msg.call_id ?? ""),
282
+ name: String(msg.name ?? ""),
283
+ arguments: String(msg.arguments ?? ""),
284
+ }];
285
+ }
286
+ case "response.output_item.added": {
287
+ const item = msg.item;
288
+ if (item?.type === "function_call") {
289
+ return [{
290
+ type: "function_call_start",
291
+ callId: String(item.call_id ?? ""),
292
+ name: String(item.name ?? ""),
293
+ }];
294
+ }
295
+ return [];
296
+ }
297
+ case "response.done": {
298
+ const response = msg.response;
299
+ const usage = response?.usage;
300
+ return [{
301
+ type: "response_done",
302
+ usage: usage ? {
303
+ inputTokens: usage.input_tokens ?? 0,
304
+ outputTokens: usage.output_tokens ?? 0,
305
+ inputAudioTokens: response?.input_token_details?.audio_tokens,
306
+ outputAudioTokens: response?.output_token_details?.audio_tokens,
307
+ } : undefined,
308
+ }];
309
+ }
310
+ case "error": {
311
+ const error = msg.error;
312
+ return [{
313
+ type: "error",
314
+ code: String(error?.code ?? "unknown"),
315
+ message: String(error?.message ?? "Unknown error"),
316
+ }];
317
+ }
318
+ // Ignored events (acknowledgements, intermediate states)
319
+ case "session.updated":
320
+ case "response.created":
321
+ case "response.output_item.done":
322
+ case "conversation.item.created":
323
+ case "input_audio_buffer.committed":
324
+ case "input_audio_buffer.cleared":
325
+ case "rate_limits.updated":
326
+ return [];
327
+ default:
328
+ // Unknown event 鈥?silently ignore for forward compatibility
329
+ return [];
330
+ }
331
+ }
332
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Volcengine Grounding 鈥?spatial coordinate parser (volcengine-ProviderMax 搂14).
3
+ *
4
+ * Parses model-emitted spatial reference tags from text output:
5
+ * - <bbox>x_min y_min x_max y_max</bbox> 鈫?bounding box
6
+ * - <point>x y</point> 鈫?single point
7
+ * - <polygon>x1 y1 x2 y2 ...</polygon> 鈫?polygon vertices
8
+ *
9
+ * All coordinates are in normalized 1000脳1000 space, range [0, 999].
10
+ * Use `toPixelCoords()` to convert to actual image pixel coordinates.
11
+ */
12
+ export type SpatialReference = {
13
+ type: "bbox";
14
+ x1: number;
15
+ y1: number;
16
+ x2: number;
17
+ y2: number;
18
+ space: "normalized_1000";
19
+ } | {
20
+ type: "point";
21
+ x: number;
22
+ y: number;
23
+ space: "normalized_1000";
24
+ } | {
25
+ type: "polygon";
26
+ points: Array<{
27
+ x: number;
28
+ y: number;
29
+ }>;
30
+ space: "normalized_1000";
31
+ };
32
+ export interface PixelBbox {
33
+ x1: number;
34
+ y1: number;
35
+ x2: number;
36
+ y2: number;
37
+ }
38
+ export interface PixelPoint {
39
+ x: number;
40
+ y: number;
41
+ }
42
+ /**
43
+ * Extract all spatial references from model output text.
44
+ * Returns an empty array if no grounding tags are found.
45
+ */
46
+ export declare function parseGroundingTags(text: string): SpatialReference[];
47
+ /**
48
+ * Convert a normalized 1000脳1000 bounding box to pixel coordinates.
49
+ */
50
+ export declare function bboxToPixels(ref: Extract<SpatialReference, {
51
+ type: "bbox";
52
+ }>, width: number, height: number): PixelBbox;
53
+ /**
54
+ * Convert a normalized 1000脳1000 point to pixel coordinates.
55
+ */
56
+ export declare function pointToPixels(ref: Extract<SpatialReference, {
57
+ type: "point";
58
+ }>, width: number, height: number): PixelPoint;
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Volcengine Grounding 鈥?spatial coordinate parser (volcengine-ProviderMax 搂14).
3
+ *
4
+ * Parses model-emitted spatial reference tags from text output:
5
+ * - <bbox>x_min y_min x_max y_max</bbox> 鈫?bounding box
6
+ * - <point>x y</point> 鈫?single point
7
+ * - <polygon>x1 y1 x2 y2 ...</polygon> 鈫?polygon vertices
8
+ *
9
+ * All coordinates are in normalized 1000脳1000 space, range [0, 999].
10
+ * Use `toPixelCoords()` to convert to actual image pixel coordinates.
11
+ */
12
+ // 鈹€鈹€ Parsing 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
13
+ const BBOX_RE = /<bbox>\s*(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s*<\/bbox>/g;
14
+ const POINT_RE = /<point>\s*(\d+)\s+(\d+)\s*<\/point>/g;
15
+ const POLYGON_RE = /<polygon>\s*([\d\s]+?)\s*<\/polygon>/g;
16
+ /**
17
+ * Extract all spatial references from model output text.
18
+ * Returns an empty array if no grounding tags are found.
19
+ */
20
+ export function parseGroundingTags(text) {
21
+ const results = [];
22
+ for (const m of text.matchAll(BBOX_RE)) {
23
+ const [x1, y1, x2, y2] = [+m[1], +m[2], +m[3], +m[4]];
24
+ if (isValidCoord(x1) && isValidCoord(y1) && isValidCoord(x2) && isValidCoord(y2)) {
25
+ results.push({ type: "bbox", x1, y1, x2, y2, space: "normalized_1000" });
26
+ }
27
+ }
28
+ for (const m of text.matchAll(POINT_RE)) {
29
+ const [x, y] = [+m[1], +m[2]];
30
+ if (isValidCoord(x) && isValidCoord(y)) {
31
+ results.push({ type: "point", x, y, space: "normalized_1000" });
32
+ }
33
+ }
34
+ for (const m of text.matchAll(POLYGON_RE)) {
35
+ const nums = m[1].trim().split(/\s+/).map(Number);
36
+ if (nums.length >= 4 && nums.length % 2 === 0 && nums.every(isValidCoord)) {
37
+ const points = [];
38
+ for (let i = 0; i < nums.length; i += 2) {
39
+ points.push({ x: nums[i], y: nums[i + 1] });
40
+ }
41
+ results.push({ type: "polygon", points, space: "normalized_1000" });
42
+ }
43
+ }
44
+ return results;
45
+ }
46
+ // 鈹€鈹€ Coordinate Conversion 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
47
+ /**
48
+ * Convert a normalized 1000脳1000 bounding box to pixel coordinates.
49
+ */
50
+ export function bboxToPixels(ref, width, height) {
51
+ return {
52
+ x1: Math.round(ref.x1 / 1000 * width),
53
+ y1: Math.round(ref.y1 / 1000 * height),
54
+ x2: Math.round(ref.x2 / 1000 * width),
55
+ y2: Math.round(ref.y2 / 1000 * height),
56
+ };
57
+ }
58
+ /**
59
+ * Convert a normalized 1000脳1000 point to pixel coordinates.
60
+ */
61
+ export function pointToPixels(ref, width, height) {
62
+ return {
63
+ x: Math.round(ref.x / 1000 * width),
64
+ y: Math.round(ref.y / 1000 * height),
65
+ };
66
+ }
67
+ function isValidCoord(n) {
68
+ return Number.isInteger(n) && n >= 0 && n <= 999;
69
+ }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Volcengine Media Transport 鈥?Doubao Seedream (image), Seedance (video), 3D generation.
3
+ *
4
+ * API reference:
5
+ * Image: POST /v3/images/generations (sync)
6
+ * Video: POST /v3/contents/generations/tasks (async job)
7
+ * 3D: POST /v3/contents/generations/tasks (async job, same endpoint as video)
8
+ *
9
+ * Auth: Authorization: Bearer $ARK_API_KEY
10
+ * Docs: https://www.volcengine.com/docs/82379/1330310
11
+ * https://www.volcengine.com/docs/82379/1874993 (3D)
12
+ */
13
+ import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
14
+ export interface VolcengineMediaConfig {
15
+ /** Base URL, e.g. "https://ark.cn-beijing.volces.com/api" */
16
+ baseUrl: string;
17
+ timeoutMs?: number;
18
+ }
19
+ export declare class VolcengineMediaTransport implements AsyncMediaTransport {
20
+ readonly supportedTypes: readonly MediaType[];
21
+ private baseUrl;
22
+ private timeoutMs;
23
+ constructor(config: VolcengineMediaConfig);
24
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
25
+ private generateEmbedding;
26
+ /**
27
+ * Check if this transport can handle a given operation.
28
+ * Video edit/merge/upscale are routed through the same video endpoint.
29
+ */
30
+ canHandle(request: MediaRequest): boolean;
31
+ private generateImage;
32
+ /**
33
+ * Parse streaming image SSE 鈥?yields progressive image quality upgrades.
34
+ * Final event contains the full-quality image URL.
35
+ */
36
+ private parseStreamingImage;
37
+ private generateVideo;
38
+ private generate3D;
39
+ /**
40
+ * Query a single video generation task by ID.
41
+ * GET /v3/contents/generations/tasks/{taskId}
42
+ */
43
+ getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
44
+ status: string;
45
+ task: Record<string, unknown>;
46
+ }>;
47
+ /**
48
+ * List video generation tasks with optional filters.
49
+ * GET /v3/contents/generations/tasks
50
+ */
51
+ listVideoTasks(apiKey: string, options?: {
52
+ after?: string;
53
+ limit?: number;
54
+ status?: string;
55
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
56
+ /**
57
+ * Cancel or delete a video generation task.
58
+ * DELETE /v3/contents/generations/tasks/{taskId}
59
+ */
60
+ deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
61
+ /**
62
+ * Upload a file to Volcengine Files API for reuse in multimodal requests.
63
+ * POST /v3/files
64
+ */
65
+ uploadFile(file: Blob | Buffer, apiKey: string, options?: {
66
+ purpose?: string;
67
+ filename?: string;
68
+ }, signal?: AbortSignal): Promise<{
69
+ id: string;
70
+ status: string;
71
+ }>;
72
+ /**
73
+ * Get file info by ID.
74
+ * GET /v3/files/{fileId}
75
+ */
76
+ getFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<Record<string, unknown>>;
77
+ /**
78
+ * List uploaded files.
79
+ * GET /v3/files
80
+ */
81
+ listFiles(apiKey: string, options?: {
82
+ after?: string;
83
+ limit?: number;
84
+ purpose?: string;
85
+ order?: "asc" | "desc";
86
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
87
+ /**
88
+ * Delete a file.
89
+ * DELETE /v3/files/{fileId}
90
+ */
91
+ deleteFile(fileId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
92
+ private submitTask;
93
+ private pollTask;
94
+ }