@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,801 @@
1
+ /**
2
+ * Volcengine Media Transport 鈥?Doubao Seedream (image), Seedance (video), 3D generation.
3
+ *
4
+ * API reference:
5
+ * Image: POST /v3/images/generations (sync)
6
+ * Video: POST /v3/contents/generations/tasks (async job)
7
+ * 3D: POST /v3/contents/generations/tasks (async job, same endpoint as video)
8
+ *
9
+ * Auth: Authorization: Bearer $ARK_API_KEY
10
+ * Docs: https://www.volcengine.com/docs/82379/1330310
11
+ * https://www.volcengine.com/docs/82379/1874993 (3D)
12
+ */
13
+ const DEFAULT_TIMEOUT_MS = 180_000;
14
+ const POLL_INTERVAL_MS = 3_000;
15
+ const MAX_POLL_MS = 600_000; // 10 min max for video/3D
16
+ export class VolcengineMediaTransport {
17
+ supportedTypes = ["image", "video", "3d", "embedding"];
18
+ baseUrl;
19
+ timeoutMs;
20
+ constructor(config) {
21
+ this.baseUrl = config.baseUrl.replace(/\/+$/, "");
22
+ this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
23
+ }
24
+ async generate(request, apiKey, signal) {
25
+ switch (request.mediaType) {
26
+ case "image":
27
+ return this.generateImage(request, apiKey, signal);
28
+ case "video":
29
+ return this.generateVideo(request, apiKey, signal);
30
+ case "3d":
31
+ return this.generate3D(request, apiKey, signal);
32
+ case "embedding":
33
+ return this.generateEmbedding(request, apiKey, signal);
34
+ default:
35
+ throw new Error(`VolcengineMediaTransport: unsupported mediaType "${request.mediaType}"`);
36
+ }
37
+ }
38
+ async generateEmbedding(request, apiKey, signal) {
39
+ const start = Date.now();
40
+ const text = request.text || request.prompt;
41
+ if (!text)
42
+ throw new Error("VolcengineMediaTransport: text or prompt is required for embedding");
43
+ const url = `${this.baseUrl}/embeddings/multimodal`;
44
+ const body = {
45
+ model: request.model || "doubao-embedding-vision-251215",
46
+ input: [{ type: "text", text }],
47
+ };
48
+ const res = await fetch(url, {
49
+ method: "POST",
50
+ headers: {
51
+ "Content-Type": "application/json",
52
+ Authorization: `Bearer ${apiKey}`,
53
+ },
54
+ body: JSON.stringify(body),
55
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
56
+ });
57
+ if (!res.ok) {
58
+ const errText = await res.text().catch(() => "");
59
+ throw new Error(`Volcengine embedding error ${res.status}: ${errText}`);
60
+ }
61
+ const data = await res.json();
62
+ const dataItems = Array.isArray(data.data)
63
+ ? data.data
64
+ : data.data
65
+ ? [data.data]
66
+ : [];
67
+ const embeddings = dataItems.map(item => item.embedding).filter((item) => Array.isArray(item));
68
+ const totalTokens = data.usage?.total_tokens ?? data.usage?.prompt_tokens;
69
+ return {
70
+ mediaUrls: [],
71
+ model: data.model ?? request.model ?? "doubao-embedding-vision-251215",
72
+ durationMs: Date.now() - start,
73
+ billingUnit: totalTokens !== undefined ? "per_token" : undefined,
74
+ billingQuantity: totalTokens,
75
+ metadata: {
76
+ embeddings,
77
+ dimensions: embeddings[0]?.length ?? 0,
78
+ usage: data.usage,
79
+ },
80
+ };
81
+ }
82
+ /**
83
+ * Check if this transport can handle a given operation.
84
+ * Video edit/merge/upscale are routed through the same video endpoint.
85
+ */
86
+ canHandle(request) {
87
+ if (this.supportedTypes.includes(request.mediaType))
88
+ return true;
89
+ return false;
90
+ }
91
+ // 鈹€鈹€ Image (Seedream) 鈥?sync 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
92
+ async generateImage(request, apiKey, signal) {
93
+ const start = Date.now();
94
+ const url = `${this.baseUrl}/v3/images/generations`;
95
+ const body = {
96
+ model: request.model,
97
+ prompt: request.prompt,
98
+ size: request.size ?? "2K",
99
+ n: request.n ?? 1,
100
+ response_format: "url",
101
+ sequential_image_generation: "disabled",
102
+ watermark: true,
103
+ };
104
+ if (request.imageUrl)
105
+ body.image = request.imageUrl;
106
+ // Multi-reference images for consistency generation (Seedream 5.0)
107
+ if (request.referenceImages && request.referenceImages.length > 0) {
108
+ body.image_urls = request.referenceImages;
109
+ }
110
+ if (request.style)
111
+ body.style = request.style;
112
+ if (request.quality)
113
+ body.quality = request.quality;
114
+ if (request.seed !== undefined)
115
+ body.seed = request.seed;
116
+ // guidance_scale is only supported by seedream 3.0, not 4.5/5.0
117
+ if (request.guidanceScale !== undefined)
118
+ body.guidance_scale = request.guidanceScale;
119
+ if (request.enhancePrompt !== undefined)
120
+ body.enhance_prompt = request.enhancePrompt;
121
+ // T13: Progressive/streaming image generation 鈥?returns low-quality preview
122
+ // images first, then upgrades to full quality (Seedream stream mode)
123
+ if (request.streamImage)
124
+ body.stream = true;
125
+ const res = await fetch(url, {
126
+ method: "POST",
127
+ headers: {
128
+ "Content-Type": "application/json",
129
+ Authorization: `Bearer ${apiKey}`,
130
+ },
131
+ body: JSON.stringify(body),
132
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
133
+ });
134
+ if (!res.ok) {
135
+ const text = await res.text().catch(() => "");
136
+ throw new Error(`Volcengine image API error ${res.status}: ${text}`);
137
+ }
138
+ // Streaming mode: SSE response with progressive image updates
139
+ if (request.streamImage && res.body) {
140
+ return this.parseStreamingImage(res.body, request, start);
141
+ }
142
+ const data = await res.json();
143
+ const mediaUrls = (data.data ?? [])
144
+ .map(d => d.url)
145
+ .filter((u) => !!u);
146
+ return {
147
+ mediaUrls,
148
+ model: request.model,
149
+ size: request.size ?? "1024x1024",
150
+ durationMs: Date.now() - start,
151
+ billingUnit: "per_call",
152
+ billingQuantity: request.n ?? Math.max(mediaUrls.length, 1),
153
+ metadata: {
154
+ billableUnits: {
155
+ images: request.n ?? Math.max(mediaUrls.length, 1),
156
+ },
157
+ pricingSpec: request.size ?? "1024x1024",
158
+ },
159
+ };
160
+ }
161
+ /**
162
+ * Parse streaming image SSE 鈥?yields progressive image quality upgrades.
163
+ * Final event contains the full-quality image URL.
164
+ */
165
+ async parseStreamingImage(body, request, startTime) {
166
+ const decoder = new TextDecoder();
167
+ let buffer = "";
168
+ let lastUrl = "";
169
+ const reader = body.getReader();
170
+ try {
171
+ while (true) {
172
+ const { done, value } = await reader.read();
173
+ if (done)
174
+ break;
175
+ buffer += decoder.decode(value, { stream: true });
176
+ let newlineIdx;
177
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
178
+ const line = buffer.slice(0, newlineIdx).trim();
179
+ buffer = buffer.slice(newlineIdx + 1);
180
+ if (!line || line.startsWith(":") || !line.startsWith("data:"))
181
+ continue;
182
+ const dataStr = line.slice(5).trim();
183
+ if (dataStr === "[DONE]")
184
+ break;
185
+ try {
186
+ const parsed = JSON.parse(dataStr);
187
+ const url = parsed.data?.[0]?.url;
188
+ if (url) {
189
+ lastUrl = url;
190
+ request.onProgress?.(50, "streaming");
191
+ }
192
+ }
193
+ catch { /* ignore parse errors */ }
194
+ }
195
+ }
196
+ }
197
+ finally {
198
+ reader.releaseLock();
199
+ }
200
+ request.onProgress?.(100, "completed");
201
+ return {
202
+ mediaUrls: lastUrl ? [lastUrl] : [],
203
+ model: request.model,
204
+ size: request.size ?? "1024x1024",
205
+ durationMs: Date.now() - startTime,
206
+ billingUnit: "per_call",
207
+ billingQuantity: 1,
208
+ metadata: {
209
+ billableUnits: { images: 1 },
210
+ pricingSpec: request.size ?? "1024x1024",
211
+ },
212
+ };
213
+ }
214
+ // 鈹€鈹€ Video (Seedance) 鈥?async job 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
215
+ async generateVideo(request, apiKey, signal) {
216
+ const start = Date.now();
217
+ const url = `${this.baseUrl}/v3/contents/generations/tasks`;
218
+ // 鈹€鈹€ Input validation (volcengine-ProviderMax 搂22.6-22.7) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
219
+ const refVideoCount = request.referenceVideos?.length ?? 0;
220
+ const refAudioCount = request.referenceAudios?.length ?? 0;
221
+ const refImageCount = request.referenceImages?.length ?? 0;
222
+ const hasImage = !!(request.imageUrl || refImageCount > 0);
223
+ // Reference video limit: max 3, total duration 鈮?5s (duration enforced server-side)
224
+ if (refVideoCount > 3) {
225
+ throw new Error(`Seedance 2.0: max 3 reference videos (got ${refVideoCount}). Total duration must be 鈮?5s.`);
226
+ }
227
+ // Reference audio limit: max 3, total duration 鈮?5s
228
+ if (refAudioCount > 3) {
229
+ throw new Error(`Seedance 2.0: max 3 reference audios (got ${refAudioCount}). Total duration must be 鈮?5s.`);
230
+ }
231
+ // Audio cannot be used alone 鈥?must have at least one image or video
232
+ if (refAudioCount > 0 && !hasImage && refVideoCount === 0) {
233
+ throw new Error(`Seedance 2.0: reference audio cannot be used alone. Provide at least one reference image or video.`);
234
+ }
235
+ // Reference images limit for video editing: max 9
236
+ if (request.operation === "edit" && refImageCount > 9) {
237
+ throw new Error(`Seedance 2.0 video edit: max 9 reference images (got ${refImageCount}).`);
238
+ }
239
+ const content = [];
240
+ // Draft task promotion: use draft task ID as input (搂22.13)
241
+ if (request.draftTaskId) {
242
+ content.push({ type: "draft_task", id: request.draftTaskId });
243
+ }
244
+ // Explicit operation routing (Volcengine Seedance API distinctions):
245
+ // text2video: text only
246
+ // img2video: image (first frame) + optional text
247
+ // multimodal_reference: reference_image + reference_video + reference_audio + text (Seedance 2.0)
248
+ // extend: multiple reference_video + text
249
+ // edit: video_url + text
250
+ // merge: multiple video_urls + text
251
+ // upscale: video_url + resolution
252
+ switch (request.operation) {
253
+ case "multimodal_reference": {
254
+ // Seedance 2.0: multimodal reference generation
255
+ // Per volcengine-ProviderMax.md 搂22.6-22.7:
256
+ // reference_image, reference_video, reference_audio roles
257
+ // Audio cannot be sent alone 鈥?requires at least one image or video
258
+ if (request.prompt)
259
+ content.push({ type: "text", text: request.prompt });
260
+ // Reference images with role
261
+ if (request.referenceImages?.length) {
262
+ for (let i = 0; i < request.referenceImages.length; i++) {
263
+ const role = request.imageRoles?.[i] ?? "reference_image";
264
+ content.push({
265
+ type: "image_url",
266
+ image_url: { url: request.referenceImages[i] },
267
+ role,
268
+ });
269
+ }
270
+ }
271
+ // Reference videos
272
+ if (request.referenceVideos?.length) {
273
+ for (const videoUrl of request.referenceVideos) {
274
+ content.push({
275
+ type: "video_url",
276
+ video_url: { url: videoUrl },
277
+ role: "reference_video",
278
+ });
279
+ }
280
+ }
281
+ // Reference audios
282
+ if (request.referenceAudios?.length) {
283
+ for (const audioUrl of request.referenceAudios) {
284
+ content.push({
285
+ type: "audio_url",
286
+ audio_url: { url: audioUrl },
287
+ role: "reference_audio",
288
+ });
289
+ }
290
+ }
291
+ break;
292
+ }
293
+ case "extend": {
294
+ // Seedance 2.0: extend/bridge video segments
295
+ if (request.sourceVideos?.length) {
296
+ for (const videoUrl of request.sourceVideos) {
297
+ content.push({
298
+ type: "video_url",
299
+ video_url: { url: videoUrl },
300
+ role: "reference_video",
301
+ });
302
+ }
303
+ }
304
+ if (request.prompt)
305
+ content.push({ type: "text", text: request.prompt });
306
+ break;
307
+ }
308
+ case "merge":
309
+ // Pass all source videos in order
310
+ for (const videoUrl of request.sourceVideos ?? []) {
311
+ content.push({ type: "video_url", video_url: { url: videoUrl } });
312
+ }
313
+ if (request.prompt)
314
+ content.push({ type: "text", text: request.prompt });
315
+ break;
316
+ case "upscale":
317
+ if (request.sourceVideos?.[0]) {
318
+ content.push({ type: "video_url", video_url: { url: request.sourceVideos[0] } });
319
+ }
320
+ content.push({ type: "text", text: request.prompt || "upscale" });
321
+ break;
322
+ case "edit":
323
+ // Seedance 2.0 video edit: source videos (1-3) + reference images (0-9) + text instruction
324
+ // Per volcengine-ProviderMax 搂22.6-22.7: source videos carry role "reference_video",
325
+ // reference images carry role "reference_image", enabling element replacement / track fill.
326
+ for (const videoUrl of request.sourceVideos ?? []) {
327
+ content.push({
328
+ type: "video_url",
329
+ video_url: { url: videoUrl },
330
+ role: "reference_video",
331
+ });
332
+ }
333
+ if (request.referenceImages?.length) {
334
+ for (const imgUrl of request.referenceImages) {
335
+ content.push({
336
+ type: "image_url",
337
+ image_url: { url: imgUrl },
338
+ role: "reference_image",
339
+ });
340
+ }
341
+ }
342
+ if (request.prompt)
343
+ content.push({ type: "text", text: request.prompt });
344
+ break;
345
+ default: {
346
+ // Auto-detect operation type from inputs:
347
+ // - referenceImages with roles or referenceVideos/referenceAudios 鈫?multimodal_reference
348
+ // - referenceImages without roles 鈫?img2video (first_frame default)
349
+ // - imageUrl only 鈫?img2video
350
+ // - text only 鈫?text2video
351
+ const hasRefVideos = (request.referenceVideos?.length ?? 0) > 0;
352
+ const hasRefAudios = (request.referenceAudios?.length ?? 0) > 0;
353
+ const hasImageRoles = (request.imageRoles?.length ?? 0) > 0;
354
+ if (hasRefVideos || hasRefAudios || hasImageRoles) {
355
+ // Multimodal reference mode
356
+ if (request.prompt)
357
+ content.push({ type: "text", text: request.prompt });
358
+ if (request.referenceImages?.length) {
359
+ for (let i = 0; i < request.referenceImages.length; i++) {
360
+ const role = request.imageRoles?.[i] ?? "reference_image";
361
+ content.push({
362
+ type: "image_url",
363
+ image_url: { url: request.referenceImages[i] },
364
+ role,
365
+ });
366
+ }
367
+ }
368
+ if (hasRefVideos) {
369
+ for (const videoUrl of request.referenceVideos) {
370
+ content.push({
371
+ type: "video_url",
372
+ video_url: { url: videoUrl },
373
+ role: "reference_video",
374
+ });
375
+ }
376
+ }
377
+ if (hasRefAudios) {
378
+ for (const audioUrl of request.referenceAudios) {
379
+ content.push({
380
+ type: "audio_url",
381
+ audio_url: { url: audioUrl },
382
+ role: "reference_audio",
383
+ });
384
+ }
385
+ }
386
+ }
387
+ else if (request.referenceImages?.length) {
388
+ for (const imgUrl of request.referenceImages) {
389
+ content.push({ type: "image_url", image_url: { url: imgUrl } });
390
+ }
391
+ if (request.prompt)
392
+ content.push({ type: "text", text: request.prompt });
393
+ }
394
+ else if (request.imageUrl) {
395
+ content.push({ type: "image_url", image_url: { url: request.imageUrl } });
396
+ if (request.prompt)
397
+ content.push({ type: "text", text: request.prompt });
398
+ }
399
+ else {
400
+ if (request.prompt)
401
+ content.push({ type: "text", text: request.prompt });
402
+ }
403
+ break;
404
+ }
405
+ }
406
+ const body = {
407
+ model: request.model,
408
+ content,
409
+ };
410
+ if (request.duration)
411
+ body.duration = request.duration;
412
+ if (request.aspectRatio)
413
+ body.ratio = request.aspectRatio;
414
+ if (request.resolution)
415
+ body.resolution = request.resolution;
416
+ if (request.seed !== undefined)
417
+ body.seed = request.seed;
418
+ if (request.fps)
419
+ body.fps = request.fps;
420
+ if (request.generateAudio !== undefined)
421
+ body.generate_audio = request.generateAudio;
422
+ if (request.watermark !== undefined)
423
+ body.watermark = request.watermark;
424
+ if (request.enhancePrompt !== undefined)
425
+ body.enhance_prompt = request.enhancePrompt;
426
+ // P4 advanced params (volcengine-ProviderMax 搂22.10-22.16)
427
+ if (request.cameraFixed !== undefined)
428
+ body.camera_fixed = request.cameraFixed;
429
+ if (request.returnLastFrame !== undefined)
430
+ body.return_last_frame = request.returnLastFrame;
431
+ if (request.serviceTier)
432
+ body.service_tier = request.serviceTier;
433
+ if (request.executionExpiresAfterSeconds !== undefined)
434
+ body.execution_expires_after = request.executionExpiresAfterSeconds;
435
+ if (request.draft !== undefined)
436
+ body.draft = request.draft;
437
+ if (request.safetyIdentifier)
438
+ body.safety_identifier = request.safetyIdentifier;
439
+ if (request.callbackUrl)
440
+ body.callback_url = request.callbackUrl;
441
+ if (request.videoTools?.length) {
442
+ body.tools = request.videoTools.map(name => ({ type: name }));
443
+ }
444
+ const taskId = await this.submitTask(url, body, apiKey, signal);
445
+ const result = await this.pollTask(`${this.baseUrl}/v3/contents/generations/tasks/${taskId}`, apiKey, signal, request.onProgress, taskId);
446
+ const mediaUrls = [];
447
+ // Volcengine returns content as { video_url: "https://..." } (object, not array)
448
+ const resultContent = result.content;
449
+ if (typeof resultContent?.video_url === "string") {
450
+ mediaUrls.push(resultContent.video_url);
451
+ }
452
+ // P4 #25: Extract last_frame_url for chaining continuous segments
453
+ const lastFrameUrl = typeof resultContent?.last_frame_url === "string"
454
+ ? resultContent.last_frame_url
455
+ : undefined;
456
+ const tokenUsage = extractGeneratedTokenUsage(result);
457
+ return {
458
+ mediaUrls,
459
+ model: request.model,
460
+ durationMs: Date.now() - start,
461
+ lastFrameUrl,
462
+ taskId,
463
+ billingUnit: "per_token",
464
+ billingQuantity: tokenUsage.completionTokens,
465
+ metadata: {
466
+ billableUnits: {
467
+ completion_tokens: tokenUsage.completionTokens,
468
+ total_tokens: tokenUsage.totalTokens,
469
+ },
470
+ usage: tokenUsage.rawUsage,
471
+ usageMissing: !tokenUsage.hasUsage,
472
+ pricingSpec: request.resolution ?? "720p",
473
+ },
474
+ };
475
+ }
476
+ // 鈹€鈹€ 3D 鈥?async job 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
477
+ async generate3D(request, apiKey, signal) {
478
+ const start = Date.now();
479
+ // 3D uses the same endpoint as video (per Volcengine docs)
480
+ const url = `${this.baseUrl}/v3/contents/generations/tasks`;
481
+ // Build content array 鈥?same structure as video.
482
+ // Text item carries CLI-style params (--fileformat,subdivisionlevel) and optional prompt.
483
+ // Image item provides the reference image for image-to-3D.
484
+ const content = [];
485
+ const textParts = [];
486
+ if (request.outputFormat)
487
+ textParts.push(`--fileformat ${request.outputFormat}`);
488
+ if (request.prompt)
489
+ textParts.push(request.prompt);
490
+ if (textParts.length > 0) {
491
+ content.push({ type: "text", text: textParts.join(" ") });
492
+ }
493
+ if (request.imageUrl) {
494
+ content.push({ type: "image_url", image_url: { url: request.imageUrl } });
495
+ }
496
+ const body = {
497
+ model: request.model,
498
+ content,
499
+ };
500
+ const taskId = await this.submitTask(url, body, apiKey, signal);
501
+ const result = await this.pollTask(`${this.baseUrl}/v3/contents/generations/tasks/${taskId}`, apiKey, signal, request.onProgress, taskId);
502
+ // 3D output: SDK uses content.video_url (same field as video);
503
+ // also check output.model_urls for possible alternative response shapes.
504
+ const mediaUrls = [];
505
+ const resultContent = result.content;
506
+ if (typeof resultContent?.video_url === "string") {
507
+ mediaUrls.push(resultContent.video_url);
508
+ }
509
+ // Fallback: scan all string values in content for URLs
510
+ if (mediaUrls.length === 0 && resultContent) {
511
+ for (const [, v] of Object.entries(resultContent)) {
512
+ if (typeof v === "string" && /^https?:\/\//.test(v)) {
513
+ mediaUrls.push(v);
514
+ }
515
+ }
516
+ }
517
+ const output = result.output;
518
+ if (output?.model_urls) {
519
+ const urls = output.model_urls;
520
+ for (const u of Object.values(urls)) {
521
+ if (u && !mediaUrls.includes(u))
522
+ mediaUrls.push(u);
523
+ }
524
+ }
525
+ // Debug: log response shape when no URLs found
526
+ if (mediaUrls.length === 0) {
527
+ const keys = Object.keys(result);
528
+ const contentKeys = resultContent ? Object.keys(resultContent) : [];
529
+ const outputKeys = output ? Object.keys(output) : [];
530
+ console.error(`[volcengine-3d] No media URLs found. Response keys: [${keys}], content keys: [${contentKeys}], output keys: [${outputKeys}]`);
531
+ // Also check for nested data field
532
+ const data = result.data;
533
+ if (data?.content) {
534
+ const dc = data.content;
535
+ for (const [, v] of Object.entries(dc)) {
536
+ if (typeof v === "string" && /^https?:\/\//.test(v)) {
537
+ mediaUrls.push(v);
538
+ }
539
+ }
540
+ }
541
+ }
542
+ const tokenUsage = extractGeneratedTokenUsage(result);
543
+ return {
544
+ mediaUrls,
545
+ model: request.model,
546
+ durationMs: Date.now() - start,
547
+ billingUnit: "per_token",
548
+ billingQuantity: tokenUsage.completionTokens,
549
+ metadata: mediaUrls.length === 0 ? {
550
+ billableUnits: {
551
+ completion_tokens: tokenUsage.completionTokens,
552
+ total_tokens: tokenUsage.totalTokens,
553
+ },
554
+ usage: tokenUsage.rawUsage,
555
+ usageMissing: !tokenUsage.hasUsage,
556
+ debugResponseKeys: Object.keys(result),
557
+ debugContentKeys: resultContent ? Object.keys(resultContent) : null,
558
+ debugContentSample: resultContent ? JSON.stringify(resultContent).slice(0, 500) : null,
559
+ debugOutputKeys: output ? Object.keys(output) : null,
560
+ debugResultSample: JSON.stringify(result).slice(0, 800),
561
+ } : {
562
+ billableUnits: {
563
+ completion_tokens: tokenUsage.completionTokens,
564
+ total_tokens: tokenUsage.totalTokens,
565
+ },
566
+ usage: tokenUsage.rawUsage,
567
+ usageMissing: !tokenUsage.hasUsage,
568
+ pricingSpec: request.outputFormat ?? "3d",
569
+ },
570
+ };
571
+ }
572
+ // 鈹€鈹€ Video Task Management (#23, #24) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
573
+ /**
574
+ * Query a single video generation task by ID.
575
+ * GET /v3/contents/generations/tasks/{taskId}
576
+ */
577
+ async getTaskStatus(taskId, apiKey, signal) {
578
+ const url = `${this.baseUrl}/v3/contents/generations/tasks/${taskId}`;
579
+ const res = await fetch(url, {
580
+ method: "GET",
581
+ headers: { Authorization: `Bearer ${apiKey}` },
582
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
583
+ });
584
+ if (!res.ok) {
585
+ const text = await res.text().catch(() => "");
586
+ throw new Error(`Volcengine task status error ${res.status}: ${text}`);
587
+ }
588
+ const data = await res.json();
589
+ const rawStatus = data.status ?? "unknown";
590
+ return { status: rawStatus, task: data };
591
+ }
592
+ /**
593
+ * List video generation tasks with optional filters.
594
+ * GET /v3/contents/generations/tasks
595
+ */
596
+ async listVideoTasks(apiKey, options, signal) {
597
+ const params = new URLSearchParams();
598
+ if (options?.after)
599
+ params.set("after", options.after);
600
+ if (options?.limit)
601
+ params.set("limit", String(options.limit));
602
+ if (options?.status)
603
+ params.set("status", options.status);
604
+ const qs = params.toString();
605
+ const url = `${this.baseUrl}/v3/contents/generations/tasks${qs ? `?${qs}` : ""}`;
606
+ const res = await fetch(url, {
607
+ method: "GET",
608
+ headers: { Authorization: `Bearer ${apiKey}` },
609
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
610
+ });
611
+ if (!res.ok) {
612
+ const text = await res.text().catch(() => "");
613
+ throw new Error(`Volcengine list tasks error ${res.status}: ${text}`);
614
+ }
615
+ return await res.json();
616
+ }
617
+ /**
618
+ * Cancel or delete a video generation task.
619
+ * DELETE /v3/contents/generations/tasks/{taskId}
620
+ */
621
+ async deleteVideoTask(taskId, apiKey, signal) {
622
+ const url = `${this.baseUrl}/v3/contents/generations/tasks/${taskId}`;
623
+ const res = await fetch(url, {
624
+ method: "DELETE",
625
+ headers: { Authorization: `Bearer ${apiKey}` },
626
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
627
+ });
628
+ if (!res.ok) {
629
+ const text = await res.text().catch(() => "");
630
+ throw new Error(`Volcengine delete task error ${res.status}: ${text}`);
631
+ }
632
+ }
633
+ // 鈹€鈹€ Files API (#29) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
634
+ /**
635
+ * Upload a file to Volcengine Files API for reuse in multimodal requests.
636
+ * POST /v3/files
637
+ */
638
+ async uploadFile(file, apiKey, options, signal) {
639
+ const url = `${this.baseUrl}/v3/files`;
640
+ const form = new FormData();
641
+ form.append("file", file instanceof Blob ? file : new Blob([file]), options?.filename ?? "upload");
642
+ form.append("purpose", options?.purpose ?? "user_data");
643
+ const res = await fetch(url, {
644
+ method: "POST",
645
+ headers: { Authorization: `Bearer ${apiKey}` },
646
+ body: form,
647
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
648
+ });
649
+ if (!res.ok) {
650
+ const text = await res.text().catch(() => "");
651
+ throw new Error(`Volcengine file upload error ${res.status}: ${text}`);
652
+ }
653
+ const data = await res.json();
654
+ return { id: String(data.id ?? ""), status: String(data.status ?? "") };
655
+ }
656
+ /**
657
+ * Get file info by ID.
658
+ * GET /v3/files/{fileId}
659
+ */
660
+ async getFile(fileId, apiKey, signal) {
661
+ const url = `${this.baseUrl}/v3/files/${fileId}`;
662
+ const res = await fetch(url, {
663
+ method: "GET",
664
+ headers: { Authorization: `Bearer ${apiKey}` },
665
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
666
+ });
667
+ if (!res.ok) {
668
+ const text = await res.text().catch(() => "");
669
+ throw new Error(`Volcengine get file error ${res.status}: ${text}`);
670
+ }
671
+ return await res.json();
672
+ }
673
+ /**
674
+ * List uploaded files.
675
+ * GET /v3/files
676
+ */
677
+ async listFiles(apiKey, options, signal) {
678
+ const params = new URLSearchParams();
679
+ if (options?.after)
680
+ params.set("after", options.after);
681
+ if (options?.limit)
682
+ params.set("limit", String(options.limit));
683
+ if (options?.purpose)
684
+ params.set("purpose", options.purpose);
685
+ if (options?.order)
686
+ params.set("order", options.order);
687
+ const qs = params.toString();
688
+ const url = `${this.baseUrl}/v3/files${qs ? `?${qs}` : ""}`;
689
+ const res = await fetch(url, {
690
+ method: "GET",
691
+ headers: { Authorization: `Bearer ${apiKey}` },
692
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
693
+ });
694
+ if (!res.ok) {
695
+ const text = await res.text().catch(() => "");
696
+ throw new Error(`Volcengine list files error ${res.status}: ${text}`);
697
+ }
698
+ return await res.json();
699
+ }
700
+ /**
701
+ * Delete a file.
702
+ * DELETE /v3/files/{fileId}
703
+ */
704
+ async deleteFile(fileId, apiKey, signal) {
705
+ const url = `${this.baseUrl}/v3/files/${fileId}`;
706
+ const res = await fetch(url, {
707
+ method: "DELETE",
708
+ headers: { Authorization: `Bearer ${apiKey}` },
709
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
710
+ });
711
+ if (!res.ok) {
712
+ const text = await res.text().catch(() => "");
713
+ throw new Error(`Volcengine delete file error ${res.status}: ${text}`);
714
+ }
715
+ }
716
+ // 鈹€鈹€ Shared helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
717
+ async submitTask(url, body, apiKey, signal) {
718
+ const res = await fetch(url, {
719
+ method: "POST",
720
+ headers: {
721
+ "Content-Type": "application/json",
722
+ Authorization: `Bearer ${apiKey}`,
723
+ },
724
+ body: JSON.stringify(body),
725
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
726
+ });
727
+ if (!res.ok) {
728
+ const text = await res.text().catch(() => "");
729
+ throw new Error(`Volcengine task submit error ${res.status}: ${text}`);
730
+ }
731
+ const data = await res.json();
732
+ const taskId = data.id ?? data.task_id;
733
+ if (!taskId) {
734
+ throw new Error("Volcengine task submit: no task_id in response");
735
+ }
736
+ return taskId;
737
+ }
738
+ async pollTask(url, apiKey, signal, onProgress, taskId) {
739
+ const deadline = Date.now() + MAX_POLL_MS;
740
+ let pollCount = 0;
741
+ while (Date.now() < deadline) {
742
+ signal?.throwIfAborted();
743
+ const res = await fetch(url, {
744
+ method: "GET",
745
+ headers: { Authorization: `Bearer ${apiKey}` },
746
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
747
+ });
748
+ if (!res.ok) {
749
+ const text = await res.text().catch(() => "");
750
+ throw new Error(`Volcengine task poll error ${res.status}: ${text}`);
751
+ }
752
+ const data = await res.json();
753
+ const status = data.status;
754
+ if (status === "succeeded" || status === "complete") {
755
+ onProgress?.(100, "completed", taskId);
756
+ return data;
757
+ }
758
+ if (status === "failed" || status === "cancelled") {
759
+ const errMsg = data.error?.message ?? "Task failed";
760
+ throw new Error(`Volcengine task failed: ${errMsg}`);
761
+ }
762
+ // Estimate progress from elapsed time
763
+ pollCount++;
764
+ const elapsed = Date.now() - (deadline - MAX_POLL_MS);
765
+ const estimatedPercent = Math.min(95, Math.round((elapsed / MAX_POLL_MS) * 100));
766
+ onProgress?.(estimatedPercent, status ?? "running", taskId);
767
+ // Still running 鈥?wait and retry
768
+ await new Promise(r => setTimeout(r, POLL_INTERVAL_MS));
769
+ }
770
+ throw new Error("Volcengine task timed out after polling");
771
+ }
772
+ }
773
+ function extractGeneratedTokenUsage(result) {
774
+ const usage = (result.usage ?? result.data?.usage);
775
+ const completionTokens = numericUsage(usage?.completion_tokens)
776
+ ?? numericUsage(usage?.completionTokens)
777
+ ?? numericUsage(usage?.output_tokens)
778
+ ?? numericUsage(usage?.outputTokens)
779
+ ?? numericUsage(usage?.total_tokens)
780
+ ?? numericUsage(usage?.totalTokens)
781
+ ?? 0;
782
+ const totalTokens = numericUsage(usage?.total_tokens)
783
+ ?? numericUsage(usage?.totalTokens)
784
+ ?? completionTokens;
785
+ return {
786
+ completionTokens,
787
+ totalTokens,
788
+ hasUsage: !!usage,
789
+ rawUsage: usage,
790
+ };
791
+ }
792
+ function numericUsage(value) {
793
+ if (typeof value === "number" && Number.isFinite(value))
794
+ return value;
795
+ if (typeof value === "string" && value.trim()) {
796
+ const parsed = Number(value);
797
+ if (Number.isFinite(parsed))
798
+ return parsed;
799
+ }
800
+ return undefined;
801
+ }