vidspotai-shared 1.0.79 → 1.0.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/globals/aiModels/enums.d.ts +5 -0
- package/lib/globals/aiModels/enums.d.ts.map +1 -1
- package/lib/globals/aiModels/enums.js +12 -1
- package/lib/globals/aiModels/providers/alibaba.d.ts.map +1 -1
- package/lib/globals/aiModels/providers/alibaba.js +159 -39
- package/lib/services/aiGen/aiGenFactory.service.d.ts +4 -1
- package/lib/services/aiGen/aiGenFactory.service.d.ts.map +1 -1
- package/lib/services/aiGen/aiGenFactory.service.js +13 -1
- package/lib/services/aiGen/index.d.ts +1 -0
- package/lib/services/aiGen/index.d.ts.map +1 -1
- package/lib/services/aiGen/index.js +1 -0
- package/lib/services/aiGen/providers/alibaba/alibaba.d.ts +34 -7
- package/lib/services/aiGen/providers/alibaba/alibaba.d.ts.map +1 -1
- package/lib/services/aiGen/providers/alibaba/alibaba.js +193 -75
- package/lib/services/aiGen/providers/google/google.service.d.ts +1 -0
- package/lib/services/aiGen/providers/google/google.service.d.ts.map +1 -1
- package/lib/services/aiGen/providers/google/google.service.js +55 -7
- package/lib/services/aiGen/providers/openai/openai.service.d.ts.map +1 -1
- package/lib/services/aiGen/providers/openai/openai.service.js +22 -10
- package/lib/services/aiGen/providers/pixverse/pixverse.service.d.ts.map +1 -1
- package/lib/services/aiGen/providers/pixverse/pixverse.service.js +71 -40
- package/lib/services/aiGen/transientRetry.d.ts +35 -0
- package/lib/services/aiGen/transientRetry.d.ts.map +1 -0
- package/lib/services/aiGen/transientRetry.js +106 -0
- package/package.json +6 -6
- package/lib/services/aiGen/providers/azure/azure.service.d.ts +0 -14
- package/lib/services/aiGen/providers/azure/azure.service.d.ts.map +0 -1
- package/lib/services/aiGen/providers/azure/azure.service.js +0 -108
- package/lib/services/aiGen/providers/azure/index.d.ts +0 -2
- package/lib/services/aiGen/providers/azure/index.d.ts.map +0 -1
- package/lib/services/aiGen/providers/azure/index.js +0 -17
|
@@ -13,32 +13,83 @@ const helpers_1 = require("./helpers");
|
|
|
13
13
|
const helpers_2 = require("../../helpers");
|
|
14
14
|
const utils_1 = require("../../../../utils");
|
|
15
15
|
const logger_1 = require("../../../../utils/logger");
|
|
16
|
-
|
|
17
|
-
//
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
//
|
|
23
|
-
|
|
16
|
+
const errors_1 = require("../../../../utils/errors");
|
|
17
|
+
// Endpoint topology (verified 2026-06-06 against intl docs + live probes):
|
|
18
|
+
//
|
|
19
|
+
// T2V + I2V → /services/aigc/video-generation/video-synthesis (async; X-DashScope-Async: enable)
|
|
20
|
+
// Wan/Qwen async → /services/aigc/text2image/image-synthesis (async; legacy qwen-image, qwen-image-plus)
|
|
21
|
+
// Wan async image → /services/aigc/image-generation/generation (async; wan2.7-image*, wan2.6-image)
|
|
22
|
+
// Qwen/Wan SYNC → /services/aigc/multimodal-generation/generation (sync; new qwen-image-2.0*, qwen-image-edit*, wan2.7-image* + edit)
|
|
23
|
+
//
|
|
24
|
+
// Both T2V and I2V use the SAME video endpoint — the model + presence of
|
|
25
|
+
// img_url determines mode. The legacy split into /image2video/... and
|
|
26
|
+
// /sound2video/... endpoints was wrong (Model not exist errors).
|
|
27
|
+
//
|
|
28
|
+
// S2V (wan2.2-s2v) is China-region only and cannot be reached with an intl
|
|
29
|
+
// DashScope key; the model key is intentionally kept but rejected at submit
|
|
30
|
+
// with a UserFacingError. Same for kolors-v2 (Kuaishou model, not Alibaba).
|
|
31
|
+
const ALIBABA_BASE_URL = "https://dashscope-intl.aliyuncs.com/api/v1";
|
|
32
|
+
const ALIBABA_BASE = `${ALIBABA_BASE_URL}/services/aigc`;
|
|
33
|
+
const ENDPOINT_VIDEO_ASYNC = `${ALIBABA_BASE}/video-generation/video-synthesis`;
|
|
34
|
+
const ENDPOINT_IMAGE_T2I_ASYNC = `${ALIBABA_BASE}/text2image/image-synthesis`;
|
|
35
|
+
const ENDPOINT_IMAGE_GEN_ASYNC = `${ALIBABA_BASE}/image-generation/generation`;
|
|
36
|
+
const ENDPOINT_MULTIMODAL_SYNC = `${ALIBABA_BASE}/multimodal-generation/generation`;
|
|
37
|
+
const ENDPOINT_TASK_STATUS = `${ALIBABA_BASE_URL}/tasks`;
|
|
38
|
+
// Model IDs that don't run on the intl `dashscope-intl.aliyuncs.com` endpoint.
|
|
39
|
+
// We surface a clean PROVIDER_AUTH_ERROR rather than letting them fall through
|
|
40
|
+
// and hit a 404 "Model not exist."
|
|
41
|
+
const INTL_UNAVAILABLE_MODEL_IDS = new Set([
|
|
42
|
+
"wan2.2-s2v", // China region only (dashscope.aliyuncs.com)
|
|
43
|
+
"kolors-v2", // Kuaishou model; not hosted on DashScope intl
|
|
44
|
+
]);
|
|
45
|
+
// Image model classification — drives sync vs async endpoint dispatch.
|
|
46
|
+
// Sync (multimodal-generation/generation, returns inline image URL):
|
|
47
|
+
// - Qwen 2.x sync line: qwen-image-2.0, qwen-image-2.0-pro, qwen-image-max,
|
|
48
|
+
// qwen-image-edit*, plus wan2.7-image / wan2.7-image-pro when used inline.
|
|
49
|
+
// Async-via-image-generation (returns task_id, poll /tasks):
|
|
50
|
+
// - Wan image-gen line: wan2.7-image*, wan2.6-image.
|
|
51
|
+
// Async-via-text2image (returns task_id, poll /tasks):
|
|
52
|
+
// - Legacy qwen-image, qwen-image-plus.
|
|
53
|
+
function isSyncMultimodalImageModel(modelId) {
|
|
54
|
+
return (modelId.startsWith("qwen-image-2") ||
|
|
55
|
+
modelId.startsWith("qwen-image-max") ||
|
|
56
|
+
modelId.startsWith("qwen-image-edit") ||
|
|
57
|
+
modelId.startsWith("wan2.7-image"));
|
|
58
|
+
}
|
|
59
|
+
function isAsyncWanImageModel(modelId) {
|
|
60
|
+
return modelId.startsWith("wan2.7-image") || modelId.startsWith("wan2.6-image");
|
|
61
|
+
}
|
|
24
62
|
class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderService {
|
|
25
63
|
constructor() {
|
|
26
64
|
super();
|
|
27
|
-
|
|
65
|
+
// Used by getCreditUsed only. Real request paths are chosen per-call.
|
|
66
|
+
this.baseUrl = ENDPOINT_VIDEO_ASYNC;
|
|
28
67
|
this.timeout = 60000; // 60 seconds
|
|
29
68
|
if (!process.env.ALIBABA_API_KEY) {
|
|
30
|
-
|
|
69
|
+
// Classify as PROVIDER_AUTH_ERROR (UserFacingError → warn log, no Slack
|
|
70
|
+
// page per job). The DashScope key is single-Bearer and distinct from
|
|
71
|
+
// ALIBABA_CLOUD_ACCESS_KEY/SECRET (those are general Alibaba Cloud
|
|
72
|
+
// creds, not DashScope) — surface a hint so an operator knows what to
|
|
73
|
+
// provision instead of chasing a generic 500.
|
|
74
|
+
throw new errors_1.UserFacingError("Alibaba (DashScope) API key is not configured. Set ALIBABA_API_KEY (Bearer sk-* token from dashscope.aliyuncs.com) in the runtime env.", errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR);
|
|
31
75
|
}
|
|
32
76
|
}
|
|
33
|
-
|
|
77
|
+
/**
|
|
78
|
+
* DashScope API call. `async` toggles the `X-DashScope-Async: enable` header
|
|
79
|
+
* — required for async endpoints (video-generation, image-generation,
|
|
80
|
+
* text2image), MUST be omitted for sync multimodal-generation.
|
|
81
|
+
*/
|
|
82
|
+
async request(body, method = "POST", url = this.baseUrl, asyncMode = true) {
|
|
83
|
+
const headers = {
|
|
84
|
+
Authorization: `Bearer ${process.env.ALIBABA_API_KEY}`,
|
|
85
|
+
"Content-Type": "application/json",
|
|
86
|
+
};
|
|
87
|
+
if (asyncMode)
|
|
88
|
+
headers["X-DashScope-Async"] = "enable";
|
|
34
89
|
const config = {
|
|
35
90
|
method,
|
|
36
91
|
url,
|
|
37
|
-
headers
|
|
38
|
-
Authorization: `Bearer ${process.env.ALIBABA_API_KEY}`,
|
|
39
|
-
"Content-Type": "application/json",
|
|
40
|
-
"X-DashScope-Async": "enable",
|
|
41
|
-
},
|
|
92
|
+
headers,
|
|
42
93
|
timeout: this.timeout,
|
|
43
94
|
data: method === "POST" ? body : undefined,
|
|
44
95
|
};
|
|
@@ -51,10 +102,12 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
51
102
|
const modelId = modelConfig?.modelId;
|
|
52
103
|
if (!modelId)
|
|
53
104
|
throw new Error(`Unknown modelKey: ${params.modelKey}`);
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
105
|
+
if (INTL_UNAVAILABLE_MODEL_IDS.has(modelId)) {
|
|
106
|
+
throw new errors_1.UserFacingError(`Model "${modelId}" is not available on the intl DashScope endpoint (dashscope-intl.aliyuncs.com). It is hosted only in the China region and requires a separate Beijing-region API key.`, errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR);
|
|
107
|
+
}
|
|
108
|
+
// T2V and I2V share a single endpoint. The model + presence of img_url
|
|
109
|
+
// determines mode. S2V is unavailable on intl (gated above).
|
|
110
|
+
const isI2V = !!params.inputImageUrl;
|
|
58
111
|
const input = {};
|
|
59
112
|
if (params.prompt)
|
|
60
113
|
input.prompt = params.prompt;
|
|
@@ -62,21 +115,16 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
62
115
|
input.negative_prompt = params.negativePrompt;
|
|
63
116
|
if (isI2V) {
|
|
64
117
|
input.img_url = params.inputImageUrl;
|
|
65
|
-
//
|
|
118
|
+
// wan2.7-i2v supports first-last-frame interpolation
|
|
66
119
|
if (params.lastFrameImageUrl)
|
|
67
120
|
input.last_frame_url = params.lastFrameImageUrl;
|
|
68
121
|
}
|
|
69
|
-
if (isS2V) {
|
|
70
|
-
input.audio_url = params.inputAudioUrl;
|
|
71
|
-
if (params.inputImageUrl)
|
|
72
|
-
input.image_url = params.inputImageUrl;
|
|
73
|
-
}
|
|
74
122
|
const parameters = {
|
|
75
123
|
duration: params.duration || 5,
|
|
76
124
|
prompt_extend: params.promptOptimizer ?? true,
|
|
77
125
|
};
|
|
78
|
-
// T2V requires explicit size; I2V
|
|
79
|
-
if (!isI2V
|
|
126
|
+
// T2V requires explicit size; I2V derives dimensions from the input image.
|
|
127
|
+
if (!isI2V) {
|
|
80
128
|
const size = (0, helpers_1.getAlibabaDimensions)(params.resolution, params.aspectRatio);
|
|
81
129
|
if (!size) {
|
|
82
130
|
throw new Error(`Invalid resolution/aspect ratio combination: ${params.resolution} ${params.aspectRatio}`);
|
|
@@ -88,7 +136,7 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
88
136
|
if (params.watermark !== undefined)
|
|
89
137
|
parameters.watermark = params.watermark;
|
|
90
138
|
const body = { model: modelId, input, parameters };
|
|
91
|
-
const result = await this.request(body, "POST",
|
|
139
|
+
const result = await this.request(body, "POST", ENDPOINT_VIDEO_ASYNC, true);
|
|
92
140
|
// DashScope returns { output: { task_id, ... } } on async submit.
|
|
93
141
|
const taskId = result?.output?.task_id || result?.request_id;
|
|
94
142
|
if (!result || !taskId) {
|
|
@@ -161,38 +209,127 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
161
209
|
return { status: types_1.EVideoSceneStatus.PENDING };
|
|
162
210
|
}
|
|
163
211
|
/**
|
|
164
|
-
* DashScope image generation.
|
|
165
|
-
*
|
|
166
|
-
*
|
|
167
|
-
*
|
|
212
|
+
* DashScope image generation. Three dispatch paths (verified live 2026-06-06):
|
|
213
|
+
*
|
|
214
|
+
* 1. SYNC multimodal-generation/generation
|
|
215
|
+
* Models: qwen-image-2.0, qwen-image-2.0-pro, qwen-image-max,
|
|
216
|
+
* qwen-image-edit*, wan2.7-image, wan2.7-image-pro
|
|
217
|
+
* Response: output.choices[0].message.content[].image (inline URLs)
|
|
218
|
+
* Note: NO X-DashScope-Async header. Edit mode is triggered by including
|
|
219
|
+
* {image: refUrl} entries in the user message content array.
|
|
220
|
+
*
|
|
221
|
+
* 2. ASYNC image-generation/generation (Wan async image)
|
|
222
|
+
* Models: wan2.7-image*, wan2.6-image (when caller prefers async; we
|
|
223
|
+
* currently route these through path 1 since they're available
|
|
224
|
+
* there too).
|
|
168
225
|
*
|
|
169
|
-
*
|
|
170
|
-
*
|
|
171
|
-
*
|
|
226
|
+
* 3. ASYNC text2image/image-synthesis (legacy)
|
|
227
|
+
* Models: qwen-image, qwen-image-plus
|
|
228
|
+
* Response: output.results[].url
|
|
229
|
+
*
|
|
230
|
+
* Image jobs typically finish in 5–15s, well inside our HTTP timeout, so
|
|
231
|
+
* we poll inline rather than going through BullMQ.
|
|
172
232
|
*/
|
|
173
233
|
async generateImage(params) {
|
|
174
234
|
const modelConfig = aiModels_1.aiModelConfigs[params.modelKey];
|
|
175
235
|
const modelId = modelConfig?.modelId;
|
|
176
236
|
if (!modelId)
|
|
177
237
|
throw new Error(`Unknown image modelKey: ${params.modelKey}`);
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
238
|
+
if (INTL_UNAVAILABLE_MODEL_IDS.has(modelId)) {
|
|
239
|
+
throw new errors_1.UserFacingError(`Model "${modelId}" is not available on the intl DashScope endpoint. Use a different image provider.`, errors_1.USER_FACING_ERROR_CODES.PROVIDER_AUTH_ERROR);
|
|
240
|
+
}
|
|
241
|
+
const refs = [
|
|
242
|
+
...(params.inputImageUrl ? [params.inputImageUrl] : []),
|
|
243
|
+
...(params.inputImageUrls ?? []),
|
|
244
|
+
];
|
|
245
|
+
const isEdit = modelId.startsWith("qwen-image-edit") ||
|
|
246
|
+
(refs.length > 0 && (modelId.startsWith("wan2.7-image") || modelId.startsWith("qwen-image-2")));
|
|
247
|
+
if (isEdit && !refs.length) {
|
|
248
|
+
throw new Error(`${modelId}: edit/reference mode requires at least one input image`);
|
|
249
|
+
}
|
|
250
|
+
let remoteUrls;
|
|
251
|
+
let providerRequestId;
|
|
252
|
+
if (isSyncMultimodalImageModel(modelId)) {
|
|
253
|
+
const result = await this.generateImageSync(modelId, params, refs);
|
|
254
|
+
remoteUrls = result.urls;
|
|
255
|
+
providerRequestId = result.requestId;
|
|
256
|
+
}
|
|
257
|
+
else if (isAsyncWanImageModel(modelId)) {
|
|
258
|
+
const result = await this.generateImageAsync(modelId, params, ENDPOINT_IMAGE_GEN_ASYNC);
|
|
259
|
+
remoteUrls = result.urls;
|
|
260
|
+
providerRequestId = result.taskId;
|
|
261
|
+
}
|
|
262
|
+
else {
|
|
263
|
+
// Legacy async path: qwen-image, qwen-image-plus.
|
|
264
|
+
const result = await this.generateImageAsync(modelId, params, ENDPOINT_IMAGE_T2I_ASYNC);
|
|
265
|
+
remoteUrls = result.urls;
|
|
266
|
+
providerRequestId = result.taskId;
|
|
267
|
+
}
|
|
268
|
+
if (!remoteUrls.length) {
|
|
269
|
+
throw new Error("DashScope image returned no URLs");
|
|
270
|
+
}
|
|
271
|
+
const bucket = (0, firebase_1.getBucket)();
|
|
272
|
+
const ts = Date.now();
|
|
273
|
+
const urls = [];
|
|
274
|
+
for (let i = 0; i < remoteUrls.length; i++) {
|
|
275
|
+
const remoteUrl = remoteUrls[i];
|
|
276
|
+
const bytes = Buffer.from(await (await axios_1.default.get(remoteUrl, { responseType: "arraybuffer", timeout: this.timeout })).data);
|
|
277
|
+
const path = `images/dashscope/${ts}-${Math.random().toString(36).slice(2, 8)}-${i}.png`;
|
|
278
|
+
const file = bucket.file(path);
|
|
279
|
+
await file.save(bytes, { contentType: "image/png" });
|
|
280
|
+
const [signed] = await file.getSignedUrl({ action: "read", expires: "03-09-2491" });
|
|
281
|
+
urls.push(signed);
|
|
282
|
+
}
|
|
283
|
+
return {
|
|
284
|
+
imageUrl: urls[0],
|
|
285
|
+
imageUrls: urls.length > 1 ? urls : undefined,
|
|
286
|
+
providerRequestId,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Sync multimodal-generation. Returns inline image URLs in
|
|
291
|
+
* `output.choices[0].message.content[].image`. No polling needed.
|
|
292
|
+
*/
|
|
293
|
+
async generateImageSync(modelId, params, refs) {
|
|
294
|
+
const userContent = [];
|
|
295
|
+
// Image refs come first (DashScope convention); each as its own content entry.
|
|
296
|
+
for (const ref of refs.slice(0, 3)) {
|
|
297
|
+
userContent.push({ image: ref });
|
|
298
|
+
}
|
|
299
|
+
if (params.prompt)
|
|
300
|
+
userContent.push({ text: params.prompt });
|
|
301
|
+
const input = {
|
|
302
|
+
messages: [{ role: "user", content: userContent }],
|
|
303
|
+
};
|
|
304
|
+
const parameters = {};
|
|
305
|
+
if (params.negativePrompt)
|
|
306
|
+
parameters.negative_prompt = params.negativePrompt;
|
|
307
|
+
if (params.watermark !== undefined)
|
|
308
|
+
parameters.watermark = params.watermark;
|
|
309
|
+
if (params.promptOptimizer !== undefined)
|
|
310
|
+
parameters.prompt_extend = params.promptOptimizer;
|
|
311
|
+
if (params.seed !== undefined)
|
|
312
|
+
parameters.seed = params.seed;
|
|
313
|
+
if (params.aspectRatio)
|
|
314
|
+
parameters.size = params.aspectRatio;
|
|
315
|
+
if (params.imageSize)
|
|
316
|
+
parameters.size = params.imageSize;
|
|
317
|
+
const result = await this.request({ model: modelId, input, parameters }, "POST", ENDPOINT_MULTIMODAL_SYNC, false);
|
|
318
|
+
const choices = result?.output?.choices ?? [];
|
|
319
|
+
const content = choices[0]?.message?.content ?? [];
|
|
320
|
+
const urls = content
|
|
321
|
+
.map((c) => c?.image)
|
|
322
|
+
.filter((u) => typeof u === "string" && !!u);
|
|
323
|
+
return { urls, requestId: result?.request_id ?? "" };
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Async submit + poll. Used by both image-generation/generation (Wan async)
|
|
327
|
+
* and text2image/image-synthesis (legacy Qwen).
|
|
328
|
+
*/
|
|
329
|
+
async generateImageAsync(modelId, params, submitUrl) {
|
|
182
330
|
const input = { prompt: params.prompt };
|
|
183
331
|
if (params.negativePrompt)
|
|
184
332
|
input.negative_prompt = params.negativePrompt;
|
|
185
|
-
if (isEdit) {
|
|
186
|
-
const refs = [
|
|
187
|
-
...(params.inputImageUrl ? [params.inputImageUrl] : []),
|
|
188
|
-
...(params.inputImageUrls ?? []),
|
|
189
|
-
];
|
|
190
|
-
if (!refs.length) {
|
|
191
|
-
throw new Error("Qwen-Image edit requires at least one input image");
|
|
192
|
-
}
|
|
193
|
-
// Qwen edit accepts 1–3 refs as base_image_url.
|
|
194
|
-
input.base_image_url = refs.length === 1 ? refs[0] : refs.slice(0, 3);
|
|
195
|
-
}
|
|
196
333
|
const parameters = {
|
|
197
334
|
n: params.numImages ?? 1,
|
|
198
335
|
...(params.aspectRatio ? { size: params.aspectRatio } : {}),
|
|
@@ -203,18 +340,18 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
203
340
|
? { prompt_extend: params.promptOptimizer }
|
|
204
341
|
: { prompt_extend: true }),
|
|
205
342
|
};
|
|
206
|
-
const submit = await this.request({ model: modelId, input, parameters }, "POST", submitUrl);
|
|
343
|
+
const submit = await this.request({ model: modelId, input, parameters }, "POST", submitUrl, true);
|
|
207
344
|
const taskId = submit?.output?.task_id;
|
|
208
345
|
if (!taskId) {
|
|
209
346
|
throw new Error("DashScope image submit returned no task_id");
|
|
210
347
|
}
|
|
211
|
-
// Poll up to 90s, every 2s.
|
|
348
|
+
// Poll up to 90s, every 2s.
|
|
212
349
|
const maxAttempts = 45;
|
|
213
350
|
const intervalMs = 2000;
|
|
214
351
|
let lastResult = null;
|
|
215
352
|
for (let i = 0; i < maxAttempts; i++) {
|
|
216
353
|
await new Promise((r) => setTimeout(r, intervalMs));
|
|
217
|
-
const poll = await this.request(null, "GET", `${ENDPOINT_TASK_STATUS}/${taskId}
|
|
354
|
+
const poll = await this.request(null, "GET", `${ENDPOINT_TASK_STATUS}/${taskId}`, false);
|
|
218
355
|
const status = poll?.output?.task_status ?? poll?.status;
|
|
219
356
|
if (status === "SUCCEEDED" || status === "succeeded") {
|
|
220
357
|
lastResult = poll;
|
|
@@ -228,27 +365,8 @@ class AlibabaService extends baseAiGenProvider_service_1.BaseAiGenProviderServic
|
|
|
228
365
|
throw new Error("DashScope image task timed out after 90s");
|
|
229
366
|
}
|
|
230
367
|
const results = lastResult.output?.results ?? [];
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
throw new Error("DashScope image task returned no URLs");
|
|
234
|
-
}
|
|
235
|
-
const bucket = (0, firebase_1.getBucket)();
|
|
236
|
-
const ts = Date.now();
|
|
237
|
-
const urls = [];
|
|
238
|
-
for (let i = 0; i < remoteUrls.length; i++) {
|
|
239
|
-
const remoteUrl = remoteUrls[i];
|
|
240
|
-
const bytes = Buffer.from(await (await axios_1.default.get(remoteUrl, { responseType: "arraybuffer", timeout: this.timeout })).data);
|
|
241
|
-
const path = `images/dashscope/${ts}-${Math.random().toString(36).slice(2, 8)}-${i}.png`;
|
|
242
|
-
const file = bucket.file(path);
|
|
243
|
-
await file.save(bytes, { contentType: "image/png" });
|
|
244
|
-
const [signed] = await file.getSignedUrl({ action: "read", expires: "03-09-2491" });
|
|
245
|
-
urls.push(signed);
|
|
246
|
-
}
|
|
247
|
-
return {
|
|
248
|
-
imageUrl: urls[0],
|
|
249
|
-
imageUrls: urls.length > 1 ? urls : undefined,
|
|
250
|
-
providerRequestId: taskId,
|
|
251
|
-
};
|
|
368
|
+
const urls = results.map((r) => r.url).filter((u) => !!u);
|
|
369
|
+
return { urls, taskId };
|
|
252
370
|
}
|
|
253
371
|
getCreditUsed({ modelKey, resolution, aspectRatio, duration, multiClip = false, numImages = 1, }) {
|
|
254
372
|
const modelConfig = aiModels_1.aiModelConfigs[modelKey];
|
|
@@ -13,6 +13,7 @@ export declare class GoogleService extends BaseAiGenProviderService {
|
|
|
13
13
|
generateVideo(params: VideoGenerationParams): Promise<VideoGenerationResult>;
|
|
14
14
|
checkVideoStatus({ task, outputFilename, outputFilePath, }: VideoStatusParams): Promise<VideoStatusResult>;
|
|
15
15
|
generateImage(params: ImageGenerationParams): Promise<ImageGenerationResult>;
|
|
16
|
+
private _generateImage;
|
|
16
17
|
/**
|
|
17
18
|
* Lyria 2 (Vertex AI). Sync — POST {region}-aiplatform.googleapis.com/.../lyria-002:predict
|
|
18
19
|
* returns base64-encoded WAV audio inline. Auth via ADC on the function service
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAmKlB,qBAAa,aAAc,SAAQ,wBAAwB;IACzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAO/C;;;;OAIG;YACW,kBAAkB;IA+B1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAyG3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA0F3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAqEjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BvI"}
|
|
@@ -127,15 +127,30 @@ function classifyGoogleApiError(err) {
|
|
|
127
127
|
if (httpCode === 14 || /high demand/i.test(msg)) {
|
|
128
128
|
return new errors_1.UserFacingError(msg, errors_1.USER_FACING_ERROR_CODES.VIDEO_PROVIDER_HIGH_DEMAND);
|
|
129
129
|
}
|
|
130
|
-
// INVALID_ARGUMENT 400 —
|
|
131
|
-
// supported"
|
|
132
|
-
// doesn't
|
|
133
|
-
//
|
|
134
|
-
//
|
|
135
|
-
//
|
|
136
|
-
|
|
130
|
+
// INVALID_ARGUMENT 400 — narrow match: only the specific "use case is
|
|
131
|
+
// currently not supported" string, which Veo returns when our request
|
|
132
|
+
// structure doesn't match the chosen model variant's capabilities.
|
|
133
|
+
// The pre-call guards above (duration=8 for lastFrame/refs) should
|
|
134
|
+
// prevent the known cases; if we still hit this it's a NEW combo we
|
|
135
|
+
// haven't profiled — surface as CAPABILITY_MISMATCH so the user gets a
|
|
136
|
+
// useful message, AND keep the raw provider text in the error so the
|
|
137
|
+
// next entry in PROD_FIX_LOG can identify which combo broke. Generic
|
|
138
|
+
// 400s (other INVALID_ARGUMENT variants) still surface as `error` so
|
|
139
|
+
// a real platform bug isn't muted.
|
|
140
|
+
if ((status === "INVALID_ARGUMENT" || httpCode === 400) &&
|
|
141
|
+
/use case is currently not supported/i.test(msg)) {
|
|
137
142
|
return new errors_1.UserFacingError(msg, errors_1.USER_FACING_ERROR_CODES.CAPABILITY_MISMATCH);
|
|
138
143
|
}
|
|
144
|
+
// Imagen + Nano-Banana surface Responsible-AI filter rejections as
|
|
145
|
+
// INVALID_ARGUMENT 400 with the literal text "filtered out because they
|
|
146
|
+
// violated Google's Responsible AI practices" (and a recommendation to
|
|
147
|
+
// rephrase). This is user content moderation, not a system bug — show the
|
|
148
|
+
// user the rephrase hint and skip the Slack page. Also matches the Veo
|
|
149
|
+
// RAI message ("violated Google's content policies") for the same reason.
|
|
150
|
+
if ((status === "INVALID_ARGUMENT" || httpCode === 400) &&
|
|
151
|
+
/(filtered out because they violated|violated Google's (?:Responsible AI|content) (?:practices|policies))/i.test(msg)) {
|
|
152
|
+
return new errors_1.UserFacingError("Your prompt was flagged by Google's safety filters. Please rephrase and try again.", errors_1.USER_FACING_ERROR_CODES.CONTENT_POLICY_VIOLATION);
|
|
153
|
+
}
|
|
139
154
|
}
|
|
140
155
|
catch {
|
|
141
156
|
// Not JSON — fall through to non-JSON checks.
|
|
@@ -192,6 +207,21 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
|
|
|
192
207
|
const modelConfig = aiModels_1.aiModelConfigs[params.modelKey];
|
|
193
208
|
const modelId = modelConfig.modelId;
|
|
194
209
|
const isVeo3_1 = VEO_3_1_MODELS.has(params.modelKey);
|
|
210
|
+
// Gemini Veo cross-constraint: first+last-frame interpolation AND
|
|
211
|
+
// reference images BOTH require durationSeconds=8. Sending any other
|
|
212
|
+
// duration returns INVALID_ARGUMENT 400 "Your use case is currently not
|
|
213
|
+
// supported." with no hint about which param caused it. Surface a
|
|
214
|
+
// typed UserFacingError so the user/frontend can correct the input
|
|
215
|
+
// instead of burning a provider call + opaque rejection.
|
|
216
|
+
const needsDuration8 = !!params.lastFrameImageUrl ||
|
|
217
|
+
(isVeo3_1 && (params.referenceImageUrls?.length ?? 0) > 0);
|
|
218
|
+
if (needsDuration8 && params.duration !== undefined && params.duration !== 8) {
|
|
219
|
+
const constraint = params.lastFrameImageUrl
|
|
220
|
+
? "first-frame + last-frame interpolation"
|
|
221
|
+
: "reference images";
|
|
222
|
+
throw new errors_1.UserFacingError(`Google Veo requires an 8-second duration when using ${constraint}. ` +
|
|
223
|
+
`Please select 8s or remove the ${params.lastFrameImageUrl ? "last-frame image" : "reference images"}.`, errors_1.USER_FACING_ERROR_CODES.CAPABILITY_MISMATCH);
|
|
224
|
+
}
|
|
195
225
|
const request = {
|
|
196
226
|
model: modelId,
|
|
197
227
|
prompt: params.prompt,
|
|
@@ -318,6 +348,24 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
|
|
|
318
348
|
return { status: types_1.EVideoSceneStatus.PENDING };
|
|
319
349
|
}
|
|
320
350
|
async generateImage(params) {
|
|
351
|
+
try {
|
|
352
|
+
return await this._generateImage(params);
|
|
353
|
+
}
|
|
354
|
+
catch (err) {
|
|
355
|
+
// The SDK's generateImages / generateContent throws ApiError instances
|
|
356
|
+
// whose .message is a JSON string. Route through classifyGoogleApiError
|
|
357
|
+
// so Imagen RAI safety filter rejections (the most common failure mode
|
|
358
|
+
// for image gen — "filtered out because they violated Google's
|
|
359
|
+
// Responsible AI practices") become UserFacingError(CONTENT_POLICY_VIOLATION)
|
|
360
|
+
// instead of leaking as raw provider JSON into the worker's Slack
|
|
361
|
+
// error channel.
|
|
362
|
+
const userFacing = classifyGoogleApiError(err);
|
|
363
|
+
if (userFacing)
|
|
364
|
+
throw userFacing;
|
|
365
|
+
throw err;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
async _generateImage(params) {
|
|
321
369
|
const modelConfig = aiModels_1.aiModelConfigs[params.modelKey];
|
|
322
370
|
const modelId = modelConfig?.modelId;
|
|
323
371
|
if (!modelId)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"openai.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/openai/openai.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAElB,qBAAa,aAAc,SAAQ,wBAAwB;IACzD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IACxC,OAAO,CAAC,MAAM,CAAS;;IAQjB,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"openai.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/openai/openai.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAElB,qBAAa,aAAc,SAAQ,wBAAwB;IACzD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IACxC,OAAO,CAAC,MAAM,CAAS;;IAQjB,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA0E3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAsC3C,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IA2ChC;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IA6HjC,aAAa,CAAC,EACZ,QAAQ,EACR,UAAuB,EACvB,QAAY,EACZ,SAAiB,EACjB,SAAa,EACb,OAAO,GACR,EAAE,iBAAiB,GAAG,MAAM;CAoB9B"}
|
|
@@ -61,17 +61,29 @@ class OpenaiService extends baseAiGenProvider_service_1.BaseAiGenProviderService
|
|
|
61
61
|
};
|
|
62
62
|
// First-frame image-to-video. The Sora API requires the reference image
|
|
63
63
|
// dimensions to match `size`; the caller is responsible for that.
|
|
64
|
-
//
|
|
65
|
-
//
|
|
64
|
+
//
|
|
65
|
+
// We use the JSON variant `input_reference: { image_url }` (OpenAI
|
|
66
|
+
// fetches the image themselves) rather than the multipart `Uploadable`
|
|
67
|
+
// shape. Reason: as of late 2026 sora-2-pro started returning
|
|
68
|
+
// 400 Invalid type for 'input_reference': expected an object, but
|
|
69
|
+
// got a file instead.
|
|
70
|
+
// for multipart uploads that worked on sora-2. The JSON form is
|
|
71
|
+
// documented for both models and avoids the size mismatch entirely.
|
|
72
|
+
//
|
|
73
|
+
// openai SDK 6.3.0's typed shape for input_reference is `Uploadable`
|
|
74
|
+
// (no JSON-object overload yet), so we bypass the type with a cast.
|
|
75
|
+
// The HTTP layer serializes plain objects as JSON automatically when
|
|
76
|
+
// no Uploadable is present in the body, which is exactly what we want.
|
|
77
|
+
//
|
|
78
|
+
// Note: openai SDK v6.3.0 does NOT yet expose extensions / characters
|
|
79
|
+
// / edits endpoints — those exist in the REST API but are not surfaced.
|
|
66
80
|
if (params.inputImageUrl) {
|
|
67
|
-
|
|
68
|
-
//
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}
|
|
73
|
-
const filename = (params.inputImageUrl.split("?")[0] ?? "reference").split("/").pop() || "reference.png";
|
|
74
|
-
request.input_reference = await (0, openai_1.toFile)(resp, filename);
|
|
81
|
+
// SDK 6.3.0 types input_reference as Uploadable only; the JSON
|
|
82
|
+
// object form is a runtime-supported overload that the types
|
|
83
|
+
// haven't caught up to yet. Bypass via unknown cast.
|
|
84
|
+
request.input_reference = {
|
|
85
|
+
image_url: params.inputImageUrl,
|
|
86
|
+
};
|
|
75
87
|
}
|
|
76
88
|
let job;
|
|
77
89
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pixverse.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/pixverse/pixverse.service.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"pixverse.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/pixverse/pixverse.service.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AA0FlB,qBAAa,eAAgB,SAAQ,wBAAwB;IAC3D,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkD;IAKpE,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAmK3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA0FjD,aAAa,CAAC,MAAM,EAAE,iBAAiB,GAAG,MAAM;CAkDjD"}
|