@umituz/react-native-ai-gemini-provider 1.14.22 → 1.14.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/infrastructure/services/feature-input-builder.ts +85 -0
- package/src/infrastructure/services/feature-model-selector.ts +31 -0
- package/src/infrastructure/services/gemini-provider.ts +6 -59
- package/src/infrastructure/services/gemini-structured-text.service.ts +113 -0
- package/src/infrastructure/services/gemini-text-generation.service.ts +5 -94
- package/src/infrastructure/services/gemini-video-generation.service.ts +25 -129
- package/src/infrastructure/services/veo-http-client.service.ts +70 -0
- package/src/infrastructure/services/veo-polling.service.ts +119 -0
- package/src/infrastructure/utils/base-input-builders.util.ts +49 -0
- package/src/infrastructure/utils/image-feature-builders.util.ts +123 -0
- package/src/infrastructure/utils/input-builder.types.ts +44 -0
- package/src/infrastructure/utils/input-builders.util.ts +40 -263
- package/src/infrastructure/utils/video-feature-builders.util.ts +57 -0
package/package.json
CHANGED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feature Input Builder
|
|
3
|
+
* Builds Gemini API inputs for image and video features
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type {
|
|
7
|
+
ImageFeatureType,
|
|
8
|
+
VideoFeatureType,
|
|
9
|
+
ImageFeatureInputData,
|
|
10
|
+
VideoFeatureInputData,
|
|
11
|
+
} from "@umituz/react-native-ai-generation-content";
|
|
12
|
+
import {
|
|
13
|
+
buildUpscaleInput,
|
|
14
|
+
buildPhotoRestoreInput,
|
|
15
|
+
buildFaceSwapInput,
|
|
16
|
+
buildAnimeSelfieInput,
|
|
17
|
+
buildRemoveBackgroundInput,
|
|
18
|
+
buildRemoveObjectInput,
|
|
19
|
+
buildReplaceBackgroundInput,
|
|
20
|
+
buildHDTouchUpInput,
|
|
21
|
+
buildVideoFromDualImagesInput,
|
|
22
|
+
} from "../utils/input-builders.util";
|
|
23
|
+
|
|
24
|
+
class FeatureInputBuilder {
|
|
25
|
+
/**
|
|
26
|
+
* Build input for an IMAGE feature
|
|
27
|
+
*/
|
|
28
|
+
buildImageFeatureInput(
|
|
29
|
+
feature: ImageFeatureType,
|
|
30
|
+
data: ImageFeatureInputData,
|
|
31
|
+
): Record<string, unknown> {
|
|
32
|
+
const { imageBase64, targetImageBase64, prompt, options } = data;
|
|
33
|
+
|
|
34
|
+
switch (feature) {
|
|
35
|
+
case "upscale":
|
|
36
|
+
return buildUpscaleInput(imageBase64, options);
|
|
37
|
+
case "photo-restore":
|
|
38
|
+
return buildPhotoRestoreInput(imageBase64, options);
|
|
39
|
+
case "face-swap":
|
|
40
|
+
if (!targetImageBase64) {
|
|
41
|
+
throw new Error("Face swap requires target image");
|
|
42
|
+
}
|
|
43
|
+
return buildFaceSwapInput(imageBase64, targetImageBase64, options);
|
|
44
|
+
case "anime-selfie":
|
|
45
|
+
return buildAnimeSelfieInput(imageBase64, options);
|
|
46
|
+
case "remove-background":
|
|
47
|
+
return buildRemoveBackgroundInput(imageBase64, options);
|
|
48
|
+
case "remove-object":
|
|
49
|
+
return buildRemoveObjectInput(imageBase64, { prompt, ...options });
|
|
50
|
+
case "hd-touch-up":
|
|
51
|
+
return buildHDTouchUpInput(imageBase64, options);
|
|
52
|
+
case "replace-background":
|
|
53
|
+
if (!prompt) {
|
|
54
|
+
throw new Error("Replace background requires prompt");
|
|
55
|
+
}
|
|
56
|
+
return buildReplaceBackgroundInput(imageBase64, { prompt });
|
|
57
|
+
default:
|
|
58
|
+
throw new Error(`Unknown image feature: ${String(feature)}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Build input for a VIDEO feature
|
|
64
|
+
*/
|
|
65
|
+
buildVideoFeatureInput(
|
|
66
|
+
feature: VideoFeatureType,
|
|
67
|
+
data: VideoFeatureInputData,
|
|
68
|
+
): Record<string, unknown> {
|
|
69
|
+
const { sourceImageBase64, targetImageBase64, prompt, options } = data;
|
|
70
|
+
|
|
71
|
+
switch (feature) {
|
|
72
|
+
case "ai-hug":
|
|
73
|
+
case "ai-kiss":
|
|
74
|
+
return buildVideoFromDualImagesInput(sourceImageBase64, {
|
|
75
|
+
target_image: targetImageBase64,
|
|
76
|
+
motion_prompt: prompt,
|
|
77
|
+
...options,
|
|
78
|
+
});
|
|
79
|
+
default:
|
|
80
|
+
throw new Error(`Unknown video feature: ${String(feature)}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export const featureInputBuilder = new FeatureInputBuilder();
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feature Model Selector
|
|
3
|
+
* Returns the appropriate model ID for a given feature
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type {
|
|
7
|
+
ImageFeatureType,
|
|
8
|
+
VideoFeatureType,
|
|
9
|
+
} from "@umituz/react-native-ai-generation-content";
|
|
10
|
+
import {
|
|
11
|
+
GEMINI_IMAGE_FEATURE_MODELS,
|
|
12
|
+
GEMINI_VIDEO_FEATURE_MODELS,
|
|
13
|
+
} from "../../domain/constants/feature-models.constants";
|
|
14
|
+
|
|
15
|
+
class FeatureModelSelector {
|
|
16
|
+
/**
|
|
17
|
+
* Get model ID for an IMAGE feature
|
|
18
|
+
*/
|
|
19
|
+
getImageFeatureModel(feature: ImageFeatureType): string {
|
|
20
|
+
return GEMINI_IMAGE_FEATURE_MODELS[feature];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Get model ID for a VIDEO feature
|
|
25
|
+
*/
|
|
26
|
+
getVideoFeatureModel(feature: VideoFeatureType): string {
|
|
27
|
+
return GEMINI_VIDEO_FEATURE_MODELS[feature];
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export const featureModelSelector = new FeatureModelSelector();
|
|
@@ -26,21 +26,8 @@ import { geminiImageEditService } from "./gemini-image-edit.service";
|
|
|
26
26
|
import { providerInitializer, type GeminiProviderConfig } from "./provider-initializer";
|
|
27
27
|
import { jobProcessor } from "./job-processor";
|
|
28
28
|
import { generationExecutor } from "./generation-executor";
|
|
29
|
-
import {
|
|
30
|
-
|
|
31
|
-
GEMINI_VIDEO_FEATURE_MODELS,
|
|
32
|
-
} from "../../domain/constants/feature-models.constants";
|
|
33
|
-
import {
|
|
34
|
-
buildUpscaleInput,
|
|
35
|
-
buildPhotoRestoreInput,
|
|
36
|
-
buildFaceSwapInput,
|
|
37
|
-
buildAnimeSelfieInput,
|
|
38
|
-
buildRemoveBackgroundInput,
|
|
39
|
-
buildRemoveObjectInput,
|
|
40
|
-
buildReplaceBackgroundInput,
|
|
41
|
-
buildHDTouchUpInput,
|
|
42
|
-
buildVideoFromDualImagesInput,
|
|
43
|
-
} from "../utils/input-builders.util";
|
|
29
|
+
import { featureInputBuilder } from "./feature-input-builder";
|
|
30
|
+
import { featureModelSelector } from "./feature-model-selector";
|
|
44
31
|
|
|
45
32
|
export type { GeminiProviderConfig };
|
|
46
33
|
|
|
@@ -63,7 +50,6 @@ const GEMINI_CAPABILITIES: ProviderCapabilities = {
|
|
|
63
50
|
textToVideo: true,
|
|
64
51
|
imageToVideo: true,
|
|
65
52
|
textToVoice: false,
|
|
66
|
-
textToText: true,
|
|
67
53
|
};
|
|
68
54
|
|
|
69
55
|
export class GeminiProvider implements IAIProvider {
|
|
@@ -162,7 +148,7 @@ export class GeminiProvider implements IAIProvider {
|
|
|
162
148
|
* Get model ID for an IMAGE feature
|
|
163
149
|
*/
|
|
164
150
|
getImageFeatureModel(feature: ImageFeatureType): string {
|
|
165
|
-
return
|
|
151
|
+
return featureModelSelector.getImageFeatureModel(feature);
|
|
166
152
|
}
|
|
167
153
|
|
|
168
154
|
/**
|
|
@@ -172,41 +158,14 @@ export class GeminiProvider implements IAIProvider {
|
|
|
172
158
|
feature: ImageFeatureType,
|
|
173
159
|
data: ImageFeatureInputData,
|
|
174
160
|
): Record<string, unknown> {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
switch (feature) {
|
|
178
|
-
case "upscale":
|
|
179
|
-
return buildUpscaleInput(imageBase64, options);
|
|
180
|
-
case "photo-restore":
|
|
181
|
-
return buildPhotoRestoreInput(imageBase64, options);
|
|
182
|
-
case "face-swap":
|
|
183
|
-
if (!targetImageBase64) {
|
|
184
|
-
throw new Error("Face swap requires target image");
|
|
185
|
-
}
|
|
186
|
-
return buildFaceSwapInput(imageBase64, targetImageBase64, options);
|
|
187
|
-
case "anime-selfie":
|
|
188
|
-
return buildAnimeSelfieInput(imageBase64, options);
|
|
189
|
-
case "remove-background":
|
|
190
|
-
return buildRemoveBackgroundInput(imageBase64, options);
|
|
191
|
-
case "remove-object":
|
|
192
|
-
return buildRemoveObjectInput(imageBase64, { prompt, ...options });
|
|
193
|
-
case "hd-touch-up":
|
|
194
|
-
return buildHDTouchUpInput(imageBase64, options);
|
|
195
|
-
case "replace-background":
|
|
196
|
-
if (!prompt) {
|
|
197
|
-
throw new Error("Replace background requires prompt");
|
|
198
|
-
}
|
|
199
|
-
return buildReplaceBackgroundInput(imageBase64, { prompt });
|
|
200
|
-
default:
|
|
201
|
-
throw new Error(`Unknown image feature: ${String(feature)}`);
|
|
202
|
-
}
|
|
161
|
+
return featureInputBuilder.buildImageFeatureInput(feature, data);
|
|
203
162
|
}
|
|
204
163
|
|
|
205
164
|
/**
|
|
206
165
|
* Get model ID for a VIDEO feature
|
|
207
166
|
*/
|
|
208
167
|
getVideoFeatureModel(feature: VideoFeatureType): string {
|
|
209
|
-
return
|
|
168
|
+
return featureModelSelector.getVideoFeatureModel(feature);
|
|
210
169
|
}
|
|
211
170
|
|
|
212
171
|
/**
|
|
@@ -216,19 +175,7 @@ export class GeminiProvider implements IAIProvider {
|
|
|
216
175
|
feature: VideoFeatureType,
|
|
217
176
|
data: VideoFeatureInputData,
|
|
218
177
|
): Record<string, unknown> {
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
switch (feature) {
|
|
222
|
-
case "ai-hug":
|
|
223
|
-
case "ai-kiss":
|
|
224
|
-
return buildVideoFromDualImagesInput(sourceImageBase64, {
|
|
225
|
-
target_image: targetImageBase64,
|
|
226
|
-
motion_prompt: prompt,
|
|
227
|
-
...options,
|
|
228
|
-
});
|
|
229
|
-
default:
|
|
230
|
-
throw new Error(`Unknown video feature: ${String(feature)}`);
|
|
231
|
-
}
|
|
178
|
+
return featureInputBuilder.buildVideoFeatureInput(feature, data);
|
|
232
179
|
}
|
|
233
180
|
}
|
|
234
181
|
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini Structured Text Service
|
|
3
|
+
* Handles structured JSON response generation with schema validation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { geminiTextGenerationService } from "./gemini-text-generation.service";
|
|
7
|
+
import type {
|
|
8
|
+
GeminiContent,
|
|
9
|
+
GeminiGenerationConfig,
|
|
10
|
+
} from "../../domain/entities";
|
|
11
|
+
|
|
12
|
+
declare const __DEV__: boolean;
|
|
13
|
+
|
|
14
|
+
class GeminiStructuredTextService {
|
|
15
|
+
/**
|
|
16
|
+
* Generate structured JSON response with schema
|
|
17
|
+
*/
|
|
18
|
+
async generateStructuredText<T>(
|
|
19
|
+
model: string,
|
|
20
|
+
prompt: string,
|
|
21
|
+
schema: Record<string, unknown>,
|
|
22
|
+
config?: Omit<GeminiGenerationConfig, "responseMimeType" | "responseSchema">,
|
|
23
|
+
): Promise<T> {
|
|
24
|
+
const generationConfig: GeminiGenerationConfig = {
|
|
25
|
+
...config,
|
|
26
|
+
responseMimeType: "application/json",
|
|
27
|
+
responseSchema: schema as unknown as undefined,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const contents: GeminiContent[] = [
|
|
31
|
+
{ parts: [{ text: prompt }], role: "user" },
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const response = await geminiTextGenerationService.generateContent(
|
|
35
|
+
model,
|
|
36
|
+
contents,
|
|
37
|
+
generationConfig,
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
return this.parseJSONResponse<T>(response);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Generate structured JSON response with images and schema
|
|
45
|
+
*/
|
|
46
|
+
async generateStructuredTextWithImages<T>(
|
|
47
|
+
model: string,
|
|
48
|
+
prompt: string,
|
|
49
|
+
images: Array<{ base64: string; mimeType: string }>,
|
|
50
|
+
schema: Record<string, unknown>,
|
|
51
|
+
config?: Omit<GeminiGenerationConfig, "responseMimeType" | "responseSchema">,
|
|
52
|
+
): Promise<T> {
|
|
53
|
+
const generationConfig: GeminiGenerationConfig = {
|
|
54
|
+
...config,
|
|
55
|
+
responseMimeType: "application/json",
|
|
56
|
+
responseSchema: schema as unknown as undefined,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const parts: GeminiContent["parts"] = [{ text: prompt }];
|
|
60
|
+
|
|
61
|
+
for (const image of images) {
|
|
62
|
+
parts.push({
|
|
63
|
+
inlineData: {
|
|
64
|
+
mimeType: image.mimeType,
|
|
65
|
+
data: image.base64,
|
|
66
|
+
},
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const contents: GeminiContent[] = [{ parts, role: "user" }];
|
|
71
|
+
|
|
72
|
+
const response = await geminiTextGenerationService.generateContent(
|
|
73
|
+
model,
|
|
74
|
+
contents,
|
|
75
|
+
generationConfig,
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
return this.parseJSONResponse<T>(response);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Parse JSON response from Gemini
|
|
83
|
+
*/
|
|
84
|
+
private parseJSONResponse<T>(response: unknown): T {
|
|
85
|
+
const candidates = (response as { candidates?: Array<{ content: { parts: Array<{ text?: string }> } }> }).candidates;
|
|
86
|
+
|
|
87
|
+
let text = "";
|
|
88
|
+
|
|
89
|
+
if (candidates?.[0]?.content?.parts) {
|
|
90
|
+
text = candidates[0].content.parts
|
|
91
|
+
.map((part) => part.text || "")
|
|
92
|
+
.join("");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Clean and parse JSON (remove markdown code blocks if present)
|
|
96
|
+
const cleanedText = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
return JSON.parse(cleanedText) as T;
|
|
100
|
+
} catch (error) {
|
|
101
|
+
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
102
|
+
// eslint-disable-next-line no-console
|
|
103
|
+
console.error("[Gemini] Failed to parse structured response:", {
|
|
104
|
+
text: cleanedText.substring(0, 200),
|
|
105
|
+
error: error instanceof Error ? error.message : String(error),
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
throw new Error(`Failed to parse structured response: ${error instanceof Error ? error.message : String(error)}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export const geminiStructuredTextService = new GeminiStructuredTextService();
|
|
@@ -11,7 +11,6 @@ import type {
|
|
|
11
11
|
GeminiGenerationConfig,
|
|
12
12
|
GeminiResponse,
|
|
13
13
|
GeminiPart,
|
|
14
|
-
GeminiFinishReason,
|
|
15
14
|
} from "../../domain/entities";
|
|
16
15
|
|
|
17
16
|
declare const __DEV__: boolean;
|
|
@@ -69,10 +68,10 @@ class GeminiTextGenerationService {
|
|
|
69
68
|
}
|
|
70
69
|
|
|
71
70
|
return {
|
|
72
|
-
candidates: response.candidates?.map((candidate
|
|
71
|
+
candidates: response.candidates?.map((candidate) => ({
|
|
73
72
|
content: {
|
|
74
73
|
parts: candidate.content.parts
|
|
75
|
-
.map((part
|
|
74
|
+
.map((part): GeminiPart | null => {
|
|
76
75
|
if ("text" in part && part.text !== undefined) {
|
|
77
76
|
return { text: part.text };
|
|
78
77
|
}
|
|
@@ -86,10 +85,10 @@ class GeminiTextGenerationService {
|
|
|
86
85
|
}
|
|
87
86
|
return null;
|
|
88
87
|
})
|
|
89
|
-
.filter((p
|
|
90
|
-
role: (candidate.content.role || "model")
|
|
88
|
+
.filter((p): p is GeminiPart => p !== null),
|
|
89
|
+
role: (candidate.content.role || "model"),
|
|
91
90
|
},
|
|
92
|
-
finishReason: candidate.finishReason
|
|
91
|
+
finishReason: candidate.finishReason,
|
|
93
92
|
})),
|
|
94
93
|
};
|
|
95
94
|
} catch (error) {
|
|
@@ -159,94 +158,6 @@ class GeminiTextGenerationService {
|
|
|
159
158
|
|
|
160
159
|
return this.generateContent(model, contents, config);
|
|
161
160
|
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Generate structured JSON response with schema
|
|
165
|
-
*/
|
|
166
|
-
async generateStructuredText<T>(
|
|
167
|
-
model: string,
|
|
168
|
-
prompt: string,
|
|
169
|
-
schema: Record<string, unknown>,
|
|
170
|
-
config?: Omit<GeminiGenerationConfig, "responseMimeType" | "responseSchema">,
|
|
171
|
-
): Promise<T> {
|
|
172
|
-
const generationConfig: GeminiGenerationConfig = {
|
|
173
|
-
...config,
|
|
174
|
-
responseMimeType: "application/json",
|
|
175
|
-
responseSchema: schema as unknown as undefined,
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
const contents: GeminiContent[] = [
|
|
179
|
-
{ parts: [{ text: prompt }], role: "user" },
|
|
180
|
-
];
|
|
181
|
-
|
|
182
|
-
const response = await this.generateContent(model, contents, generationConfig);
|
|
183
|
-
const text = extractTextFromResponse(response);
|
|
184
|
-
|
|
185
|
-
// Clean and parse JSON (remove markdown code blocks if present)
|
|
186
|
-
const cleanedText = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
|
|
187
|
-
|
|
188
|
-
try {
|
|
189
|
-
return JSON.parse(cleanedText) as T;
|
|
190
|
-
} catch (error) {
|
|
191
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
192
|
-
// eslint-disable-next-line no-console
|
|
193
|
-
console.error("[Gemini] Failed to parse structured response:", {
|
|
194
|
-
text: cleanedText.substring(0, 200),
|
|
195
|
-
error: error instanceof Error ? error.message : String(error),
|
|
196
|
-
});
|
|
197
|
-
}
|
|
198
|
-
throw new Error(`Failed to parse structured response: ${error instanceof Error ? error.message : String(error)}`);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
/**
|
|
203
|
-
* Generate structured JSON response with images and schema
|
|
204
|
-
*/
|
|
205
|
-
async generateStructuredTextWithImages<T>(
|
|
206
|
-
model: string,
|
|
207
|
-
prompt: string,
|
|
208
|
-
images: Array<{ base64: string; mimeType: string }>,
|
|
209
|
-
schema: Record<string, unknown>,
|
|
210
|
-
config?: Omit<GeminiGenerationConfig, "responseMimeType" | "responseSchema">,
|
|
211
|
-
): Promise<T> {
|
|
212
|
-
const generationConfig: GeminiGenerationConfig = {
|
|
213
|
-
...config,
|
|
214
|
-
responseMimeType: "application/json",
|
|
215
|
-
responseSchema: schema as unknown as undefined,
|
|
216
|
-
};
|
|
217
|
-
|
|
218
|
-
const parts: GeminiContent["parts"] = [{ text: prompt }];
|
|
219
|
-
|
|
220
|
-
for (const image of images) {
|
|
221
|
-
parts.push({
|
|
222
|
-
inlineData: {
|
|
223
|
-
mimeType: image.mimeType,
|
|
224
|
-
data: extractBase64Data(image.base64),
|
|
225
|
-
},
|
|
226
|
-
});
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const contents: GeminiContent[] = [{ parts, role: "user" }];
|
|
230
|
-
|
|
231
|
-
const response = await this.generateContent(model, contents, generationConfig);
|
|
232
|
-
const text = extractTextFromResponse(response);
|
|
233
|
-
|
|
234
|
-
// Clean and parse JSON
|
|
235
|
-
const cleanedText = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
|
|
236
|
-
|
|
237
|
-
try {
|
|
238
|
-
return JSON.parse(cleanedText) as T;
|
|
239
|
-
} catch (error) {
|
|
240
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
241
|
-
// eslint-disable-next-line no-console
|
|
242
|
-
console.error("[Gemini] Failed to parse structured response:", {
|
|
243
|
-
text: cleanedText.substring(0, 200),
|
|
244
|
-
error: error instanceof Error ? error.message : String(error),
|
|
245
|
-
});
|
|
246
|
-
}
|
|
247
|
-
throw new Error(`Failed to parse structured response: ${error instanceof Error ? error.message : String(error)}`);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
161
|
}
|
|
251
162
|
|
|
252
163
|
export const geminiTextGenerationService = new GeminiTextGenerationService();
|
|
@@ -6,30 +6,19 @@
|
|
|
6
6
|
|
|
7
7
|
import { geminiClientCoreService } from "./gemini-client-core.service";
|
|
8
8
|
import { geminiRetryService } from "./gemini-retry.service";
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
9
|
+
import { veoHttpClient } from "./veo-http-client.service";
|
|
10
|
+
import { veoPollingService } from "./veo-polling.service";
|
|
11
11
|
import { createVideoError } from "./gemini-video-error";
|
|
12
12
|
import { DEFAULT_MODELS } from "../../domain/entities";
|
|
13
13
|
import type {
|
|
14
14
|
VideoGenerationInput,
|
|
15
15
|
VideoGenerationResult,
|
|
16
16
|
VideoGenerationProgress,
|
|
17
|
-
VeoOperation,
|
|
18
17
|
TextToVideoInput,
|
|
19
18
|
} from "../../domain/entities";
|
|
20
19
|
|
|
21
20
|
declare const __DEV__: boolean;
|
|
22
21
|
|
|
23
|
-
const POLL_INTERVAL = 10000;
|
|
24
|
-
const MAX_POLL_DURATION = 300000;
|
|
25
|
-
const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / POLL_INTERVAL);
|
|
26
|
-
const VEO_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
|
|
27
|
-
|
|
28
|
-
/** Calculate polling progress (10-95% range) */
|
|
29
|
-
function calculateProgress(attempt: number, maxAttempts: number): number {
|
|
30
|
-
return Math.round(10 + (attempt / maxAttempts) * 85);
|
|
31
|
-
}
|
|
32
|
-
|
|
33
22
|
class GeminiVideoGenerationService {
|
|
34
23
|
async generateTextToVideo(
|
|
35
24
|
input: TextToVideoInput,
|
|
@@ -53,14 +42,24 @@ class GeminiVideoGenerationService {
|
|
|
53
42
|
console.log("[GeminiVideoGeneration] Starting operation with model:", model);
|
|
54
43
|
}
|
|
55
44
|
|
|
56
|
-
const
|
|
45
|
+
const instances = [{ prompt: input.prompt }];
|
|
46
|
+
const parameters = {
|
|
47
|
+
aspectRatio: input.options?.aspectRatio || "16:9",
|
|
48
|
+
...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
onProgress?.({ status: "queued", progress: 5 });
|
|
52
|
+
|
|
53
|
+
const operation = await geminiRetryService.executeWithRetry(() =>
|
|
54
|
+
veoHttpClient.startOperation(model, apiKey, instances, parameters),
|
|
55
|
+
);
|
|
57
56
|
|
|
58
57
|
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
59
58
|
// eslint-disable-next-line no-console
|
|
60
59
|
console.log("[GeminiVideoGeneration] Operation started:", operation.name);
|
|
61
60
|
}
|
|
62
61
|
|
|
63
|
-
return
|
|
62
|
+
return veoPollingService.pollOperation(operation.name, apiKey, model, onProgress);
|
|
64
63
|
}
|
|
65
64
|
|
|
66
65
|
async generateVideo(
|
|
@@ -77,131 +76,28 @@ class GeminiVideoGenerationService {
|
|
|
77
76
|
const apiKey = config?.apiKey;
|
|
78
77
|
if (!apiKey) throw createVideoError("INVALID_INPUT", "API key is required");
|
|
79
78
|
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
apiKey: string,
|
|
88
|
-
onProgress?: (progress: VideoGenerationProgress) => void,
|
|
89
|
-
): Promise<VeoOperation> {
|
|
90
|
-
const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
|
|
91
|
-
const body = {
|
|
92
|
-
instances: [{ prompt: input.prompt }],
|
|
93
|
-
parameters: {
|
|
94
|
-
aspectRatio: input.options?.aspectRatio || "16:9",
|
|
95
|
-
...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
|
|
96
|
-
},
|
|
79
|
+
const instances = [{
|
|
80
|
+
prompt: input.prompt,
|
|
81
|
+
image: { bytesBase64Encoded: input.image },
|
|
82
|
+
}];
|
|
83
|
+
const parameters = {
|
|
84
|
+
aspectRatio: input.options?.aspectRatio || "16:9",
|
|
85
|
+
...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
|
|
97
86
|
};
|
|
98
|
-
onProgress?.({ status: "queued", progress: 5 });
|
|
99
|
-
return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
|
|
100
|
-
}
|
|
101
87
|
|
|
102
|
-
private async startImageToVideoOperation(
|
|
103
|
-
input: VideoGenerationInput,
|
|
104
|
-
model: string,
|
|
105
|
-
apiKey: string,
|
|
106
|
-
onProgress?: (progress: VideoGenerationProgress) => void,
|
|
107
|
-
): Promise<VeoOperation> {
|
|
108
|
-
const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
|
|
109
|
-
const body = {
|
|
110
|
-
instances: [{ prompt: input.prompt, image: { bytesBase64Encoded: input.image } }],
|
|
111
|
-
parameters: {
|
|
112
|
-
aspectRatio: input.options?.aspectRatio || "16:9",
|
|
113
|
-
...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
|
|
114
|
-
},
|
|
115
|
-
};
|
|
116
88
|
onProgress?.({ status: "queued", progress: 5 });
|
|
117
|
-
return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
private async postRequest(url: string, body: Record<string, unknown>, apiKey: string): Promise<VeoOperation> {
|
|
121
|
-
const res = await fetch(url, {
|
|
122
|
-
method: "POST",
|
|
123
|
-
headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
|
|
124
|
-
body: JSON.stringify(body),
|
|
125
|
-
});
|
|
126
|
-
if (!res.ok) {
|
|
127
|
-
throw createVideoError("OPERATION_FAILED", `Veo API error: ${await res.text()}`, res.status);
|
|
128
|
-
}
|
|
129
|
-
return res.json() as Promise<VeoOperation>;
|
|
130
|
-
}
|
|
131
89
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
model: string,
|
|
136
|
-
onProgress?: (progress: VideoGenerationProgress) => void,
|
|
137
|
-
): Promise<VideoGenerationResult> {
|
|
138
|
-
const url = `${VEO_API_BASE}/${operationName}`;
|
|
139
|
-
let attempts = 0;
|
|
140
|
-
onProgress?.({ status: "processing", progress: 10 });
|
|
90
|
+
const operation = await geminiRetryService.executeWithRetry(() =>
|
|
91
|
+
veoHttpClient.startOperation(model, apiKey, instances, parameters),
|
|
92
|
+
);
|
|
141
93
|
|
|
142
|
-
|
|
143
|
-
// eslint-disable-next-line no-console
|
|
144
|
-
console.log("[GeminiVideoGeneration] Starting polling...", { operationName, maxAttempts: MAX_POLL_ATTEMPTS });
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
while (attempts < MAX_POLL_ATTEMPTS) {
|
|
148
|
-
await this.delay(POLL_INTERVAL);
|
|
149
|
-
attempts++;
|
|
150
|
-
const progress = calculateProgress(attempts, MAX_POLL_ATTEMPTS);
|
|
151
|
-
onProgress?.({ status: "processing", progress });
|
|
152
|
-
|
|
153
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
154
|
-
// eslint-disable-next-line no-console
|
|
155
|
-
console.log("[GeminiVideoGeneration] Poll attempt:", { attempts, progress });
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const operation = await this.fetchOperationStatus(url, apiKey);
|
|
159
|
-
if (operation.error) {
|
|
160
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
161
|
-
// eslint-disable-next-line no-console
|
|
162
|
-
console.error("[GeminiVideoGeneration] Operation error:", operation.error);
|
|
163
|
-
}
|
|
164
|
-
throw createVideoError("OPERATION_FAILED", operation.error.message, operation.error.code);
|
|
165
|
-
}
|
|
166
|
-
if (operation.done) {
|
|
167
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
168
|
-
// eslint-disable-next-line no-console
|
|
169
|
-
console.log("[GeminiVideoGeneration] Operation completed!");
|
|
170
|
-
}
|
|
171
|
-
const rawVideoUrl = extractVideoUrl(operation);
|
|
172
|
-
if (rawVideoUrl) {
|
|
173
|
-
if (typeof __DEV__ !== "undefined" && __DEV__) {
|
|
174
|
-
// eslint-disable-next-line no-console
|
|
175
|
-
console.log("[GeminiVideoGeneration] Downloading video...");
|
|
176
|
-
}
|
|
177
|
-
const result = await downloadVideoFromVeo(rawVideoUrl, apiKey);
|
|
178
|
-
onProgress?.({ status: "completed", progress: 100 });
|
|
179
|
-
return {
|
|
180
|
-
videoUrl: result.base64DataUri,
|
|
181
|
-
metadata: { duration: 8, resolution: "720p", aspectRatio: "16:9", model, operationName },
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
throw createVideoError("TIMEOUT", `Operation timed out after ${MAX_POLL_DURATION / 1000}s`);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
private async fetchOperationStatus(url: string, apiKey: string): Promise<VeoOperation> {
|
|
190
|
-
return geminiRetryService.executeWithRetry(async () => {
|
|
191
|
-
const res = await fetch(url, { method: "GET", headers: { "x-goog-api-key": apiKey } });
|
|
192
|
-
if (!res.ok) throw createVideoError("NETWORK", `Polling error: ${await res.text()}`, res.status);
|
|
193
|
-
return res.json() as Promise<VeoOperation>;
|
|
194
|
-
});
|
|
94
|
+
return veoPollingService.pollOperation(operation.name, apiKey, model, onProgress);
|
|
195
95
|
}
|
|
196
96
|
|
|
197
97
|
private validatePrompt(prompt: string): void {
|
|
198
98
|
if (!prompt?.trim()) throw createVideoError("INVALID_INPUT", "Prompt is required");
|
|
199
99
|
if (prompt.length > 2000) throw createVideoError("INVALID_INPUT", "Prompt exceeds 2000 characters");
|
|
200
100
|
}
|
|
201
|
-
|
|
202
|
-
private delay(ms: number): Promise<void> {
|
|
203
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
204
|
-
}
|
|
205
101
|
}
|
|
206
102
|
|
|
207
103
|
export const geminiVideoGenerationService = new GeminiVideoGenerationService();
|