@umituz/react-native-ai-gemini-provider 1.14.12 → 1.14.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@umituz/react-native-ai-gemini-provider",
3
- "version": "1.14.12",
3
+ "version": "1.14.13",
4
4
  "description": "Google Gemini AI provider for React Native applications",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -10,12 +10,12 @@ import type {
10
10
  JobSubmission,
11
11
  JobStatus,
12
12
  SubscribeOptions,
13
- RunOptions,
14
13
  ImageFeatureType,
15
14
  VideoFeatureType,
16
15
  ImageFeatureInputData,
17
16
  VideoFeatureInputData,
18
17
  } from "@umituz/react-native-ai-generation-content";
18
+ import type { ExecutionOptions } from "./generation-executor";
19
19
  import type {
20
20
  GeminiImageInput,
21
21
  GeminiImageGenerationResult,
@@ -88,11 +88,9 @@ export class GeminiProvider implements IAIProvider {
88
88
  async run<T = unknown>(
89
89
  model: string,
90
90
  input: Record<string, unknown>,
91
- options?: RunOptions,
91
+ options?: ExecutionOptions,
92
92
  ): Promise<T> {
93
- return generationExecutor.executeGeneration<T>(model, input, {
94
- onProgress: options?.onProgress,
95
- });
93
+ return generationExecutor.executeGeneration<T>(model, input, options);
96
94
  }
97
95
 
98
96
  async generateImage(prompt: string): Promise<GeminiImageGenerationResult> {
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Gemini Video Downloader
3
+ * Single Responsibility: Download video from Veo URL and convert to base64
4
+ * Required because Veo URLs need x-goog-api-key authentication
5
+ */
6
+
7
+ declare const __DEV__: boolean;
8
+
9
+ export interface VideoDownloadResult {
10
+ base64DataUri: string;
11
+ sizeInMB: number;
12
+ mimeType: string;
13
+ }
14
+
15
+ /**
16
+ * Download video from authenticated Veo URL
17
+ */
18
+ export async function downloadVideoFromVeo(
19
+ videoUrl: string,
20
+ apiKey: string,
21
+ ): Promise<VideoDownloadResult> {
22
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
23
+ // eslint-disable-next-line no-console
24
+ console.log("[GeminiVideoDownloader] Downloading video from Veo...");
25
+ }
26
+
27
+ const response = await fetch(videoUrl, {
28
+ method: "GET",
29
+ headers: {
30
+ "x-goog-api-key": apiKey,
31
+ },
32
+ });
33
+
34
+ if (!response.ok) {
35
+ throw new Error(`Failed to download video: ${response.status}`);
36
+ }
37
+
38
+ const blob = await response.blob();
39
+ const sizeInMB = blob.size / 1024 / 1024;
40
+
41
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
42
+ // eslint-disable-next-line no-console
43
+ console.log("[GeminiVideoDownloader] Video downloaded", {
44
+ size: `${sizeInMB.toFixed(2)} MB`,
45
+ type: blob.type,
46
+ });
47
+ }
48
+
49
+ const base64DataUri = await convertBlobToBase64(blob);
50
+
51
+ return {
52
+ base64DataUri,
53
+ sizeInMB,
54
+ mimeType: blob.type || "video/mp4",
55
+ };
56
+ }
57
+
58
+ /**
59
+ * Convert blob to base64 data URI
60
+ */
61
+ function convertBlobToBase64(blob: Blob): Promise<string> {
62
+ return new Promise((resolve, reject) => {
63
+ const reader = new FileReader();
64
+ reader.onloadend = () => {
65
+ const base64 = reader.result as string;
66
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
67
+ // eslint-disable-next-line no-console
68
+ console.log("[GeminiVideoDownloader] Video converted to base64", {
69
+ length: base64.length,
70
+ });
71
+ }
72
+ resolve(base64);
73
+ };
74
+ reader.onerror = () => reject(new Error("Failed to convert video to base64"));
75
+ reader.readAsDataURL(blob);
76
+ });
77
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Gemini Video Error Factory
3
+ * Single Responsibility: Create typed video generation errors
4
+ */
5
+
6
+ import type { VideoGenerationError } from "../../domain/entities";
7
+
8
+ /**
9
+ * Create a typed video generation error
10
+ */
11
+ export function createVideoError(
12
+ type: VideoGenerationError["type"],
13
+ message: string,
14
+ statusCode?: number,
15
+ ): VideoGenerationError {
16
+ const error = new Error(message) as VideoGenerationError;
17
+ error.type = type;
18
+ error.statusCode = statusCode;
19
+ error.retryable = type === "NETWORK" || type === "TIMEOUT";
20
+ return error;
21
+ }
@@ -1,190 +1,123 @@
1
1
  /**
2
2
  * Gemini Video Generation Service
3
- * Handles video generation using Google Veo REST API (predictLongRunning)
3
+ * Orchestrates video generation using Google Veo REST API
4
4
  * @see https://ai.google.dev/gemini-api/docs/video
5
5
  */
6
6
 
7
- import { calculatePollingProgress } from "@umituz/react-native-ai-generation-content";
8
7
  import { geminiClientCoreService } from "./gemini-client-core.service";
9
8
  import { geminiRetryService } from "./gemini-retry.service";
9
+ import { downloadVideoFromVeo } from "./gemini-video-downloader";
10
+ import { extractVideoUrl } from "./gemini-video-url-extractor";
11
+ import { createVideoError } from "./gemini-video-error";
10
12
  import { DEFAULT_MODELS } from "../../domain/entities";
11
13
  import type {
12
14
  VideoGenerationInput,
13
15
  VideoGenerationResult,
14
16
  VideoGenerationProgress,
15
17
  VeoOperation,
16
- VideoGenerationError,
17
18
  TextToVideoInput,
18
19
  } from "../../domain/entities";
19
20
 
20
21
  declare const __DEV__: boolean;
21
22
 
22
- const DEFAULT_POLL_INTERVAL = 10000; // 10 seconds
23
- const MAX_POLL_DURATION = 300000; // 5 minutes
24
- const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / DEFAULT_POLL_INTERVAL);
23
+ const POLL_INTERVAL = 10000;
24
+ const MAX_POLL_DURATION = 300000;
25
+ const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / POLL_INTERVAL);
25
26
  const VEO_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
26
27
 
28
+ /** Calculate polling progress (10-95% range) */
29
+ function calculateProgress(attempt: number, maxAttempts: number): number {
30
+ return Math.round(10 + (attempt / maxAttempts) * 85);
31
+ }
32
+
27
33
  class GeminiVideoGenerationService {
28
- /**
29
- * Generate video from text prompt using Veo REST API (text-to-video)
30
- * Uses predictLongRunning endpoint with instances/parameters format
31
- */
32
34
  async generateTextToVideo(
33
35
  input: TextToVideoInput,
34
36
  onProgress?: (progress: VideoGenerationProgress) => void,
35
37
  ): Promise<VideoGenerationResult> {
36
38
  geminiClientCoreService.validateInitialization();
37
- this.validateTextInput(input);
39
+ this.validatePrompt(input.prompt);
38
40
 
39
41
  const config = geminiClientCoreService.getConfig();
40
- const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
42
+ const model = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
41
43
  const apiKey = config?.apiKey;
44
+ if (!apiKey) throw createVideoError("INVALID_INPUT", "API key is required");
42
45
 
43
46
  if (typeof __DEV__ !== "undefined" && __DEV__) {
44
47
  // eslint-disable-next-line no-console
45
- console.log("[GeminiVideoGeneration] generateTextToVideo() called", {
46
- model: videoModel,
47
- promptLength: input.prompt.length,
48
- });
49
- }
50
-
51
- // REST API uses predictLongRunning endpoint
52
- const url = `${VEO_API_BASE}/models/${videoModel}:predictLongRunning`;
53
-
54
- // REST API format: instances array with parameters object
55
- const requestBody = {
56
- instances: [{ prompt: input.prompt }],
57
- parameters: {
58
- aspectRatio: input.options?.aspectRatio || "16:9",
59
- ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
60
- },
61
- };
62
-
63
- onProgress?.({ status: "queued", progress: 5 });
64
-
65
- if (typeof __DEV__ !== "undefined" && __DEV__) {
66
- // eslint-disable-next-line no-console
67
- console.log("[GeminiVideoGeneration] Request URL:", url);
68
- // eslint-disable-next-line no-console
69
- console.log("[GeminiVideoGeneration] Request body:", JSON.stringify(requestBody, null, 2));
70
- }
71
-
72
- const operation = await geminiRetryService.executeWithRetry(async () => {
73
- const res = await fetch(url, {
74
- method: "POST",
75
- headers: {
76
- "Content-Type": "application/json",
77
- "x-goog-api-key": apiKey!,
78
- },
79
- body: JSON.stringify(requestBody),
80
- });
81
-
82
- if (!res.ok) {
83
- const errorText = await res.text();
84
- throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
85
- }
86
-
87
- return res.json() as Promise<VeoOperation>;
88
- });
89
-
90
- if (typeof __DEV__ !== "undefined" && __DEV__) {
91
- // eslint-disable-next-line no-console
92
- console.log("[GeminiVideoGeneration] Operation started", {
93
- operationName: operation.name,
94
- });
48
+ console.log("[GeminiVideoGeneration] generateTextToVideo()", { model });
95
49
  }
96
50
 
97
- onProgress?.({ status: "processing", progress: 10 });
98
-
99
- return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
51
+ const operation = await this.startOperation(input, model, apiKey, onProgress);
52
+ return this.pollOperation(operation.name, apiKey, model, onProgress);
100
53
  }
101
54
 
102
- /**
103
- * Generate video from image and prompt using Veo REST API (image-to-video)
104
- * Uses predictLongRunning endpoint with image in instances
105
- */
106
55
  async generateVideo(
107
56
  input: VideoGenerationInput,
108
57
  onProgress?: (progress: VideoGenerationProgress) => void,
109
58
  ): Promise<VideoGenerationResult> {
110
- // If no image provided, use text-to-video
111
- if (!input.image) {
112
- return this.generateTextToVideo(input, onProgress);
113
- }
59
+ if (!input.image) return this.generateTextToVideo(input, onProgress);
114
60
 
115
61
  geminiClientCoreService.validateInitialization();
116
- this.validateInput(input);
62
+ this.validatePrompt(input.prompt);
117
63
 
118
64
  const config = geminiClientCoreService.getConfig();
119
- const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
65
+ const model = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
120
66
  const apiKey = config?.apiKey;
67
+ if (!apiKey) throw createVideoError("INVALID_INPUT", "API key is required");
121
68
 
122
- if (typeof __DEV__ !== "undefined" && __DEV__) {
123
- // eslint-disable-next-line no-console
124
- console.log("[GeminiVideoGeneration] generateVideo() called", {
125
- model: videoModel,
126
- promptLength: input.prompt.length,
127
- hasImage: !!input.image,
128
- });
129
- }
130
-
131
- // REST API uses predictLongRunning endpoint
132
- const url = `${VEO_API_BASE}/models/${videoModel}:predictLongRunning`;
69
+ const operation = await this.startImageToVideoOperation(input, model, apiKey, onProgress);
70
+ return this.pollOperation(operation.name, apiKey, model, onProgress);
71
+ }
133
72
 
134
- // REST API format with image for image-to-video
135
- const requestBody = {
136
- instances: [{
137
- prompt: input.prompt,
138
- image: {
139
- bytesBase64Encoded: input.image,
140
- },
141
- }],
73
+ private async startOperation(
74
+ input: TextToVideoInput,
75
+ model: string,
76
+ apiKey: string,
77
+ onProgress?: (progress: VideoGenerationProgress) => void,
78
+ ): Promise<VeoOperation> {
79
+ const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
80
+ const body = {
81
+ instances: [{ prompt: input.prompt }],
142
82
  parameters: {
143
83
  aspectRatio: input.options?.aspectRatio || "16:9",
144
84
  ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
145
85
  },
146
86
  };
147
-
148
87
  onProgress?.({ status: "queued", progress: 5 });
88
+ return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
89
+ }
149
90
 
150
- if (typeof __DEV__ !== "undefined" && __DEV__) {
151
- // eslint-disable-next-line no-console
152
- console.log("[GeminiVideoGeneration] Request URL:", url);
153
- }
154
-
155
- const operation = await geminiRetryService.executeWithRetry(async () => {
156
- const res = await fetch(url, {
157
- method: "POST",
158
- headers: {
159
- "Content-Type": "application/json",
160
- "x-goog-api-key": apiKey!,
161
- },
162
- body: JSON.stringify(requestBody),
163
- });
164
-
165
- if (!res.ok) {
166
- const errorText = await res.text();
167
- throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
168
- }
91
+ private async startImageToVideoOperation(
92
+ input: VideoGenerationInput,
93
+ model: string,
94
+ apiKey: string,
95
+ onProgress?: (progress: VideoGenerationProgress) => void,
96
+ ): Promise<VeoOperation> {
97
+ const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
98
+ const body = {
99
+ instances: [{ prompt: input.prompt, image: { bytesBase64Encoded: input.image } }],
100
+ parameters: {
101
+ aspectRatio: input.options?.aspectRatio || "16:9",
102
+ ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
103
+ },
104
+ };
105
+ onProgress?.({ status: "queued", progress: 5 });
106
+ return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
107
+ }
169
108
 
170
- return res.json() as Promise<VeoOperation>;
109
+ private async postRequest(url: string, body: Record<string, unknown>, apiKey: string): Promise<VeoOperation> {
110
+ const res = await fetch(url, {
111
+ method: "POST",
112
+ headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
113
+ body: JSON.stringify(body),
171
114
  });
172
-
173
- if (typeof __DEV__ !== "undefined" && __DEV__) {
174
- // eslint-disable-next-line no-console
175
- console.log("[GeminiVideoGeneration] Operation started", {
176
- operationName: operation.name,
177
- });
115
+ if (!res.ok) {
116
+ throw createVideoError("OPERATION_FAILED", `Veo API error: ${await res.text()}`, res.status);
178
117
  }
179
-
180
- onProgress?.({ status: "processing", progress: 10 });
181
-
182
- return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
118
+ return res.json() as Promise<VeoOperation>;
183
119
  }
184
120
 
185
- /**
186
- * Poll operation status until completion
187
- */
188
121
  private async pollOperation(
189
122
  operationName: string,
190
123
  apiKey: string,
@@ -193,142 +126,45 @@ class GeminiVideoGenerationService {
193
126
  ): Promise<VideoGenerationResult> {
194
127
  const url = `${VEO_API_BASE}/${operationName}`;
195
128
  let attempts = 0;
129
+ onProgress?.({ status: "processing", progress: 10 });
196
130
 
197
131
  while (attempts < MAX_POLL_ATTEMPTS) {
198
- await this.delay(DEFAULT_POLL_INTERVAL);
132
+ await this.delay(POLL_INTERVAL);
199
133
  attempts++;
134
+ onProgress?.({ status: "processing", progress: calculateProgress(attempts, MAX_POLL_ATTEMPTS) });
200
135
 
201
- const progress = calculatePollingProgress(attempts, MAX_POLL_ATTEMPTS);
202
-
203
- onProgress?.({
204
- status: "processing",
205
- progress,
206
- estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
207
- });
208
-
209
- if (typeof __DEV__ !== "undefined" && __DEV__) {
210
- // eslint-disable-next-line no-console
211
- console.log("[GeminiVideoGeneration] Polling operation", {
212
- attempt: attempts,
213
- progress: `${progress.toFixed(0)}%`,
214
- });
215
- }
216
-
217
- const operation = await geminiRetryService.executeWithRetry(async () => {
218
- const res = await fetch(url, {
219
- method: "GET",
220
- headers: {
221
- "x-goog-api-key": apiKey,
222
- },
223
- });
224
-
225
- if (!res.ok) {
226
- const errorText = await res.text();
227
- throw this.createError("NETWORK", `Polling error (${res.status}): ${errorText}`, res.status);
228
- }
229
-
230
- return res.json() as Promise<VeoOperation>;
231
- });
232
-
136
+ const operation = await this.fetchOperationStatus(url, apiKey);
233
137
  if (operation.error) {
234
- throw this.createError("OPERATION_FAILED", operation.error.message, operation.error.code);
138
+ throw createVideoError("OPERATION_FAILED", operation.error.message, operation.error.code);
235
139
  }
236
-
237
140
  if (operation.done) {
238
- const videoUrl = this.extractVideoUrl(operation);
239
-
240
- if (videoUrl) {
141
+ const rawVideoUrl = extractVideoUrl(operation);
142
+ if (rawVideoUrl) {
143
+ const result = await downloadVideoFromVeo(rawVideoUrl, apiKey);
241
144
  onProgress?.({ status: "completed", progress: 100 });
242
-
243
145
  return {
244
- videoUrl,
245
- metadata: {
246
- duration: 8,
247
- resolution: "720p",
248
- aspectRatio: "16:9",
249
- model,
250
- operationName,
251
- },
146
+ videoUrl: result.base64DataUri,
147
+ metadata: { duration: 8, resolution: "720p", aspectRatio: "16:9", model, operationName },
252
148
  };
253
149
  }
254
150
  }
255
151
  }
256
-
257
- throw this.createError("TIMEOUT", `Operation timed out after ${MAX_POLL_DURATION / 1000} seconds`);
258
- }
259
-
260
- /**
261
- * Extract video URL from operation response (handles multiple response formats)
262
- */
263
- private extractVideoUrl(operation: VeoOperation): string | null {
264
- const response = operation.response;
265
- if (!response) return null;
266
-
267
- // Format 1: generatedVideos[].video.uri (new SDK format)
268
- if (response.generatedVideos?.[0]?.video?.uri) {
269
- return response.generatedVideos[0].video.uri;
270
- }
271
-
272
- // Format 2: generatedVideos[].video.url
273
- if (response.generatedVideos?.[0]?.video?.url) {
274
- return response.generatedVideos[0].video.url;
275
- }
276
-
277
- // Format 3: candidates[].uri (legacy format)
278
- if (response.candidates?.[0]?.uri) {
279
- return response.candidates[0].uri;
280
- }
281
-
282
- // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
283
- if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
284
- return response.generateVideoResponse.generatedSamples[0].video.uri;
285
- }
286
-
287
- return null;
288
- }
289
-
290
- /**
291
- * Validate text-to-video input parameters
292
- */
293
- private validateTextInput(input: TextToVideoInput): void {
294
- if (!input.prompt || input.prompt.trim().length === 0) {
295
- throw this.createError("INVALID_INPUT", "Prompt is required");
296
- }
297
-
298
- if (input.prompt.length > 2000) {
299
- throw this.createError("INVALID_INPUT", "Prompt exceeds 2000 characters");
300
- }
152
+ throw createVideoError("TIMEOUT", `Operation timed out after ${MAX_POLL_DURATION / 1000}s`);
301
153
  }
302
154
 
303
- /**
304
- * Validate image-to-video input parameters
305
- */
306
- private validateInput(input: VideoGenerationInput): void {
307
- this.validateTextInput(input);
308
-
309
- if (!input.image || input.image.length === 0) {
310
- throw this.createError("INVALID_INPUT", "Image is required for image-to-video");
311
- }
155
+ private async fetchOperationStatus(url: string, apiKey: string): Promise<VeoOperation> {
156
+ return geminiRetryService.executeWithRetry(async () => {
157
+ const res = await fetch(url, { method: "GET", headers: { "x-goog-api-key": apiKey } });
158
+ if (!res.ok) throw createVideoError("NETWORK", `Polling error: ${await res.text()}`, res.status);
159
+ return res.json() as Promise<VeoOperation>;
160
+ });
312
161
  }
313
162
 
314
- /**
315
- * Create typed error
316
- */
317
- private createError(
318
- type: VideoGenerationError["type"],
319
- message: string,
320
- statusCode?: number,
321
- ): VideoGenerationError {
322
- const error = new Error(message) as VideoGenerationError;
323
- error.type = type;
324
- error.statusCode = statusCode;
325
- error.retryable = type === "NETWORK" || type === "TIMEOUT";
326
- return error;
163
+ private validatePrompt(prompt: string): void {
164
+ if (!prompt?.trim()) throw createVideoError("INVALID_INPUT", "Prompt is required");
165
+ if (prompt.length > 2000) throw createVideoError("INVALID_INPUT", "Prompt exceeds 2000 characters");
327
166
  }
328
167
 
329
- /**
330
- * Delay helper
331
- */
332
168
  private delay(ms: number): Promise<void> {
333
169
  return new Promise((resolve) => setTimeout(resolve, ms));
334
170
  }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Gemini Video URL Extractor
3
+ * Single Responsibility: Extract video URL from Veo operation response
4
+ * Handles multiple response formats from Veo API
5
+ */
6
+
7
+ import type { VeoOperation } from "../../domain/entities";
8
+
9
+ declare const __DEV__: boolean;
10
+
11
+ /**
12
+ * Extract video URL from Veo operation response
13
+ * Handles multiple response formats from different Veo API versions
14
+ */
15
+ export function extractVideoUrl(operation: VeoOperation): string | null {
16
+ const response = operation.response;
17
+ if (!response) return null;
18
+
19
+ // Format 1: generatedVideos[].video.uri (new SDK format)
20
+ if (response.generatedVideos?.[0]?.video?.uri) {
21
+ return response.generatedVideos[0].video.uri;
22
+ }
23
+
24
+ // Format 2: generatedVideos[].video.url
25
+ if (response.generatedVideos?.[0]?.video?.url) {
26
+ return response.generatedVideos[0].video.url;
27
+ }
28
+
29
+ // Format 3: candidates[].uri (legacy format)
30
+ if (response.candidates?.[0]?.uri) {
31
+ return response.candidates[0].uri;
32
+ }
33
+
34
+ // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
35
+ if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
36
+ return response.generateVideoResponse.generatedSamples[0].video.uri;
37
+ }
38
+
39
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
40
+ // eslint-disable-next-line no-console
41
+ console.warn("[GeminiVideoUrlExtractor] No video URL found in response");
42
+ }
43
+
44
+ return null;
45
+ }