@umituz/react-native-ai-gemini-provider 1.14.11 → 1.14.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@umituz/react-native-ai-gemini-provider",
3
- "version": "1.14.11",
3
+ "version": "1.14.13",
4
4
  "description": "Google Gemini AI provider for React Native applications",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -10,12 +10,12 @@ import type {
10
10
  JobSubmission,
11
11
  JobStatus,
12
12
  SubscribeOptions,
13
- RunOptions,
14
13
  ImageFeatureType,
15
14
  VideoFeatureType,
16
15
  ImageFeatureInputData,
17
16
  VideoFeatureInputData,
18
17
  } from "@umituz/react-native-ai-generation-content";
18
+ import type { ExecutionOptions } from "./generation-executor";
19
19
  import type {
20
20
  GeminiImageInput,
21
21
  GeminiImageGenerationResult,
@@ -88,11 +88,9 @@ export class GeminiProvider implements IAIProvider {
88
88
  async run<T = unknown>(
89
89
  model: string,
90
90
  input: Record<string, unknown>,
91
- options?: RunOptions,
91
+ options?: ExecutionOptions,
92
92
  ): Promise<T> {
93
- return generationExecutor.executeGeneration<T>(model, input, {
94
- onProgress: options?.onProgress,
95
- });
93
+ return generationExecutor.executeGeneration<T>(model, input, options);
96
94
  }
97
95
 
98
96
  async generateImage(prompt: string): Promise<GeminiImageGenerationResult> {
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Gemini Video Downloader
3
+ * Single Responsibility: Download video from Veo URL and convert to base64
4
+ * Required because Veo URLs need x-goog-api-key authentication
5
+ */
6
+
7
+ declare const __DEV__: boolean;
8
+
9
+ export interface VideoDownloadResult {
10
+ base64DataUri: string;
11
+ sizeInMB: number;
12
+ mimeType: string;
13
+ }
14
+
15
+ /**
16
+ * Download video from authenticated Veo URL
17
+ */
18
+ export async function downloadVideoFromVeo(
19
+ videoUrl: string,
20
+ apiKey: string,
21
+ ): Promise<VideoDownloadResult> {
22
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
23
+ // eslint-disable-next-line no-console
24
+ console.log("[GeminiVideoDownloader] Downloading video from Veo...");
25
+ }
26
+
27
+ const response = await fetch(videoUrl, {
28
+ method: "GET",
29
+ headers: {
30
+ "x-goog-api-key": apiKey,
31
+ },
32
+ });
33
+
34
+ if (!response.ok) {
35
+ throw new Error(`Failed to download video: ${response.status}`);
36
+ }
37
+
38
+ const blob = await response.blob();
39
+ const sizeInMB = blob.size / 1024 / 1024;
40
+
41
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
42
+ // eslint-disable-next-line no-console
43
+ console.log("[GeminiVideoDownloader] Video downloaded", {
44
+ size: `${sizeInMB.toFixed(2)} MB`,
45
+ type: blob.type,
46
+ });
47
+ }
48
+
49
+ const base64DataUri = await convertBlobToBase64(blob);
50
+
51
+ return {
52
+ base64DataUri,
53
+ sizeInMB,
54
+ mimeType: blob.type || "video/mp4",
55
+ };
56
+ }
57
+
58
+ /**
59
+ * Convert blob to base64 data URI
60
+ */
61
+ function convertBlobToBase64(blob: Blob): Promise<string> {
62
+ return new Promise((resolve, reject) => {
63
+ const reader = new FileReader();
64
+ reader.onloadend = () => {
65
+ const base64 = reader.result as string;
66
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
67
+ // eslint-disable-next-line no-console
68
+ console.log("[GeminiVideoDownloader] Video converted to base64", {
69
+ length: base64.length,
70
+ });
71
+ }
72
+ resolve(base64);
73
+ };
74
+ reader.onerror = () => reject(new Error("Failed to convert video to base64"));
75
+ reader.readAsDataURL(blob);
76
+ });
77
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Gemini Video Error Factory
3
+ * Single Responsibility: Create typed video generation errors
4
+ */
5
+
6
+ import type { VideoGenerationError } from "../../domain/entities";
7
+
8
+ /**
9
+ * Create a typed video generation error
10
+ */
11
+ export function createVideoError(
12
+ type: VideoGenerationError["type"],
13
+ message: string,
14
+ statusCode?: number,
15
+ ): VideoGenerationError {
16
+ const error = new Error(message) as VideoGenerationError;
17
+ error.type = type;
18
+ error.statusCode = statusCode;
19
+ error.retryable = type === "NETWORK" || type === "TIMEOUT";
20
+ return error;
21
+ }
@@ -1,189 +1,123 @@
1
1
  /**
2
2
  * Gemini Video Generation Service
3
- * Handles video generation using Google Veo REST API (predictLongRunning)
3
+ * Orchestrates video generation using Google Veo REST API
4
4
  * @see https://ai.google.dev/gemini-api/docs/video
5
5
  */
6
6
 
7
7
  import { geminiClientCoreService } from "./gemini-client-core.service";
8
8
  import { geminiRetryService } from "./gemini-retry.service";
9
+ import { downloadVideoFromVeo } from "./gemini-video-downloader";
10
+ import { extractVideoUrl } from "./gemini-video-url-extractor";
11
+ import { createVideoError } from "./gemini-video-error";
9
12
  import { DEFAULT_MODELS } from "../../domain/entities";
10
13
  import type {
11
14
  VideoGenerationInput,
12
15
  VideoGenerationResult,
13
16
  VideoGenerationProgress,
14
17
  VeoOperation,
15
- VideoGenerationError,
16
18
  TextToVideoInput,
17
19
  } from "../../domain/entities";
18
20
 
19
21
  declare const __DEV__: boolean;
20
22
 
21
- const DEFAULT_POLL_INTERVAL = 10000; // 10 seconds
22
- const MAX_POLL_DURATION = 300000; // 5 minutes
23
- const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / DEFAULT_POLL_INTERVAL);
23
+ const POLL_INTERVAL = 10000;
24
+ const MAX_POLL_DURATION = 300000;
25
+ const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / POLL_INTERVAL);
24
26
  const VEO_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
25
27
 
28
+ /** Calculate polling progress (10-95% range) */
29
+ function calculateProgress(attempt: number, maxAttempts: number): number {
30
+ return Math.round(10 + (attempt / maxAttempts) * 85);
31
+ }
32
+
26
33
  class GeminiVideoGenerationService {
27
- /**
28
- * Generate video from text prompt using Veo REST API (text-to-video)
29
- * Uses predictLongRunning endpoint with instances/parameters format
30
- */
31
34
  async generateTextToVideo(
32
35
  input: TextToVideoInput,
33
36
  onProgress?: (progress: VideoGenerationProgress) => void,
34
37
  ): Promise<VideoGenerationResult> {
35
38
  geminiClientCoreService.validateInitialization();
36
- this.validateTextInput(input);
39
+ this.validatePrompt(input.prompt);
37
40
 
38
41
  const config = geminiClientCoreService.getConfig();
39
- const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
42
+ const model = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
40
43
  const apiKey = config?.apiKey;
44
+ if (!apiKey) throw createVideoError("INVALID_INPUT", "API key is required");
41
45
 
42
46
  if (typeof __DEV__ !== "undefined" && __DEV__) {
43
47
  // eslint-disable-next-line no-console
44
- console.log("[GeminiVideoGeneration] generateTextToVideo() called", {
45
- model: videoModel,
46
- promptLength: input.prompt.length,
47
- });
48
- }
49
-
50
- // REST API uses predictLongRunning endpoint
51
- const url = `${VEO_API_BASE}/models/${videoModel}:predictLongRunning`;
52
-
53
- // REST API format: instances array with parameters object
54
- const requestBody = {
55
- instances: [{ prompt: input.prompt }],
56
- parameters: {
57
- aspectRatio: input.options?.aspectRatio || "16:9",
58
- ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
59
- },
60
- };
61
-
62
- onProgress?.({ status: "queued", progress: 5 });
63
-
64
- if (typeof __DEV__ !== "undefined" && __DEV__) {
65
- // eslint-disable-next-line no-console
66
- console.log("[GeminiVideoGeneration] Request URL:", url);
67
- // eslint-disable-next-line no-console
68
- console.log("[GeminiVideoGeneration] Request body:", JSON.stringify(requestBody, null, 2));
69
- }
70
-
71
- const operation = await geminiRetryService.executeWithRetry(async () => {
72
- const res = await fetch(url, {
73
- method: "POST",
74
- headers: {
75
- "Content-Type": "application/json",
76
- "x-goog-api-key": apiKey!,
77
- },
78
- body: JSON.stringify(requestBody),
79
- });
80
-
81
- if (!res.ok) {
82
- const errorText = await res.text();
83
- throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
84
- }
85
-
86
- return res.json() as Promise<VeoOperation>;
87
- });
88
-
89
- if (typeof __DEV__ !== "undefined" && __DEV__) {
90
- // eslint-disable-next-line no-console
91
- console.log("[GeminiVideoGeneration] Operation started", {
92
- operationName: operation.name,
93
- });
48
+ console.log("[GeminiVideoGeneration] generateTextToVideo()", { model });
94
49
  }
95
50
 
96
- onProgress?.({ status: "processing", progress: 10 });
97
-
98
- return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
51
+ const operation = await this.startOperation(input, model, apiKey, onProgress);
52
+ return this.pollOperation(operation.name, apiKey, model, onProgress);
99
53
  }
100
54
 
101
- /**
102
- * Generate video from image and prompt using Veo REST API (image-to-video)
103
- * Uses predictLongRunning endpoint with image in instances
104
- */
105
55
  async generateVideo(
106
56
  input: VideoGenerationInput,
107
57
  onProgress?: (progress: VideoGenerationProgress) => void,
108
58
  ): Promise<VideoGenerationResult> {
109
- // If no image provided, use text-to-video
110
- if (!input.image) {
111
- return this.generateTextToVideo(input, onProgress);
112
- }
59
+ if (!input.image) return this.generateTextToVideo(input, onProgress);
113
60
 
114
61
  geminiClientCoreService.validateInitialization();
115
- this.validateInput(input);
62
+ this.validatePrompt(input.prompt);
116
63
 
117
64
  const config = geminiClientCoreService.getConfig();
118
- const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
65
+ const model = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
119
66
  const apiKey = config?.apiKey;
67
+ if (!apiKey) throw createVideoError("INVALID_INPUT", "API key is required");
120
68
 
121
- if (typeof __DEV__ !== "undefined" && __DEV__) {
122
- // eslint-disable-next-line no-console
123
- console.log("[GeminiVideoGeneration] generateVideo() called", {
124
- model: videoModel,
125
- promptLength: input.prompt.length,
126
- hasImage: !!input.image,
127
- });
128
- }
129
-
130
- // REST API uses predictLongRunning endpoint
131
- const url = `${VEO_API_BASE}/models/${videoModel}:predictLongRunning`;
69
+ const operation = await this.startImageToVideoOperation(input, model, apiKey, onProgress);
70
+ return this.pollOperation(operation.name, apiKey, model, onProgress);
71
+ }
132
72
 
133
- // REST API format with image for image-to-video
134
- const requestBody = {
135
- instances: [{
136
- prompt: input.prompt,
137
- image: {
138
- bytesBase64Encoded: input.image,
139
- },
140
- }],
73
+ private async startOperation(
74
+ input: TextToVideoInput,
75
+ model: string,
76
+ apiKey: string,
77
+ onProgress?: (progress: VideoGenerationProgress) => void,
78
+ ): Promise<VeoOperation> {
79
+ const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
80
+ const body = {
81
+ instances: [{ prompt: input.prompt }],
141
82
  parameters: {
142
83
  aspectRatio: input.options?.aspectRatio || "16:9",
143
84
  ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
144
85
  },
145
86
  };
146
-
147
87
  onProgress?.({ status: "queued", progress: 5 });
88
+ return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
89
+ }
148
90
 
149
- if (typeof __DEV__ !== "undefined" && __DEV__) {
150
- // eslint-disable-next-line no-console
151
- console.log("[GeminiVideoGeneration] Request URL:", url);
152
- }
153
-
154
- const operation = await geminiRetryService.executeWithRetry(async () => {
155
- const res = await fetch(url, {
156
- method: "POST",
157
- headers: {
158
- "Content-Type": "application/json",
159
- "x-goog-api-key": apiKey!,
160
- },
161
- body: JSON.stringify(requestBody),
162
- });
163
-
164
- if (!res.ok) {
165
- const errorText = await res.text();
166
- throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
167
- }
91
+ private async startImageToVideoOperation(
92
+ input: VideoGenerationInput,
93
+ model: string,
94
+ apiKey: string,
95
+ onProgress?: (progress: VideoGenerationProgress) => void,
96
+ ): Promise<VeoOperation> {
97
+ const url = `${VEO_API_BASE}/models/${model}:predictLongRunning`;
98
+ const body = {
99
+ instances: [{ prompt: input.prompt, image: { bytesBase64Encoded: input.image } }],
100
+ parameters: {
101
+ aspectRatio: input.options?.aspectRatio || "16:9",
102
+ ...(input.negativePrompt && { negativePrompt: input.negativePrompt }),
103
+ },
104
+ };
105
+ onProgress?.({ status: "queued", progress: 5 });
106
+ return geminiRetryService.executeWithRetry(() => this.postRequest(url, body, apiKey));
107
+ }
168
108
 
169
- return res.json() as Promise<VeoOperation>;
109
+ private async postRequest(url: string, body: Record<string, unknown>, apiKey: string): Promise<VeoOperation> {
110
+ const res = await fetch(url, {
111
+ method: "POST",
112
+ headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
113
+ body: JSON.stringify(body),
170
114
  });
171
-
172
- if (typeof __DEV__ !== "undefined" && __DEV__) {
173
- // eslint-disable-next-line no-console
174
- console.log("[GeminiVideoGeneration] Operation started", {
175
- operationName: operation.name,
176
- });
115
+ if (!res.ok) {
116
+ throw createVideoError("OPERATION_FAILED", `Veo API error: ${await res.text()}`, res.status);
177
117
  }
178
-
179
- onProgress?.({ status: "processing", progress: 10 });
180
-
181
- return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
118
+ return res.json() as Promise<VeoOperation>;
182
119
  }
183
120
 
184
- /**
185
- * Poll operation status until completion
186
- */
187
121
  private async pollOperation(
188
122
  operationName: string,
189
123
  apiKey: string,
@@ -192,142 +126,45 @@ class GeminiVideoGenerationService {
192
126
  ): Promise<VideoGenerationResult> {
193
127
  const url = `${VEO_API_BASE}/${operationName}`;
194
128
  let attempts = 0;
129
+ onProgress?.({ status: "processing", progress: 10 });
195
130
 
196
131
  while (attempts < MAX_POLL_ATTEMPTS) {
197
- await this.delay(DEFAULT_POLL_INTERVAL);
132
+ await this.delay(POLL_INTERVAL);
198
133
  attempts++;
134
+ onProgress?.({ status: "processing", progress: calculateProgress(attempts, MAX_POLL_ATTEMPTS) });
199
135
 
200
- const progress = Math.round(Math.min(95, 10 + (attempts / MAX_POLL_ATTEMPTS) * 85));
201
-
202
- onProgress?.({
203
- status: "processing",
204
- progress,
205
- estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
206
- });
207
-
208
- if (typeof __DEV__ !== "undefined" && __DEV__) {
209
- // eslint-disable-next-line no-console
210
- console.log("[GeminiVideoGeneration] Polling operation", {
211
- attempt: attempts,
212
- progress: `${progress.toFixed(0)}%`,
213
- });
214
- }
215
-
216
- const operation = await geminiRetryService.executeWithRetry(async () => {
217
- const res = await fetch(url, {
218
- method: "GET",
219
- headers: {
220
- "x-goog-api-key": apiKey,
221
- },
222
- });
223
-
224
- if (!res.ok) {
225
- const errorText = await res.text();
226
- throw this.createError("NETWORK", `Polling error (${res.status}): ${errorText}`, res.status);
227
- }
228
-
229
- return res.json() as Promise<VeoOperation>;
230
- });
231
-
136
+ const operation = await this.fetchOperationStatus(url, apiKey);
232
137
  if (operation.error) {
233
- throw this.createError("OPERATION_FAILED", operation.error.message, operation.error.code);
138
+ throw createVideoError("OPERATION_FAILED", operation.error.message, operation.error.code);
234
139
  }
235
-
236
140
  if (operation.done) {
237
- const videoUrl = this.extractVideoUrl(operation);
238
-
239
- if (videoUrl) {
141
+ const rawVideoUrl = extractVideoUrl(operation);
142
+ if (rawVideoUrl) {
143
+ const result = await downloadVideoFromVeo(rawVideoUrl, apiKey);
240
144
  onProgress?.({ status: "completed", progress: 100 });
241
-
242
145
  return {
243
- videoUrl,
244
- metadata: {
245
- duration: 8,
246
- resolution: "720p",
247
- aspectRatio: "16:9",
248
- model,
249
- operationName,
250
- },
146
+ videoUrl: result.base64DataUri,
147
+ metadata: { duration: 8, resolution: "720p", aspectRatio: "16:9", model, operationName },
251
148
  };
252
149
  }
253
150
  }
254
151
  }
255
-
256
- throw this.createError("TIMEOUT", `Operation timed out after ${MAX_POLL_DURATION / 1000} seconds`);
257
- }
258
-
259
- /**
260
- * Extract video URL from operation response (handles multiple response formats)
261
- */
262
- private extractVideoUrl(operation: VeoOperation): string | null {
263
- const response = operation.response;
264
- if (!response) return null;
265
-
266
- // Format 1: generatedVideos[].video.uri (new SDK format)
267
- if (response.generatedVideos?.[0]?.video?.uri) {
268
- return response.generatedVideos[0].video.uri;
269
- }
270
-
271
- // Format 2: generatedVideos[].video.url
272
- if (response.generatedVideos?.[0]?.video?.url) {
273
- return response.generatedVideos[0].video.url;
274
- }
275
-
276
- // Format 3: candidates[].uri (legacy format)
277
- if (response.candidates?.[0]?.uri) {
278
- return response.candidates[0].uri;
279
- }
280
-
281
- // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
282
- if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
283
- return response.generateVideoResponse.generatedSamples[0].video.uri;
284
- }
285
-
286
- return null;
287
- }
288
-
289
- /**
290
- * Validate text-to-video input parameters
291
- */
292
- private validateTextInput(input: TextToVideoInput): void {
293
- if (!input.prompt || input.prompt.trim().length === 0) {
294
- throw this.createError("INVALID_INPUT", "Prompt is required");
295
- }
296
-
297
- if (input.prompt.length > 2000) {
298
- throw this.createError("INVALID_INPUT", "Prompt exceeds 2000 characters");
299
- }
152
+ throw createVideoError("TIMEOUT", `Operation timed out after ${MAX_POLL_DURATION / 1000}s`);
300
153
  }
301
154
 
302
- /**
303
- * Validate image-to-video input parameters
304
- */
305
- private validateInput(input: VideoGenerationInput): void {
306
- this.validateTextInput(input);
307
-
308
- if (!input.image || input.image.length === 0) {
309
- throw this.createError("INVALID_INPUT", "Image is required for image-to-video");
310
- }
155
+ private async fetchOperationStatus(url: string, apiKey: string): Promise<VeoOperation> {
156
+ return geminiRetryService.executeWithRetry(async () => {
157
+ const res = await fetch(url, { method: "GET", headers: { "x-goog-api-key": apiKey } });
158
+ if (!res.ok) throw createVideoError("NETWORK", `Polling error: ${await res.text()}`, res.status);
159
+ return res.json() as Promise<VeoOperation>;
160
+ });
311
161
  }
312
162
 
313
- /**
314
- * Create typed error
315
- */
316
- private createError(
317
- type: VideoGenerationError["type"],
318
- message: string,
319
- statusCode?: number,
320
- ): VideoGenerationError {
321
- const error = new Error(message) as VideoGenerationError;
322
- error.type = type;
323
- error.statusCode = statusCode;
324
- error.retryable = type === "NETWORK" || type === "TIMEOUT";
325
- return error;
163
+ private validatePrompt(prompt: string): void {
164
+ if (!prompt?.trim()) throw createVideoError("INVALID_INPUT", "Prompt is required");
165
+ if (prompt.length > 2000) throw createVideoError("INVALID_INPUT", "Prompt exceeds 2000 characters");
326
166
  }
327
167
 
328
- /**
329
- * Delay helper
330
- */
331
168
  private delay(ms: number): Promise<void> {
332
169
  return new Promise((resolve) => setTimeout(resolve, ms));
333
170
  }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Gemini Video URL Extractor
3
+ * Single Responsibility: Extract video URL from Veo operation response
4
+ * Handles multiple response formats from Veo API
5
+ */
6
+
7
+ import type { VeoOperation } from "../../domain/entities";
8
+
9
+ declare const __DEV__: boolean;
10
+
11
+ /**
12
+ * Extract video URL from Veo operation response
13
+ * Handles multiple response formats from different Veo API versions
14
+ */
15
+ export function extractVideoUrl(operation: VeoOperation): string | null {
16
+ const response = operation.response;
17
+ if (!response) return null;
18
+
19
+ // Format 1: generatedVideos[].video.uri (new SDK format)
20
+ if (response.generatedVideos?.[0]?.video?.uri) {
21
+ return response.generatedVideos[0].video.uri;
22
+ }
23
+
24
+ // Format 2: generatedVideos[].video.url
25
+ if (response.generatedVideos?.[0]?.video?.url) {
26
+ return response.generatedVideos[0].video.url;
27
+ }
28
+
29
+ // Format 3: candidates[].uri (legacy format)
30
+ if (response.candidates?.[0]?.uri) {
31
+ return response.candidates[0].uri;
32
+ }
33
+
34
+ // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
35
+ if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
36
+ return response.generateVideoResponse.generatedSamples[0].video.uri;
37
+ }
38
+
39
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
40
+ // eslint-disable-next-line no-console
41
+ console.warn("[GeminiVideoUrlExtractor] No video URL found in response");
42
+ }
43
+
44
+ return null;
45
+ }