@umituz/react-native-ai-gemini-provider 1.14.6 → 1.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@umituz/react-native-ai-gemini-provider",
3
- "version": "1.14.6",
3
+ "version": "1.14.7",
4
4
  "description": "Google Gemini AI provider for React Native applications",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -33,15 +33,34 @@ export interface VideoGenerationOptions {
33
33
  }
34
34
 
35
35
  /**
36
- * Input for video generation
36
+ * Input for text-to-video generation
37
37
  */
38
- export interface VideoGenerationInput {
38
+ export interface TextToVideoInput {
39
+ prompt: string;
40
+ negativePrompt?: string;
41
+ options?: VideoGenerationOptions;
42
+ }
43
+
44
+ /**
45
+ * Input for image-to-video generation
46
+ */
47
+ export interface ImageToVideoInput {
39
48
  prompt: string;
40
49
  image: string;
41
50
  negativePrompt?: string;
42
51
  options?: VideoGenerationOptions;
43
52
  }
44
53
 
54
+ /**
55
+ * Input for video generation (supports both text-to-video and image-to-video)
56
+ */
57
+ export interface VideoGenerationInput {
58
+ prompt: string;
59
+ image?: string;
60
+ negativePrompt?: string;
61
+ options?: VideoGenerationOptions;
62
+ }
63
+
45
64
  /**
46
65
  * Progress information during video generation
47
66
  */
@@ -91,6 +110,16 @@ export interface VideoGenerationError extends Error {
91
110
  retryable: boolean;
92
111
  }
93
112
 
113
+ /**
114
+ * Generated video from Veo API
115
+ */
116
+ export interface VeoGeneratedVideo {
117
+ video: {
118
+ uri?: string;
119
+ url?: string;
120
+ };
121
+ }
122
+
94
123
  /**
95
124
  * Operation response from Veo API
96
125
  */
@@ -99,10 +128,18 @@ export interface VeoOperation {
99
128
  done: boolean;
100
129
  metadata?: Record<string, unknown>;
101
130
  response?: {
131
+ generatedVideos?: VeoGeneratedVideo[];
102
132
  candidates?: Array<{
103
133
  uri?: string;
104
134
  [key: string]: unknown;
105
135
  }>;
136
+ generateVideoResponse?: {
137
+ generatedSamples?: Array<{
138
+ video?: {
139
+ uri?: string;
140
+ };
141
+ }>;
142
+ };
106
143
  };
107
144
  error?: {
108
145
  code: number;
@@ -5,13 +5,14 @@
5
5
 
6
6
  import { geminiClientCoreService } from "./gemini-client-core.service";
7
7
  import { geminiRetryService } from "./gemini-retry.service";
8
- import { DEFAULT_MODELS, RESPONSE_MODALITIES } from "../../domain/entities";
8
+ import { DEFAULT_MODELS } from "../../domain/entities";
9
9
  import type {
10
10
  VideoGenerationInput,
11
11
  VideoGenerationResult,
12
12
  VideoGenerationProgress,
13
13
  VeoOperation,
14
14
  VideoGenerationError,
15
+ TextToVideoInput,
15
16
  } from "../../domain/entities";
16
17
 
17
18
  declare const __DEV__: boolean;
@@ -19,15 +20,92 @@ declare const __DEV__: boolean;
19
20
  const DEFAULT_POLL_INTERVAL = 10000; // 10 seconds
20
21
  const MAX_POLL_DURATION = 300000; // 5 minutes
21
22
  const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / DEFAULT_POLL_INTERVAL);
23
+ const VEO_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
22
24
 
23
25
  class GeminiVideoGenerationService {
24
26
  /**
25
- * Generate video from image and prompt using Veo API
27
+ * Generate video from text prompt using Veo API (text-to-video)
28
+ */
29
+ async generateTextToVideo(
30
+ input: TextToVideoInput,
31
+ onProgress?: (progress: VideoGenerationProgress) => void,
32
+ ): Promise<VideoGenerationResult> {
33
+ geminiClientCoreService.validateInitialization();
34
+ this.validateTextInput(input);
35
+
36
+ const config = geminiClientCoreService.getConfig();
37
+ const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
38
+ const apiKey = config?.apiKey;
39
+
40
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
41
+ // eslint-disable-next-line no-console
42
+ console.log("[GeminiVideoGeneration] generateTextToVideo() called", {
43
+ model: videoModel,
44
+ promptLength: input.prompt.length,
45
+ });
46
+ }
47
+
48
+ const url = `${VEO_API_BASE}/models/${videoModel}:generateVideos`;
49
+
50
+ const requestBody: Record<string, unknown> = {
51
+ prompt: input.prompt,
52
+ };
53
+
54
+ if (input.negativePrompt) {
55
+ requestBody.config = {
56
+ negativePrompt: input.negativePrompt,
57
+ aspectRatio: input.options?.aspectRatio || "16:9",
58
+ };
59
+ } else if (input.options?.aspectRatio) {
60
+ requestBody.config = {
61
+ aspectRatio: input.options.aspectRatio,
62
+ };
63
+ }
64
+
65
+ onProgress?.({ status: "queued", progress: 5 });
66
+
67
+ const operation = await geminiRetryService.executeWithRetry(async () => {
68
+ const res = await fetch(url, {
69
+ method: "POST",
70
+ headers: {
71
+ "Content-Type": "application/json",
72
+ "x-goog-api-key": apiKey!,
73
+ },
74
+ body: JSON.stringify(requestBody),
75
+ });
76
+
77
+ if (!res.ok) {
78
+ const errorText = await res.text();
79
+ throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
80
+ }
81
+
82
+ return res.json() as Promise<VeoOperation>;
83
+ });
84
+
85
+ if (typeof __DEV__ !== "undefined" && __DEV__) {
86
+ // eslint-disable-next-line no-console
87
+ console.log("[GeminiVideoGeneration] Operation started", {
88
+ operationName: operation.name,
89
+ });
90
+ }
91
+
92
+ onProgress?.({ status: "processing", progress: 10 });
93
+
94
+ return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
95
+ }
96
+
97
+ /**
98
+ * Generate video from image and prompt using Veo API (image-to-video)
26
99
  */
27
100
  async generateVideo(
28
101
  input: VideoGenerationInput,
29
102
  onProgress?: (progress: VideoGenerationProgress) => void,
30
103
  ): Promise<VideoGenerationResult> {
104
+ // If no image provided, use text-to-video
105
+ if (!input.image) {
106
+ return this.generateTextToVideo(input, onProgress);
107
+ }
108
+
31
109
  geminiClientCoreService.validateInitialization();
32
110
  this.validateInput(input);
33
111
 
@@ -40,36 +118,33 @@ class GeminiVideoGenerationService {
40
118
  console.log("[GeminiVideoGeneration] generateVideo() called", {
41
119
  model: videoModel,
42
120
  promptLength: input.prompt.length,
121
+ hasImage: !!input.image,
43
122
  });
44
123
  }
45
124
 
46
- const url = `https://generativelanguage.googleapis.com/v1/models/${videoModel}:generate`;
47
-
48
- const requestBody = {
49
- model: videoModel,
50
- contents: [
51
- {
52
- parts: [
53
- { text: input.prompt },
54
- {
55
- inline_data: {
56
- mime_type: "image/jpeg",
57
- data: input.image,
58
- },
125
+ const url = `${VEO_API_BASE}/models/${videoModel}:generateVideos`;
126
+
127
+ const requestBody: Record<string, unknown> = {
128
+ prompt: input.prompt,
129
+ config: {
130
+ aspectRatio: input.options?.aspectRatio || "16:9",
131
+ referenceImages: [
132
+ {
133
+ inlineData: {
134
+ mimeType: "image/jpeg",
135
+ data: input.image,
59
136
  },
60
- ],
61
- },
62
- ],
63
- generationConfig: {
64
- responseModalities: RESPONSE_MODALITIES.VIDEO_ONLY,
65
- videoGenerationConfig: {
66
- numberOfVideos: input.options?.numberOfVideos || 1,
67
- aspectRatio: input.options?.aspectRatio || "9:16",
68
- resolution: input.options?.resolution || "720p",
69
- },
137
+ },
138
+ ],
70
139
  },
71
140
  };
72
141
 
142
+ if (input.negativePrompt) {
143
+ (requestBody.config as Record<string, unknown>).negativePrompt = input.negativePrompt;
144
+ }
145
+
146
+ onProgress?.({ status: "queued", progress: 5 });
147
+
73
148
  const operation = await geminiRetryService.executeWithRetry(async () => {
74
149
  const res = await fetch(url, {
75
150
  method: "POST",
@@ -95,9 +170,9 @@ class GeminiVideoGenerationService {
95
170
  });
96
171
  }
97
172
 
98
- const result = await this.pollOperation(operation.name, apiKey!, onProgress);
173
+ onProgress?.({ status: "processing", progress: 10 });
99
174
 
100
- return result;
175
+ return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
101
176
  }
102
177
 
103
178
  /**
@@ -106,24 +181,23 @@ class GeminiVideoGenerationService {
106
181
  private async pollOperation(
107
182
  operationName: string,
108
183
  apiKey: string,
184
+ model: string,
109
185
  onProgress?: (progress: VideoGenerationProgress) => void,
110
186
  ): Promise<VideoGenerationResult> {
111
- const url = `https://generativelanguage.googleapis.com/v1/${operationName}`;
187
+ const url = `${VEO_API_BASE}/${operationName}`;
112
188
  let attempts = 0;
113
189
 
114
190
  while (attempts < MAX_POLL_ATTEMPTS) {
115
191
  await this.delay(DEFAULT_POLL_INTERVAL);
116
192
  attempts++;
117
193
 
118
- const progress = Math.min(95, (attempts / MAX_POLL_ATTEMPTS) * 100);
194
+ const progress = Math.min(95, 10 + (attempts / MAX_POLL_ATTEMPTS) * 85);
119
195
 
120
- if (onProgress) {
121
- onProgress({
122
- status: "processing",
123
- progress,
124
- estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
125
- });
126
- }
196
+ onProgress?.({
197
+ status: "processing",
198
+ progress,
199
+ estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
200
+ });
127
201
 
128
202
  if (typeof __DEV__ !== "undefined" && __DEV__) {
129
203
  // eslint-disable-next-line no-console
@@ -150,33 +224,26 @@ class GeminiVideoGenerationService {
150
224
  });
151
225
 
152
226
  if (operation.error) {
153
- throw this.createError(
154
- "OPERATION_FAILED",
155
- operation.error.message,
156
- operation.error.code,
157
- );
227
+ throw this.createError("OPERATION_FAILED", operation.error.message, operation.error.code);
158
228
  }
159
229
 
160
- if (operation.done && operation.response?.candidates?.[0]?.uri) {
161
- const videoUrl = operation.response.candidates[0].uri;
230
+ if (operation.done) {
231
+ const videoUrl = this.extractVideoUrl(operation);
162
232
 
163
- if (onProgress) {
164
- onProgress({
165
- status: "completed",
166
- progress: 100,
167
- });
168
- }
233
+ if (videoUrl) {
234
+ onProgress?.({ status: "completed", progress: 100 });
169
235
 
170
- return {
171
- videoUrl,
172
- metadata: {
173
- duration: 10,
174
- resolution: "720p",
175
- aspectRatio: "9:16",
176
- model: DEFAULT_MODELS.VIDEO_GENERATION,
177
- operationName,
178
- },
179
- };
236
+ return {
237
+ videoUrl,
238
+ metadata: {
239
+ duration: 8,
240
+ resolution: "720p",
241
+ aspectRatio: "16:9",
242
+ model,
243
+ operationName,
244
+ },
245
+ };
246
+ }
180
247
  }
181
248
  }
182
249
 
@@ -184,9 +251,39 @@ class GeminiVideoGenerationService {
184
251
  }
185
252
 
186
253
  /**
187
- * Validate input parameters
254
+ * Extract video URL from operation response (handles multiple response formats)
188
255
  */
189
- private validateInput(input: VideoGenerationInput): void {
256
+ private extractVideoUrl(operation: VeoOperation): string | null {
257
+ const response = operation.response;
258
+ if (!response) return null;
259
+
260
+ // Format 1: generatedVideos[].video.uri (new SDK format)
261
+ if (response.generatedVideos?.[0]?.video?.uri) {
262
+ return response.generatedVideos[0].video.uri;
263
+ }
264
+
265
+ // Format 2: generatedVideos[].video.url
266
+ if (response.generatedVideos?.[0]?.video?.url) {
267
+ return response.generatedVideos[0].video.url;
268
+ }
269
+
270
+ // Format 3: candidates[].uri (legacy format)
271
+ if (response.candidates?.[0]?.uri) {
272
+ return response.candidates[0].uri;
273
+ }
274
+
275
+ // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
276
+ if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
277
+ return response.generateVideoResponse.generatedSamples[0].video.uri;
278
+ }
279
+
280
+ return null;
281
+ }
282
+
283
+ /**
284
+ * Validate text-to-video input parameters
285
+ */
286
+ private validateTextInput(input: TextToVideoInput): void {
190
287
  if (!input.prompt || input.prompt.trim().length === 0) {
191
288
  throw this.createError("INVALID_INPUT", "Prompt is required");
192
289
  }
@@ -194,9 +291,16 @@ class GeminiVideoGenerationService {
194
291
  if (input.prompt.length > 2000) {
195
292
  throw this.createError("INVALID_INPUT", "Prompt exceeds 2000 characters");
196
293
  }
294
+ }
295
+
296
+ /**
297
+ * Validate image-to-video input parameters
298
+ */
299
+ private validateInput(input: VideoGenerationInput): void {
300
+ this.validateTextInput(input);
197
301
 
198
302
  if (!input.image || input.image.length === 0) {
199
- throw this.createError("INVALID_INPUT", "Image is required");
303
+ throw this.createError("INVALID_INPUT", "Image is required for image-to-video");
200
304
  }
201
305
  }
202
306
 
@@ -5,9 +5,11 @@
5
5
 
6
6
  import type {
7
7
  GeminiImageInput,
8
+ VideoGenerationInput,
8
9
  } from "../../domain/entities";
9
10
  import { geminiTextGenerationService } from "./gemini-text-generation.service";
10
11
  import { geminiImageGenerationService } from "./gemini-image-generation.service";
12
+ import { geminiVideoGenerationService } from "./gemini-video-generation.service";
11
13
  import { ContentBuilder } from "../content/ContentBuilder";
12
14
  import { ResponseFormatter } from "../response/ResponseFormatter";
13
15
 
@@ -20,6 +22,11 @@ export class GenerationExecutor {
20
22
  input: Record<string, unknown>,
21
23
  ): Promise<T> {
22
24
  const isImageGeneration = input.generateImage === true || input.type === "image";
25
+ const isVideoGeneration = this.isVideoModel(model) || input.type === "video";
26
+
27
+ if (isVideoGeneration) {
28
+ return this.executeVideoGeneration<T>(input);
29
+ }
23
30
 
24
31
  if (isImageGeneration) {
25
32
  const prompt = String(input.prompt || "");
@@ -38,6 +45,43 @@ export class GenerationExecutor {
38
45
  return this.responseFormatter.formatResponse<T>(response, input);
39
46
  }
40
47
 
48
+ /**
49
+ * Check if model is a video generation model (Veo)
50
+ */
51
+ private isVideoModel(model: string): boolean {
52
+ return model.toLowerCase().includes("veo");
53
+ }
54
+
55
+ /**
56
+ * Execute video generation using Veo API
57
+ */
58
+ private async executeVideoGeneration<T>(input: Record<string, unknown>): Promise<T> {
59
+ const videoInput: VideoGenerationInput = {
60
+ prompt: String(input.prompt || ""),
61
+ image: input.image as string | undefined,
62
+ negativePrompt: input.negativePrompt as string | undefined,
63
+ options: {
64
+ aspectRatio: this.normalizeAspectRatio(input.aspect_ratio as string),
65
+ },
66
+ };
67
+
68
+ const result = await geminiVideoGenerationService.generateVideo(videoInput);
69
+
70
+ return {
71
+ video: { url: result.videoUrl },
72
+ videoUrl: result.videoUrl,
73
+ metadata: result.metadata,
74
+ } as T;
75
+ }
76
+
77
+ /**
78
+ * Normalize aspect ratio format (e.g., "16:9" stays, others default)
79
+ */
80
+ private normalizeAspectRatio(ratio: string | undefined): "16:9" | "9:16" | "1:1" {
81
+ if (ratio === "9:16" || ratio === "1:1") return ratio;
82
+ return "16:9";
83
+ }
84
+
41
85
  async generateWithImages(
42
86
  model: string,
43
87
  prompt: string,