npm - @umituz/react-native-ai-gemini-provider - Versions diffs - 1.14.6 → 1.14.7 - Mend

@umituz/react-native-ai-gemini-provider 1.14.6 → 1.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/domain/entities/video.types.ts +39 -2
package/src/infrastructure/services/gemini-video-generation.service.ts +166 -62
package/src/infrastructure/services/generation-executor.ts +44 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@umituz/react-native-ai-gemini-provider",
-  "version": "1.14.6",
+  "version": "1.14.7",
   "description": "Google Gemini AI provider for React Native applications",
   "main": "./src/index.ts",
   "types": "./src/index.ts",

package/src/domain/entities/video.types.ts CHANGED Viewed

@@ -33,15 +33,34 @@ export interface VideoGenerationOptions {
 }
 /**
- * Input for video generation
+ * Input for text-to-video generation
  */
-export interface VideoGenerationInput {
+export interface TextToVideoInput {
+  prompt: string;
+  negativePrompt?: string;
+  options?: VideoGenerationOptions;
+}
+/**
+ * Input for image-to-video generation
+ */
+export interface ImageToVideoInput {
   prompt: string;
   image: string;
   negativePrompt?: string;
   options?: VideoGenerationOptions;
 }
+/**
+ * Input for video generation (supports both text-to-video and image-to-video)
+ */
+export interface VideoGenerationInput {
+  prompt: string;
+  image?: string;
+  negativePrompt?: string;
+  options?: VideoGenerationOptions;
+}
 /**
  * Progress information during video generation
  */
@@ -91,6 +110,16 @@ export interface VideoGenerationError extends Error {
   retryable: boolean;
 }
+/**
+ * Generated video from Veo API
+ */
+export interface VeoGeneratedVideo {
+  video: {
+    uri?: string;
+    url?: string;
+  };
+}
 /**
  * Operation response from Veo API
  */
@@ -99,10 +128,18 @@ export interface VeoOperation {
   done: boolean;
   metadata?: Record<string, unknown>;
   response?: {
+    generatedVideos?: VeoGeneratedVideo[];
     candidates?: Array<{
       uri?: string;
       [key: string]: unknown;
     }>;
+    generateVideoResponse?: {
+      generatedSamples?: Array<{
+        video?: {
+          uri?: string;
+        };
+      }>;
+    };
   };
   error?: {
     code: number;

package/src/infrastructure/services/gemini-video-generation.service.ts CHANGED Viewed

@@ -5,13 +5,14 @@
 import { geminiClientCoreService } from "./gemini-client-core.service";
 import { geminiRetryService } from "./gemini-retry.service";
-import { DEFAULT_MODELS, RESPONSE_MODALITIES } from "../../domain/entities";
+import { DEFAULT_MODELS } from "../../domain/entities";
 import type {
   VideoGenerationInput,
   VideoGenerationResult,
   VideoGenerationProgress,
   VeoOperation,
   VideoGenerationError,
+  TextToVideoInput,
 } from "../../domain/entities";
 declare const __DEV__: boolean;
@@ -19,15 +20,92 @@ declare const __DEV__: boolean;
 const DEFAULT_POLL_INTERVAL = 10000; // 10 seconds
 const MAX_POLL_DURATION = 300000; // 5 minutes
 const MAX_POLL_ATTEMPTS = Math.floor(MAX_POLL_DURATION / DEFAULT_POLL_INTERVAL);
+const VEO_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
 class GeminiVideoGenerationService {
   /**
-   * Generate video from image and prompt using Veo API
+   * Generate video from text prompt using Veo API (text-to-video)
+   */
+  async generateTextToVideo(
+    input: TextToVideoInput,
+    onProgress?: (progress: VideoGenerationProgress) => void,
+  ): Promise<VideoGenerationResult> {
+    geminiClientCoreService.validateInitialization();
+    this.validateTextInput(input);
+    const config = geminiClientCoreService.getConfig();
+    const videoModel = config?.videoGenerationModel || DEFAULT_MODELS.VIDEO_GENERATION;
+    const apiKey = config?.apiKey;
+    if (typeof __DEV__ !== "undefined" && __DEV__) {
+      // eslint-disable-next-line no-console
+      console.log("[GeminiVideoGeneration] generateTextToVideo() called", {
+        model: videoModel,
+        promptLength: input.prompt.length,
+      });
+    }
+    const url = `${VEO_API_BASE}/models/${videoModel}:generateVideos`;
+    const requestBody: Record<string, unknown> = {
+      prompt: input.prompt,
+    };
+    if (input.negativePrompt) {
+      requestBody.config = {
+        negativePrompt: input.negativePrompt,
+        aspectRatio: input.options?.aspectRatio || "16:9",
+      };
+    } else if (input.options?.aspectRatio) {
+      requestBody.config = {
+        aspectRatio: input.options.aspectRatio,
+      };
+    }
+    onProgress?.({ status: "queued", progress: 5 });
+    const operation = await geminiRetryService.executeWithRetry(async () => {
+      const res = await fetch(url, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "x-goog-api-key": apiKey!,
+        },
+        body: JSON.stringify(requestBody),
+      });
+      if (!res.ok) {
+        const errorText = await res.text();
+        throw this.createError("OPERATION_FAILED", `Veo API error (${res.status}): ${errorText}`, res.status);
+      }
+      return res.json() as Promise<VeoOperation>;
+    });
+    if (typeof __DEV__ !== "undefined" && __DEV__) {
+      // eslint-disable-next-line no-console
+      console.log("[GeminiVideoGeneration] Operation started", {
+        operationName: operation.name,
+      });
+    }
+    onProgress?.({ status: "processing", progress: 10 });
+    return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
+  }
+  /**
+   * Generate video from image and prompt using Veo API (image-to-video)
    */
   async generateVideo(
     input: VideoGenerationInput,
     onProgress?: (progress: VideoGenerationProgress) => void,
   ): Promise<VideoGenerationResult> {
+    // If no image provided, use text-to-video
+    if (!input.image) {
+      return this.generateTextToVideo(input, onProgress);
+    }
     geminiClientCoreService.validateInitialization();
     this.validateInput(input);
@@ -40,36 +118,33 @@ class GeminiVideoGenerationService {
       console.log("[GeminiVideoGeneration] generateVideo() called", {
         model: videoModel,
         promptLength: input.prompt.length,
+        hasImage: !!input.image,
       });
     }
-    const url = `https://generativelanguage.googleapis.com/v1/models/${videoModel}:generate`;
-    const requestBody = {
-      model: videoModel,
-      contents: [
-        {
-          parts: [
-            { text: input.prompt },
-            {
-              inline_data: {
-                mime_type: "image/jpeg",
-                data: input.image,
-              },
+    const url = `${VEO_API_BASE}/models/${videoModel}:generateVideos`;
+    const requestBody: Record<string, unknown> = {
+      prompt: input.prompt,
+      config: {
+        aspectRatio: input.options?.aspectRatio || "16:9",
+        referenceImages: [
+          {
+            inlineData: {
+              mimeType: "image/jpeg",
+              data: input.image,
             },
-          ],
-        },
-      ],
-      generationConfig: {
-        responseModalities: RESPONSE_MODALITIES.VIDEO_ONLY,
-        videoGenerationConfig: {
-          numberOfVideos: input.options?.numberOfVideos || 1,
-          aspectRatio: input.options?.aspectRatio || "9:16",
-          resolution: input.options?.resolution || "720p",
-        },
+          },
+        ],
       },
     };
+    if (input.negativePrompt) {
+      (requestBody.config as Record<string, unknown>).negativePrompt = input.negativePrompt;
+    }
+    onProgress?.({ status: "queued", progress: 5 });
     const operation = await geminiRetryService.executeWithRetry(async () => {
       const res = await fetch(url, {
         method: "POST",
@@ -95,9 +170,9 @@ class GeminiVideoGenerationService {
       });
     }
-    const result = await this.pollOperation(operation.name, apiKey!, onProgress);
+    onProgress?.({ status: "processing", progress: 10 });
-    return result;
+    return this.pollOperation(operation.name, apiKey!, videoModel, onProgress);
   }
   /**
@@ -106,24 +181,23 @@ class GeminiVideoGenerationService {
   private async pollOperation(
     operationName: string,
     apiKey: string,
+    model: string,
     onProgress?: (progress: VideoGenerationProgress) => void,
   ): Promise<VideoGenerationResult> {
-    const url = `https://generativelanguage.googleapis.com/v1/${operationName}`;
+    const url = `${VEO_API_BASE}/${operationName}`;
     let attempts = 0;
     while (attempts < MAX_POLL_ATTEMPTS) {
       await this.delay(DEFAULT_POLL_INTERVAL);
       attempts++;
-      const progress = Math.min(95, (attempts / MAX_POLL_ATTEMPTS) * 100);
+      const progress = Math.min(95, 10 + (attempts / MAX_POLL_ATTEMPTS) * 85);
-      if (onProgress) {
-        onProgress({
-          status: "processing",
-          progress,
-          estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
-        });
-      }
+      onProgress?.({
+        status: "processing",
+        progress,
+        estimatedTimeRemaining: (MAX_POLL_ATTEMPTS - attempts) * (DEFAULT_POLL_INTERVAL / 1000),
+      });
       if (typeof __DEV__ !== "undefined" && __DEV__) {
         // eslint-disable-next-line no-console
@@ -150,33 +224,26 @@ class GeminiVideoGenerationService {
       });
       if (operation.error) {
-        throw this.createError(
-          "OPERATION_FAILED",
-          operation.error.message,
-          operation.error.code,
-        );
+        throw this.createError("OPERATION_FAILED", operation.error.message, operation.error.code);
       }
-      if (operation.done && operation.response?.candidates?.[0]?.uri) {
-        const videoUrl = operation.response.candidates[0].uri;
+      if (operation.done) {
+        const videoUrl = this.extractVideoUrl(operation);
-        if (onProgress) {
-          onProgress({
-            status: "completed",
-            progress: 100,
-          });
-        }
+        if (videoUrl) {
+          onProgress?.({ status: "completed", progress: 100 });
-        return {
-          videoUrl,
-          metadata: {
-            duration: 10,
-            resolution: "720p",
-            aspectRatio: "9:16",
-            model: DEFAULT_MODELS.VIDEO_GENERATION,
-            operationName,
-          },
-        };
+          return {
+            videoUrl,
+            metadata: {
+              duration: 8,
+              resolution: "720p",
+              aspectRatio: "16:9",
+              model,
+              operationName,
+            },
+          };
+        }
       }
     }
@@ -184,9 +251,39 @@ class GeminiVideoGenerationService {
   }
   /**
-   * Validate input parameters
+   * Extract video URL from operation response (handles multiple response formats)
    */
-  private validateInput(input: VideoGenerationInput): void {
+  private extractVideoUrl(operation: VeoOperation): string | null {
+    const response = operation.response;
+    if (!response) return null;
+    // Format 1: generatedVideos[].video.uri (new SDK format)
+    if (response.generatedVideos?.[0]?.video?.uri) {
+      return response.generatedVideos[0].video.uri;
+    }
+    // Format 2: generatedVideos[].video.url
+    if (response.generatedVideos?.[0]?.video?.url) {
+      return response.generatedVideos[0].video.url;
+    }
+    // Format 3: candidates[].uri (legacy format)
+    if (response.candidates?.[0]?.uri) {
+      return response.candidates[0].uri;
+    }
+    // Format 4: generateVideoResponse.generatedSamples[].video.uri (REST API format)
+    if (response.generateVideoResponse?.generatedSamples?.[0]?.video?.uri) {
+      return response.generateVideoResponse.generatedSamples[0].video.uri;
+    }
+    return null;
+  }
+  /**
+   * Validate text-to-video input parameters
+   */
+  private validateTextInput(input: TextToVideoInput): void {
     if (!input.prompt || input.prompt.trim().length === 0) {
       throw this.createError("INVALID_INPUT", "Prompt is required");
     }
@@ -194,9 +291,16 @@ class GeminiVideoGenerationService {
     if (input.prompt.length > 2000) {
       throw this.createError("INVALID_INPUT", "Prompt exceeds 2000 characters");
     }
+  }
+  /**
+   * Validate image-to-video input parameters
+   */
+  private validateInput(input: VideoGenerationInput): void {
+    this.validateTextInput(input);
     if (!input.image || input.image.length === 0) {
-      throw this.createError("INVALID_INPUT", "Image is required");
+      throw this.createError("INVALID_INPUT", "Image is required for image-to-video");
     }
   }

package/src/infrastructure/services/generation-executor.ts CHANGED Viewed

@@ -5,9 +5,11 @@
 import type {
     GeminiImageInput,
+    VideoGenerationInput,
 } from "../../domain/entities";
 import { geminiTextGenerationService } from "./gemini-text-generation.service";
 import { geminiImageGenerationService } from "./gemini-image-generation.service";
+import { geminiVideoGenerationService } from "./gemini-video-generation.service";
 import { ContentBuilder } from "../content/ContentBuilder";
 import { ResponseFormatter } from "../response/ResponseFormatter";
@@ -20,6 +22,11 @@ export class GenerationExecutor {
         input: Record<string, unknown>,
     ): Promise<T> {
         const isImageGeneration = input.generateImage === true || input.type === "image";
+        const isVideoGeneration = this.isVideoModel(model) || input.type === "video";
+        if (isVideoGeneration) {
+            return this.executeVideoGeneration<T>(input);
+        }
         if (isImageGeneration) {
             const prompt = String(input.prompt || "");
@@ -38,6 +45,43 @@ export class GenerationExecutor {
         return this.responseFormatter.formatResponse<T>(response, input);
     }
+    /**
+     * Check if model is a video generation model (Veo)
+     */
+    private isVideoModel(model: string): boolean {
+        return model.toLowerCase().includes("veo");
+    }
+    /**
+     * Execute video generation using Veo API
+     */
+    private async executeVideoGeneration<T>(input: Record<string, unknown>): Promise<T> {
+        const videoInput: VideoGenerationInput = {
+            prompt: String(input.prompt || ""),
+            image: input.image as string | undefined,
+            negativePrompt: input.negativePrompt as string | undefined,
+            options: {
+                aspectRatio: this.normalizeAspectRatio(input.aspect_ratio as string),
+            },
+        };
+        const result = await geminiVideoGenerationService.generateVideo(videoInput);
+        return {
+            video: { url: result.videoUrl },
+            videoUrl: result.videoUrl,
+            metadata: result.metadata,
+        } as T;
+    }
+    /**
+     * Normalize aspect ratio format (e.g., "16:9" stays, others default)
+     */
+    private normalizeAspectRatio(ratio: string | undefined): "16:9" | "9:16" | "1:1" {
+        if (ratio === "9:16" || ratio === "1:1") return ratio;
+        return "16:9";
+    }
     async generateWithImages(
         model: string,
         prompt: string,