npm - @loonylabs/tti-middleware - Versions diffs - 1.5.1 → 1.7.0 - Mend

@loonylabs/tti-middleware 1.5.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -45,6 +45,7 @@
   - **IONOS**: German cloud provider with OpenAI-compatible API (experimental)
 - **Character Consistency**: Generate consistent characters across multiple images (perfect for children's book illustrations)
 - **GDPR/DSGVO Compliance**: Built-in EU region support with automatic fallback
+- **Region Rotation**: Opt-in region rotation on quota errors (429) for Google Cloud — rotate through regions instead of retrying the same exhausted region
 - **Retry Logic**: Exponential backoff with jitter for transient errors (429, 408, 5xx, timeouts)
 - **TypeScript First**: Full type safety with comprehensive interfaces
 - **Logging Control**: Configurable log levels via environment or API
@@ -400,6 +401,31 @@ interface TTIResponse {
 ## Advanced Features
+<details>
+<summary><strong>Region Rotation (Google Cloud)</strong></summary>
+When Vertex AI returns 429 (Resource Exhausted) due to Dynamic Shared Quota, the middleware can rotate through a list of regions instead of retrying the same exhausted region:
+```typescript
+const provider = new GoogleCloudTTIProvider({
+  projectId: 'my-project',
+  region: 'europe-west4',
+  regionRotation: {
+    regions: ['europe-west4', 'europe-west1', 'europe-north1', 'europe-central2'],
+    fallback: 'global',
+    alwaysTryFallback: true, // Default: one bonus attempt on fallback after budget exhausted
+  },
+});
+```
+**Key behavior:**
+- `maxRetries` is the **total budget** across all regions (no multiplier)
+- Only **quota errors** (429, Resource Exhausted) trigger rotation — server errors (500, 503) retry the same region
+- `alwaysTryFallback: true` (default): one bonus attempt on fallback even if retry budget is exhausted
+- Without `regionRotation`: existing behavior unchanged
+</details>
 <details>
 <summary><strong>Retry Configuration</strong></summary>

package/dist/middleware/services/tti/providers/base-tti-provider.d.ts CHANGED Viewed

@@ -112,6 +112,12 @@ export declare abstract class BaseTTIProvider implements ITTIProvider {
      * Check if an error is a timeout error (from our withTimeout wrapper).
      */
     private isTimeoutError;
+    /**
+     * Check if an error is a quota/rate-limit error (429 / Resource Exhausted).
+     * Used by providers to distinguish quota errors from other retryable errors
+     * (e.g., for region rotation on quota errors only).
+     */
+    protected isQuotaError(error: Error): boolean;
     /**
      * Execute a generation function with retry logic for transient errors.
      * Retries on: 429, 408, 5xx, network timeouts, TCP disconnects.
@@ -121,8 +127,15 @@ export declare abstract class BaseTTIProvider implements ITTIProvider {
      * retry.timeoutMs, default 45s). Timeout errors have their own retry
      * counter (timeoutRetries, default 2) independent from the general
      * maxRetries used for quota/server errors.
-     */
-    protected executeWithRetry<T>(request: TTIRequest, operation: () => Promise<T>, operationName: string): Promise<T>;
+     *
+     * @param options.onRetry - Optional callback invoked before each retry.
+     *   Receives the error that triggered the retry and the current general
+     *   retry count. Providers can use this to adjust state between retries
+     *   (e.g., rotate regions on quota errors).
+     */
+    protected executeWithRetry<T>(request: TTIRequest, operation: () => Promise<T>, operationName: string, options?: {
+        onRetry?: (error: Error, generalRetryCount: number) => void;
+    }): Promise<T>;
     /**
      * Check if an error is retryable (transient).
      * Retryable: 429, 408, 500, 502, 503, 504, network errors, timeouts.

package/dist/middleware/services/tti/providers/base-tti-provider.js CHANGED Viewed

@@ -301,6 +301,19 @@ class BaseTTIProvider {
     isTimeoutError(error) {
         return error.message.toLowerCase().startsWith('timeout:');
     }
+    /**
+     * Check if an error is a quota/rate-limit error (429 / Resource Exhausted).
+     * Used by providers to distinguish quota errors from other retryable errors
+     * (e.g., for region rotation on quota errors only).
+     */
+    isQuotaError(error) {
+        const message = error.message.toLowerCase();
+        return (message.includes('429') ||
+            message.includes('resource exhausted') ||
+            message.includes('quota exceeded') ||
+            message.includes('rate limit') ||
+            message.includes('too many requests'));
+    }
     /**
      * Execute a generation function with retry logic for transient errors.
      * Retries on: 429, 408, 5xx, network timeouts, TCP disconnects.
@@ -310,8 +323,13 @@ class BaseTTIProvider {
      * retry.timeoutMs, default 45s). Timeout errors have their own retry
      * counter (timeoutRetries, default 2) independent from the general
      * maxRetries used for quota/server errors.
+     *
+     * @param options.onRetry - Optional callback invoked before each retry.
+     *   Receives the error that triggered the retry and the current general
+     *   retry count. Providers can use this to adjust state between retries
+     *   (e.g., rotate regions on quota errors).
      */
-    async executeWithRetry(request, operation, operationName) {
+    async executeWithRetry(request, operation, operationName, options) {
         const retryConfig = this.resolveRetryConfig(request);
         // No retry configured
         if (!retryConfig) {
@@ -371,6 +389,10 @@ class BaseTTIProvider {
                         this.log('error', `${operationName} general retry budget exhausted (${maxGeneralRetries} retries): ${error.message}`, { attempt, generalRetryCount, durationMs: duration });
                         throw error;
                     }
+                    // Notify provider before retry (e.g., for region rotation)
+                    if (options?.onRetry) {
+                        options.onRetry(error, generalRetryCount);
+                    }
                     const delay = this.calculateRetryDelay(generalRetryCount, retryConfig);
                     this.log('warn', `Transient error during ${operationName} after ${duration}ms. Retry ${generalRetryCount}/${maxGeneralRetries} in ${delay}ms: ${error.message}`, { attempt, generalRetryCount, maxGeneralRetries, delayMs: delay, durationMs: duration });
                     await this.sleep(delay);

package/dist/middleware/services/tti/providers/google-cloud-provider.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@
  * - Imagen 4 Ultra (imagen-4.0-ultra-generate-001) - Highest quality variant
  * - Gemini 2.5 Flash Image - Text-to-image with character consistency
  * - Gemini 3 Pro Image (gemini-3-pro-image-preview) - 4K, text rendering
+ * - Gemini 3.1 Flash Image (gemini-3.1-flash-image-preview) - 4K, improved text rendering (global endpoint)
  *
  * All requests go through Google Cloud (Vertex AI) with proper DPA.
  * EU-compliant when using EU regions.
@@ -15,12 +16,12 @@
  * @see https://cloud.google.com/vertex-ai/generative-ai/pricing
  * @see https://cloud.google.com/terms/data-processing-addendum
  */
-import { TTIRequest, TTIResponse, ModelInfo, GoogleCloudRegion } from '../../../types';
+import { TTIRequest, TTIResponse, ModelInfo, GoogleCloudRegion, RegionRotationConfig } from '../../../types';
 import { BaseTTIProvider } from './base-tti-provider';
 interface GoogleCloudConfig {
     /** Google Cloud Project ID */
     projectId: string;
-    /** Default region for requests */
+    /** Default region for requests (used when regionRotation is not configured) */
     region: GoogleCloudRegion;
     /** Path to service account JSON file */
     keyFilename?: string;
@@ -30,11 +31,17 @@ interface GoogleCloudConfig {
         private_key: string;
         project_id?: string;
     };
+    /**
+     * Opt-in region rotation for quota errors (429 / Resource Exhausted).
+     * When configured, the middleware rotates through the listed regions
+     * on quota errors instead of retrying the same region.
+     */
+    regionRotation?: RegionRotationConfig;
 }
 export declare class GoogleCloudTTIProvider extends BaseTTIProvider {
     private config;
     private lastUsedRegion;
-    private aiplatformClient;
+    private aiplatformClients;
     private genaiClients;
     constructor(config?: Partial<GoogleCloudConfig>);
     getDisplayName(): string;

package/dist/middleware/services/tti/providers/google-cloud-provider.js CHANGED Viewed

@@ -9,6 +9,7 @@
  * - Imagen 4 Ultra (imagen-4.0-ultra-generate-001) - Highest quality variant
  * - Gemini 2.5 Flash Image - Text-to-image with character consistency
  * - Gemini 3 Pro Image (gemini-3-pro-image-preview) - 4K, text rendering
+ * - Gemini 3.1 Flash Image (gemini-3.1-flash-image-preview) - 4K, improved text rendering (global endpoint)
  *
  * All requests go through Google Cloud (Vertex AI) with proper DPA.
  * EU-compliant when using EU regions.
@@ -163,6 +164,20 @@ const GOOGLE_CLOUD_MODELS = [
         availableRegions: ['global'],
         pricingUrl: 'https://cloud.google.com/vertex-ai/generative-ai/pricing',
     },
+    {
+        id: 'gemini-flash-image-2',
+        displayName: 'Gemini 3.1 Flash Image',
+        capabilities: {
+            textToImage: true,
+            characterConsistency: true, // Up to 5 characters + 14 objects
+            imageEditing: false,
+            maxImagesPerRequest: 1,
+        },
+        // Preview model — requires global endpoint (same as gemini-pro-image).
+        // Will likely get regional endpoints once GA.
+        availableRegions: ['global'],
+        pricingUrl: 'https://cloud.google.com/vertex-ai/generative-ai/pricing',
+    },
 ];
 // Internal model IDs used in Vertex AI API calls
 const MODEL_ID_MAP = {
@@ -172,9 +187,10 @@ const MODEL_ID_MAP = {
     'imagen-4-ultra': 'imagen-4.0-ultra-generate-001',
     'gemini-flash-image': 'gemini-2.5-flash-image',
     'gemini-pro-image': 'gemini-3-pro-image-preview',
+    'gemini-flash-image-2': 'gemini-3.1-flash-image-preview',
 };
 // Models that use the Gemini generateContent API (vs Imagen predict API)
-const GEMINI_API_MODELS = new Set(['gemini-flash-image', 'gemini-pro-image']);
+const GEMINI_API_MODELS = new Set(['gemini-flash-image', 'gemini-pro-image', 'gemini-flash-image-2']);
 // ============================================================
 // PROVIDER IMPLEMENTATION
 // ============================================================
@@ -182,8 +198,8 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
     constructor(config) {
         super(types_1.TTIProvider.GOOGLE_CLOUD);
         this.lastUsedRegion = null;
-        // Lazy-loaded SDK clients
-        this.aiplatformClient = null;
+        // Lazy-loaded SDK clients (one per region, since region is baked into the client)
+        this.aiplatformClients = new Map();
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         this.genaiClients = new Map();
         const projectId = config?.projectId ||
@@ -202,10 +218,27 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
             region,
             keyFilename: config?.keyFilename || process.env.GOOGLE_APPLICATION_CREDENTIALS,
             credentials: config?.credentials,
+            regionRotation: config?.regionRotation,
         };
+        // Validate regionRotation config
+        if (this.config.regionRotation) {
+            if (!this.config.regionRotation.regions || this.config.regionRotation.regions.length === 0) {
+                throw new base_tti_provider_1.InvalidConfigError(types_1.TTIProvider.GOOGLE_CLOUD, 'regionRotation.regions must contain at least one region');
+            }
+            if (!this.config.regionRotation.fallback) {
+                throw new base_tti_provider_1.InvalidConfigError(types_1.TTIProvider.GOOGLE_CLOUD, 'regionRotation.fallback is required');
+            }
+        }
         this.log('info', 'Google Cloud TTI Provider initialized', {
             projectId: this.config.projectId,
             region: this.config.region,
+            regionRotation: this.config.regionRotation
+                ? {
+                    regions: this.config.regionRotation.regions,
+                    fallback: this.config.regionRotation.fallback,
+                    alwaysTryFallback: this.config.regionRotation.alwaysTryFallback ?? true,
+                }
+                : undefined,
             isEURegion: (0, base_tti_provider_1.isEURegion)(this.config.region),
             models: this.listModels().map((m) => m.id),
         });
@@ -232,29 +265,66 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
                 .map((m) => m.id)
                 .join(', ')}`);
         }
-        // Validate region availability
-        const effectiveRegion = this.getEffectiveRegion(modelId);
+        // Determine base region (handles global-only models, region availability)
+        const baseRegion = this.getEffectiveRegion(modelId);
+        // Region rotation: only for non-global models with rotation configured
+        const rotation = this.config.regionRotation;
+        const useRotation = !!(rotation && baseRegion !== 'global');
+        // Mutable region — the onRetry callback advances this on quota errors
+        let currentRegion = useRotation ? rotation.regions[0] : baseRegion;
+        // Build region sequence: [...regions, fallback]
+        let regionIndex = 0;
+        let regionSequence = [];
+        if (useRotation) {
+            regionSequence = [...rotation.regions, rotation.fallback];
+            this.log('info', 'Region rotation enabled', {
+                sequence: regionSequence,
+                fallback: rotation.fallback,
+                alwaysTryFallback: rotation.alwaysTryFallback ?? true,
+            });
+        }
         // Create debug info for logging
         let debugInfo = null;
         if (debug_tti_utils_1.TTIDebugger.isEnabled) {
-            debugInfo = debug_tti_utils_1.TTIDebugger.createDebugInfo(request, this.providerName, modelId, { region: effectiveRegion });
+            debugInfo = debug_tti_utils_1.TTIDebugger.createDebugInfo(request, this.providerName, modelId, { region: currentRegion });
             await debug_tti_utils_1.TTIDebugger.logRequest(debugInfo);
         }
         this.log('debug', 'Generating image', {
             model: modelId,
-            region: effectiveRegion,
+            region: currentRegion,
+            regionRotation: useRotation,
             hasReferenceImages: (0, base_tti_provider_1.hasReferenceImages)(request),
         });
-        try {
-            // Route to appropriate API based on model type
-            let response;
-            if (GEMINI_API_MODELS.has(modelId)) {
-                response = await this.executeWithRetry(request, () => this.generateWithGemini(request, modelId, effectiveRegion), 'Gemini API call');
+        const isGeminiModel = GEMINI_API_MODELS.has(modelId);
+        const operationName = isGeminiModel ? 'Gemini API call' : 'Imagen API call';
+        // Operation lambda reads currentRegion from closure
+        const operation = () => {
+            if (isGeminiModel) {
+                return this.generateWithGemini(request, modelId, currentRegion);
             }
             else {
-                response = await this.executeWithRetry(request, () => this.generateWithImagen(request, modelId, effectiveRegion), 'Imagen API call');
+                return this.generateWithImagen(request, modelId, currentRegion);
             }
-            // Log successful response
+        };
+        // onRetry: advance region on quota errors, stay on same region otherwise
+        const onRetry = useRotation
+            ? (error) => {
+                if (this.isQuotaError(error) && regionIndex < regionSequence.length - 1) {
+                    regionIndex++;
+                    currentRegion = regionSequence[regionIndex];
+                    this.log('info', `Quota error — rotating to region ${currentRegion}`, {
+                        regionIndex,
+                        totalRegions: regionSequence.length,
+                        region: currentRegion,
+                    });
+                }
+                // Non-quota retryable errors: stay on same region
+            }
+            : undefined;
+        try {
+            const response = await this.executeWithRetry(request, operation, operationName, {
+                onRetry,
+            });
             if (debugInfo) {
                 debugInfo = debug_tti_utils_1.TTIDebugger.updateWithResponse(debugInfo, response);
                 await debug_tti_utils_1.TTIDebugger.logResponse(debugInfo);
@@ -262,7 +332,32 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
             return response;
         }
         catch (error) {
-            // Log error
+            // alwaysTryFallback: one bonus attempt on fallback after budget exhausted
+            if (useRotation &&
+                this.isQuotaError(error) &&
+                (rotation.alwaysTryFallback !== false) &&
+                currentRegion !== rotation.fallback) {
+                this.log('info', `Retry budget exhausted — bonus attempt on fallback region ${rotation.fallback}`, {
+                    exhaustedRegion: currentRegion,
+                    fallback: rotation.fallback,
+                });
+                currentRegion = rotation.fallback;
+                try {
+                    const response = await operation();
+                    if (debugInfo) {
+                        debugInfo = debug_tti_utils_1.TTIDebugger.updateWithResponse(debugInfo, response);
+                        await debug_tti_utils_1.TTIDebugger.logResponse(debugInfo);
+                    }
+                    return response;
+                }
+                catch (fallbackError) {
+                    if (debugInfo) {
+                        debugInfo = debug_tti_utils_1.TTIDebugger.updateWithError(debugInfo, fallbackError);
+                        await debug_tti_utils_1.TTIDebugger.logError(debugInfo);
+                    }
+                    throw fallbackError;
+                }
+            }
             if (debugInfo) {
                 debugInfo = debug_tti_utils_1.TTIDebugger.updateWithError(debugInfo, error);
                 await debug_tti_utils_1.TTIDebugger.logError(debugInfo);
@@ -325,7 +420,7 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
         const internalModelId = MODEL_ID_MAP[modelId];
         this.lastUsedRegion = region;
         try {
-            const { client, helpers } = await this.getAiplatformClient();
+            const { client, helpers } = await this.getAiplatformClient(region);
             const endpoint = `projects/${this.config.projectId}/locations/${region}/publishers/google/models/${internalModelId}`;
             // Build instance
             const instanceValue = { prompt: request.prompt };
@@ -370,12 +465,12 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
             throw this.handleError(error, 'during Imagen API call');
         }
     }
-    async getAiplatformClient() {
-        if (!this.aiplatformClient) {
+    async getAiplatformClient(region) {
+        if (!this.aiplatformClients.has(region)) {
             try {
                 const { v1, helpers } = await Promise.resolve().then(() => __importStar(require('@google-cloud/aiplatform')));
                 const clientOptions = {
-                    apiEndpoint: `${this.config.region}-aiplatform.googleapis.com`,
+                    apiEndpoint: `${region}-aiplatform.googleapis.com`,
                 };
                 if (this.config.keyFilename) {
                     clientOptions.keyFilename = this.config.keyFilename;
@@ -384,9 +479,13 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
                     clientOptions.credentials = this.config.credentials;
                 }
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                this.aiplatformClient = new v1.PredictionServiceClient(clientOptions);
+                this.aiplatformClients.set(region, new v1.PredictionServiceClient(clientOptions));
+                this.log('debug', 'Initialized @google-cloud/aiplatform client', {
+                    region,
+                    apiEndpoint: clientOptions.apiEndpoint,
+                });
                 return {
-                    client: this.aiplatformClient,
+                    client: this.aiplatformClients.get(region),
                     helpers: helpers,
                 };
             }
@@ -396,7 +495,7 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
         }
         const { helpers } = await Promise.resolve().then(() => __importStar(require('@google-cloud/aiplatform')));
         return {
-            client: this.aiplatformClient,
+            client: this.aiplatformClients.get(region),
             helpers: helpers,
         };
     }
@@ -470,11 +569,17 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
             const config = {
                 responseModalities: ['TEXT', 'IMAGE'],
             };
-            // Add imageConfig with aspectRatio if provided
-            if (request.aspectRatio) {
-                config.imageConfig = {
-                    aspectRatio: request.aspectRatio,
-                };
+            // Add imageConfig with aspectRatio and/or imageSize if provided
+            if (request.aspectRatio || request.providerOptions?.imageSize) {
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                const imageConfig = {};
+                if (request.aspectRatio) {
+                    imageConfig.aspectRatio = request.aspectRatio;
+                }
+                if (request.providerOptions?.imageSize) {
+                    imageConfig.imageSize = request.providerOptions.imageSize;
+                }
+                config.imageConfig = imageConfig;
             }
             // Add temperature if provided
             if (request.providerOptions?.temperature !== undefined) {

package/dist/middleware/types/index.d.ts CHANGED Viewed

@@ -50,7 +50,29 @@ export interface ModelInfo {
  * Google Cloud regions
  * EU regions are GDPR-compliant
  */
-export type GoogleCloudRegion = 'global' | 'europe-west1' | 'europe-west2' | 'europe-west3' | 'europe-west4' | 'europe-west9' | 'us-central1' | 'us-east4';
+export type GoogleCloudRegion = 'global' | 'europe-west1' | 'europe-west2' | 'europe-west3' | 'europe-west4' | 'europe-west6' | 'europe-west8' | 'europe-west9' | 'europe-north1' | 'europe-central2' | 'europe-southwest1' | 'us-central1' | 'us-east1' | 'us-east4' | 'us-east5' | 'us-south1' | 'us-west1' | 'us-west4' | 'asia-east1' | 'asia-east2' | 'asia-northeast1' | 'asia-northeast3' | 'asia-south1' | 'asia-southeast1' | 'australia-southeast1' | 'me-central1' | 'me-central2' | 'me-west1';
+/**
+ * Configuration for region rotation on quota errors (429 / Resource Exhausted).
+ *
+ * When Vertex AI returns a quota error, the middleware rotates through the
+ * configured regions instead of retrying the same region. This is useful
+ * when Dynamic Shared Quota is temporarily exhausted in a single region.
+ *
+ * The total retry budget (from RetryOptions.maxRetries) is shared across
+ * all regions — region rotation does NOT multiply the retry count.
+ */
+export interface RegionRotationConfig {
+    /** Ordered list of regions to try. First entry = primary region. */
+    regions: GoogleCloudRegion[];
+    /** Last-resort region after all regions exhausted (typically 'global'). */
+    fallback: GoogleCloudRegion;
+    /**
+     * If true: when maxRetries is exhausted before reaching the fallback,
+     * one final bonus attempt on the fallback region is made.
+     * @default true
+     */
+    alwaysTryFallback?: boolean;
+}
 /**
  * Reference image for character consistency
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@loonylabs/tti-middleware",
-  "version": "1.5.1",
+  "version": "1.7.0",
   "description": "Provider-agnostic Text-to-Image middleware with GDPR compliance. Supports Google Cloud (Imagen, Gemini), Eden AI, and IONOS.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",