@loonylabs/tti-middleware 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,6 +45,7 @@
45
45
  - **IONOS**: German cloud provider with OpenAI-compatible API (experimental)
46
46
  - **Character Consistency**: Generate consistent characters across multiple images (perfect for children's book illustrations)
47
47
  - **GDPR/DSGVO Compliance**: Built-in EU region support with automatic fallback
48
+ - **Region Rotation**: Opt-in region rotation on quota errors (429) for Google Cloud — rotate through regions instead of retrying the same exhausted region
48
49
  - **Retry Logic**: Exponential backoff with jitter for transient errors (429, 408, 5xx, timeouts)
49
50
  - **TypeScript First**: Full type safety with comprehensive interfaces
50
51
  - **Logging Control**: Configurable log levels via environment or API
@@ -400,6 +401,31 @@ interface TTIResponse {
400
401
 
401
402
  ## Advanced Features
402
403
 
404
+ <details>
405
+ <summary><strong>Region Rotation (Google Cloud)</strong></summary>
406
+
407
+ When Vertex AI returns 429 (Resource Exhausted) due to Dynamic Shared Quota, the middleware can rotate through a list of regions instead of retrying the same exhausted region:
408
+
409
+ ```typescript
410
+ const provider = new GoogleCloudTTIProvider({
411
+ projectId: 'my-project',
412
+ region: 'europe-west4',
413
+ regionRotation: {
414
+ regions: ['europe-west4', 'europe-west1', 'europe-north1', 'europe-central2'],
415
+ fallback: 'global',
416
+ alwaysTryFallback: true, // Default: one bonus attempt on fallback after budget exhausted
417
+ },
418
+ });
419
+ ```
420
+
421
+ **Key behavior:**
422
+ - `maxRetries` is the **total budget** across all regions (no multiplier)
423
+ - Only **quota errors** (429, Resource Exhausted) trigger rotation — server errors (500, 503) retry the same region
424
+ - `alwaysTryFallback: true` (default): one bonus attempt on fallback even if retry budget is exhausted
425
+ - Without `regionRotation`: existing behavior unchanged
426
+
427
+ </details>
428
+
403
429
  <details>
404
430
  <summary><strong>Retry Configuration</strong></summary>
405
431
 
@@ -112,6 +112,12 @@ export declare abstract class BaseTTIProvider implements ITTIProvider {
112
112
  * Check if an error is a timeout error (from our withTimeout wrapper).
113
113
  */
114
114
  private isTimeoutError;
115
+ /**
116
+ * Check if an error is a quota/rate-limit error (429 / Resource Exhausted).
117
+ * Used by providers to distinguish quota errors from other retryable errors
118
+ * (e.g., for region rotation on quota errors only).
119
+ */
120
+ protected isQuotaError(error: Error): boolean;
115
121
  /**
116
122
  * Execute a generation function with retry logic for transient errors.
117
123
  * Retries on: 429, 408, 5xx, network timeouts, TCP disconnects.
@@ -121,8 +127,15 @@ export declare abstract class BaseTTIProvider implements ITTIProvider {
121
127
  * retry.timeoutMs, default 45s). Timeout errors have their own retry
122
128
  * counter (timeoutRetries, default 2) independent from the general
123
129
  * maxRetries used for quota/server errors.
124
- */
125
- protected executeWithRetry<T>(request: TTIRequest, operation: () => Promise<T>, operationName: string): Promise<T>;
130
+ *
131
+ * @param options.onRetry - Optional callback invoked before each retry.
132
+ * Receives the error that triggered the retry and the current general
133
+ * retry count. Providers can use this to adjust state between retries
134
+ * (e.g., rotate regions on quota errors).
135
+ */
136
+ protected executeWithRetry<T>(request: TTIRequest, operation: () => Promise<T>, operationName: string, options?: {
137
+ onRetry?: (error: Error, generalRetryCount: number) => void;
138
+ }): Promise<T>;
126
139
  /**
127
140
  * Check if an error is retryable (transient).
128
141
  * Retryable: 429, 408, 500, 502, 503, 504, network errors, timeouts.
@@ -301,6 +301,19 @@ class BaseTTIProvider {
301
301
  isTimeoutError(error) {
302
302
  return error.message.toLowerCase().startsWith('timeout:');
303
303
  }
304
+ /**
305
+ * Check if an error is a quota/rate-limit error (429 / Resource Exhausted).
306
+ * Used by providers to distinguish quota errors from other retryable errors
307
+ * (e.g., for region rotation on quota errors only).
308
+ */
309
+ isQuotaError(error) {
310
+ const message = error.message.toLowerCase();
311
+ return (message.includes('429') ||
312
+ message.includes('resource exhausted') ||
313
+ message.includes('quota exceeded') ||
314
+ message.includes('rate limit') ||
315
+ message.includes('too many requests'));
316
+ }
304
317
  /**
305
318
  * Execute a generation function with retry logic for transient errors.
306
319
  * Retries on: 429, 408, 5xx, network timeouts, TCP disconnects.
@@ -310,8 +323,13 @@ class BaseTTIProvider {
310
323
  * retry.timeoutMs, default 45s). Timeout errors have their own retry
311
324
  * counter (timeoutRetries, default 2) independent from the general
312
325
  * maxRetries used for quota/server errors.
326
+ *
327
+ * @param options.onRetry - Optional callback invoked before each retry.
328
+ * Receives the error that triggered the retry and the current general
329
+ * retry count. Providers can use this to adjust state between retries
330
+ * (e.g., rotate regions on quota errors).
313
331
  */
314
- async executeWithRetry(request, operation, operationName) {
332
+ async executeWithRetry(request, operation, operationName, options) {
315
333
  const retryConfig = this.resolveRetryConfig(request);
316
334
  // No retry configured
317
335
  if (!retryConfig) {
@@ -371,6 +389,10 @@ class BaseTTIProvider {
371
389
  this.log('error', `${operationName} general retry budget exhausted (${maxGeneralRetries} retries): ${error.message}`, { attempt, generalRetryCount, durationMs: duration });
372
390
  throw error;
373
391
  }
392
+ // Notify provider before retry (e.g., for region rotation)
393
+ if (options?.onRetry) {
394
+ options.onRetry(error, generalRetryCount);
395
+ }
374
396
  const delay = this.calculateRetryDelay(generalRetryCount, retryConfig);
375
397
  this.log('warn', `Transient error during ${operationName} after ${duration}ms. Retry ${generalRetryCount}/${maxGeneralRetries} in ${delay}ms: ${error.message}`, { attempt, generalRetryCount, maxGeneralRetries, delayMs: delay, durationMs: duration });
376
398
  await this.sleep(delay);
@@ -15,12 +15,12 @@
15
15
  * @see https://cloud.google.com/vertex-ai/generative-ai/pricing
16
16
  * @see https://cloud.google.com/terms/data-processing-addendum
17
17
  */
18
- import { TTIRequest, TTIResponse, ModelInfo, GoogleCloudRegion } from '../../../types';
18
+ import { TTIRequest, TTIResponse, ModelInfo, GoogleCloudRegion, RegionRotationConfig } from '../../../types';
19
19
  import { BaseTTIProvider } from './base-tti-provider';
20
20
  interface GoogleCloudConfig {
21
21
  /** Google Cloud Project ID */
22
22
  projectId: string;
23
- /** Default region for requests */
23
+ /** Default region for requests (used when regionRotation is not configured) */
24
24
  region: GoogleCloudRegion;
25
25
  /** Path to service account JSON file */
26
26
  keyFilename?: string;
@@ -30,11 +30,17 @@ interface GoogleCloudConfig {
30
30
  private_key: string;
31
31
  project_id?: string;
32
32
  };
33
+ /**
34
+ * Opt-in region rotation for quota errors (429 / Resource Exhausted).
35
+ * When configured, the middleware rotates through the listed regions
36
+ * on quota errors instead of retrying the same region.
37
+ */
38
+ regionRotation?: RegionRotationConfig;
33
39
  }
34
40
  export declare class GoogleCloudTTIProvider extends BaseTTIProvider {
35
41
  private config;
36
42
  private lastUsedRegion;
37
- private aiplatformClient;
43
+ private aiplatformClients;
38
44
  private genaiClients;
39
45
  constructor(config?: Partial<GoogleCloudConfig>);
40
46
  getDisplayName(): string;
@@ -182,8 +182,8 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
182
182
  constructor(config) {
183
183
  super(types_1.TTIProvider.GOOGLE_CLOUD);
184
184
  this.lastUsedRegion = null;
185
- // Lazy-loaded SDK clients
186
- this.aiplatformClient = null;
185
+ // Lazy-loaded SDK clients (one per region, since region is baked into the client)
186
+ this.aiplatformClients = new Map();
187
187
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
188
188
  this.genaiClients = new Map();
189
189
  const projectId = config?.projectId ||
@@ -202,10 +202,27 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
202
202
  region,
203
203
  keyFilename: config?.keyFilename || process.env.GOOGLE_APPLICATION_CREDENTIALS,
204
204
  credentials: config?.credentials,
205
+ regionRotation: config?.regionRotation,
205
206
  };
207
+ // Validate regionRotation config
208
+ if (this.config.regionRotation) {
209
+ if (!this.config.regionRotation.regions || this.config.regionRotation.regions.length === 0) {
210
+ throw new base_tti_provider_1.InvalidConfigError(types_1.TTIProvider.GOOGLE_CLOUD, 'regionRotation.regions must contain at least one region');
211
+ }
212
+ if (!this.config.regionRotation.fallback) {
213
+ throw new base_tti_provider_1.InvalidConfigError(types_1.TTIProvider.GOOGLE_CLOUD, 'regionRotation.fallback is required');
214
+ }
215
+ }
206
216
  this.log('info', 'Google Cloud TTI Provider initialized', {
207
217
  projectId: this.config.projectId,
208
218
  region: this.config.region,
219
+ regionRotation: this.config.regionRotation
220
+ ? {
221
+ regions: this.config.regionRotation.regions,
222
+ fallback: this.config.regionRotation.fallback,
223
+ alwaysTryFallback: this.config.regionRotation.alwaysTryFallback ?? true,
224
+ }
225
+ : undefined,
209
226
  isEURegion: (0, base_tti_provider_1.isEURegion)(this.config.region),
210
227
  models: this.listModels().map((m) => m.id),
211
228
  });
@@ -232,29 +249,66 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
232
249
  .map((m) => m.id)
233
250
  .join(', ')}`);
234
251
  }
235
- // Validate region availability
236
- const effectiveRegion = this.getEffectiveRegion(modelId);
252
+ // Determine base region (handles global-only models, region availability)
253
+ const baseRegion = this.getEffectiveRegion(modelId);
254
+ // Region rotation: only for non-global models with rotation configured
255
+ const rotation = this.config.regionRotation;
256
+ const useRotation = !!(rotation && baseRegion !== 'global');
257
+ // Mutable region — the onRetry callback advances this on quota errors
258
+ let currentRegion = useRotation ? rotation.regions[0] : baseRegion;
259
+ // Build region sequence: [...regions, fallback]
260
+ let regionIndex = 0;
261
+ let regionSequence = [];
262
+ if (useRotation) {
263
+ regionSequence = [...rotation.regions, rotation.fallback];
264
+ this.log('info', 'Region rotation enabled', {
265
+ sequence: regionSequence,
266
+ fallback: rotation.fallback,
267
+ alwaysTryFallback: rotation.alwaysTryFallback ?? true,
268
+ });
269
+ }
237
270
  // Create debug info for logging
238
271
  let debugInfo = null;
239
272
  if (debug_tti_utils_1.TTIDebugger.isEnabled) {
240
- debugInfo = debug_tti_utils_1.TTIDebugger.createDebugInfo(request, this.providerName, modelId, { region: effectiveRegion });
273
+ debugInfo = debug_tti_utils_1.TTIDebugger.createDebugInfo(request, this.providerName, modelId, { region: currentRegion });
241
274
  await debug_tti_utils_1.TTIDebugger.logRequest(debugInfo);
242
275
  }
243
276
  this.log('debug', 'Generating image', {
244
277
  model: modelId,
245
- region: effectiveRegion,
278
+ region: currentRegion,
279
+ regionRotation: useRotation,
246
280
  hasReferenceImages: (0, base_tti_provider_1.hasReferenceImages)(request),
247
281
  });
248
- try {
249
- // Route to appropriate API based on model type
250
- let response;
251
- if (GEMINI_API_MODELS.has(modelId)) {
252
- response = await this.executeWithRetry(request, () => this.generateWithGemini(request, modelId, effectiveRegion), 'Gemini API call');
282
+ const isGeminiModel = GEMINI_API_MODELS.has(modelId);
283
+ const operationName = isGeminiModel ? 'Gemini API call' : 'Imagen API call';
284
+ // Operation lambda reads currentRegion from closure
285
+ const operation = () => {
286
+ if (isGeminiModel) {
287
+ return this.generateWithGemini(request, modelId, currentRegion);
253
288
  }
254
289
  else {
255
- response = await this.executeWithRetry(request, () => this.generateWithImagen(request, modelId, effectiveRegion), 'Imagen API call');
290
+ return this.generateWithImagen(request, modelId, currentRegion);
291
+ }
292
+ };
293
+ // onRetry: advance region on quota errors, stay on same region otherwise
294
+ const onRetry = useRotation
295
+ ? (error) => {
296
+ if (this.isQuotaError(error) && regionIndex < regionSequence.length - 1) {
297
+ regionIndex++;
298
+ currentRegion = regionSequence[regionIndex];
299
+ this.log('info', `Quota error — rotating to region ${currentRegion}`, {
300
+ regionIndex,
301
+ totalRegions: regionSequence.length,
302
+ region: currentRegion,
303
+ });
304
+ }
305
+ // Non-quota retryable errors: stay on same region
256
306
  }
257
- // Log successful response
307
+ : undefined;
308
+ try {
309
+ const response = await this.executeWithRetry(request, operation, operationName, {
310
+ onRetry,
311
+ });
258
312
  if (debugInfo) {
259
313
  debugInfo = debug_tti_utils_1.TTIDebugger.updateWithResponse(debugInfo, response);
260
314
  await debug_tti_utils_1.TTIDebugger.logResponse(debugInfo);
@@ -262,7 +316,32 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
262
316
  return response;
263
317
  }
264
318
  catch (error) {
265
- // Log error
319
+ // alwaysTryFallback: one bonus attempt on fallback after budget exhausted
320
+ if (useRotation &&
321
+ this.isQuotaError(error) &&
322
+ (rotation.alwaysTryFallback !== false) &&
323
+ currentRegion !== rotation.fallback) {
324
+ this.log('info', `Retry budget exhausted — bonus attempt on fallback region ${rotation.fallback}`, {
325
+ exhaustedRegion: currentRegion,
326
+ fallback: rotation.fallback,
327
+ });
328
+ currentRegion = rotation.fallback;
329
+ try {
330
+ const response = await operation();
331
+ if (debugInfo) {
332
+ debugInfo = debug_tti_utils_1.TTIDebugger.updateWithResponse(debugInfo, response);
333
+ await debug_tti_utils_1.TTIDebugger.logResponse(debugInfo);
334
+ }
335
+ return response;
336
+ }
337
+ catch (fallbackError) {
338
+ if (debugInfo) {
339
+ debugInfo = debug_tti_utils_1.TTIDebugger.updateWithError(debugInfo, fallbackError);
340
+ await debug_tti_utils_1.TTIDebugger.logError(debugInfo);
341
+ }
342
+ throw fallbackError;
343
+ }
344
+ }
266
345
  if (debugInfo) {
267
346
  debugInfo = debug_tti_utils_1.TTIDebugger.updateWithError(debugInfo, error);
268
347
  await debug_tti_utils_1.TTIDebugger.logError(debugInfo);
@@ -325,7 +404,7 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
325
404
  const internalModelId = MODEL_ID_MAP[modelId];
326
405
  this.lastUsedRegion = region;
327
406
  try {
328
- const { client, helpers } = await this.getAiplatformClient();
407
+ const { client, helpers } = await this.getAiplatformClient(region);
329
408
  const endpoint = `projects/${this.config.projectId}/locations/${region}/publishers/google/models/${internalModelId}`;
330
409
  // Build instance
331
410
  const instanceValue = { prompt: request.prompt };
@@ -370,12 +449,12 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
370
449
  throw this.handleError(error, 'during Imagen API call');
371
450
  }
372
451
  }
373
- async getAiplatformClient() {
374
- if (!this.aiplatformClient) {
452
+ async getAiplatformClient(region) {
453
+ if (!this.aiplatformClients.has(region)) {
375
454
  try {
376
455
  const { v1, helpers } = await Promise.resolve().then(() => __importStar(require('@google-cloud/aiplatform')));
377
456
  const clientOptions = {
378
- apiEndpoint: `${this.config.region}-aiplatform.googleapis.com`,
457
+ apiEndpoint: `${region}-aiplatform.googleapis.com`,
379
458
  };
380
459
  if (this.config.keyFilename) {
381
460
  clientOptions.keyFilename = this.config.keyFilename;
@@ -384,9 +463,13 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
384
463
  clientOptions.credentials = this.config.credentials;
385
464
  }
386
465
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
387
- this.aiplatformClient = new v1.PredictionServiceClient(clientOptions);
466
+ this.aiplatformClients.set(region, new v1.PredictionServiceClient(clientOptions));
467
+ this.log('debug', 'Initialized @google-cloud/aiplatform client', {
468
+ region,
469
+ apiEndpoint: clientOptions.apiEndpoint,
470
+ });
388
471
  return {
389
- client: this.aiplatformClient,
472
+ client: this.aiplatformClients.get(region),
390
473
  helpers: helpers,
391
474
  };
392
475
  }
@@ -396,7 +479,7 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
396
479
  }
397
480
  const { helpers } = await Promise.resolve().then(() => __importStar(require('@google-cloud/aiplatform')));
398
481
  return {
399
- client: this.aiplatformClient,
482
+ client: this.aiplatformClients.get(region),
400
483
  helpers: helpers,
401
484
  };
402
485
  }
@@ -50,7 +50,29 @@ export interface ModelInfo {
50
50
  * Google Cloud regions
51
51
  * EU regions are GDPR-compliant
52
52
  */
53
- export type GoogleCloudRegion = 'global' | 'europe-west1' | 'europe-west2' | 'europe-west3' | 'europe-west4' | 'europe-west9' | 'us-central1' | 'us-east4';
53
+ export type GoogleCloudRegion = 'global' | 'europe-west1' | 'europe-west2' | 'europe-west3' | 'europe-west4' | 'europe-west6' | 'europe-west8' | 'europe-west9' | 'europe-north1' | 'europe-central2' | 'europe-southwest1' | 'us-central1' | 'us-east1' | 'us-east4' | 'us-east5' | 'us-south1' | 'us-west1' | 'us-west4' | 'asia-east1' | 'asia-east2' | 'asia-northeast1' | 'asia-northeast3' | 'asia-south1' | 'asia-southeast1' | 'australia-southeast1' | 'me-central1' | 'me-central2' | 'me-west1';
54
+ /**
55
+ * Configuration for region rotation on quota errors (429 / Resource Exhausted).
56
+ *
57
+ * When Vertex AI returns a quota error, the middleware rotates through the
58
+ * configured regions instead of retrying the same region. This is useful
59
+ * when Dynamic Shared Quota is temporarily exhausted in a single region.
60
+ *
61
+ * The total retry budget (from RetryOptions.maxRetries) is shared across
62
+ * all regions — region rotation does NOT multiply the retry count.
63
+ */
64
+ export interface RegionRotationConfig {
65
+ /** Ordered list of regions to try. First entry = primary region. */
66
+ regions: GoogleCloudRegion[];
67
+ /** Last-resort region after all regions exhausted (typically 'global'). */
68
+ fallback: GoogleCloudRegion;
69
+ /**
70
+ * If true: when maxRetries is exhausted before reaching the fallback,
71
+ * one final bonus attempt on the fallback region is made.
72
+ * @default true
73
+ */
74
+ alwaysTryFallback?: boolean;
75
+ }
54
76
  /**
55
77
  * Reference image for character consistency
56
78
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loonylabs/tti-middleware",
3
- "version": "1.5.1",
3
+ "version": "1.6.0",
4
4
  "description": "Provider-agnostic Text-to-Image middleware with GDPR compliance. Supports Google Cloud (Imagen, Gemini), Eden AI, and IONOS.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",