@loonylabs/tti-middleware 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -44,6 +44,7 @@
44
44
  - **Eden AI**: Aggregator with access to OpenAI, Stability AI, Replicate (experimental)
45
45
  - **IONOS**: German cloud provider with OpenAI-compatible API (experimental)
46
46
  - **Character Consistency**: Generate consistent characters across multiple images (perfect for children's book illustrations)
47
+ - **Inpainting**: Fix specific areas of a generated image without regenerating the entire scene — via Vertex AI `imagen-capability` model
47
48
  - **GDPR/DSGVO Compliance**: Built-in EU region support with automatic fallback
48
49
  - **Region Rotation**: Opt-in region rotation on quota errors (429) for Google Cloud — rotate through regions instead of retrying the same exhausted region
49
50
  - **Retry Logic**: Exponential backoff with jitter for transient errors (429, 408, 5xx, timeouts)
@@ -196,6 +196,27 @@ class BaseTTIProvider {
196
196
  if (!request.prompt || request.prompt.trim().length === 0) {
197
197
  throw new InvalidConfigError(this.providerName, 'Prompt cannot be empty');
198
198
  }
199
+ // If baseImage is provided, validate inpainting requirements
200
+ if (request.baseImage) {
201
+ if (!request.baseImage.base64 || request.baseImage.base64.trim().length === 0) {
202
+ throw new InvalidConfigError(this.providerName, 'baseImage has empty base64 data');
203
+ }
204
+ const modelId = request.model || this.getDefaultModel();
205
+ if (!this.modelSupportsCapability(modelId, 'imageEditing')) {
206
+ throw new CapabilityNotSupportedError(this.providerName, 'imageEditing', modelId);
207
+ }
208
+ if (!request.maskImage) {
209
+ throw new InvalidConfigError(this.providerName, 'maskImage is required when baseImage is set');
210
+ }
211
+ if (!request.maskImage.base64 || request.maskImage.base64.trim().length === 0) {
212
+ throw new InvalidConfigError(this.providerName, 'maskImage has empty base64 data');
213
+ }
214
+ if (request.maskDilation !== undefined) {
215
+ if (request.maskDilation < 0 || request.maskDilation > 1) {
216
+ throw new InvalidConfigError(this.providerName, 'maskDilation must be between 0.0 and 1.0');
217
+ }
218
+ }
219
+ }
199
220
  // If reference images are provided, validate them
200
221
  if (request.referenceImages && request.referenceImages.length > 0) {
201
222
  const modelId = request.model || this.getDefaultModel();
@@ -63,6 +63,9 @@ export declare class GoogleCloudTTIProvider extends BaseTTIProvider {
63
63
  private generateWithImagen;
64
64
  private getAiplatformClient;
65
65
  private processImagenResponse;
66
+ /** Maps our editMode values to the Vertex AI API constants */
67
+ private static readonly EDIT_MODE_MAP;
68
+ private editWithImagen;
66
69
  private generateWithGemini;
67
70
  private getGenaiClient;
68
71
  private buildCharacterConsistencyPrompt;
@@ -131,6 +131,27 @@ const GOOGLE_CLOUD_MODELS = [
131
131
  availableRegions: IMAGEN_4_REGIONS,
132
132
  pricingUrl: 'https://cloud.google.com/vertex-ai/generative-ai/pricing',
133
133
  },
134
+ // ── Imagen Capability model (Vertex AI editing / inpainting) ──
135
+ {
136
+ id: 'imagen-capability',
137
+ displayName: 'Imagen 3 Capability (Editing)',
138
+ capabilities: {
139
+ textToImage: false,
140
+ characterConsistency: false,
141
+ imageEditing: true,
142
+ maxImagesPerRequest: 4,
143
+ },
144
+ availableRegions: [
145
+ 'europe-west1',
146
+ 'europe-west2',
147
+ 'europe-west3',
148
+ 'europe-west4',
149
+ 'europe-west9',
150
+ 'us-central1',
151
+ 'us-east4',
152
+ ],
153
+ pricingUrl: 'https://cloud.google.com/vertex-ai/generative-ai/pricing',
154
+ },
134
155
  // ── Gemini models (Vertex AI generateContent API) ──────────
135
156
  {
136
157
  id: 'gemini-flash-image',
@@ -185,6 +206,7 @@ const MODEL_ID_MAP = {
185
206
  'imagen-4': 'imagen-4.0-generate-001',
186
207
  'imagen-4-fast': 'imagen-4.0-fast-generate-001',
187
208
  'imagen-4-ultra': 'imagen-4.0-ultra-generate-001',
209
+ 'imagen-capability': 'imagen-3.0-capability-001',
188
210
  'gemini-flash-image': 'gemini-2.5-flash-image',
189
211
  'gemini-pro-image': 'gemini-3-pro-image-preview',
190
212
  'gemini-flash-image-2': 'gemini-3.1-flash-image-preview',
@@ -296,10 +318,18 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
296
318
  hasReferenceImages: (0, base_tti_provider_1.hasReferenceImages)(request),
297
319
  });
298
320
  const isGeminiModel = GEMINI_API_MODELS.has(modelId);
299
- const operationName = isGeminiModel ? 'Gemini API call' : 'Imagen API call';
321
+ const isEditRequest = !!request.baseImage;
322
+ const operationName = isEditRequest
323
+ ? 'Imagen edit API call'
324
+ : isGeminiModel
325
+ ? 'Gemini API call'
326
+ : 'Imagen API call';
300
327
  // Operation lambda reads currentRegion from closure
301
328
  const operation = () => {
302
- if (isGeminiModel) {
329
+ if (isEditRequest) {
330
+ return this.editWithImagen(request, modelId, currentRegion);
331
+ }
332
+ else if (isGeminiModel) {
303
333
  return this.generateWithGemini(request, modelId, currentRegion);
304
334
  }
305
335
  else {
@@ -528,6 +558,77 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
528
558
  usage,
529
559
  };
530
560
  }
561
+ async editWithImagen(request, modelId, region) {
562
+ const startTime = Date.now();
563
+ const internalModelId = MODEL_ID_MAP[modelId];
564
+ this.lastUsedRegion = region;
565
+ try {
566
+ const { client, helpers } = await this.getAiplatformClient(region);
567
+ const endpoint = `projects/${this.config.projectId}/locations/${region}/publishers/google/models/${internalModelId}`;
568
+ // Build referenceImages array: [RAW base image, MASK image]
569
+ const referenceImages = [
570
+ {
571
+ referenceType: 'REFERENCE_TYPE_RAW',
572
+ referenceId: 1,
573
+ referenceImage: {
574
+ bytesBase64Encoded: request.baseImage.base64,
575
+ },
576
+ },
577
+ {
578
+ referenceType: 'REFERENCE_TYPE_MASK',
579
+ referenceId: 2,
580
+ referenceImage: {
581
+ bytesBase64Encoded: request.maskImage.base64,
582
+ },
583
+ maskImageConfig: {
584
+ maskMode: 'MASK_MODE_USER_PROVIDED',
585
+ dilation: request.maskDilation ?? 0.01,
586
+ },
587
+ },
588
+ ];
589
+ const instanceValue = {
590
+ prompt: request.prompt,
591
+ referenceImages,
592
+ };
593
+ const instance = helpers.toValue(instanceValue);
594
+ // Map editMode to Vertex AI constant, default to inpainting-insert
595
+ const editModeKey = request.editMode ?? 'inpainting-insert';
596
+ const vertexEditMode = GoogleCloudTTIProvider.EDIT_MODE_MAP[editModeKey] ?? 'EDIT_MODE_INPAINT_INSERTION';
597
+ const parameterValue = {
598
+ editMode: vertexEditMode,
599
+ sampleCount: request.n || 1,
600
+ editConfig: {
601
+ baseSteps: request.providerOptions?.baseSteps ?? 35,
602
+ },
603
+ };
604
+ const parameters = helpers.toValue(parameterValue);
605
+ this.log('info', 'Sending Imagen edit request to Vertex AI', {
606
+ endpoint,
607
+ editMode: vertexEditMode,
608
+ dilation: request.maskDilation ?? 0.01,
609
+ });
610
+ const [response] = await client.predict({
611
+ endpoint,
612
+ instances: [instance],
613
+ parameters,
614
+ });
615
+ const duration = Date.now() - startTime;
616
+ this.log('info', `Imagen edit response received in ${duration}ms`, {
617
+ duration,
618
+ hasPredictions: !!response.predictions?.length,
619
+ });
620
+ if (!response.predictions || response.predictions.length === 0) {
621
+ throw new base_tti_provider_1.GenerationFailedError(this.providerName, 'No images returned from Imagen edit API');
622
+ }
623
+ return this.processImagenResponse(response.predictions, helpers, modelId, duration);
624
+ }
625
+ catch (error) {
626
+ if (error instanceof base_tti_provider_1.InvalidConfigError || error instanceof base_tti_provider_1.GenerationFailedError) {
627
+ throw error;
628
+ }
629
+ throw this.handleError(error, 'during Imagen edit API call');
630
+ }
631
+ }
531
632
  // ============================================================
532
633
  // PRIVATE: GEMINI IMAGE IMPLEMENTATION
533
634
  // ============================================================
@@ -702,3 +803,13 @@ IMPORTANT: Maintain exact visual consistency with the subject in the reference -
702
803
  }
703
804
  }
704
805
  exports.GoogleCloudTTIProvider = GoogleCloudTTIProvider;
806
+ // ============================================================
807
+ // PRIVATE: IMAGEN EDITING / INPAINTING IMPLEMENTATION
808
+ // ============================================================
809
+ /** Maps our editMode values to the Vertex AI API constants */
810
+ GoogleCloudTTIProvider.EDIT_MODE_MAP = {
811
+ 'inpainting-insert': 'EDIT_MODE_INPAINT_INSERTION',
812
+ 'inpainting-remove': 'EDIT_MODE_INPAINT_REMOVAL',
813
+ 'background-swap': 'EDIT_MODE_BGSWAP',
814
+ 'outpainting': 'EDIT_MODE_OUTPAINT',
815
+ };
@@ -105,6 +105,30 @@ export interface TTIRequest {
105
105
  * Required when using referenceImages (e.g., "cute cartoon bear with red hat")
106
106
  */
107
107
  subjectDescription?: string;
108
+ /**
109
+ * Base image to edit. When present, activates "edit mode" instead of text-to-image generation.
110
+ * Requires maskImage and a model that supports imageEditing capability.
111
+ */
112
+ baseImage?: TTIReferenceImage;
113
+ /**
114
+ * Mask image for inpainting. White pixels = regenerate, black pixels = preserve.
115
+ * Must have identical dimensions to baseImage.
116
+ * Required when baseImage is set.
117
+ */
118
+ maskImage?: TTIReferenceImage;
119
+ /**
120
+ * Mask dilation: expands the mask boundary to smooth hard edges (0.0–1.0, default 0.01).
121
+ * Useful when hand-drawn masks have jagged edges.
122
+ */
123
+ maskDilation?: number;
124
+ /**
125
+ * Edit operation to perform on the masked region.
126
+ * - 'inpainting-insert': add or replace content in the masked area (default)
127
+ * - 'inpainting-remove': remove content and fill with matching background
128
+ * - 'background-swap': replace background while preserving foreground
129
+ * - 'outpainting': extend image beyond its boundaries into the masked area
130
+ */
131
+ editMode?: 'inpainting-insert' | 'inpainting-remove' | 'background-swap' | 'outpainting';
108
132
  /** Additional provider-specific options */
109
133
  providerOptions?: Record<string, unknown>;
110
134
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loonylabs/tti-middleware",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "Provider-agnostic Text-to-Image middleware with GDPR compliance. Supports Google Cloud (Imagen, Gemini), Eden AI, and IONOS.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",