@loonylabs/tti-middleware 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,7 @@
24
24
  - [Configuration](#%EF%B8%8F-configuration)
25
25
  - [Providers & Models](#-providers--models)
26
26
  - [Character Consistency](#-character-consistency)
27
+ - [Inpainting / Image Editing](#inpainting--image-editing)
27
28
  - [GDPR / Compliance](#-gdpr--compliance)
28
29
  - [API Reference](#-api-reference)
29
30
  - [Advanced Features](#-advanced-features)
@@ -44,7 +45,7 @@
44
45
  - **Eden AI**: Aggregator with access to OpenAI, Stability AI, Replicate (experimental)
45
46
  - **IONOS**: German cloud provider with OpenAI-compatible API (experimental)
46
47
  - **Character Consistency**: Generate consistent characters across multiple images (perfect for children's book illustrations)
47
- - **Inpainting**: Fix specific areas of a generated image without regenerating the entire scene — via Vertex AI `imagen-capability` model
48
+ - **Inpainting**: Fix specific areas of a generated image without regenerating the entire scene — via Vertex AI `imagen-capability` model. Supports optional subject reference images (`maskReferenceImages`) to guide *what* gets inserted into the masked area
48
49
  - **GDPR/DSGVO Compliance**: Built-in EU region support with automatic fallback
49
50
  - **Region Rotation**: Opt-in region rotation on quota errors (429) for Google Cloud — rotate through regions instead of retrying the same exhausted region
50
51
  - **Retry Logic**: Exponential backoff with jitter for transient errors (429, 408, 5xx, timeouts)
@@ -308,6 +309,72 @@ const duelScene = await service.generate({
308
309
 
309
310
  - Model must be `gemini-flash-image` (only model supporting character consistency)
310
311
 
312
+ ## Inpainting / Image Editing
313
+
314
+ The `imagen-capability` model supports mask-based inpainting via Vertex AI. This is the **only** model that supports pixel-precise editing with a mask image.
315
+
316
+ ### Basic Inpainting
317
+
318
+ ```typescript
319
+ const result = await service.generate({
320
+ model: 'imagen-capability',
321
+ prompt: 'Remove the extra arm and fill with matching forest background',
322
+ baseImage: { base64: originalImageBase64, mimeType: 'image/png' },
323
+ maskImage: { base64: maskBase64, mimeType: 'image/png' },
324
+ editMode: 'inpainting-remove', // default: 'inpainting-insert'
325
+ maskDilation: 0.02, // optional, 0.0–1.0, default 0.01
326
+ });
327
+ ```
328
+
329
+ **How the mask works:**
330
+ - White pixels = area the model will regenerate
331
+ - Black pixels = area preserved exactly as-is
332
+ - Mask must have identical dimensions to `baseImage`
333
+
334
+ ### Guided Inpainting with Subject References
335
+
336
+ Use `maskReferenceImages` to provide a reference photo of the subject to insert — e.g. "place **this** character into the masked region":
337
+
338
+ ```typescript
339
+ const result = await service.generate({
340
+ model: 'imagen-capability',
341
+ prompt: 'The character standing in a bright forest clearing, photorealistic',
342
+ baseImage: { base64: sceneBase64, mimeType: 'image/png' },
343
+ maskImage: { base64: maskBase64, mimeType: 'image/png' },
344
+ editMode: 'inpainting-insert',
345
+ maskReferenceImages: [
346
+ {
347
+ base64: characterRefBase64,
348
+ mimeType: 'image/png',
349
+ subjectType: 'person', // 'person' | 'animal' | 'product' | 'default'
350
+ },
351
+ ],
352
+ });
353
+ ```
354
+
355
+ **Subject types:**
356
+
357
+ | `subjectType` | Use case |
358
+ |---------------|----------|
359
+ | `'person'` | Human character |
360
+ | `'animal'` | Animal or creature |
361
+ | `'product'` | Object, item, product |
362
+ | `'default'` | Let the model decide (safe fallback) |
363
+
364
+ **Notes:**
365
+ - `maskReferenceImages` only works with `editMode: 'inpainting-insert'`
366
+ - Gemini models do **not** support mask-based inpainting or `maskReferenceImages`
367
+ - `maskReferenceImages` without `baseImage` throws a validation error
368
+
369
+ ### Supported `editMode` Values
370
+
371
+ | Value | Description |
372
+ |-------|-------------|
373
+ | `'inpainting-insert'` | Add or replace content in masked area (default) |
374
+ | `'inpainting-remove'` | Remove content and fill with matching background |
375
+ | `'background-swap'` | Replace background, preserve foreground |
376
+ | `'outpainting'` | Extend image beyond its boundaries into the masked area |
377
+
311
378
  ## GDPR / Compliance
312
379
 
313
380
  ### Provider Compliance Overview
@@ -366,15 +433,30 @@ interface TTIRequest {
366
433
  n?: number; // Number of images (default: 1)
367
434
  aspectRatio?: string; // '1:1', '16:9', '4:3', etc.
368
435
 
369
- // Character consistency
436
+ // Character consistency (Gemini models only)
370
437
  referenceImages?: TTIReferenceImage[];
371
438
  subjectDescription?: string;
372
439
 
440
+ // Inpainting / image editing (imagen-capability only)
441
+ baseImage?: TTIReferenceImage; // Activates edit mode when set
442
+ maskImage?: TTIReferenceImage; // Required when baseImage is set
443
+ maskDilation?: number; // 0.0–1.0, default 0.01
444
+ editMode?: 'inpainting-insert' | 'inpainting-remove' | 'background-swap' | 'outpainting';
445
+ maskReferenceImages?: TTIMaskReferenceImage[]; // Subject refs for guided inpainting
446
+
373
447
  // Retry configuration
374
448
  retry?: boolean | RetryOptions; // true (default), false, or custom config
375
449
 
376
450
  providerOptions?: Record<string, unknown>;
377
451
  }
452
+
453
+ type TTISubjectType = 'person' | 'animal' | 'product' | 'default';
454
+
455
+ interface TTIMaskReferenceImage {
456
+ base64: string;
457
+ mimeType?: string;
458
+ subjectType?: TTISubjectType; // defaults to 'default'
459
+ }
378
460
  ```
379
461
 
380
462
  ### TTIResponse
@@ -216,6 +216,17 @@ class BaseTTIProvider {
216
216
  throw new InvalidConfigError(this.providerName, 'maskDilation must be between 0.0 and 1.0');
217
217
  }
218
218
  }
219
+ if (request.maskReferenceImages && request.maskReferenceImages.length > 0) {
220
+ for (let i = 0; i < request.maskReferenceImages.length; i++) {
221
+ const ref = request.maskReferenceImages[i];
222
+ if (!ref.base64 || ref.base64.trim().length === 0) {
223
+ throw new InvalidConfigError(this.providerName, `maskReferenceImages[${i}] has empty base64 data`);
224
+ }
225
+ }
226
+ }
227
+ }
228
+ if (request.maskReferenceImages && request.maskReferenceImages.length > 0 && !request.baseImage) {
229
+ throw new InvalidConfigError(this.providerName, 'maskReferenceImages requires baseImage to be set');
219
230
  }
220
231
  // If reference images are provided, validate them
221
232
  if (request.referenceImages && request.referenceImages.length > 0) {
@@ -65,6 +65,7 @@ export declare class GoogleCloudTTIProvider extends BaseTTIProvider {
65
65
  private processImagenResponse;
66
66
  /** Maps our editMode values to the Vertex AI API constants */
67
67
  private static readonly EDIT_MODE_MAP;
68
+ private static readonly SUBJECT_TYPE_MAP;
68
69
  private editWithImagen;
69
70
  private generateWithGemini;
70
71
  private getGenaiClient;
@@ -586,6 +586,23 @@ class GoogleCloudTTIProvider extends base_tti_provider_1.BaseTTIProvider {
586
586
  },
587
587
  },
588
588
  ];
589
+ // Append optional subject reference images for guided inpainting
590
+ if (request.maskReferenceImages && request.maskReferenceImages.length > 0) {
591
+ for (const [i, ref] of request.maskReferenceImages.entries()) {
592
+ const subjectType = GoogleCloudTTIProvider.SUBJECT_TYPE_MAP[ref.subjectType ?? 'default'] ??
593
+ 'SUBJECT_TYPE_DEFAULT';
594
+ referenceImages.push({
595
+ referenceType: 'REFERENCE_TYPE_SUBJECT',
596
+ referenceId: 3 + i,
597
+ referenceImage: {
598
+ bytesBase64Encoded: ref.base64,
599
+ },
600
+ subjectImageConfig: {
601
+ subjectType,
602
+ },
603
+ });
604
+ }
605
+ }
589
606
  const instanceValue = {
590
607
  prompt: request.prompt,
591
608
  referenceImages,
@@ -813,3 +830,9 @@ GoogleCloudTTIProvider.EDIT_MODE_MAP = {
813
830
  'background-swap': 'EDIT_MODE_BGSWAP',
814
831
  'outpainting': 'EDIT_MODE_OUTPAINT',
815
832
  };
833
+ GoogleCloudTTIProvider.SUBJECT_TYPE_MAP = {
834
+ person: 'SUBJECT_TYPE_PERSON',
835
+ animal: 'SUBJECT_TYPE_ANIMAL',
836
+ product: 'SUBJECT_TYPE_PRODUCT',
837
+ default: 'SUBJECT_TYPE_DEFAULT',
838
+ };
@@ -82,6 +82,30 @@ export interface TTIReferenceImage {
82
82
  /** MIME type of the image (e.g., 'image/png', 'image/jpeg') */
83
83
  mimeType?: string;
84
84
  }
85
+ /**
86
+ * Subject type hint for mask reference images.
87
+ * Helps the model understand what kind of subject is shown in the reference image.
88
+ * - 'person' — a human character
89
+ * - 'animal' — an animal or creature
90
+ * - 'product' — an object, product, or item
91
+ * - 'default' — let the model decide (fallback)
92
+ */
93
+ export type TTISubjectType = 'person' | 'animal' | 'product' | 'default';
94
+ /**
95
+ * Reference image for mask-based inpainting (subject reference).
96
+ * Used with maskReferenceImages to guide what the model inserts into the masked area.
97
+ */
98
+ export interface TTIMaskReferenceImage {
99
+ /** Base64-encoded image data of the subject to insert */
100
+ base64: string;
101
+ /** MIME type of the image (e.g., 'image/png', 'image/jpeg') */
102
+ mimeType?: string;
103
+ /**
104
+ * Subject type hint for the model.
105
+ * Defaults to 'default' if omitted.
106
+ */
107
+ subjectType?: TTISubjectType;
108
+ }
85
109
  /**
86
110
  * Unified TTI generation request
87
111
  * Works for both simple text-to-image and character consistency
@@ -129,6 +153,14 @@ export interface TTIRequest {
129
153
  * - 'outpainting': extend image beyond its boundaries into the masked area
130
154
  */
131
155
  editMode?: 'inpainting-insert' | 'inpainting-remove' | 'background-swap' | 'outpainting';
156
+ /**
157
+ * Optional subject reference images for mask-based inpainting.
158
+ * Only valid when baseImage and maskImage are set.
159
+ * Each entry guides the model to insert a specific subject into the masked area
160
+ * (e.g., "place the character from this reference image into the mask").
161
+ * Only supported by the 'imagen-capability' model.
162
+ */
163
+ maskReferenceImages?: TTIMaskReferenceImage[];
132
164
  /** Additional provider-specific options */
133
165
  providerOptions?: Record<string, unknown>;
134
166
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loonylabs/tti-middleware",
3
- "version": "1.10.0",
3
+ "version": "1.11.0",
4
4
  "description": "Provider-agnostic Text-to-Image middleware with GDPR compliance. Supports Google Cloud (Imagen, Gemini), Eden AI, and IONOS.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",