@dataclouder/nest-vertex 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +31 -0
- package/src/controllers/gemini-chat.controller.ts +29 -0
- package/src/controllers/nest-tts.controller.ts +120 -0
- package/src/controllers/nest-vertex.controller.ts +53 -0
- package/src/dto/generate-image.dto.ts +53 -0
- package/src/dto/generate-video.dto.ts +148 -0
- package/src/index.ts +9 -0
- package/src/nest-vertex.module.ts +14 -0
- package/src/services/gemini-chat.service.ts +190 -0
- package/src/services/nest-tts.service.ts +119 -0
- package/src/services/nest-vertex.service.ts +184 -0
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@dataclouder/nest-vertex",
|
|
3
|
+
"version": "0.0.2",
|
|
4
|
+
"description": "NestJS Vertex AI library for Dataclouder",
|
|
5
|
+
"author": "dataclouder",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"main": "dist/index.js",
|
|
8
|
+
"types": "dist/index.d.ts",
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"src"
|
|
12
|
+
],
|
|
13
|
+
"scripts": {
|
|
14
|
+
"build": "tsc -p tsconfig.lib.json",
|
|
15
|
+
"publish:npm": "npm run build && npm version patch && npm publish"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"nestjs",
|
|
19
|
+
"library",
|
|
20
|
+
"vertex",
|
|
21
|
+
"ai"
|
|
22
|
+
],
|
|
23
|
+
"peerDependencies": {
|
|
24
|
+
"@nestjs/common": ">=10.0.0",
|
|
25
|
+
"@nestjs/core": ">=10.0.0",
|
|
26
|
+
"rxjs": ">=7.0.0"
|
|
27
|
+
},
|
|
28
|
+
"publishConfig": {
|
|
29
|
+
"access": "public"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { Controller, Post, Body, Logger } from '@nestjs/common';
|
|
2
|
+
import { GeminiChatService } from '../services/gemini-chat.service';
|
|
3
|
+
import { ApiTags } from '@nestjs/swagger';
|
|
4
|
+
|
|
5
|
+
// Basic interface for input messages, similar to ChatCompletionMessageParam
|
|
6
|
+
interface ChatMessageInput {
|
|
7
|
+
role: 'user' | 'assistant' | 'system';
|
|
8
|
+
content: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
@ApiTags('Vertex Gemini') // Keeping tag as 'tts' for now
|
|
12
|
+
@Controller('api/vertex/gemini')
|
|
13
|
+
export class GeminiChatController {
|
|
14
|
+
private readonly logger = new Logger(GeminiChatController.name);
|
|
15
|
+
|
|
16
|
+
constructor(private readonly geminiChatService: GeminiChatService) {}
|
|
17
|
+
|
|
18
|
+
@Post('generate-text')
|
|
19
|
+
async generateText(@Body() messages: ChatMessageInput[]) {
|
|
20
|
+
this.logger.log('Received request for text generation');
|
|
21
|
+
try {
|
|
22
|
+
const result = await this.geminiChatService.chat(messages);
|
|
23
|
+
return result;
|
|
24
|
+
} catch (error) {
|
|
25
|
+
this.logger.error(`Error generating text: ${error.message}`, error.stack);
|
|
26
|
+
throw error; // Re-throw the error for NestJS to handle
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Controller,
|
|
3
|
+
Get,
|
|
4
|
+
Post,
|
|
5
|
+
Body,
|
|
6
|
+
Res,
|
|
7
|
+
HttpCode,
|
|
8
|
+
HttpStatus,
|
|
9
|
+
Header,
|
|
10
|
+
Logger,
|
|
11
|
+
InternalServerErrorException,
|
|
12
|
+
NotFoundException,
|
|
13
|
+
Query, // Import Query decorator
|
|
14
|
+
} from '@nestjs/common';
|
|
15
|
+
import { google } from '@google-cloud/text-to-speech/build/protos/protos'; // Import google types
|
|
16
|
+
import { ApiTags, ApiOperation, ApiBody, ApiResponse, ApiQuery } from '@nestjs/swagger'; // Import ApiQuery
|
|
17
|
+
import { FastifyReply } from 'fastify'; // Import FastifyReply type
|
|
18
|
+
import { NestTtsService, SynthesizeSpeechInput } from '../services/nest-tts.service';
|
|
19
|
+
@ApiTags('Vertex TTS') // Keeping tag as 'tts' for now
|
|
20
|
+
@Controller('api/vertex/tts') // Updated route prefix to 'vertex/tts'
|
|
21
|
+
export class NestTtsController {
|
|
22
|
+
// Keeping class name for now
|
|
23
|
+
private readonly logger = new Logger('VertexTtsController'); // Updated logger context
|
|
24
|
+
|
|
25
|
+
constructor(private readonly nestTtsService: NestTtsService) {}
|
|
26
|
+
|
|
27
|
+
@Get('ping') // Renamed from 'generate'/'hello'
|
|
28
|
+
@ApiOperation({ summary: 'Ping the TTS service' })
|
|
29
|
+
@ApiResponse({ status: 200, description: 'Service is available' })
|
|
30
|
+
ping(): string {
|
|
31
|
+
this.logger.log('Received ping request');
|
|
32
|
+
return this.nestTtsService.getHello(); // Keep using the simple service method for ping
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@Post('synthesize')
|
|
36
|
+
@HttpCode(HttpStatus.OK)
|
|
37
|
+
@ApiOperation({ summary: 'Synthesize speech from text' })
|
|
38
|
+
@ApiBody({
|
|
39
|
+
description: 'Text and configuration for speech synthesis',
|
|
40
|
+
schema: {
|
|
41
|
+
type: 'object',
|
|
42
|
+
properties: {
|
|
43
|
+
text: { type: 'string', example: 'Hello world' },
|
|
44
|
+
languageCode: { type: 'string', example: 'en-US', description: 'Optional language code (default: en-US)' },
|
|
45
|
+
ssmlGender: { type: 'string', example: 'NEUTRAL', description: 'Optional SSML gender (default: NEUTRAL)' },
|
|
46
|
+
voiceName: { type: 'string', example: 'en-US-Neural2-F', description: 'Optional specific voice name' },
|
|
47
|
+
audioEncoding: { type: 'string', example: 'MP3', description: 'Optional audio encoding (default: MP3)' },
|
|
48
|
+
},
|
|
49
|
+
required: ['text'], // Only 'text' is required
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
@ApiResponse({ status: 200, description: 'Returns the synthesized audio file (e.g., MP3)' })
|
|
53
|
+
@ApiResponse({ status: 500, description: 'Internal server error during synthesis' })
|
|
54
|
+
@Header('Content-Type', 'audio/mpeg') // Set default, assuming MP3
|
|
55
|
+
async synthesizeSpeech(
|
|
56
|
+
@Body() body: SynthesizeSpeechInput,
|
|
57
|
+
@Res({ passthrough: true }) res: FastifyReply, // Use FastifyReply type
|
|
58
|
+
) {
|
|
59
|
+
console.log('Received synthesize request', body);
|
|
60
|
+
|
|
61
|
+
// Removed Promise<Buffer> return type annotation
|
|
62
|
+
this.logger.log(`Received synthesize request for text: "${body.text.substring(0, 50)}..."`);
|
|
63
|
+
try {
|
|
64
|
+
const audioBuffer = await this.nestTtsService.synthesizeSpeech(body);
|
|
65
|
+
|
|
66
|
+
if (!audioBuffer) {
|
|
67
|
+
this.logger.error('Synthesis resulted in null buffer');
|
|
68
|
+
// Use NotFoundException or a more specific error if appropriate
|
|
69
|
+
throw new NotFoundException('Could not generate audio for the given input.');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// For testing porpuse
|
|
73
|
+
|
|
74
|
+
// // --- Save audio locally as mp3 ---
|
|
75
|
+
// const audioDir = path.join(process.cwd(), 'public', 'tts-audio'); // Save in project_root/public/tts-audio
|
|
76
|
+
// const filename = `tts_${Date.now()}.mp3`;
|
|
77
|
+
// const filePath = path.join(audioDir, filename);
|
|
78
|
+
|
|
79
|
+
// try {
|
|
80
|
+
// await fs.mkdir(audioDir, { recursive: true }); // Ensure directory exists
|
|
81
|
+
// await fs.writeFile(filePath, audioBuffer);
|
|
82
|
+
// this.logger.log(`Audio saved successfully to ${filePath}`);
|
|
83
|
+
// } catch (saveError) {
|
|
84
|
+
// // Log the error but don't stop the response from being sent
|
|
85
|
+
// this.logger.error(`Failed to save audio file to ${filePath}: ${saveError.message}`, saveError.stack);
|
|
86
|
+
// // Optionally, you could throw an error here if saving is critical
|
|
87
|
+
// // throw new InternalServerErrorException(`Failed to save audio file: ${saveError.message}`);
|
|
88
|
+
// }
|
|
89
|
+
// // --- End save audio ---
|
|
90
|
+
|
|
91
|
+
// Set content type based on actual encoding if needed, otherwise default is fine
|
|
92
|
+
// Example: if (body.audioEncoding === 'OGG_OPUS') res.header('Content-Type', 'audio/ogg');
|
|
93
|
+
res.header('Content-Length', audioBuffer.length.toString()); // Use res.header for Fastify
|
|
94
|
+
|
|
95
|
+
this.logger.log(`Successfully synthesized audio (${audioBuffer.length} bytes)`);
|
|
96
|
+
return audioBuffer; // NestJS handles sending the buffer
|
|
97
|
+
} catch (error) {
|
|
98
|
+
this.logger.error(`Synthesis failed: ${error.message}`, error.stack);
|
|
99
|
+
// Re-throw as a NestJS exception for proper handling
|
|
100
|
+
throw new InternalServerErrorException(`Speech synthesis failed: ${error.message}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
@Get('voices')
|
|
105
|
+
@ApiOperation({ summary: 'List available TTS voices, optionally filtered by language' })
|
|
106
|
+
@ApiQuery({ name: 'languageCode', required: false, type: String, description: 'Optional language code (e.g., en-US) to filter voices' })
|
|
107
|
+
@ApiResponse({ status: 200, description: 'Returns a list of available voices', type: [Object] }) // Using Object as a placeholder for Swagger type
|
|
108
|
+
@ApiResponse({ status: 500, description: 'Internal server error while fetching voices' })
|
|
109
|
+
async listVoices(@Query('languageCode') languageCode?: string): Promise<google.cloud.texttospeech.v1.IVoice[]> {
|
|
110
|
+
this.logger.log(`Received request to list voices${languageCode ? ` for language ${languageCode}` : ''}`);
|
|
111
|
+
try {
|
|
112
|
+
const voices = await this.nestTtsService.listVoices(languageCode);
|
|
113
|
+
this.logger.log(`Returning ${voices.length} voice${voices.length === 1 ? '' : 's'}.`);
|
|
114
|
+
return voices;
|
|
115
|
+
} catch (error) {
|
|
116
|
+
this.logger.error(`Failed to list voices: ${error.message}`, error.stack);
|
|
117
|
+
throw new InternalServerErrorException(`Failed to list voices: ${error.message}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { Controller, Post, Body, ValidationPipe, Logger, Param, Get } from '@nestjs/common';
|
|
2
|
+
import { NestVertexService } from '../services/nest-vertex.service';
|
|
3
|
+
import { GenerateVideoDto } from '../dto/generate-video.dto'; // Updated import path
|
|
4
|
+
import { GenerateImageDto } from '../dto/generate-image.dto'; // Updated import path
|
|
5
|
+
import { GenerateVideosOperation, GenerateImagesResponse } from '@google/genai';
|
|
6
|
+
import { ApiTags } from '@nestjs/swagger';
|
|
7
|
+
|
|
8
|
+
@ApiTags('Vertex Image/video') // Keeping tag as 'tts' for now
|
|
9
|
+
@Controller('api/vertex')
|
|
10
|
+
export class NestVertexController {
|
|
11
|
+
private readonly logger = new Logger(NestVertexController.name);
|
|
12
|
+
|
|
13
|
+
constructor(private readonly nestVertexService: NestVertexService) {}
|
|
14
|
+
|
|
15
|
+
@Post('generate-video')
|
|
16
|
+
async generateVideo(
|
|
17
|
+
@Body(new ValidationPipe({ transform: true, whitelist: true })) generateVideoDto: GenerateVideoDto,
|
|
18
|
+
): Promise<{ operationName: string }> {
|
|
19
|
+
this.logger.log(`Received request to generate video: ${JSON.stringify(generateVideoDto)}`);
|
|
20
|
+
try {
|
|
21
|
+
const operation: GenerateVideosOperation = await this.nestVertexService.startVideoGeneration(generateVideoDto);
|
|
22
|
+
return { operationName: operation.name };
|
|
23
|
+
} catch (error) {
|
|
24
|
+
this.logger.error('Error in generateVideo endpoint:', error);
|
|
25
|
+
throw error;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
@Get('video-status/:operationName')
|
|
30
|
+
async getVideoStatus(@Param('operationName') operationName: string): Promise<GenerateVideosOperation> {
|
|
31
|
+
this.logger.log(`Received request to check status for operation: ${operationName}`);
|
|
32
|
+
try {
|
|
33
|
+
return await this.nestVertexService.checkVideoOperationStatus(operationName);
|
|
34
|
+
} catch (error) {
|
|
35
|
+
this.logger.error(`Error checking status for operation ${operationName}:`, error);
|
|
36
|
+
throw error;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
@Post('generate-image')
|
|
41
|
+
async generateImage(
|
|
42
|
+
@Body(new ValidationPipe({ transform: true, whitelist: true })) generateImageDto: GenerateImageDto,
|
|
43
|
+
): Promise<GenerateImagesResponse> {
|
|
44
|
+
this.logger.log(`Received request to generate image: ${JSON.stringify(generateImageDto)}`);
|
|
45
|
+
try {
|
|
46
|
+
const response: GenerateImagesResponse = await this.nestVertexService.startImageGeneration(generateImageDto);
|
|
47
|
+
return response;
|
|
48
|
+
} catch (error) {
|
|
49
|
+
this.logger.error('Error in generateImage endpoint:', error);
|
|
50
|
+
throw error;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// src/vertex/dto/generate-image.dto.ts
|
|
2
|
+
import { IsString, IsNotEmpty, IsOptional, IsInt, Min, Max, IsEnum } from 'class-validator';
|
|
3
|
+
|
|
4
|
+
export class GenerateImageDto {
|
|
5
|
+
/**
|
|
6
|
+
* The text prompt describing the image(s) to generate.
|
|
7
|
+
* Example: "A futuristic cityscape at sunset"
|
|
8
|
+
*/
|
|
9
|
+
@IsString()
|
|
10
|
+
@IsNotEmpty()
|
|
11
|
+
prompt: string;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* The number of images to generate.
|
|
15
|
+
* @default 1
|
|
16
|
+
*/
|
|
17
|
+
@IsOptional()
|
|
18
|
+
@IsInt()
|
|
19
|
+
@Min(1)
|
|
20
|
+
@Max(8) // Assuming a reasonable max, adjust if needed based on API limits
|
|
21
|
+
numberOfImages?: number = 1;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Defines the aspect ratio of the generated image(s).
|
|
25
|
+
* Common values: '1:1', '16:9', '9:16'. Check API docs for exact supported values.
|
|
26
|
+
* @default '1:1'
|
|
27
|
+
*/
|
|
28
|
+
@IsOptional()
|
|
29
|
+
@IsEnum(['1:1', '16:9', '9:16', '3:4', '4:3'], { message: 'aspectRatio must be one of "1:1", "16:9", "9:16", "3:4", "4:3"' })
|
|
30
|
+
aspectRatio?: string = '1:1';
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* A text string describing elements to avoid in the generated image(s).
|
|
34
|
+
* Example: "text, watermark, blurry"
|
|
35
|
+
*/
|
|
36
|
+
@IsOptional()
|
|
37
|
+
@IsString()
|
|
38
|
+
negativePrompt?: string;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* A seed value for deterministic image generation.
|
|
42
|
+
* Using the same seed and prompt should yield the same result.
|
|
43
|
+
* Range: 0 - 4,294,967,295
|
|
44
|
+
*/
|
|
45
|
+
@IsOptional()
|
|
46
|
+
@IsInt()
|
|
47
|
+
@Min(0)
|
|
48
|
+
@Max(4294967295)
|
|
49
|
+
seed?: number;
|
|
50
|
+
|
|
51
|
+
// Add other relevant parameters if known from SDK/API docs, e.g.,
|
|
52
|
+
// style_preset, output_format, quality, etc.
|
|
53
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import {
|
|
2
|
+
IsString,
|
|
3
|
+
IsOptional,
|
|
4
|
+
IsInt,
|
|
5
|
+
Min,
|
|
6
|
+
Max,
|
|
7
|
+
IsBoolean,
|
|
8
|
+
IsEnum,
|
|
9
|
+
ValidateNested,
|
|
10
|
+
IsBase64,
|
|
11
|
+
Matches,
|
|
12
|
+
IsNotEmpty,
|
|
13
|
+
ValidateIf,
|
|
14
|
+
} from 'class-validator';
|
|
15
|
+
import { Type } from 'class-transformer';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Represents the image input for guiding video generation.
|
|
19
|
+
* Can be either bytesBase64Encoded or gcsUri.
|
|
20
|
+
*/
|
|
21
|
+
class ImageInputDto {
|
|
22
|
+
/**
|
|
23
|
+
* Base64-encoded image byte string.
|
|
24
|
+
* Mutually exclusive with gcsUri.
|
|
25
|
+
*/
|
|
26
|
+
@IsOptional()
|
|
27
|
+
@IsBase64()
|
|
28
|
+
@ValidateIf((o) => !o.gcsUri) // Validate only if gcsUri is not present
|
|
29
|
+
@IsNotEmpty({ message: 'Either bytesBase64Encoded or gcsUri must be provided in image object' })
|
|
30
|
+
bytesBase64Encoded?: string;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Cloud Storage bucket URI (e.g., gs://bucket-name/image.png).
|
|
34
|
+
* Mutually exclusive with bytesBase64Encoded.
|
|
35
|
+
*/
|
|
36
|
+
@IsOptional()
|
|
37
|
+
@IsString()
|
|
38
|
+
@Matches(/^gs:\/\/.+/, { message: 'gcsUri must be a valid Cloud Storage URI (gs://...)' })
|
|
39
|
+
@ValidateIf((o) => !o.bytesBase64Encoded) // Validate only if bytesBase64Encoded is not present
|
|
40
|
+
@IsNotEmpty({ message: 'Either bytesBase64Encoded or gcsUri must be provided in image object' })
|
|
41
|
+
gcsUri?: string;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* The mime type of the image. Required if image object is provided.
|
|
45
|
+
* Example: "image/png", "image/jpeg"
|
|
46
|
+
*/
|
|
47
|
+
@IsString()
|
|
48
|
+
@IsNotEmpty()
|
|
49
|
+
mimeType: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export class GenerateVideoDto {
|
|
53
|
+
/**
|
|
54
|
+
* A text string to guide the first eight seconds in the video.
|
|
55
|
+
* Mandatory if 'image' is not provided.
|
|
56
|
+
* Example: "A fast-tracking shot through a bustling dystopian sprawl..."
|
|
57
|
+
*/
|
|
58
|
+
@IsOptional()
|
|
59
|
+
@IsString()
|
|
60
|
+
@ValidateIf((o) => !o.image) // Validate only if image is not present
|
|
61
|
+
@IsNotEmpty({ message: 'prompt is required if image is not provided' })
|
|
62
|
+
prompt?: string;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Image input for guiding video generation.
|
|
66
|
+
* Optional if 'prompt' is provided.
|
|
67
|
+
* Recommended: 1280x720 or 720x1280 pixels.
|
|
68
|
+
*/
|
|
69
|
+
@IsOptional()
|
|
70
|
+
@ValidateNested()
|
|
71
|
+
@Type(() => ImageInputDto)
|
|
72
|
+
image?: ImageInputDto;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* The length of video files that you want to generate.
|
|
76
|
+
* @default 8
|
|
77
|
+
*/
|
|
78
|
+
@IsInt()
|
|
79
|
+
@Min(5)
|
|
80
|
+
@Max(8)
|
|
81
|
+
durationSeconds: number = 8; // Default value set
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Defines the aspect ratio of the generated video.
|
|
85
|
+
* @default '16:9'
|
|
86
|
+
*/
|
|
87
|
+
@IsOptional()
|
|
88
|
+
@IsEnum(['16:9', '9:16'], { message: 'aspectRatio must be either "16:9" or "9:16"' })
|
|
89
|
+
aspectRatio?: string = '16:9'; // Default value set
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* A text string that describes anything you want to discourage the model from generating.
|
|
93
|
+
* Example: "overhead lighting, bright colors"
|
|
94
|
+
*/
|
|
95
|
+
@IsOptional()
|
|
96
|
+
@IsString()
|
|
97
|
+
negativePrompt?: string;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* The safety setting that controls whether people or face generation is allowed.
|
|
101
|
+
* 'allow_adult': allow generation of adults only.
|
|
102
|
+
* 'disallow': disallows inclusion of people/faces.
|
|
103
|
+
* @default 'allow_adult'
|
|
104
|
+
*/
|
|
105
|
+
@IsOptional()
|
|
106
|
+
@IsEnum(['allow_adult', 'disallow'], { message: 'personGeneration must be either "allow_adult" or "disallow"' })
|
|
107
|
+
personGeneration?: string = 'allow_adult'; // Default value set
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* The number of output videos requested.
|
|
111
|
+
* @default 1
|
|
112
|
+
*/
|
|
113
|
+
@IsOptional()
|
|
114
|
+
@IsInt()
|
|
115
|
+
@Min(1)
|
|
116
|
+
@Max(4)
|
|
117
|
+
sampleCount?: number = 1; // Default value set
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* A number to request to make generated videos deterministic.
|
|
121
|
+
* Adding a seed number with your request without changing other parameters
|
|
122
|
+
* will cause the model to produce the same videos.
|
|
123
|
+
* Range: 0 - 4,294,967,295
|
|
124
|
+
*/
|
|
125
|
+
@IsOptional()
|
|
126
|
+
@IsInt()
|
|
127
|
+
@Min(0)
|
|
128
|
+
@Max(4294967295)
|
|
129
|
+
seed?: number;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* A Cloud Storage bucket URI to store the output video(s).
|
|
133
|
+
* If not provided, base64-encoded video bytes are returned in the response.
|
|
134
|
+
* Pattern: gs://BUCKET_NAME/SUBDIRECTORY
|
|
135
|
+
*/
|
|
136
|
+
@IsOptional()
|
|
137
|
+
@IsString()
|
|
138
|
+
@Matches(/^gs:\/\/.+\/.+/, { message: 'storageUri must be a valid Cloud Storage URI (gs://BUCKET_NAME/SUBDIRECTORY)' })
|
|
139
|
+
storageUri?: string;
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Use Gemini to enhance your prompts.
|
|
143
|
+
* @default true
|
|
144
|
+
*/
|
|
145
|
+
@IsOptional()
|
|
146
|
+
@IsBoolean()
|
|
147
|
+
enhancePrompt?: boolean = true; // Default value set
|
|
148
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export * from './nest-vertex.module';
|
|
2
|
+
// Controller
|
|
3
|
+
export * from './controllers/nest-tts.controller'; // Export TTS controller
|
|
4
|
+
// Services
|
|
5
|
+
export * from './services/nest-vertex.service';
|
|
6
|
+
export * from './services/nest-tts.service'; // Export TTS service and interface
|
|
7
|
+
export * from './services/gemini-chat.service'; // Export GeminiChatService
|
|
8
|
+
// Models
|
|
9
|
+
export * from './dto/generate-image.dto'; // Export GenerateImageDto
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Module } from '@nestjs/common';
|
|
2
|
+
import { NestVertexService } from './services/nest-vertex.service';
|
|
3
|
+
import { NestVertexController } from './controllers/nest-vertex.controller';
|
|
4
|
+
import { NestTtsService } from './services/nest-tts.service'; // Import the TTS service
|
|
5
|
+
import { NestTtsController } from './controllers/nest-tts.controller'; // Import the TTS controller
|
|
6
|
+
import { GeminiChatService } from './services/gemini-chat.service';
|
|
7
|
+
import { GeminiChatController } from './controllers/gemini-chat.controller';
|
|
8
|
+
|
|
9
|
+
@Module({
|
|
10
|
+
providers: [NestVertexService, NestTtsService, GeminiChatService], // Add NestTtsService and GeminiChatService to providers
|
|
11
|
+
exports: [NestVertexService, NestTtsService, GeminiChatService], // Export NestTtsService and GeminiChatService
|
|
12
|
+
controllers: [NestVertexController, NestTtsController, GeminiChatController], // Add NestTtsController and GeminiChatController to controllers
|
|
13
|
+
})
|
|
14
|
+
export class NestVertexModule {}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { Injectable, Logger } from '@nestjs/common';
|
|
2
|
+
import { GoogleGenAI, Content, Model } from '@google/genai'; // Use @google/genai - Removed ListModelsResponse
|
|
3
|
+
import { ChatCompletionMessageParam } from 'groq-sdk/resources/chat/completions'; // Keep for input consistency for now
|
|
4
|
+
|
|
5
|
+
import { ChatMessageDict, ChatRole } from '@dataclouder/nest-agent-cards/models/agents.models'; // Use existing output format
|
|
6
|
+
|
|
7
|
+
@Injectable()
|
|
8
|
+
export class GeminiChatService {
|
|
9
|
+
private readonly logger = new Logger(GeminiChatService.name);
|
|
10
|
+
private readonly clientGenAi: GoogleGenAI;
|
|
11
|
+
private readonly modelName = 'gemini-1.5-flash'; // Updated model name
|
|
12
|
+
|
|
13
|
+
constructor() {
|
|
14
|
+
const apiKey = process.env.GEMINI_API_KEY; // Using GEMINI_API_KEY as per example
|
|
15
|
+
if (!apiKey) {
|
|
16
|
+
throw new Error('GEMINI_API_KEY environment variable not set.');
|
|
17
|
+
}
|
|
18
|
+
// Pass apiKey within an options object
|
|
19
|
+
this.clientGenAi = new GoogleGenAI({ apiKey });
|
|
20
|
+
this.logger.log(`GeminiChatService initialized with model: ${this.modelName}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Maps incoming roles (Groq/OpenAI style) to Gemini roles for history
|
|
24
|
+
private mapToGeminiRole(role: ChatCompletionMessageParam['role']): 'user' | 'model' {
|
|
25
|
+
// Return string literal type
|
|
26
|
+
switch (role) {
|
|
27
|
+
case 'assistant':
|
|
28
|
+
return 'model';
|
|
29
|
+
case 'system':
|
|
30
|
+
// System messages aren't directly part of Gemini chat history in the same way.
|
|
31
|
+
// We extract it separately. Treat as 'user' if forced into history.
|
|
32
|
+
this.logger.warn('System role encountered, will be extracted, not directly added to Gemini history.');
|
|
33
|
+
return 'user'; // Fallback if needed, but ideally handled separately
|
|
34
|
+
case 'user':
|
|
35
|
+
default:
|
|
36
|
+
return 'user';
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Formats messages for the @google/genai Content structure (used by generateContent)
|
|
41
|
+
private formatMessagesToContent(messages: ChatCompletionMessageParam[]): Content[] {
|
|
42
|
+
return messages
|
|
43
|
+
.filter((msg) => msg.role !== 'system' && typeof msg.content === 'string') // Exclude system messages and ensure content is string
|
|
44
|
+
.map((msg) => ({
|
|
45
|
+
role: this.mapToGeminiRole(msg.role),
|
|
46
|
+
parts: [{ text: msg.content as string }],
|
|
47
|
+
}));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async chat(messages: ChatCompletionMessageParam[]): Promise<ChatMessageDict> {
|
|
51
|
+
if (!messages || messages.length === 0) {
|
|
52
|
+
// this.logger.warn('Gemini chat called with empty messages.');
|
|
53
|
+
return { content: '', role: ChatRole.Assistant, metadata: { finishReason: 'NO_INPUT' } };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// 1. Extract System Prompt
|
|
57
|
+
const systemMessage = messages.find((msg) => msg.role === 'system')?.content as string | undefined;
|
|
58
|
+
if (systemMessage) {
|
|
59
|
+
// this.logger.log(`System prompt extracted: "${systemMessage}"`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 2. Ensure last message is from user (or add one)
|
|
63
|
+
let lastMessage = messages[messages.length - 1];
|
|
64
|
+
if (lastMessage.role !== 'user' || typeof lastMessage.content !== 'string') {
|
|
65
|
+
// this.logger.warn('Last message was not from user or content invalid, adding default message.');
|
|
66
|
+
messages.push({ role: 'user', content: 'please say something to start/continue conversation' });
|
|
67
|
+
lastMessage = messages[messages.length - 1];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// 3. Format all non-system messages for the 'contents' parameter
|
|
71
|
+
const formattedContents = this.formatMessagesToContent(messages); // Pass all valid messages
|
|
72
|
+
|
|
73
|
+
// 4. Call generateContent (stateless)
|
|
74
|
+
try {
|
|
75
|
+
this.logger.debug(
|
|
76
|
+
`Sending request to Gemini model ${this.modelName} with ${
|
|
77
|
+
formattedContents.length
|
|
78
|
+
} content parts and system instruction: ${!!systemMessage}`,
|
|
79
|
+
);
|
|
80
|
+
// Reverted: Call generateContent directly on this.ai.models
|
|
81
|
+
const response = await this.clientGenAi.models.generateContent({
|
|
82
|
+
model: this.modelName,
|
|
83
|
+
contents: formattedContents,
|
|
84
|
+
config: { systemInstruction: systemMessage },
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
this.logger.debug(`Received Gemini response text.`);
|
|
88
|
+
|
|
89
|
+
// Assuming response structure provides text directly based on example
|
|
90
|
+
// Check the actual type definition of GenerateContentResponse if needed
|
|
91
|
+
const responseText = response?.text ?? ''; // Safely access text, default to empty string
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
content: responseText,
|
|
95
|
+
role: ChatRole.Assistant,
|
|
96
|
+
metadata: {
|
|
97
|
+
// Try to access finishReason if available in this response structure
|
|
98
|
+
// finishReason: response?.candidates?.[0]?.finishReason,
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
} catch (error) {
|
|
102
|
+
this.logger.error(`Gemini chat.sendMessage failed: ${error.message}`, error.stack);
|
|
103
|
+
// Handle specific errors if possible (e.g., API key, rate limits, safety blocks)
|
|
104
|
+
throw new Error(`Failed to get Gemini chat completion: ${error.message}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async chatStream(messages: ChatCompletionMessageParam[]): Promise<AsyncIterable<ChatMessageDict>> {
|
|
109
|
+
if (!messages || messages.length === 0) {
|
|
110
|
+
this.logger.warn('Gemini chatStream called with empty messages.');
|
|
111
|
+
return (async function* () {})(); // Return an empty async iterable
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// 1. Extract System Prompt
|
|
115
|
+
const systemMessage = messages.find((msg) => msg.role === 'system')?.content as string | undefined;
|
|
116
|
+
if (systemMessage) {
|
|
117
|
+
this.logger.log(`System prompt extracted for stream: "${systemMessage}"`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 2. Ensure last message is from user
|
|
121
|
+
const lastMessage = messages[messages.length - 1];
|
|
122
|
+
if (lastMessage.role !== 'user' || typeof lastMessage.content !== 'string') {
|
|
123
|
+
// Unlike non-streaming, we probably shouldn't add a message here.
|
|
124
|
+
// Streaming usually implies a direct user interaction initiated the stream.
|
|
125
|
+
this.logger.error('The last message for streaming must be from the user and contain string content.');
|
|
126
|
+
throw new Error('The last message for streaming must be from the user and contain string content.');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// 3. Format all non-system messages for the 'contents' parameter
|
|
130
|
+
const formattedContents = this.formatMessagesToContent(messages); // Pass all valid messages
|
|
131
|
+
|
|
132
|
+
// 4. Call generateContentStream (stateless)
|
|
133
|
+
|
|
134
|
+
// Define the async generator using an arrow function to capture 'this' context
|
|
135
|
+
const processStream = async function* (): AsyncIterable<ChatMessageDict> {
|
|
136
|
+
try {
|
|
137
|
+
this.logger.debug(
|
|
138
|
+
`Sending stream request to Gemini model ${this.modelName} with ${
|
|
139
|
+
formattedContents.length
|
|
140
|
+
} content parts and system instruction: ${!!systemMessage}`,
|
|
141
|
+
);
|
|
142
|
+
// Reverted: Call generateContentStream directly on this.ai.models
|
|
143
|
+
const stream = await this.ai.models.generateContentStream({
|
|
144
|
+
model: this.modelName,
|
|
145
|
+
contents: formattedContents,
|
|
146
|
+
config: { systemInstruction: systemMessage },
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
for await (const chunk of stream) {
|
|
150
|
+
// Assuming chunk structure provides text directly based on generateContent response
|
|
151
|
+
// Check the actual type definition of GenerateContentResponseStream if needed
|
|
152
|
+
const chunkText = chunk?.text ?? ''; // Safely access text
|
|
153
|
+
if (chunkText) {
|
|
154
|
+
// Only yield if there's text content
|
|
155
|
+
this.logger.debug(`Received Gemini stream chunk text.`);
|
|
156
|
+
yield {
|
|
157
|
+
content: chunkText,
|
|
158
|
+
role: ChatRole.Assistant,
|
|
159
|
+
metadata: {
|
|
160
|
+
isChunk: true,
|
|
161
|
+
// Try to access finishReason if available in this chunk structure
|
|
162
|
+
// finishReason: chunk?.candidates?.[0]?.finishReason,
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
this.logger.debug(`Gemini stream finished.`);
|
|
168
|
+
} catch (error) {
|
|
169
|
+
this.logger.error(`Gemini generateContentStream failed: ${error.message}`, error.stack);
|
|
170
|
+
// Re-throw or yield an error message chunk if preferred
|
|
171
|
+
throw new Error(`Failed to get Gemini chat stream completion: ${error.message}`);
|
|
172
|
+
}
|
|
173
|
+
}.bind(this); // Bind 'this' explicitly for safety, although arrow functions usually handle it
|
|
174
|
+
|
|
175
|
+
// Return the invoked generator
|
|
176
|
+
return processStream();
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// New method to list available models
|
|
180
|
+
async listModels(): Promise<Record<string, string>[]> {
|
|
181
|
+
// Last reserach there is no method to return availible models, so i had to hard coded
|
|
182
|
+
return [
|
|
183
|
+
{ id: 'gemini-2.5-flash-preview-04-17' },
|
|
184
|
+
{ id: 'gemini-2.5-pro-preview-03-25' },
|
|
185
|
+
{ id: 'gemini-2.0-flash' },
|
|
186
|
+
{ id: 'gemini-2.0-flash-lite' },
|
|
187
|
+
{ id: 'gemini-2.0-flash-live-001' },
|
|
188
|
+
];
|
|
189
|
+
}
|
|
190
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { Injectable, Logger } from '@nestjs/common';
|
|
2
|
+
import { TextToSpeechClient } from '@google-cloud/text-to-speech';
|
|
3
|
+
import { google } from '@google-cloud/text-to-speech/build/protos/protos'; // For types
|
|
4
|
+
|
|
5
|
+
// Define interfaces for better type safety
|
|
6
|
+
export interface SynthesizeSpeechInput {
|
|
7
|
+
text: string;
|
|
8
|
+
languageCode?: string;
|
|
9
|
+
ssmlGender?: google.cloud.texttospeech.v1.SsmlVoiceGender;
|
|
10
|
+
voiceName?: string; // temporary select onlt one voice or voice name
|
|
11
|
+
voice?: string; // optional voice name
|
|
12
|
+
audioEncoding?: google.cloud.texttospeech.v1.AudioEncoding;
|
|
13
|
+
generateTranscription?: boolean;
|
|
14
|
+
speed?: string;
|
|
15
|
+
speedRate?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
@Injectable()
|
|
19
|
+
export class NestTtsService {
|
|
20
|
+
// Keeping class name for now
|
|
21
|
+
private readonly logger = new Logger('VertexTtsService'); // Updated logger context
|
|
22
|
+
private client: TextToSpeechClient;
|
|
23
|
+
|
|
24
|
+
constructor() {
|
|
25
|
+
// Instantiates a client. Needs GOOGLE_APPLICATION_CREDENTIALS env var set.
|
|
26
|
+
// See: https://cloud.google.com/docs/authentication/provide-credentials-adc#local-dev
|
|
27
|
+
this.client = new TextToSpeechClient();
|
|
28
|
+
this.logger.log('Google TextToSpeechClient initialized.');
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
getHello(): string {
|
|
32
|
+
return 'Hello from VertexTtsService!'; // Updated message
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Synthesizes speech from text using Google Cloud TTS.
|
|
37
|
+
* @param input - The text and configuration for speech synthesis.
|
|
38
|
+
* @returns A Buffer containing the audio data (e.g., MP3).
|
|
39
|
+
* @throws Error if synthesis fails.
|
|
40
|
+
*/
|
|
41
|
+
async synthesizeSpeech(input: SynthesizeSpeechInput): Promise<Buffer | null> {
|
|
42
|
+
const {
|
|
43
|
+
text,
|
|
44
|
+
languageCode, // Default language
|
|
45
|
+
ssmlGender = 'NEUTRAL', // Default gender
|
|
46
|
+
voiceName, // Optional specific voice name
|
|
47
|
+
voice,
|
|
48
|
+
audioEncoding = 'MP3', // Default audio encoding
|
|
49
|
+
speedRate,
|
|
50
|
+
} = input;
|
|
51
|
+
const voiceId = voiceName || voice;
|
|
52
|
+
console.log('Selected voice:', voiceId);
|
|
53
|
+
|
|
54
|
+
// Seems langcode is required take from voice name
|
|
55
|
+
const langCode = !languageCode ? voiceId.slice(0, 5) : languageCode;
|
|
56
|
+
|
|
57
|
+
const request: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
|
|
58
|
+
input: { text: text },
|
|
59
|
+
// Select the language and SSML voice gender (optional)
|
|
60
|
+
voice: voiceId ? { name: voiceId, languageCode: langCode } : { languageCode: langCode, ssmlGender: ssmlGender },
|
|
61
|
+
// Select the type of audio encoding and speaking rate
|
|
62
|
+
audioConfig: {
|
|
63
|
+
audioEncoding: audioEncoding,
|
|
64
|
+
speakingRate: speedRate, // Add the speaking rate here
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
this.logger.log(`Synthesizing speech for text: "${text.substring(0, 50)}..."`);
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
console.log('request', request);
|
|
72
|
+
// Performs the text-to-speech request
|
|
73
|
+
const [response] = await this.client.synthesizeSpeech(request);
|
|
74
|
+
|
|
75
|
+
if (response.audioContent instanceof Uint8Array) {
|
|
76
|
+
this.logger.log('Speech synthesis successful.');
|
|
77
|
+
return Buffer.from(response.audioContent);
|
|
78
|
+
} else {
|
|
79
|
+
this.logger.warn('No audio content received from TTS API.');
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
} catch (error) {
|
|
83
|
+
this.logger.error('Error synthesizing speech:', error);
|
|
84
|
+
throw new Error(`Failed to synthesize speech: ${error.message}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Lists available voices from Google Cloud TTS.
|
|
90
|
+
* Lists available voices from Google Cloud TTS, optionally filtered by language code.
|
|
91
|
+
* @param languageCode - Optional language code (e.g., 'en-US') to filter voices.
|
|
92
|
+
* @returns A list of available voices.
|
|
93
|
+
* @throws Error if listing voices fails.
|
|
94
|
+
*/
|
|
95
|
+
async listVoices(languageCode?: string): Promise<google.cloud.texttospeech.v1.IVoice[]> {
|
|
96
|
+
const request: google.cloud.texttospeech.v1.IListVoicesRequest = {};
|
|
97
|
+
if (languageCode) {
|
|
98
|
+
request.languageCode = languageCode;
|
|
99
|
+
this.logger.log(`Fetching list of available voices for language: ${languageCode}...`);
|
|
100
|
+
} else {
|
|
101
|
+
this.logger.log('Fetching list of all available voices...');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
const [response] = await this.client.listVoices(request);
|
|
106
|
+
if (response.voices) {
|
|
107
|
+
const count = response.voices.length;
|
|
108
|
+
this.logger.log(`Successfully fetched ${count} voice${count === 1 ? '' : 's'}${languageCode ? ` for language ${languageCode}` : ''}.`);
|
|
109
|
+
return response.voices;
|
|
110
|
+
} else {
|
|
111
|
+
this.logger.warn('No voices received from TTS API.');
|
|
112
|
+
return [];
|
|
113
|
+
}
|
|
114
|
+
} catch (error) {
|
|
115
|
+
this.logger.error('Error listing voices:', error);
|
|
116
|
+
throw new Error(`Failed to list voices: ${error.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
|
2
|
+
// Assuming GenerateImagesResponse might be the type, add if available, otherwise use any/unknown
|
|
3
|
+
import { GoogleGenAI, GenerateVideosOperation, GenerateImagesResponse } from '@google/genai';
|
|
4
|
+
import { GenerateVideoDto } from '../dto/generate-video.dto'; // Updated import path
|
|
5
|
+
import { GenerateImageDto } from '../dto/generate-image.dto'; // Updated import path
|
|
6
|
+
|
|
7
|
+
@Injectable()
|
|
8
|
+
export class NestVertexService implements OnModuleInit {
|
|
9
|
+
// Renamed class
|
|
10
|
+
private genAi: GoogleGenAI;
|
|
11
|
+
private readonly logger = new Logger(NestVertexService.name); // Updated logger name
|
|
12
|
+
private readonly videoModelName = 'veo-2.0-generate-001';
|
|
13
|
+
private readonly imageModelName = 'imagen-3.0-generate-002'; // Added image model name
|
|
14
|
+
|
|
15
|
+
onModuleInit() {
|
|
16
|
+
const apiKey = process.env.GEMINI_API_KEY;
|
|
17
|
+
if (!apiKey) {
|
|
18
|
+
this.logger.error('GEMINI_API_KEY environment variable not set. NestVertexService will not function.'); // Updated log message
|
|
19
|
+
// Optionally throw an error to prevent the application from starting
|
|
20
|
+
// throw new Error('GEMINI_API_KEY environment variable not set.');
|
|
21
|
+
} else {
|
|
22
|
+
console.log('GEMINI_API_KEY environment variable set: ', apiKey);
|
|
23
|
+
this.genAi = new GoogleGenAI({ apiKey });
|
|
24
|
+
this.logger.log('GoogleGenAI client initialized successfully.');
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Starts the video generation process using Google Gen AI.
|
|
30
|
+
* Starts the video generation process using Google Gen AI based on the provided DTO.
|
|
31
|
+
* @param generateVideoDto The DTO containing all parameters for video generation.
|
|
32
|
+
* @returns A Promise resolving to the video generation operation object.
|
|
33
|
+
*/
|
|
34
|
+
async startVideoGeneration(generateVideoDto: GenerateVideoDto): Promise<GenerateVideosOperation> {
|
|
35
|
+
if (!this.genAi) {
|
|
36
|
+
throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY.');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
this.logger.log(`Starting video generation with DTO: ${JSON.stringify(generateVideoDto)}`);
|
|
40
|
+
|
|
41
|
+
// Construct the request object based on DTO, likely structure for the SDK
|
|
42
|
+
// Note: The exact structure might need verification against SDK docs/examples if this fails
|
|
43
|
+
const videoRequest = {
|
|
44
|
+
model: this.videoModelName,
|
|
45
|
+
prompt: generateVideoDto.prompt,
|
|
46
|
+
// Image handling - assuming it's a top-level param or within config
|
|
47
|
+
...(generateVideoDto.image && {
|
|
48
|
+
image: {
|
|
49
|
+
// Assuming image structure is nested like this
|
|
50
|
+
bytesBase64Encoded: generateVideoDto.image.bytesBase64Encoded,
|
|
51
|
+
gcsUri: generateVideoDto.image.gcsUri,
|
|
52
|
+
mimeType: generateVideoDto.image.mimeType,
|
|
53
|
+
},
|
|
54
|
+
}),
|
|
55
|
+
// Configuration parameters - place them where the SDK expects (e.g., top-level or config object)
|
|
56
|
+
// Trying top-level first based on common patterns
|
|
57
|
+
durationSeconds: generateVideoDto.durationSeconds,
|
|
58
|
+
aspectRatio: generateVideoDto.aspectRatio,
|
|
59
|
+
negativePrompt: generateVideoDto.negativePrompt,
|
|
60
|
+
personGeneration: generateVideoDto.personGeneration,
|
|
61
|
+
sampleCount: generateVideoDto.sampleCount,
|
|
62
|
+
seed: generateVideoDto.seed,
|
|
63
|
+
storageUri: generateVideoDto.storageUri,
|
|
64
|
+
enhancePrompt: generateVideoDto.enhancePrompt,
|
|
65
|
+
// config: { // Alternative: place parameters inside a 'config' object if needed
|
|
66
|
+
// numberOfVideos: generateVideoDto.sampleCount, // Example if sampleCount maps here
|
|
67
|
+
// ... other config params
|
|
68
|
+
// }
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Remove undefined optional parameters to avoid sending empty values
|
|
72
|
+
Object.keys(videoRequest).forEach((key) => videoRequest[key] === undefined && delete videoRequest[key]);
|
|
73
|
+
if (videoRequest.image) {
|
|
74
|
+
if (videoRequest.image.bytesBase64Encoded === undefined) delete videoRequest.image.bytesBase64Encoded;
|
|
75
|
+
if (videoRequest.image.gcsUri === undefined) delete videoRequest.image.gcsUri;
|
|
76
|
+
// If both are undefined after cleanup, remove the image object itself
|
|
77
|
+
if (Object.keys(videoRequest.image).length === 1 && videoRequest.image.mimeType) {
|
|
78
|
+
// If only mimeType remains, it's likely an invalid state unless mimeType alone is useful
|
|
79
|
+
// For safety, let's remove image if both content fields are missing
|
|
80
|
+
if (!videoRequest.image.bytesBase64Encoded && !videoRequest.image.gcsUri) {
|
|
81
|
+
delete videoRequest.image;
|
|
82
|
+
}
|
|
83
|
+
} else if (Object.keys(videoRequest.image).length === 0) {
|
|
84
|
+
delete videoRequest.image; // Remove empty image object
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
// Pass the constructed request object
|
|
90
|
+
const operation = await this.genAi.models.generateVideos(videoRequest);
|
|
91
|
+
this.logger.log(`Video generation operation started: ${operation.name}`);
|
|
92
|
+
return operation;
|
|
93
|
+
} catch (error) {
|
|
94
|
+
this.logger.error('Error starting video generation:', error.message || error);
|
|
95
|
+
// Re-throw or handle the error appropriately
|
|
96
|
+
throw error;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Checks the status of a video generation operation.
|
|
102
|
+
* IMPORTANT SDK Limitation: The `@google/genai` SDK's `getVideosOperation` method
|
|
103
|
+
* requires the full operation object, not just the name string, to be passed
|
|
104
|
+
* in its `operation` parameter (based on TypeScript errors and official examples).
|
|
105
|
+
* Therefore, this method cannot be directly called from a simple GET endpoint
|
|
106
|
+
* that only provides the operation name string.
|
|
107
|
+
*
|
|
108
|
+
* To implement polling, the client would typically need to store the initial
|
|
109
|
+
* operation object returned by `startVideoGeneration` and pass it back, or
|
|
110
|
+
* the server would need to manage operation state differently (e.g., store
|
|
111
|
+
* operations in a database and poll internally).
|
|
112
|
+
*
|
|
113
|
+
* This method is left here as a placeholder demonstrating the intended SDK call,
|
|
114
|
+
* but it will likely fail if called with just the name string due to the SDK design.
|
|
115
|
+
*
|
|
116
|
+
* @param operationName The name of the operation (e.g., "operations/...")
|
|
117
|
+
* @returns A Promise resolving to the updated operation object.
|
|
118
|
+
*/
|
|
119
|
+
async checkVideoOperationStatus(operationName: string): Promise<GenerateVideosOperation> {
|
|
120
|
+
if (!this.genAi) {
|
|
121
|
+
throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY.');
|
|
122
|
+
}
|
|
123
|
+
this.logger.warn(
|
|
124
|
+
`Attempting to check status for operation: ${operationName}. ` +
|
|
125
|
+
`Note: This may fail due to SDK requiring the full operation object for getVideosOperation.`,
|
|
126
|
+
);
|
|
127
|
+
try {
|
|
128
|
+
// This call expects the full operation object, not just the name.
|
|
129
|
+
// Passing only the name string will likely cause a runtime or type error.
|
|
130
|
+
const oper: GenerateVideosOperation = { name: operationName };
|
|
131
|
+
const operation = await this.genAi.operations.getVideosOperation({
|
|
132
|
+
operation: oper, // This line is problematic based on SDK behavior
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
this.logger.log(`Operation ${operationName} status: ${operation.done ? 'Done' : 'Processing'}`);
|
|
136
|
+
return operation;
|
|
137
|
+
} catch (error) {
|
|
138
|
+
this.logger.error(`Error checking status for operation ${operationName}:`, error);
|
|
139
|
+
// The error might be a TypeError due to the incorrect parameter type.
|
|
140
|
+
throw error;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Starts the image generation process using Google Gen AI.
|
|
146
|
+
* @param generateImageDto The DTO containing parameters for image generation.
|
|
147
|
+
* @returns A Promise resolving to the image generation response.
|
|
148
|
+
*/
|
|
149
|
+
async startImageGeneration(generateImageDto: GenerateImageDto): Promise<GenerateImagesResponse> {
|
|
150
|
+
// Use GenerateImagesResponse or adjust type as needed
|
|
151
|
+
if (!this.genAi) {
|
|
152
|
+
throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY/Vertex AI setup.');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
this.logger.log(`Starting image generation with DTO: ${JSON.stringify(generateImageDto)}`);
|
|
156
|
+
|
|
157
|
+
// Construct the request object for the SDK based on the example
|
|
158
|
+
const imageRequest = {
|
|
159
|
+
model: this.imageModelName,
|
|
160
|
+
prompt: generateImageDto.prompt,
|
|
161
|
+
config: {
|
|
162
|
+
numberOfImages: generateImageDto.numberOfImages,
|
|
163
|
+
aspectRatio: generateImageDto.aspectRatio,
|
|
164
|
+
negativePrompt: generateImageDto.negativePrompt,
|
|
165
|
+
// includeRaiReason: true, // Optional, based on example
|
|
166
|
+
// Add other config parameters from DTO if applicable
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Remove undefined optional parameters from config
|
|
171
|
+
Object.keys(imageRequest.config).forEach((key) => imageRequest.config[key] === undefined && delete imageRequest.config[key]);
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
// Pass the constructed request object
|
|
175
|
+
const response = await this.genAi.models.generateImages(imageRequest);
|
|
176
|
+
this.logger.log(`Image generation successful for prompt: "${generateImageDto.prompt}"`);
|
|
177
|
+
// Consider what part of the response to return, e.g., response.generatedImages
|
|
178
|
+
return response;
|
|
179
|
+
} catch (error) {
|
|
180
|
+
this.logger.error('Error starting image generation:', error.message || error);
|
|
181
|
+
throw error;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|