@dataclouder/nest-vertex 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@dataclouder/nest-vertex",
3
+ "version": "0.0.2",
4
+ "description": "NestJS Vertex AI library for Dataclouder",
5
+ "author": "dataclouder",
6
+ "license": "MIT",
7
+ "main": "dist/index.js",
8
+ "types": "dist/index.d.ts",
9
+ "files": [
10
+ "dist",
11
+ "src"
12
+ ],
13
+ "scripts": {
14
+ "build": "tsc -p tsconfig.lib.json",
15
+ "publish:npm": "npm run build && npm version patch && npm publish"
16
+ },
17
+ "keywords": [
18
+ "nestjs",
19
+ "library",
20
+ "vertex",
21
+ "ai"
22
+ ],
23
+ "peerDependencies": {
24
+ "@nestjs/common": ">=10.0.0",
25
+ "@nestjs/core": ">=10.0.0",
26
+ "rxjs": ">=7.0.0"
27
+ },
28
+ "publishConfig": {
29
+ "access": "public"
30
+ }
31
+ }
@@ -0,0 +1,29 @@
1
+ import { Controller, Post, Body, Logger } from '@nestjs/common';
2
+ import { GeminiChatService } from '../services/gemini-chat.service';
3
+ import { ApiTags } from '@nestjs/swagger';
4
+
5
+ // Basic interface for input messages, similar to ChatCompletionMessageParam
6
+ interface ChatMessageInput {
7
+ role: 'user' | 'assistant' | 'system';
8
+ content: string;
9
+ }
10
+
11
+ @ApiTags('Vertex Gemini') // Keeping tag as 'tts' for now
12
+ @Controller('api/vertex/gemini')
13
+ export class GeminiChatController {
14
+ private readonly logger = new Logger(GeminiChatController.name);
15
+
16
+ constructor(private readonly geminiChatService: GeminiChatService) {}
17
+
18
+ @Post('generate-text')
19
+ async generateText(@Body() messages: ChatMessageInput[]) {
20
+ this.logger.log('Received request for text generation');
21
+ try {
22
+ const result = await this.geminiChatService.chat(messages);
23
+ return result;
24
+ } catch (error) {
25
+ this.logger.error(`Error generating text: ${error.message}`, error.stack);
26
+ throw error; // Re-throw the error for NestJS to handle
27
+ }
28
+ }
29
+ }
@@ -0,0 +1,120 @@
1
+ import {
2
+ Controller,
3
+ Get,
4
+ Post,
5
+ Body,
6
+ Res,
7
+ HttpCode,
8
+ HttpStatus,
9
+ Header,
10
+ Logger,
11
+ InternalServerErrorException,
12
+ NotFoundException,
13
+ Query, // Import Query decorator
14
+ } from '@nestjs/common';
15
+ import { google } from '@google-cloud/text-to-speech/build/protos/protos'; // Import google types
16
+ import { ApiTags, ApiOperation, ApiBody, ApiResponse, ApiQuery } from '@nestjs/swagger'; // Import ApiQuery
17
+ import { FastifyReply } from 'fastify'; // Import FastifyReply type
18
+ import { NestTtsService, SynthesizeSpeechInput } from '../services/nest-tts.service';
19
+ @ApiTags('Vertex TTS') // Keeping tag as 'tts' for now
20
+ @Controller('api/vertex/tts') // Updated route prefix to 'vertex/tts'
21
+ export class NestTtsController {
22
+ // Keeping class name for now
23
+ private readonly logger = new Logger('VertexTtsController'); // Updated logger context
24
+
25
+ constructor(private readonly nestTtsService: NestTtsService) {}
26
+
27
+ @Get('ping') // Renamed from 'generate'/'hello'
28
+ @ApiOperation({ summary: 'Ping the TTS service' })
29
+ @ApiResponse({ status: 200, description: 'Service is available' })
30
+ ping(): string {
31
+ this.logger.log('Received ping request');
32
+ return this.nestTtsService.getHello(); // Keep using the simple service method for ping
33
+ }
34
+
35
+ @Post('synthesize')
36
+ @HttpCode(HttpStatus.OK)
37
+ @ApiOperation({ summary: 'Synthesize speech from text' })
38
+ @ApiBody({
39
+ description: 'Text and configuration for speech synthesis',
40
+ schema: {
41
+ type: 'object',
42
+ properties: {
43
+ text: { type: 'string', example: 'Hello world' },
44
+ languageCode: { type: 'string', example: 'en-US', description: 'Optional language code (default: en-US)' },
45
+ ssmlGender: { type: 'string', example: 'NEUTRAL', description: 'Optional SSML gender (default: NEUTRAL)' },
46
+ voiceName: { type: 'string', example: 'en-US-Neural2-F', description: 'Optional specific voice name' },
47
+ audioEncoding: { type: 'string', example: 'MP3', description: 'Optional audio encoding (default: MP3)' },
48
+ },
49
+ required: ['text'], // Only 'text' is required
50
+ },
51
+ })
52
+ @ApiResponse({ status: 200, description: 'Returns the synthesized audio file (e.g., MP3)' })
53
+ @ApiResponse({ status: 500, description: 'Internal server error during synthesis' })
54
+ @Header('Content-Type', 'audio/mpeg') // Set default, assuming MP3
55
+ async synthesizeSpeech(
56
+ @Body() body: SynthesizeSpeechInput,
57
+ @Res({ passthrough: true }) res: FastifyReply, // Use FastifyReply type
58
+ ) {
59
+ console.log('Received synthesize request', body);
60
+
61
+ // Removed Promise<Buffer> return type annotation
62
+ this.logger.log(`Received synthesize request for text: "${body.text.substring(0, 50)}..."`);
63
+ try {
64
+ const audioBuffer = await this.nestTtsService.synthesizeSpeech(body);
65
+
66
+ if (!audioBuffer) {
67
+ this.logger.error('Synthesis resulted in null buffer');
68
+ // Use NotFoundException or a more specific error if appropriate
69
+ throw new NotFoundException('Could not generate audio for the given input.');
70
+ }
71
+
72
+ // For testing porpuse
73
+
74
+ // // --- Save audio locally as mp3 ---
75
+ // const audioDir = path.join(process.cwd(), 'public', 'tts-audio'); // Save in project_root/public/tts-audio
76
+ // const filename = `tts_${Date.now()}.mp3`;
77
+ // const filePath = path.join(audioDir, filename);
78
+
79
+ // try {
80
+ // await fs.mkdir(audioDir, { recursive: true }); // Ensure directory exists
81
+ // await fs.writeFile(filePath, audioBuffer);
82
+ // this.logger.log(`Audio saved successfully to ${filePath}`);
83
+ // } catch (saveError) {
84
+ // // Log the error but don't stop the response from being sent
85
+ // this.logger.error(`Failed to save audio file to ${filePath}: ${saveError.message}`, saveError.stack);
86
+ // // Optionally, you could throw an error here if saving is critical
87
+ // // throw new InternalServerErrorException(`Failed to save audio file: ${saveError.message}`);
88
+ // }
89
+ // // --- End save audio ---
90
+
91
+ // Set content type based on actual encoding if needed, otherwise default is fine
92
+ // Example: if (body.audioEncoding === 'OGG_OPUS') res.header('Content-Type', 'audio/ogg');
93
+ res.header('Content-Length', audioBuffer.length.toString()); // Use res.header for Fastify
94
+
95
+ this.logger.log(`Successfully synthesized audio (${audioBuffer.length} bytes)`);
96
+ return audioBuffer; // NestJS handles sending the buffer
97
+ } catch (error) {
98
+ this.logger.error(`Synthesis failed: ${error.message}`, error.stack);
99
+ // Re-throw as a NestJS exception for proper handling
100
+ throw new InternalServerErrorException(`Speech synthesis failed: ${error.message}`);
101
+ }
102
+ }
103
+
104
+ @Get('voices')
105
+ @ApiOperation({ summary: 'List available TTS voices, optionally filtered by language' })
106
+ @ApiQuery({ name: 'languageCode', required: false, type: String, description: 'Optional language code (e.g., en-US) to filter voices' })
107
+ @ApiResponse({ status: 200, description: 'Returns a list of available voices', type: [Object] }) // Using Object as a placeholder for Swagger type
108
+ @ApiResponse({ status: 500, description: 'Internal server error while fetching voices' })
109
+ async listVoices(@Query('languageCode') languageCode?: string): Promise<google.cloud.texttospeech.v1.IVoice[]> {
110
+ this.logger.log(`Received request to list voices${languageCode ? ` for language ${languageCode}` : ''}`);
111
+ try {
112
+ const voices = await this.nestTtsService.listVoices(languageCode);
113
+ this.logger.log(`Returning ${voices.length} voice${voices.length === 1 ? '' : 's'}.`);
114
+ return voices;
115
+ } catch (error) {
116
+ this.logger.error(`Failed to list voices: ${error.message}`, error.stack);
117
+ throw new InternalServerErrorException(`Failed to list voices: ${error.message}`);
118
+ }
119
+ }
120
+ }
@@ -0,0 +1,53 @@
1
+ import { Controller, Post, Body, ValidationPipe, Logger, Param, Get } from '@nestjs/common';
2
+ import { NestVertexService } from '../services/nest-vertex.service';
3
+ import { GenerateVideoDto } from '../dto/generate-video.dto'; // Updated import path
4
+ import { GenerateImageDto } from '../dto/generate-image.dto'; // Updated import path
5
+ import { GenerateVideosOperation, GenerateImagesResponse } from '@google/genai';
6
+ import { ApiTags } from '@nestjs/swagger';
7
+
8
+ @ApiTags('Vertex Image/video') // Keeping tag as 'tts' for now
9
+ @Controller('api/vertex')
10
+ export class NestVertexController {
11
+ private readonly logger = new Logger(NestVertexController.name);
12
+
13
+ constructor(private readonly nestVertexService: NestVertexService) {}
14
+
15
+ @Post('generate-video')
16
+ async generateVideo(
17
+ @Body(new ValidationPipe({ transform: true, whitelist: true })) generateVideoDto: GenerateVideoDto,
18
+ ): Promise<{ operationName: string }> {
19
+ this.logger.log(`Received request to generate video: ${JSON.stringify(generateVideoDto)}`);
20
+ try {
21
+ const operation: GenerateVideosOperation = await this.nestVertexService.startVideoGeneration(generateVideoDto);
22
+ return { operationName: operation.name };
23
+ } catch (error) {
24
+ this.logger.error('Error in generateVideo endpoint:', error);
25
+ throw error;
26
+ }
27
+ }
28
+
29
+ @Get('video-status/:operationName')
30
+ async getVideoStatus(@Param('operationName') operationName: string): Promise<GenerateVideosOperation> {
31
+ this.logger.log(`Received request to check status for operation: ${operationName}`);
32
+ try {
33
+ return await this.nestVertexService.checkVideoOperationStatus(operationName);
34
+ } catch (error) {
35
+ this.logger.error(`Error checking status for operation ${operationName}:`, error);
36
+ throw error;
37
+ }
38
+ }
39
+
40
+ @Post('generate-image')
41
+ async generateImage(
42
+ @Body(new ValidationPipe({ transform: true, whitelist: true })) generateImageDto: GenerateImageDto,
43
+ ): Promise<GenerateImagesResponse> {
44
+ this.logger.log(`Received request to generate image: ${JSON.stringify(generateImageDto)}`);
45
+ try {
46
+ const response: GenerateImagesResponse = await this.nestVertexService.startImageGeneration(generateImageDto);
47
+ return response;
48
+ } catch (error) {
49
+ this.logger.error('Error in generateImage endpoint:', error);
50
+ throw error;
51
+ }
52
+ }
53
+ }
@@ -0,0 +1,53 @@
1
+ // src/vertex/dto/generate-image.dto.ts
2
+ import { IsString, IsNotEmpty, IsOptional, IsInt, Min, Max, IsEnum } from 'class-validator';
3
+
4
+ export class GenerateImageDto {
5
+ /**
6
+ * The text prompt describing the image(s) to generate.
7
+ * Example: "A futuristic cityscape at sunset"
8
+ */
9
+ @IsString()
10
+ @IsNotEmpty()
11
+ prompt: string;
12
+
13
+ /**
14
+ * The number of images to generate.
15
+ * @default 1
16
+ */
17
+ @IsOptional()
18
+ @IsInt()
19
+ @Min(1)
20
+ @Max(8) // Assuming a reasonable max, adjust if needed based on API limits
21
+ numberOfImages?: number = 1;
22
+
23
+ /**
24
+ * Defines the aspect ratio of the generated image(s).
25
+ * Common values: '1:1', '16:9', '9:16'. Check API docs for exact supported values.
26
+ * @default '1:1'
27
+ */
28
+ @IsOptional()
29
+ @IsEnum(['1:1', '16:9', '9:16', '3:4', '4:3'], { message: 'aspectRatio must be one of "1:1", "16:9", "9:16", "3:4", "4:3"' })
30
+ aspectRatio?: string = '1:1';
31
+
32
+ /**
33
+ * A text string describing elements to avoid in the generated image(s).
34
+ * Example: "text, watermark, blurry"
35
+ */
36
+ @IsOptional()
37
+ @IsString()
38
+ negativePrompt?: string;
39
+
40
+ /**
41
+ * A seed value for deterministic image generation.
42
+ * Using the same seed and prompt should yield the same result.
43
+ * Range: 0 - 4,294,967,295
44
+ */
45
+ @IsOptional()
46
+ @IsInt()
47
+ @Min(0)
48
+ @Max(4294967295)
49
+ seed?: number;
50
+
51
+ // Add other relevant parameters if known from SDK/API docs, e.g.,
52
+ // style_preset, output_format, quality, etc.
53
+ }
@@ -0,0 +1,148 @@
1
+ import {
2
+ IsString,
3
+ IsOptional,
4
+ IsInt,
5
+ Min,
6
+ Max,
7
+ IsBoolean,
8
+ IsEnum,
9
+ ValidateNested,
10
+ IsBase64,
11
+ Matches,
12
+ IsNotEmpty,
13
+ ValidateIf,
14
+ } from 'class-validator';
15
+ import { Type } from 'class-transformer';
16
+
17
+ /**
18
+ * Represents the image input for guiding video generation.
19
+ * Can be either bytesBase64Encoded or gcsUri.
20
+ */
21
+ class ImageInputDto {
22
+ /**
23
+ * Base64-encoded image byte string.
24
+ * Mutually exclusive with gcsUri.
25
+ */
26
+ @IsOptional()
27
+ @IsBase64()
28
+ @ValidateIf((o) => !o.gcsUri) // Validate only if gcsUri is not present
29
+ @IsNotEmpty({ message: 'Either bytesBase64Encoded or gcsUri must be provided in image object' })
30
+ bytesBase64Encoded?: string;
31
+
32
+ /**
33
+ * Cloud Storage bucket URI (e.g., gs://bucket-name/image.png).
34
+ * Mutually exclusive with bytesBase64Encoded.
35
+ */
36
+ @IsOptional()
37
+ @IsString()
38
+ @Matches(/^gs:\/\/.+/, { message: 'gcsUri must be a valid Cloud Storage URI (gs://...)' })
39
+ @ValidateIf((o) => !o.bytesBase64Encoded) // Validate only if bytesBase64Encoded is not present
40
+ @IsNotEmpty({ message: 'Either bytesBase64Encoded or gcsUri must be provided in image object' })
41
+ gcsUri?: string;
42
+
43
+ /**
44
+ * The mime type of the image. Required if image object is provided.
45
+ * Example: "image/png", "image/jpeg"
46
+ */
47
+ @IsString()
48
+ @IsNotEmpty()
49
+ mimeType: string;
50
+ }
51
+
52
+ export class GenerateVideoDto {
53
+ /**
54
+ * A text string to guide the first eight seconds in the video.
55
+ * Mandatory if 'image' is not provided.
56
+ * Example: "A fast-tracking shot through a bustling dystopian sprawl..."
57
+ */
58
+ @IsOptional()
59
+ @IsString()
60
+ @ValidateIf((o) => !o.image) // Validate only if image is not present
61
+ @IsNotEmpty({ message: 'prompt is required if image is not provided' })
62
+ prompt?: string;
63
+
64
+ /**
65
+ * Image input for guiding video generation.
66
+ * Optional if 'prompt' is provided.
67
+ * Recommended: 1280x720 or 720x1280 pixels.
68
+ */
69
+ @IsOptional()
70
+ @ValidateNested()
71
+ @Type(() => ImageInputDto)
72
+ image?: ImageInputDto;
73
+
74
+ /**
75
+ * The length of video files that you want to generate.
76
+ * @default 8
77
+ */
78
+ @IsInt()
79
+ @Min(5)
80
+ @Max(8)
81
+ durationSeconds: number = 8; // Default value set
82
+
83
+ /**
84
+ * Defines the aspect ratio of the generated video.
85
+ * @default '16:9'
86
+ */
87
+ @IsOptional()
88
+ @IsEnum(['16:9', '9:16'], { message: 'aspectRatio must be either "16:9" or "9:16"' })
89
+ aspectRatio?: string = '16:9'; // Default value set
90
+
91
+ /**
92
+ * A text string that describes anything you want to discourage the model from generating.
93
+ * Example: "overhead lighting, bright colors"
94
+ */
95
+ @IsOptional()
96
+ @IsString()
97
+ negativePrompt?: string;
98
+
99
+ /**
100
+ * The safety setting that controls whether people or face generation is allowed.
101
+ * 'allow_adult': allow generation of adults only.
102
+ * 'disallow': disallows inclusion of people/faces.
103
+ * @default 'allow_adult'
104
+ */
105
+ @IsOptional()
106
+ @IsEnum(['allow_adult', 'disallow'], { message: 'personGeneration must be either "allow_adult" or "disallow"' })
107
+ personGeneration?: string = 'allow_adult'; // Default value set
108
+
109
+ /**
110
+ * The number of output videos requested.
111
+ * @default 1
112
+ */
113
+ @IsOptional()
114
+ @IsInt()
115
+ @Min(1)
116
+ @Max(4)
117
+ sampleCount?: number = 1; // Default value set
118
+
119
+ /**
120
+ * A number to request to make generated videos deterministic.
121
+ * Adding a seed number with your request without changing other parameters
122
+ * will cause the model to produce the same videos.
123
+ * Range: 0 - 4,294,967,295
124
+ */
125
+ @IsOptional()
126
+ @IsInt()
127
+ @Min(0)
128
+ @Max(4294967295)
129
+ seed?: number;
130
+
131
+ /**
132
+ * A Cloud Storage bucket URI to store the output video(s).
133
+ * If not provided, base64-encoded video bytes are returned in the response.
134
+ * Pattern: gs://BUCKET_NAME/SUBDIRECTORY
135
+ */
136
+ @IsOptional()
137
+ @IsString()
138
+ @Matches(/^gs:\/\/.+\/.+/, { message: 'storageUri must be a valid Cloud Storage URI (gs://BUCKET_NAME/SUBDIRECTORY)' })
139
+ storageUri?: string;
140
+
141
+ /**
142
+ * Use Gemini to enhance your prompts.
143
+ * @default true
144
+ */
145
+ @IsOptional()
146
+ @IsBoolean()
147
+ enhancePrompt?: boolean = true; // Default value set
148
+ }
package/src/index.ts ADDED
@@ -0,0 +1,9 @@
1
+ export * from './nest-vertex.module';
2
+ // Controller
3
+ export * from './controllers/nest-tts.controller'; // Export TTS controller
4
+ // Services
5
+ export * from './services/nest-vertex.service';
6
+ export * from './services/nest-tts.service'; // Export TTS service and interface
7
+ export * from './services/gemini-chat.service'; // Export GeminiChatService
8
+ // Models
9
+ export * from './dto/generate-image.dto'; // Export GenerateImageDto
@@ -0,0 +1,14 @@
1
+ import { Module } from '@nestjs/common';
2
+ import { NestVertexService } from './services/nest-vertex.service';
3
+ import { NestVertexController } from './controllers/nest-vertex.controller';
4
+ import { NestTtsService } from './services/nest-tts.service'; // Import the TTS service
5
+ import { NestTtsController } from './controllers/nest-tts.controller'; // Import the TTS controller
6
+ import { GeminiChatService } from './services/gemini-chat.service';
7
+ import { GeminiChatController } from './controllers/gemini-chat.controller';
8
+
9
+ @Module({
10
+ providers: [NestVertexService, NestTtsService, GeminiChatService], // Add NestTtsService and GeminiChatService to providers
11
+ exports: [NestVertexService, NestTtsService, GeminiChatService], // Export NestTtsService and GeminiChatService
12
+ controllers: [NestVertexController, NestTtsController, GeminiChatController], // Add NestTtsController and GeminiChatController to controllers
13
+ })
14
+ export class NestVertexModule {}
@@ -0,0 +1,190 @@
1
+ import { Injectable, Logger } from '@nestjs/common';
2
+ import { GoogleGenAI, Content, Model } from '@google/genai'; // Use @google/genai - Removed ListModelsResponse
3
+ import { ChatCompletionMessageParam } from 'groq-sdk/resources/chat/completions'; // Keep for input consistency for now
4
+
5
+ import { ChatMessageDict, ChatRole } from '@dataclouder/nest-agent-cards/models/agents.models'; // Use existing output format
6
+
7
+ @Injectable()
8
+ export class GeminiChatService {
9
+ private readonly logger = new Logger(GeminiChatService.name);
10
+ private readonly clientGenAi: GoogleGenAI;
11
+ private readonly modelName = 'gemini-1.5-flash'; // Updated model name
12
+
13
+ constructor() {
14
+ const apiKey = process.env.GEMINI_API_KEY; // Using GEMINI_API_KEY as per example
15
+ if (!apiKey) {
16
+ throw new Error('GEMINI_API_KEY environment variable not set.');
17
+ }
18
+ // Pass apiKey within an options object
19
+ this.clientGenAi = new GoogleGenAI({ apiKey });
20
+ this.logger.log(`GeminiChatService initialized with model: ${this.modelName}`);
21
+ }
22
+
23
+ // Maps incoming roles (Groq/OpenAI style) to Gemini roles for history
24
+ private mapToGeminiRole(role: ChatCompletionMessageParam['role']): 'user' | 'model' {
25
+ // Return string literal type
26
+ switch (role) {
27
+ case 'assistant':
28
+ return 'model';
29
+ case 'system':
30
+ // System messages aren't directly part of Gemini chat history in the same way.
31
+ // We extract it separately. Treat as 'user' if forced into history.
32
+ this.logger.warn('System role encountered, will be extracted, not directly added to Gemini history.');
33
+ return 'user'; // Fallback if needed, but ideally handled separately
34
+ case 'user':
35
+ default:
36
+ return 'user';
37
+ }
38
+ }
39
+
40
+ // Formats messages for the @google/genai Content structure (used by generateContent)
41
+ private formatMessagesToContent(messages: ChatCompletionMessageParam[]): Content[] {
42
+ return messages
43
+ .filter((msg) => msg.role !== 'system' && typeof msg.content === 'string') // Exclude system messages and ensure content is string
44
+ .map((msg) => ({
45
+ role: this.mapToGeminiRole(msg.role),
46
+ parts: [{ text: msg.content as string }],
47
+ }));
48
+ }
49
+
50
+ async chat(messages: ChatCompletionMessageParam[]): Promise<ChatMessageDict> {
51
+ if (!messages || messages.length === 0) {
52
+ // this.logger.warn('Gemini chat called with empty messages.');
53
+ return { content: '', role: ChatRole.Assistant, metadata: { finishReason: 'NO_INPUT' } };
54
+ }
55
+
56
+ // 1. Extract System Prompt
57
+ const systemMessage = messages.find((msg) => msg.role === 'system')?.content as string | undefined;
58
+ if (systemMessage) {
59
+ // this.logger.log(`System prompt extracted: "${systemMessage}"`);
60
+ }
61
+
62
+ // 2. Ensure last message is from user (or add one)
63
+ let lastMessage = messages[messages.length - 1];
64
+ if (lastMessage.role !== 'user' || typeof lastMessage.content !== 'string') {
65
+ // this.logger.warn('Last message was not from user or content invalid, adding default message.');
66
+ messages.push({ role: 'user', content: 'please say something to start/continue conversation' });
67
+ lastMessage = messages[messages.length - 1];
68
+ }
69
+
70
+ // 3. Format all non-system messages for the 'contents' parameter
71
+ const formattedContents = this.formatMessagesToContent(messages); // Pass all valid messages
72
+
73
+ // 4. Call generateContent (stateless)
74
+ try {
75
+ this.logger.debug(
76
+ `Sending request to Gemini model ${this.modelName} with ${
77
+ formattedContents.length
78
+ } content parts and system instruction: ${!!systemMessage}`,
79
+ );
80
+ // Reverted: Call generateContent directly on this.ai.models
81
+ const response = await this.clientGenAi.models.generateContent({
82
+ model: this.modelName,
83
+ contents: formattedContents,
84
+ config: { systemInstruction: systemMessage },
85
+ });
86
+
87
+ this.logger.debug(`Received Gemini response text.`);
88
+
89
+ // Assuming response structure provides text directly based on example
90
+ // Check the actual type definition of GenerateContentResponse if needed
91
+ const responseText = response?.text ?? ''; // Safely access text, default to empty string
92
+
93
+ return {
94
+ content: responseText,
95
+ role: ChatRole.Assistant,
96
+ metadata: {
97
+ // Try to access finishReason if available in this response structure
98
+ // finishReason: response?.candidates?.[0]?.finishReason,
99
+ },
100
+ };
101
+ } catch (error) {
102
+ this.logger.error(`Gemini chat.sendMessage failed: ${error.message}`, error.stack);
103
+ // Handle specific errors if possible (e.g., API key, rate limits, safety blocks)
104
+ throw new Error(`Failed to get Gemini chat completion: ${error.message}`);
105
+ }
106
+ }
107
+
108
+ async chatStream(messages: ChatCompletionMessageParam[]): Promise<AsyncIterable<ChatMessageDict>> {
109
+ if (!messages || messages.length === 0) {
110
+ this.logger.warn('Gemini chatStream called with empty messages.');
111
+ return (async function* () {})(); // Return an empty async iterable
112
+ }
113
+
114
+ // 1. Extract System Prompt
115
+ const systemMessage = messages.find((msg) => msg.role === 'system')?.content as string | undefined;
116
+ if (systemMessage) {
117
+ this.logger.log(`System prompt extracted for stream: "${systemMessage}"`);
118
+ }
119
+
120
+ // 2. Ensure last message is from user
121
+ const lastMessage = messages[messages.length - 1];
122
+ if (lastMessage.role !== 'user' || typeof lastMessage.content !== 'string') {
123
+ // Unlike non-streaming, we probably shouldn't add a message here.
124
+ // Streaming usually implies a direct user interaction initiated the stream.
125
+ this.logger.error('The last message for streaming must be from the user and contain string content.');
126
+ throw new Error('The last message for streaming must be from the user and contain string content.');
127
+ }
128
+
129
+ // 3. Format all non-system messages for the 'contents' parameter
130
+ const formattedContents = this.formatMessagesToContent(messages); // Pass all valid messages
131
+
132
+ // 4. Call generateContentStream (stateless)
133
+
134
+ // Define the async generator using an arrow function to capture 'this' context
135
+ const processStream = async function* (): AsyncIterable<ChatMessageDict> {
136
+ try {
137
+ this.logger.debug(
138
+ `Sending stream request to Gemini model ${this.modelName} with ${
139
+ formattedContents.length
140
+ } content parts and system instruction: ${!!systemMessage}`,
141
+ );
142
+ // Reverted: Call generateContentStream directly on this.ai.models
143
+ const stream = await this.ai.models.generateContentStream({
144
+ model: this.modelName,
145
+ contents: formattedContents,
146
+ config: { systemInstruction: systemMessage },
147
+ });
148
+
149
+ for await (const chunk of stream) {
150
+ // Assuming chunk structure provides text directly based on generateContent response
151
+ // Check the actual type definition of GenerateContentResponseStream if needed
152
+ const chunkText = chunk?.text ?? ''; // Safely access text
153
+ if (chunkText) {
154
+ // Only yield if there's text content
155
+ this.logger.debug(`Received Gemini stream chunk text.`);
156
+ yield {
157
+ content: chunkText,
158
+ role: ChatRole.Assistant,
159
+ metadata: {
160
+ isChunk: true,
161
+ // Try to access finishReason if available in this chunk structure
162
+ // finishReason: chunk?.candidates?.[0]?.finishReason,
163
+ },
164
+ };
165
+ }
166
+ }
167
+ this.logger.debug(`Gemini stream finished.`);
168
+ } catch (error) {
169
+ this.logger.error(`Gemini generateContentStream failed: ${error.message}`, error.stack);
170
+ // Re-throw or yield an error message chunk if preferred
171
+ throw new Error(`Failed to get Gemini chat stream completion: ${error.message}`);
172
+ }
173
+ }.bind(this); // Bind 'this' explicitly for safety, although arrow functions usually handle it
174
+
175
+ // Return the invoked generator
176
+ return processStream();
177
+ }
178
+
179
+ // New method to list available models
180
+ async listModels(): Promise<Record<string, string>[]> {
181
+ // Last reserach there is no method to return availible models, so i had to hard coded
182
+ return [
183
+ { id: 'gemini-2.5-flash-preview-04-17' },
184
+ { id: 'gemini-2.5-pro-preview-03-25' },
185
+ { id: 'gemini-2.0-flash' },
186
+ { id: 'gemini-2.0-flash-lite' },
187
+ { id: 'gemini-2.0-flash-live-001' },
188
+ ];
189
+ }
190
+ }
@@ -0,0 +1,119 @@
1
+ import { Injectable, Logger } from '@nestjs/common';
2
+ import { TextToSpeechClient } from '@google-cloud/text-to-speech';
3
+ import { google } from '@google-cloud/text-to-speech/build/protos/protos'; // For types
4
+
5
+ // Define interfaces for better type safety
6
+ export interface SynthesizeSpeechInput {
7
+ text: string;
8
+ languageCode?: string;
9
+ ssmlGender?: google.cloud.texttospeech.v1.SsmlVoiceGender;
10
+ voiceName?: string; // temporary select onlt one voice or voice name
11
+ voice?: string; // optional voice name
12
+ audioEncoding?: google.cloud.texttospeech.v1.AudioEncoding;
13
+ generateTranscription?: boolean;
14
+ speed?: string;
15
+ speedRate?: number;
16
+ }
17
+
18
+ @Injectable()
19
+ export class NestTtsService {
20
+ // Keeping class name for now
21
+ private readonly logger = new Logger('VertexTtsService'); // Updated logger context
22
+ private client: TextToSpeechClient;
23
+
24
+ constructor() {
25
+ // Instantiates a client. Needs GOOGLE_APPLICATION_CREDENTIALS env var set.
26
+ // See: https://cloud.google.com/docs/authentication/provide-credentials-adc#local-dev
27
+ this.client = new TextToSpeechClient();
28
+ this.logger.log('Google TextToSpeechClient initialized.');
29
+ }
30
+
31
+ getHello(): string {
32
+ return 'Hello from VertexTtsService!'; // Updated message
33
+ }
34
+
35
+ /**
36
+ * Synthesizes speech from text using Google Cloud TTS.
37
+ * @param input - The text and configuration for speech synthesis.
38
+ * @returns A Buffer containing the audio data (e.g., MP3).
39
+ * @throws Error if synthesis fails.
40
+ */
41
+ async synthesizeSpeech(input: SynthesizeSpeechInput): Promise<Buffer | null> {
42
+ const {
43
+ text,
44
+ languageCode, // Default language
45
+ ssmlGender = 'NEUTRAL', // Default gender
46
+ voiceName, // Optional specific voice name
47
+ voice,
48
+ audioEncoding = 'MP3', // Default audio encoding
49
+ speedRate,
50
+ } = input;
51
+ const voiceId = voiceName || voice;
52
+ console.log('Selected voice:', voiceId);
53
+
54
+ // Seems langcode is required take from voice name
55
+ const langCode = !languageCode ? voiceId.slice(0, 5) : languageCode;
56
+
57
+ const request: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
58
+ input: { text: text },
59
+ // Select the language and SSML voice gender (optional)
60
+ voice: voiceId ? { name: voiceId, languageCode: langCode } : { languageCode: langCode, ssmlGender: ssmlGender },
61
+ // Select the type of audio encoding and speaking rate
62
+ audioConfig: {
63
+ audioEncoding: audioEncoding,
64
+ speakingRate: speedRate, // Add the speaking rate here
65
+ },
66
+ };
67
+
68
+ this.logger.log(`Synthesizing speech for text: "${text.substring(0, 50)}..."`);
69
+
70
+ try {
71
+ console.log('request', request);
72
+ // Performs the text-to-speech request
73
+ const [response] = await this.client.synthesizeSpeech(request);
74
+
75
+ if (response.audioContent instanceof Uint8Array) {
76
+ this.logger.log('Speech synthesis successful.');
77
+ return Buffer.from(response.audioContent);
78
+ } else {
79
+ this.logger.warn('No audio content received from TTS API.');
80
+ return null;
81
+ }
82
+ } catch (error) {
83
+ this.logger.error('Error synthesizing speech:', error);
84
+ throw new Error(`Failed to synthesize speech: ${error.message}`);
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Lists available voices from Google Cloud TTS.
90
+ * Lists available voices from Google Cloud TTS, optionally filtered by language code.
91
+ * @param languageCode - Optional language code (e.g., 'en-US') to filter voices.
92
+ * @returns A list of available voices.
93
+ * @throws Error if listing voices fails.
94
+ */
95
+ async listVoices(languageCode?: string): Promise<google.cloud.texttospeech.v1.IVoice[]> {
96
+ const request: google.cloud.texttospeech.v1.IListVoicesRequest = {};
97
+ if (languageCode) {
98
+ request.languageCode = languageCode;
99
+ this.logger.log(`Fetching list of available voices for language: ${languageCode}...`);
100
+ } else {
101
+ this.logger.log('Fetching list of all available voices...');
102
+ }
103
+
104
+ try {
105
+ const [response] = await this.client.listVoices(request);
106
+ if (response.voices) {
107
+ const count = response.voices.length;
108
+ this.logger.log(`Successfully fetched ${count} voice${count === 1 ? '' : 's'}${languageCode ? ` for language ${languageCode}` : ''}.`);
109
+ return response.voices;
110
+ } else {
111
+ this.logger.warn('No voices received from TTS API.');
112
+ return [];
113
+ }
114
+ } catch (error) {
115
+ this.logger.error('Error listing voices:', error);
116
+ throw new Error(`Failed to list voices: ${error.message}`);
117
+ }
118
+ }
119
+ }
@@ -0,0 +1,184 @@
1
+ import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
2
+ // Assuming GenerateImagesResponse might be the type, add if available, otherwise use any/unknown
3
+ import { GoogleGenAI, GenerateVideosOperation, GenerateImagesResponse } from '@google/genai';
4
+ import { GenerateVideoDto } from '../dto/generate-video.dto'; // Updated import path
5
+ import { GenerateImageDto } from '../dto/generate-image.dto'; // Updated import path
6
+
7
+ @Injectable()
8
+ export class NestVertexService implements OnModuleInit {
9
+ // Renamed class
10
+ private genAi: GoogleGenAI;
11
+ private readonly logger = new Logger(NestVertexService.name); // Updated logger name
12
+ private readonly videoModelName = 'veo-2.0-generate-001';
13
+ private readonly imageModelName = 'imagen-3.0-generate-002'; // Added image model name
14
+
15
+ onModuleInit() {
16
+ const apiKey = process.env.GEMINI_API_KEY;
17
+ if (!apiKey) {
18
+ this.logger.error('GEMINI_API_KEY environment variable not set. NestVertexService will not function.'); // Updated log message
19
+ // Optionally throw an error to prevent the application from starting
20
+ // throw new Error('GEMINI_API_KEY environment variable not set.');
21
+ } else {
22
+ console.log('GEMINI_API_KEY environment variable set: ', apiKey);
23
+ this.genAi = new GoogleGenAI({ apiKey });
24
+ this.logger.log('GoogleGenAI client initialized successfully.');
25
+ }
26
+ }
27
+
28
+ /**
29
+ * Starts the video generation process using Google Gen AI.
30
+ * Starts the video generation process using Google Gen AI based on the provided DTO.
31
+ * @param generateVideoDto The DTO containing all parameters for video generation.
32
+ * @returns A Promise resolving to the video generation operation object.
33
+ */
34
+ async startVideoGeneration(generateVideoDto: GenerateVideoDto): Promise<GenerateVideosOperation> {
35
+ if (!this.genAi) {
36
+ throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY.');
37
+ }
38
+
39
+ this.logger.log(`Starting video generation with DTO: ${JSON.stringify(generateVideoDto)}`);
40
+
41
+ // Construct the request object based on DTO, likely structure for the SDK
42
+ // Note: The exact structure might need verification against SDK docs/examples if this fails
43
+ const videoRequest = {
44
+ model: this.videoModelName,
45
+ prompt: generateVideoDto.prompt,
46
+ // Image handling - assuming it's a top-level param or within config
47
+ ...(generateVideoDto.image && {
48
+ image: {
49
+ // Assuming image structure is nested like this
50
+ bytesBase64Encoded: generateVideoDto.image.bytesBase64Encoded,
51
+ gcsUri: generateVideoDto.image.gcsUri,
52
+ mimeType: generateVideoDto.image.mimeType,
53
+ },
54
+ }),
55
+ // Configuration parameters - place them where the SDK expects (e.g., top-level or config object)
56
+ // Trying top-level first based on common patterns
57
+ durationSeconds: generateVideoDto.durationSeconds,
58
+ aspectRatio: generateVideoDto.aspectRatio,
59
+ negativePrompt: generateVideoDto.negativePrompt,
60
+ personGeneration: generateVideoDto.personGeneration,
61
+ sampleCount: generateVideoDto.sampleCount,
62
+ seed: generateVideoDto.seed,
63
+ storageUri: generateVideoDto.storageUri,
64
+ enhancePrompt: generateVideoDto.enhancePrompt,
65
+ // config: { // Alternative: place parameters inside a 'config' object if needed
66
+ // numberOfVideos: generateVideoDto.sampleCount, // Example if sampleCount maps here
67
+ // ... other config params
68
+ // }
69
+ };
70
+
71
+ // Remove undefined optional parameters to avoid sending empty values
72
+ Object.keys(videoRequest).forEach((key) => videoRequest[key] === undefined && delete videoRequest[key]);
73
+ if (videoRequest.image) {
74
+ if (videoRequest.image.bytesBase64Encoded === undefined) delete videoRequest.image.bytesBase64Encoded;
75
+ if (videoRequest.image.gcsUri === undefined) delete videoRequest.image.gcsUri;
76
+ // If both are undefined after cleanup, remove the image object itself
77
+ if (Object.keys(videoRequest.image).length === 1 && videoRequest.image.mimeType) {
78
+ // If only mimeType remains, it's likely an invalid state unless mimeType alone is useful
79
+ // For safety, let's remove image if both content fields are missing
80
+ if (!videoRequest.image.bytesBase64Encoded && !videoRequest.image.gcsUri) {
81
+ delete videoRequest.image;
82
+ }
83
+ } else if (Object.keys(videoRequest.image).length === 0) {
84
+ delete videoRequest.image; // Remove empty image object
85
+ }
86
+ }
87
+
88
+ try {
89
+ // Pass the constructed request object
90
+ const operation = await this.genAi.models.generateVideos(videoRequest);
91
+ this.logger.log(`Video generation operation started: ${operation.name}`);
92
+ return operation;
93
+ } catch (error) {
94
+ this.logger.error('Error starting video generation:', error.message || error);
95
+ // Re-throw or handle the error appropriately
96
+ throw error;
97
+ }
98
+ }
99
+
100
+ /**
101
+ * Checks the status of a video generation operation.
102
+ * IMPORTANT SDK Limitation: The `@google/genai` SDK's `getVideosOperation` method
103
+ * requires the full operation object, not just the name string, to be passed
104
+ * in its `operation` parameter (based on TypeScript errors and official examples).
105
+ * Therefore, this method cannot be directly called from a simple GET endpoint
106
+ * that only provides the operation name string.
107
+ *
108
+ * To implement polling, the client would typically need to store the initial
109
+ * operation object returned by `startVideoGeneration` and pass it back, or
110
+ * the server would need to manage operation state differently (e.g., store
111
+ * operations in a database and poll internally).
112
+ *
113
+ * This method is left here as a placeholder demonstrating the intended SDK call,
114
+ * but it will likely fail if called with just the name string due to the SDK design.
115
+ *
116
+ * @param operationName The name of the operation (e.g., "operations/...")
117
+ * @returns A Promise resolving to the updated operation object.
118
+ */
119
+ async checkVideoOperationStatus(operationName: string): Promise<GenerateVideosOperation> {
120
+ if (!this.genAi) {
121
+ throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY.');
122
+ }
123
+ this.logger.warn(
124
+ `Attempting to check status for operation: ${operationName}. ` +
125
+ `Note: This may fail due to SDK requiring the full operation object for getVideosOperation.`,
126
+ );
127
+ try {
128
+ // This call expects the full operation object, not just the name.
129
+ // Passing only the name string will likely cause a runtime or type error.
130
+ const oper: GenerateVideosOperation = { name: operationName };
131
+ const operation = await this.genAi.operations.getVideosOperation({
132
+ operation: oper, // This line is problematic based on SDK behavior
133
+ });
134
+
135
+ this.logger.log(`Operation ${operationName} status: ${operation.done ? 'Done' : 'Processing'}`);
136
+ return operation;
137
+ } catch (error) {
138
+ this.logger.error(`Error checking status for operation ${operationName}:`, error);
139
+ // The error might be a TypeError due to the incorrect parameter type.
140
+ throw error;
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Starts the image generation process using Google Gen AI.
146
+ * @param generateImageDto The DTO containing parameters for image generation.
147
+ * @returns A Promise resolving to the image generation response.
148
+ */
149
+ async startImageGeneration(generateImageDto: GenerateImageDto): Promise<GenerateImagesResponse> {
150
+ // Use GenerateImagesResponse or adjust type as needed
151
+ if (!this.genAi) {
152
+ throw new Error('GoogleGenAI client not initialized. Check GEMINI_API_KEY/Vertex AI setup.');
153
+ }
154
+
155
+ this.logger.log(`Starting image generation with DTO: ${JSON.stringify(generateImageDto)}`);
156
+
157
+ // Construct the request object for the SDK based on the example
158
+ const imageRequest = {
159
+ model: this.imageModelName,
160
+ prompt: generateImageDto.prompt,
161
+ config: {
162
+ numberOfImages: generateImageDto.numberOfImages,
163
+ aspectRatio: generateImageDto.aspectRatio,
164
+ negativePrompt: generateImageDto.negativePrompt,
165
+ // includeRaiReason: true, // Optional, based on example
166
+ // Add other config parameters from DTO if applicable
167
+ },
168
+ };
169
+
170
+ // Remove undefined optional parameters from config
171
+ Object.keys(imageRequest.config).forEach((key) => imageRequest.config[key] === undefined && delete imageRequest.config[key]);
172
+
173
+ try {
174
+ // Pass the constructed request object
175
+ const response = await this.genAi.models.generateImages(imageRequest);
176
+ this.logger.log(`Image generation successful for prompt: "${generateImageDto.prompt}"`);
177
+ // Consider what part of the response to return, e.g., response.generatedImages
178
+ return response;
179
+ } catch (error) {
180
+ this.logger.error('Error starting image generation:', error.message || error);
181
+ throw error;
182
+ }
183
+ }
184
+ }