@dataclouder/nest-vertex 0.0.57 → 0.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/controllers/stt/adapter-stt.controller.d.ts +10 -0
- package/controllers/stt/adapter-stt.controller.js +79 -0
- package/controllers/stt/groq-stt.controller.js +1 -1
- package/controllers/stt/local-stt.controller.js +1 -1
- package/controllers/tts/vertex-gemini-tts.controller.js +1 -1
- package/controllers/tts/vertex-tts-adapter.controller.js +1 -1
- package/models/adapter.models.d.ts +1 -1
- package/nest-vertex.module.js +2 -0
- package/package.json +1 -1
- package/services/vertex-gemini-chat.service.d.ts +1 -0
- package/services/vertex-gemini-chat.service.js +21 -14
- package/services/whisper/local-stt.service.js +13 -2
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { type MulterFile } from '@webundsoehne/nest-fastify-file-upload';
|
|
2
|
+
import { GroqService } from '../../services/whisper/groq.service';
|
|
3
|
+
import { LocalSttService } from '../../services/whisper/local-stt.service';
|
|
4
|
+
export declare class AdapterSttController {
|
|
5
|
+
private readonly groqService;
|
|
6
|
+
private readonly localSttService;
|
|
7
|
+
private readonly logger;
|
|
8
|
+
constructor(groqService: GroqService, localSttService: LocalSttService);
|
|
9
|
+
processAudio(file: MulterFile, provider?: 'local' | 'groq'): Promise<any>;
|
|
10
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
var __param = (this && this.__param) || function (paramIndex, decorator) {
|
|
12
|
+
return function (target, key) { decorator(target, key, paramIndex); }
|
|
13
|
+
};
|
|
14
|
+
var AdapterSttController_1;
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.AdapterSttController = void 0;
|
|
17
|
+
const common_1 = require("@nestjs/common");
|
|
18
|
+
const nest_fastify_file_upload_1 = require("@webundsoehne/nest-fastify-file-upload");
|
|
19
|
+
const swagger_1 = require("@nestjs/swagger");
|
|
20
|
+
const nest_core_1 = require("@dataclouder/nest-core");
|
|
21
|
+
const groq_service_1 = require("../../services/whisper/groq.service");
|
|
22
|
+
const local_stt_service_1 = require("../../services/whisper/local-stt.service");
|
|
23
|
+
let AdapterSttController = AdapterSttController_1 = class AdapterSttController {
|
|
24
|
+
groqService;
|
|
25
|
+
localSttService;
|
|
26
|
+
logger = new common_1.Logger(AdapterSttController_1.name);
|
|
27
|
+
constructor(groqService, localSttService) {
|
|
28
|
+
this.groqService = groqService;
|
|
29
|
+
this.localSttService = localSttService;
|
|
30
|
+
}
|
|
31
|
+
async processAudio(file, provider = 'local') {
|
|
32
|
+
this.logger.log(`Receive request with provider: ${provider}`);
|
|
33
|
+
if (!file || !file.buffer) {
|
|
34
|
+
this.logger.error('No file buffer received.');
|
|
35
|
+
return { error: 'No file uploaded or file buffer is missing.' };
|
|
36
|
+
}
|
|
37
|
+
this.logger.log(`Received file: ${file.originalname}, mimetype: ${file.mimetype}, size: ${file.size}`);
|
|
38
|
+
try {
|
|
39
|
+
let result;
|
|
40
|
+
if (provider === 'groq') {
|
|
41
|
+
result = await this.groqService.transcribeAudio(file.buffer, file.originalname, file.mimetype);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
result = await this.localSttService.transcribeAudio(file.buffer, file.originalname, file.mimetype);
|
|
45
|
+
}
|
|
46
|
+
return result;
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
this.logger.error('Error during transcription process:', error);
|
|
50
|
+
return { error: 'Failed to transcribe audio.', details: error.message };
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
exports.AdapterSttController = AdapterSttController;
|
|
55
|
+
__decorate([
|
|
56
|
+
(0, common_1.Post)('transcribe-bytes'),
|
|
57
|
+
(0, common_1.UseInterceptors)((0, nest_fastify_file_upload_1.FileInterceptor)('file')),
|
|
58
|
+
(0, swagger_1.ApiConsumes)('multipart/form-data'),
|
|
59
|
+
(0, nest_fastify_file_upload_1.ApiFileBody)('file'),
|
|
60
|
+
(0, swagger_1.ApiQuery)({
|
|
61
|
+
name: 'provider',
|
|
62
|
+
enum: ['local', 'groq'],
|
|
63
|
+
required: false,
|
|
64
|
+
description: 'The provider to use for transcription. Defaults to "local".',
|
|
65
|
+
}),
|
|
66
|
+
__param(0, (0, common_1.UploadedFile)('file')),
|
|
67
|
+
__param(1, (0, common_1.Query)('provider')),
|
|
68
|
+
__metadata("design:type", Function),
|
|
69
|
+
__metadata("design:paramtypes", [Object, String]),
|
|
70
|
+
__metadata("design:returntype", Promise)
|
|
71
|
+
], AdapterSttController.prototype, "processAudio", null);
|
|
72
|
+
exports.AdapterSttController = AdapterSttController = AdapterSttController_1 = __decorate([
|
|
73
|
+
(0, swagger_1.ApiTags)('Speech-to-Text'),
|
|
74
|
+
(0, common_1.Controller)('api/ai-services/adapter/stt'),
|
|
75
|
+
(0, common_1.UseFilters)(nest_core_1.AllExceptionsHandler),
|
|
76
|
+
__metadata("design:paramtypes", [groq_service_1.GroqService,
|
|
77
|
+
local_stt_service_1.LocalSttService])
|
|
78
|
+
], AdapterSttController);
|
|
79
|
+
//# sourceMappingURL=adapter-stt.controller.js.map
|
|
@@ -55,7 +55,7 @@ __decorate([
|
|
|
55
55
|
], GroqSttController.prototype, "processAudio", null);
|
|
56
56
|
exports.GroqSttController = GroqSttController = GroqSttController_1 = __decorate([
|
|
57
57
|
(0, swagger_1.ApiTags)('Speech-to-Text'),
|
|
58
|
-
(0, common_1.Controller)('api/ai-services/groq
|
|
58
|
+
(0, common_1.Controller)('api/ai-services/groq/stt'),
|
|
59
59
|
(0, common_1.UseFilters)(nest_core_1.AllExceptionsHandler),
|
|
60
60
|
__metadata("design:paramtypes", [groq_service_1.GroqService])
|
|
61
61
|
], GroqSttController);
|
|
@@ -55,7 +55,7 @@ __decorate([
|
|
|
55
55
|
], LocalSttController.prototype, "processAudio", null);
|
|
56
56
|
exports.LocalSttController = LocalSttController = LocalSttController_1 = __decorate([
|
|
57
57
|
(0, swagger_1.ApiTags)('Speech-to-Text'),
|
|
58
|
-
(0, common_1.Controller)('api/ai-services/local
|
|
58
|
+
(0, common_1.Controller)('api/ai-services/local/stt'),
|
|
59
59
|
(0, common_1.UseFilters)(nest_core_1.AllExceptionsHandler),
|
|
60
60
|
__metadata("design:paramtypes", [local_stt_service_1.LocalSttService])
|
|
61
61
|
], LocalSttController);
|
|
@@ -79,7 +79,7 @@ __decorate([
|
|
|
79
79
|
__metadata("design:returntype", Promise)
|
|
80
80
|
], VertexGeminiTtsController.prototype, "synthesizeSpeech", null);
|
|
81
81
|
exports.VertexGeminiTtsController = VertexGeminiTtsController = __decorate([
|
|
82
|
-
(0, swagger_1.ApiTags)('
|
|
82
|
+
(0, swagger_1.ApiTags)('Text To Speech Gemini'),
|
|
83
83
|
(0, common_1.Controller)('api/vertex-gemini/tts'),
|
|
84
84
|
(0, common_1.UseFilters)(nest_core_1.AllExceptionsHandler),
|
|
85
85
|
__metadata("design:paramtypes", [vertex_gemini_tts_service_1.VertexGeminiTtsService])
|
|
@@ -104,7 +104,7 @@ __decorate([
|
|
|
104
104
|
__metadata("design:returntype", Promise)
|
|
105
105
|
], VertexTtsAdapterController.prototype, "listVoices", null);
|
|
106
106
|
exports.VertexTtsAdapterController = VertexTtsAdapterController = __decorate([
|
|
107
|
-
(0, swagger_1.ApiTags)('
|
|
107
|
+
(0, swagger_1.ApiTags)('Text To Speech Adapter'),
|
|
108
108
|
(0, common_1.Controller)('api/ai-services/adapter/tts'),
|
|
109
109
|
(0, common_2.UseFilters)(nest_core_1.AllExceptionsHandler),
|
|
110
110
|
__metadata("design:paramtypes", [vertex_tts_service_1.VertextTtsService])
|
package/nest-vertex.module.js
CHANGED
|
@@ -45,6 +45,7 @@ const nest_auth_1 = require("@dataclouder/nest-auth");
|
|
|
45
45
|
const groq_llm_service_1 = require("./services/llm/groq-llm.service");
|
|
46
46
|
const groq_llm_controller_1 = require("./controllers/llm/groq-llm.controller");
|
|
47
47
|
const video_gen_adapter_controller_1 = require("./controllers/video/video-gen-adapter.controller");
|
|
48
|
+
const adapter_stt_controller_1 = require("./controllers/stt/adapter-stt.controller");
|
|
48
49
|
let NestVertexModule = class NestVertexModule {
|
|
49
50
|
};
|
|
50
51
|
exports.NestVertexModule = NestVertexModule;
|
|
@@ -112,6 +113,7 @@ exports.NestVertexModule = NestVertexModule = __decorate([
|
|
|
112
113
|
groq_stt_controller_1.GroqSttController,
|
|
113
114
|
local_stt_controller_1.LocalSttController,
|
|
114
115
|
vertex_gemini_tts_controller_1.VertexGeminiTtsController,
|
|
116
|
+
adapter_stt_controller_1.AdapterSttController,
|
|
115
117
|
],
|
|
116
118
|
})
|
|
117
119
|
], NestVertexModule);
|
package/package.json
CHANGED
|
@@ -15,6 +15,7 @@ export declare class GeminiChatService {
|
|
|
15
15
|
chatStream(messages: MessageLLM[], model?: string, keyType?: TierType): Promise<AsyncIterable<ChatMessageDict>>;
|
|
16
16
|
listModels(): Promise<Record<string, string>[]>;
|
|
17
17
|
getDefaultQualityModel(quality: EModelQuality): string;
|
|
18
|
+
private getBestModel;
|
|
18
19
|
private _extractJsonWithRecovery;
|
|
19
20
|
chatAndExtractJson(messages: MessageLLM[], model?: string, keyType?: TierType): Promise<ChatJsonResponse>;
|
|
20
21
|
describeImageByUrl(dto: DescribeImageRequestAdapter): Promise<any>;
|
|
@@ -93,8 +93,8 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
93
93
|
this.logger.debug(`Received Gemini response text. ${response?.text.slice(0, 50).replace(/\n/g, '')} ...`);
|
|
94
94
|
const responseText = response?.text ?? '';
|
|
95
95
|
const tokens = {
|
|
96
|
-
input: response?.usageMetadata?.
|
|
97
|
-
output: response?.usageMetadata?.
|
|
96
|
+
input: response?.usageMetadata?.promptTokenCount,
|
|
97
|
+
output: response?.usageMetadata?.candidatesTokenCount,
|
|
98
98
|
total: response?.usageMetadata?.totalTokenCount,
|
|
99
99
|
};
|
|
100
100
|
return {
|
|
@@ -201,6 +201,19 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
201
201
|
return gemini_models_1.GeminiModels.Gemini2_5Pro;
|
|
202
202
|
}
|
|
203
203
|
}
|
|
204
|
+
getBestModel(conversation) {
|
|
205
|
+
if (!conversation.model) {
|
|
206
|
+
return { provider: 'google', modelName: gemini_models_1.GeminiModels.Gemini2_5Lite };
|
|
207
|
+
}
|
|
208
|
+
const { quality, modelName, provider } = conversation.model;
|
|
209
|
+
if (quality) {
|
|
210
|
+
return { provider: 'google', modelName: this.getDefaultQualityModel(quality) };
|
|
211
|
+
}
|
|
212
|
+
if (modelName && provider) {
|
|
213
|
+
return { provider, modelName };
|
|
214
|
+
}
|
|
215
|
+
return { provider: 'google', modelName: gemini_models_1.GeminiModels.Gemini2_5Lite };
|
|
216
|
+
}
|
|
204
217
|
async _extractJsonWithRecovery(responseText, model, keyType) {
|
|
205
218
|
try {
|
|
206
219
|
const json = (0, llm_models_1.extractJsonFromResponse)(responseText);
|
|
@@ -286,36 +299,30 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
286
299
|
}
|
|
287
300
|
async chatWithConversation(conversation) {
|
|
288
301
|
const startTime = Date.now();
|
|
289
|
-
|
|
290
|
-
conversation.model = { provider: 'google', modelName: gemini_models_1.GeminiModels.Gemini2_5Lite, id: 'no-id' };
|
|
291
|
-
}
|
|
302
|
+
const { provider, modelName } = this.getBestModel(conversation);
|
|
292
303
|
const tierType = conversation.tierType || key_balancer_models_1.TierType.TIER_1;
|
|
293
|
-
if (conversation?.model?.quality) {
|
|
294
|
-
conversation.model.provider = 'google';
|
|
295
|
-
conversation.model.modelName = this.getDefaultQualityModel(conversation.model.quality);
|
|
296
|
-
}
|
|
297
304
|
const returnJson = conversation.returnJson;
|
|
298
305
|
if (returnJson) {
|
|
299
|
-
const obj = await this.chatAndExtractJson(conversation.messages,
|
|
306
|
+
const obj = await this.chatAndExtractJson(conversation.messages, modelName, tierType);
|
|
300
307
|
const endTime = Date.now();
|
|
301
308
|
const processTime = (endTime - startTime) / 1000;
|
|
302
309
|
const metadata = {
|
|
303
310
|
type: 'json',
|
|
304
|
-
provider
|
|
305
|
-
model:
|
|
311
|
+
provider,
|
|
312
|
+
model: modelName,
|
|
306
313
|
processTime,
|
|
307
314
|
...obj?.metadata,
|
|
308
315
|
};
|
|
309
316
|
return { content: obj.json, role: adapter_models_1.ChatRole.Assistant, metadata };
|
|
310
317
|
}
|
|
311
318
|
else {
|
|
312
|
-
const response = await this.chat(conversation.messages,
|
|
319
|
+
const response = await this.chat(conversation.messages, modelName, tierType);
|
|
313
320
|
const endTime = Date.now();
|
|
314
321
|
const processTime = (endTime - startTime) / 1000;
|
|
315
322
|
return {
|
|
316
323
|
content: response.content,
|
|
317
324
|
role: response.role,
|
|
318
|
-
metadata: { provider
|
|
325
|
+
metadata: { provider, model: modelName, processTime, tokens: response.metadata.tokens },
|
|
319
326
|
};
|
|
320
327
|
}
|
|
321
328
|
}
|
|
@@ -51,8 +51,10 @@ let LocalSttService = LocalSttService_1 = class LocalSttService {
|
|
|
51
51
|
logger = new common_1.Logger(LocalSttService_1.name);
|
|
52
52
|
openai;
|
|
53
53
|
constructor() {
|
|
54
|
+
const aiServerHost = process.env.AI_LAB_HOST;
|
|
55
|
+
console.log('Connecting to host AI_LAB_HOST', aiServerHost);
|
|
54
56
|
this.openai = new openai_1.default({
|
|
55
|
-
baseURL:
|
|
57
|
+
baseURL: `${aiServerHost}:3171/v1`,
|
|
56
58
|
apiKey: '',
|
|
57
59
|
});
|
|
58
60
|
}
|
|
@@ -103,13 +105,22 @@ let LocalSttService = LocalSttService_1 = class LocalSttService {
|
|
|
103
105
|
this.logger.warn(`Could not determine valid extension for mime type ${mimeType}. Using original filename: ${originalFileName}. Transcription may fail if the filename lacks a supported extension.`);
|
|
104
106
|
}
|
|
105
107
|
try {
|
|
108
|
+
console.log(' -> Request to...', this.openai.baseURL);
|
|
106
109
|
const result = await this.openai.audio.transcriptions.create({
|
|
107
110
|
model: 'rtlingo/mobiuslabsgmbh-faster-whisper-large-v3-turbo',
|
|
108
111
|
file: await (0, openai_1.toFile)(fileBuffer, effectiveFileName, { type: mimeType }),
|
|
109
112
|
response_format: 'verbose_json',
|
|
110
113
|
timestamp_granularities: ['word'],
|
|
111
114
|
});
|
|
112
|
-
|
|
115
|
+
if (result?.segments) {
|
|
116
|
+
delete result.segments;
|
|
117
|
+
}
|
|
118
|
+
if (result?.words?.length) {
|
|
119
|
+
result.words.forEach((word) => {
|
|
120
|
+
delete word?.probability;
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
this.logger.log(`Transcription successful for file: ${result.text}, lang: ${result.language}, duration: ${result.duration}`);
|
|
113
124
|
return result;
|
|
114
125
|
}
|
|
115
126
|
catch (error) {
|