@dataclouder/nest-vertex 0.0.66 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/services/google-genai.service.js +2 -8
- package/services/key-balancer-api.service.d.ts +9 -2
- package/services/key-balancer-api.service.js +5 -5
- package/services/vertex-gemini-chat.service.d.ts +1 -0
- package/services/vertex-gemini-chat.service.js +117 -68
- package/services/whisper/groq.service.js +1 -1
package/package.json
CHANGED
|
@@ -37,20 +37,14 @@ let GoogleGenaiService = GoogleGenaiService_1 = class GoogleGenaiService {
|
|
|
37
37
|
}, null);
|
|
38
38
|
if (balancedKey?.key) {
|
|
39
39
|
this.logger.debug(`⚖️ Using balanced key: ${balancedKey.id}) ${balancedKey.name} - ${balancedKey.key} `);
|
|
40
|
-
return {
|
|
41
|
-
client: new genai_1.GoogleGenAI({ apiKey: balancedKey.key }),
|
|
42
|
-
balancedKey: balancedKey,
|
|
43
|
-
};
|
|
40
|
+
return { client: new genai_1.GoogleGenAI({ apiKey: balancedKey.key }), balancedKey: balancedKey };
|
|
44
41
|
}
|
|
45
42
|
}
|
|
46
43
|
catch (error) {
|
|
47
44
|
this.logger.error(`Failed to get balanced key: ${error.message}`);
|
|
48
45
|
const randomKey = process.env.GEMINI_API_KEY;
|
|
49
46
|
console.log(`⚖️ Using random key ::::::::: ${randomKey} 🚨🚨🚨🚨🚨🚨`);
|
|
50
|
-
return {
|
|
51
|
-
client: new genai_1.GoogleGenAI({ apiKey: randomKey }),
|
|
52
|
-
balancedKey: null,
|
|
53
|
-
};
|
|
47
|
+
return { client: new genai_1.GoogleGenAI({ apiKey: randomKey }), balancedKey: null };
|
|
54
48
|
}
|
|
55
49
|
if (!this.defaultApiKey) {
|
|
56
50
|
throw new nest_core_1.AppException({
|
|
@@ -2,10 +2,17 @@ import { HttpService } from '@nestjs/axios';
|
|
|
2
2
|
import { AvailableKeyResult, ModelType, TierType } from '../models/key-balancer.models';
|
|
3
3
|
export interface PostQueryKeyRequest {
|
|
4
4
|
provider: string;
|
|
5
|
+
service?: string;
|
|
5
6
|
tierType?: TierType;
|
|
6
7
|
aiType?: ModelType;
|
|
7
8
|
keyType?: string;
|
|
8
|
-
|
|
9
|
+
}
|
|
10
|
+
export interface KeyFailureDTO {
|
|
11
|
+
provider: string;
|
|
12
|
+
key: string;
|
|
13
|
+
error: string;
|
|
14
|
+
service: string;
|
|
15
|
+
ttlSeconds: number;
|
|
9
16
|
}
|
|
10
17
|
export declare class KeyBalancerClientService {
|
|
11
18
|
private readonly httpService;
|
|
@@ -15,6 +22,6 @@ export declare class KeyBalancerClientService {
|
|
|
15
22
|
getBestKey(keyRequest: PostQueryKeyRequest, token: any): Promise<AvailableKeyResult>;
|
|
16
23
|
getKeys(): Promise<AvailableKeyResult[]>;
|
|
17
24
|
private getUnavailableServiceResult;
|
|
18
|
-
recordFailedRequest(
|
|
25
|
+
recordFailedRequest(failure: KeyFailureDTO): Promise<void>;
|
|
19
26
|
updateUsage(keyUsageId: string, usageCount: number): Promise<any>;
|
|
20
27
|
}
|
|
@@ -43,7 +43,7 @@ let KeyBalancerClientService = KeyBalancerClientService_1 = class KeyBalancerCli
|
|
|
43
43
|
keyRequest,
|
|
44
44
|
};
|
|
45
45
|
const keyBalancerHost = process.env.KEY_BALANCER_HOST || 'http://localhost:8085';
|
|
46
|
-
const url = `${keyBalancerHost}/api/key-usage/best-key-balanced`;
|
|
46
|
+
const url = `${keyBalancerHost}/api/key-usage/get-best-key-balanced`;
|
|
47
47
|
try {
|
|
48
48
|
const { data } = await (0, rxjs_1.firstValueFrom)(this.httpService.post(url, finalRquest));
|
|
49
49
|
return data;
|
|
@@ -73,11 +73,11 @@ let KeyBalancerClientService = KeyBalancerClientService_1 = class KeyBalancerCli
|
|
|
73
73
|
errorDescription: 'Failed to fetch key from key-balancer API.',
|
|
74
74
|
};
|
|
75
75
|
}
|
|
76
|
-
async recordFailedRequest(
|
|
76
|
+
async recordFailedRequest(failure) {
|
|
77
77
|
const keyBalancerHost = process.env.KEY_BALANCER_HOST;
|
|
78
|
-
const url = `${keyBalancerHost}/api/key-
|
|
79
|
-
const { data } = await (0, rxjs_1.firstValueFrom)(this.httpService.post(url,
|
|
80
|
-
this.logger.warn(`Failed to get LLM key for model ${
|
|
78
|
+
const url = `${keyBalancerHost}/api/key-usage/update-key-failure`;
|
|
79
|
+
const { data } = await (0, rxjs_1.firstValueFrom)(this.httpService.post(url, failure));
|
|
80
|
+
this.logger.warn(`Failed to get LLM key for model ${failure.service} from the external service.`, data);
|
|
81
81
|
return;
|
|
82
82
|
}
|
|
83
83
|
async updateUsage(keyUsageId, usageCount) {
|
|
@@ -11,6 +11,7 @@ export declare class GeminiChatService {
|
|
|
11
11
|
private mapToGeminiRole;
|
|
12
12
|
private formatMessagesToContent;
|
|
13
13
|
chat(messages: MessageLLM[], model?: string, keyType?: TierType): Promise<ChatMessageDict>;
|
|
14
|
+
private _handleChatError;
|
|
14
15
|
chatStream(messages: MessageLLM[], model?: string, keyType?: TierType): Promise<AsyncIterable<ChatMessageDict>>;
|
|
15
16
|
listModels(): Promise<Record<string, string>[]>;
|
|
16
17
|
getDefaultQualityModel(quality: EModelQuality): string;
|
|
@@ -57,81 +57,103 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
57
57
|
if (!messages || messages.length === 0) {
|
|
58
58
|
return { content: '', role: adapter_models_1.ChatRole.Assistant, metadata: { finishReason: 'NO_INPUT' } };
|
|
59
59
|
}
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
keyTierType: keyType || key_balancer_models_1.TierType.FREE_TIER
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
.
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
lastMessage = messages[messages.length - 1];
|
|
73
|
-
}
|
|
74
|
-
const formattedContents = this.formatMessagesToContent(messages);
|
|
75
|
-
this.logger.debug(`Sending request to Gemini model 🦾 ${model} ♊️ with ${formattedContents.length} content parts and system instruction: ${!!systemMessage}`);
|
|
76
|
-
const response = await client.models.generateContent({
|
|
77
|
-
model: model,
|
|
78
|
-
contents: formattedContents,
|
|
79
|
-
config: { systemInstruction: systemMessage },
|
|
80
|
-
});
|
|
81
|
-
if (!response.text) {
|
|
82
|
-
if (response?.promptFeedback?.blockReason) {
|
|
83
|
-
throw new nest_core_1.AppException({
|
|
84
|
-
error_message: 'Gemini Bloqueo la solicitud: ' + response.promptFeedback.blockReason,
|
|
85
|
-
explanation: 'Lo sentimos a gemini no le gusta esta conversación, y simplemente no responde, intenta reiniciar la conversación y ser muy respetuoso.',
|
|
86
|
-
});
|
|
60
|
+
const maxRetries = 2;
|
|
61
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
62
|
+
const { client, balancedKey } = await this.googleGenaiService.getGoogleGenAIClient({ model: model, keyTierType: keyType || key_balancer_models_1.TierType.FREE_TIER });
|
|
63
|
+
try {
|
|
64
|
+
const systemMessage = messages
|
|
65
|
+
.filter(msg => msg.role === 'system')
|
|
66
|
+
.map(msg => msg.content)
|
|
67
|
+
.join('\n');
|
|
68
|
+
let lastMessage = messages[messages.length - 1];
|
|
69
|
+
if (lastMessage.role !== 'user' || typeof lastMessage.content !== 'string') {
|
|
70
|
+
messages.push({ role: adapter_models_1.ChatRole.User, content: 'please say something to start/continue conversation' });
|
|
71
|
+
lastMessage = messages[messages.length - 1];
|
|
87
72
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
73
|
+
const formattedContents = this.formatMessagesToContent(messages);
|
|
74
|
+
this.logger.debug(`Sending request to Gemini model 🦾 ${model} ♊️ (Attempt ${attempt}/${maxRetries})`);
|
|
75
|
+
const response = await client.models.generateContent({
|
|
76
|
+
model: model,
|
|
77
|
+
contents: formattedContents,
|
|
78
|
+
config: { systemInstruction: systemMessage },
|
|
91
79
|
});
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
};
|
|
100
|
-
return {
|
|
101
|
-
content: responseText,
|
|
102
|
-
role: adapter_models_1.ChatRole.Assistant,
|
|
103
|
-
metadata: { tokens },
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
catch (error) {
|
|
107
|
-
this.logger.error(`First Error Gemini Gemini {${balancedKey?.key} chat.sendMessage failed: ${error.message}`, client['apiClient']);
|
|
108
|
-
if (error instanceof nest_core_1.AppException) {
|
|
109
|
-
throw error;
|
|
110
|
-
}
|
|
111
|
-
if (balancedKey?.id) {
|
|
112
|
-
if (error.status === 429) {
|
|
113
|
-
this.logger.error(`Gemini chat.sendMessage failed: Rate limited ${model} - con clave ${balancedKey?.name} ${balancedKey?.key}`);
|
|
114
|
-
await this.keyBalancer.recordFailedRequest(balancedKey.id, error, key_balancer_models_1.ModelType.LLM, model, 15);
|
|
80
|
+
if (!response.text) {
|
|
81
|
+
if (response?.promptFeedback?.blockReason) {
|
|
82
|
+
throw new nest_core_1.AppException({
|
|
83
|
+
error_message: 'Gemini Bloqueo la solicitud: ' + response.promptFeedback.blockReason,
|
|
84
|
+
explanation: 'Lo sentimos a gemini no le gusta esta conversación, y simplemente no responde, intenta reiniciar la conversación y ser muy respetuoso.',
|
|
85
|
+
});
|
|
86
|
+
}
|
|
115
87
|
throw new nest_core_1.AppException({
|
|
116
|
-
error_message:
|
|
117
|
-
explanation:
|
|
88
|
+
error_message: 'Gemini response is null or undefined',
|
|
89
|
+
explanation: 'Probablemente Google bloqueo esta solicitud. intenta reiniciar la conversación y ser muy respetuoso.',
|
|
118
90
|
});
|
|
119
91
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
92
|
+
this.logger.debug(`Received Gemini response text. ${response?.text.slice(0, 50).replace(/\n/g, '')} ...`);
|
|
93
|
+
const responseText = response?.text ?? '';
|
|
94
|
+
const tokens = {
|
|
95
|
+
input: response?.usageMetadata?.promptTokenCount,
|
|
96
|
+
output: response?.usageMetadata?.candidatesTokenCount,
|
|
97
|
+
total: response?.usageMetadata?.totalTokenCount,
|
|
98
|
+
};
|
|
99
|
+
return {
|
|
100
|
+
content: responseText,
|
|
101
|
+
role: adapter_models_1.ChatRole.Assistant,
|
|
102
|
+
metadata: { tokens },
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
this.logger.warn(`Attempt ${attempt} failed for model ${model}. Error: ${error.message}`);
|
|
107
|
+
await this._handleChatError(error, balancedKey, model, attempt < maxRetries);
|
|
108
|
+
if (attempt >= maxRetries) {
|
|
109
|
+
this.logger.error(`All ${maxRetries} attempts failed for model ${model}. Rethrowing final error.`);
|
|
110
|
+
throw error;
|
|
127
111
|
}
|
|
128
|
-
await this.keyBalancer.recordFailedRequest(balancedKey.id, error, key_balancer_models_1.ModelType.LLM, model);
|
|
129
112
|
}
|
|
130
|
-
|
|
113
|
+
}
|
|
114
|
+
throw new Error('Chat failed after all retry attempts.');
|
|
115
|
+
}
|
|
116
|
+
async _handleChatError(error, balancedKey, model, isRetryable) {
|
|
117
|
+
this.logger.error(`Error during Gemini chat for model ${model} with key ${balancedKey?.key}: ${error.message}`, isRetryable ? undefined : error.stack);
|
|
118
|
+
if (error instanceof nest_core_1.AppException) {
|
|
119
|
+
if (balancedKey?.key) {
|
|
120
|
+
await this.keyBalancer.recordFailedRequest({
|
|
121
|
+
key: balancedKey.key,
|
|
122
|
+
provider: 'google',
|
|
123
|
+
service: model,
|
|
124
|
+
error: error.message,
|
|
125
|
+
ttlSeconds: 0,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
if (!isRetryable)
|
|
131
129
|
throw error;
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
let ttlSeconds = 0;
|
|
133
|
+
let errorMessage = `An unexpected error occurred with model ${model}.`;
|
|
134
|
+
let explanation = error.message;
|
|
135
|
+
if (balancedKey?.id) {
|
|
136
|
+
switch (error.status) {
|
|
137
|
+
case 429:
|
|
138
|
+
ttlSeconds = 15;
|
|
139
|
+
errorMessage = `Rate limited for model ${model} with key ${balancedKey?.key}.`;
|
|
140
|
+
this.logger.warn(`Gemini chat failed: Rate limited ${model} - key ${balancedKey?.name}`);
|
|
141
|
+
break;
|
|
142
|
+
case 503:
|
|
143
|
+
ttlSeconds = 40;
|
|
144
|
+
errorMessage = `Service unavailable for model ${model}. Key ${balancedKey?.name} will be invalidated for a few seconds.`;
|
|
145
|
+
this.logger.warn(`Gemini chat failed: Service Unavailable ${model} - key ${balancedKey?.name}`);
|
|
146
|
+
break;
|
|
147
|
+
default:
|
|
148
|
+
break;
|
|
132
149
|
}
|
|
133
|
-
this.
|
|
134
|
-
|
|
150
|
+
await this.keyBalancer.recordFailedRequest({ key: balancedKey.key, provider: 'google', service: model, error: error.message, ttlSeconds });
|
|
151
|
+
}
|
|
152
|
+
if (!isRetryable) {
|
|
153
|
+
throw new nest_core_1.AppException({
|
|
154
|
+
error_message: errorMessage,
|
|
155
|
+
explanation: explanation,
|
|
156
|
+
});
|
|
135
157
|
}
|
|
136
158
|
}
|
|
137
159
|
async chatStream(messages, model = this.defaultModel, keyType) {
|
|
@@ -174,7 +196,13 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
174
196
|
}
|
|
175
197
|
catch (error) {
|
|
176
198
|
if (balancedKey?.id) {
|
|
177
|
-
await this.keyBalancer.recordFailedRequest(
|
|
199
|
+
await this.keyBalancer.recordFailedRequest({
|
|
200
|
+
key: balancedKey.key,
|
|
201
|
+
provider: 'google',
|
|
202
|
+
service: model,
|
|
203
|
+
error: error.message,
|
|
204
|
+
ttlSeconds: 0,
|
|
205
|
+
});
|
|
178
206
|
}
|
|
179
207
|
this.logger.error(`Gemini generateContentStream failed: ${error.message}`, error.stack);
|
|
180
208
|
throw new Error(`Failed to get Gemini chat stream completion: ${error.message}`);
|
|
@@ -227,6 +255,9 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
227
255
|
try {
|
|
228
256
|
const fixPrompt = `I cant extract the json from the following text, something is wrong, please analyze the following text, correct any JSON formatting errors invalid characters or whatever thing is wrong, change or complete the text if needed, and return ONLY the valid JSON object or array. Do not include any explanatory text before or after the JSON itself. JSON to fix: ${responseText}`;
|
|
229
257
|
const fixResponse = await this.chat([{ role: adapter_models_1.ChatRole.User, content: fixPrompt }], model, keyType);
|
|
258
|
+
if (!fixResponse) {
|
|
259
|
+
throw new Error('Recovery chat call returned undefined.');
|
|
260
|
+
}
|
|
230
261
|
this.logger.debug(`Recovery response content: ${fixResponse.content}`);
|
|
231
262
|
const fixedJson = (0, llm_models_1.extractJsonFromResponse)(fixResponse.content);
|
|
232
263
|
if (fixedJson === null) {
|
|
@@ -246,6 +277,12 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
246
277
|
}
|
|
247
278
|
async chatAndExtractJson(messages, model = gemini_models_1.GeminiModels.Gemini2_5Lite, keyType) {
|
|
248
279
|
const response = await this.chat(messages, model, keyType);
|
|
280
|
+
if (!response) {
|
|
281
|
+
throw new nest_core_1.AppException({
|
|
282
|
+
error_message: 'chatAndExtractJson received no response from chat()',
|
|
283
|
+
explanation: 'The chat service did not return a response, so JSON could not be extracted.',
|
|
284
|
+
});
|
|
285
|
+
}
|
|
249
286
|
const json = await this._extractJsonWithRecovery(response.content, model, keyType);
|
|
250
287
|
response.json = json;
|
|
251
288
|
return response;
|
|
@@ -290,7 +327,13 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
290
327
|
}
|
|
291
328
|
catch (error) {
|
|
292
329
|
if (balancedKey?.id) {
|
|
293
|
-
await this.keyBalancer.recordFailedRequest(
|
|
330
|
+
await this.keyBalancer.recordFailedRequest({
|
|
331
|
+
key: balancedKey.key,
|
|
332
|
+
provider: 'google',
|
|
333
|
+
service: visionModelLLM,
|
|
334
|
+
error: error.message,
|
|
335
|
+
ttlSeconds: 0,
|
|
336
|
+
});
|
|
294
337
|
}
|
|
295
338
|
this.logger.error(`Error in describeImageByUrl vision call: ${error.message}`, error.stack);
|
|
296
339
|
throw new Error(`Failed to get Gemini vision completion: ${error.message}`);
|
|
@@ -317,6 +360,12 @@ let GeminiChatService = GeminiChatService_1 = class GeminiChatService {
|
|
|
317
360
|
}
|
|
318
361
|
else {
|
|
319
362
|
const response = await this.chat(conversation.messages, modelName, tierType);
|
|
363
|
+
if (!response) {
|
|
364
|
+
throw new nest_core_1.AppException({
|
|
365
|
+
error_message: 'chatWithConversation received no response from chat()',
|
|
366
|
+
explanation: 'The chat service did not return a response.',
|
|
367
|
+
});
|
|
368
|
+
}
|
|
320
369
|
const endTime = Date.now();
|
|
321
370
|
const processTime = (endTime - startTime) / 1000;
|
|
322
371
|
return {
|
|
@@ -107,11 +107,11 @@ let GroqService = GroqService_1 = class GroqService {
|
|
|
107
107
|
}
|
|
108
108
|
try {
|
|
109
109
|
const file = await (0, groq_sdk_1.toFile)(fileBuffer, effectiveFileName, { type: mimeType });
|
|
110
|
+
this.logger.verbose(`Tryng to get key for model whisper-large-v3-turbo groq....`);
|
|
110
111
|
const keyResult = await this.keyBalancer.getBestKey({
|
|
111
112
|
provider: 'groq',
|
|
112
113
|
service: 'whisper-large-v3-turbo',
|
|
113
114
|
tierType: key_balancer_models_1.TierType.FREE_TIER,
|
|
114
|
-
aiType: key_balancer_models_1.ModelType.AUDIO,
|
|
115
115
|
}, null);
|
|
116
116
|
this.logger.verbose(`Using key: ${keyResult.name} {${keyResult?.service}}`);
|
|
117
117
|
const transcription = await this.getClientWithKey(keyResult.key).audio.transcriptions.create({
|