garriga 1.0.9 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +36 -5
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -214,20 +214,51 @@ class localLLM {
|
|
|
214
214
|
);
|
|
215
215
|
|
|
216
216
|
if (userToken === "modeloLocalOllama") {
|
|
217
|
-
|
|
217
|
+
try {
|
|
218
|
+
// --- CÁLCULO DINÁMICO DE CONTEXTO ---
|
|
219
|
+
// 1. Estimar tokens del prompt:
|
|
220
|
+
// En español y JSON, un token suelen ser aprox 2.7 - 3 caracteres.
|
|
221
|
+
// Dividimos por 2.7 para ser conservadores (calcular por lo alto) y evitar cortes.
|
|
222
|
+
const estimatedPromptTokens = Math.ceil(prompt.length / 2.7);
|
|
223
|
+
|
|
224
|
+
// 2. Tokens reservados para la respuesta (output):
|
|
225
|
+
const numPredict = +(process.env.MAX_TOKENS_FRAUD || 2500);
|
|
226
|
+
|
|
227
|
+
// 3. Buffer de seguridad (por si acaso):
|
|
228
|
+
const buffer = 1000;
|
|
229
|
+
|
|
230
|
+
// 4. Calcular el total necesario:
|
|
231
|
+
// 4. Calcular el total necesario:
|
|
232
|
+
let neededCtx = estimatedPromptTokens + numPredict + buffer;
|
|
233
|
+
|
|
234
|
+
// --- MEJORA: BUCKETING (REDONDEO) ---
|
|
235
|
+
// Redondeamos hacia arriba al siguiente múltiplo de 1024 o 2048.
|
|
236
|
+
// Esto evita recargas si el prompt cambia poco de tamaño.
|
|
237
|
+
const blockSize = 2048;
|
|
238
|
+
neededCtx = Math.ceil(neededCtx / blockSize) * blockSize;
|
|
239
|
+
|
|
240
|
+
// 5. Aplicar límites (Clamping):
|
|
241
|
+
const maxServerLimit = +(process.env.MAX_CONTEXT_FRAUD || 65536);
|
|
242
|
+
const minLimit = 4096;
|
|
243
|
+
|
|
244
|
+
const finalCtx = Math.min(Math.max(neededCtx, minLimit), maxServerLimit);
|
|
245
|
+
|
|
246
|
+
// (Opcional) Log para depurar y ver cuánto está pidiendo realmente
|
|
247
|
+
console.log(`[Ollama] Dynamic Context -> Prompt Chars: ${prompt.length}, Calculated: ${neededCtx}, Used: ${finalCtx}`);
|
|
248
|
+
|
|
218
249
|
const ollamaPayload = {
|
|
219
250
|
prompt,
|
|
220
251
|
model: source,
|
|
221
252
|
stream: false,
|
|
222
253
|
format: "json",
|
|
223
|
-
|
|
224
254
|
keep_alive: -1,
|
|
225
255
|
options: {
|
|
226
|
-
num_ctx:
|
|
227
|
-
temperature: process.env.TEMPERATURE_FRAUD || 0.1,
|
|
228
|
-
num_predict:
|
|
256
|
+
num_ctx: finalCtx, // Valor dinámico calculado
|
|
257
|
+
temperature: +(process.env.TEMPERATURE_FRAUD || 0.1),
|
|
258
|
+
num_predict: numPredict,
|
|
229
259
|
},
|
|
230
260
|
};
|
|
261
|
+
|
|
231
262
|
const response = await axios.post(this.apiUrlOllama, ollamaPayload, {
|
|
232
263
|
headers: {
|
|
233
264
|
"Content-Type": "application/json",
|