garriga 1.0.10 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +8 -37
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -214,51 +214,22 @@ class localLLM {
|
|
|
214
214
|
);
|
|
215
215
|
|
|
216
216
|
if (userToken === "modeloLocalOllama") {
|
|
217
|
-
|
|
218
|
-
// --- CÁLCULO DINÁMICO DE CONTEXTO ---
|
|
219
|
-
// 1. Estimar tokens del prompt:
|
|
220
|
-
// En español y JSON, un token suelen ser aprox 2.7 - 3 caracteres.
|
|
221
|
-
// Dividimos por 2.7 para ser conservadores (calcular por lo alto) y evitar cortes.
|
|
222
|
-
const estimatedPromptTokens = Math.ceil(prompt.length / 2.7);
|
|
223
|
-
|
|
224
|
-
// 2. Tokens reservados para la respuesta (output):
|
|
225
|
-
const numPredict = +(process.env.MAX_TOKENS_FRAUD || 2500);
|
|
226
|
-
|
|
227
|
-
// 3. Buffer de seguridad (por si acaso):
|
|
228
|
-
const buffer = 1000;
|
|
229
|
-
|
|
230
|
-
// 4. Calcular el total necesario:
|
|
231
|
-
// 4. Calcular el total necesario:
|
|
232
|
-
let neededCtx = estimatedPromptTokens + numPredict + buffer;
|
|
233
|
-
|
|
234
|
-
// --- MEJORA: BUCKETING (REDONDEO) ---
|
|
235
|
-
// Redondeamos hacia arriba al siguiente múltiplo de 1024 o 2048.
|
|
236
|
-
// Esto evita recargas si el prompt cambia poco de tamaño.
|
|
237
|
-
const blockSize = 2048;
|
|
238
|
-
neededCtx = Math.ceil(neededCtx / blockSize) * blockSize;
|
|
239
|
-
|
|
240
|
-
// 5. Aplicar límites (Clamping):
|
|
241
|
-
const maxServerLimit = +(process.env.MAX_CONTEXT_FRAUD || 65536);
|
|
242
|
-
const minLimit = 4096;
|
|
243
|
-
|
|
244
|
-
const finalCtx = Math.min(Math.max(neededCtx, minLimit), maxServerLimit);
|
|
245
|
-
|
|
246
|
-
// (Opcional) Log para depurar y ver cuánto está pidiendo realmente
|
|
247
|
-
console.log(`[Ollama] Dynamic Context -> Prompt Chars: ${prompt.length}, Calculated: ${neededCtx}, Used: ${finalCtx}`);
|
|
248
|
-
|
|
217
|
+
try {
|
|
249
218
|
const ollamaPayload = {
|
|
250
219
|
prompt,
|
|
251
220
|
model: source,
|
|
252
221
|
stream: false,
|
|
253
|
-
format: "json",
|
|
222
|
+
//format: "json",
|
|
223
|
+
|
|
254
224
|
keep_alive: -1,
|
|
255
225
|
options: {
|
|
256
|
-
num_ctx:
|
|
257
|
-
temperature: +
|
|
258
|
-
num_predict:
|
|
226
|
+
num_ctx: +process.env.MAX_CONTEXT_FRAUD || 65536,
|
|
227
|
+
temperature: +process.env.TEMPERATURE_FRAUD || 0.1,
|
|
228
|
+
num_predict: +process.env.MAX_TOKENS_FRAUD || 2500,
|
|
229
|
+
top_k: 40,
|
|
230
|
+
top_p: 0.9,
|
|
259
231
|
},
|
|
260
232
|
};
|
|
261
|
-
|
|
262
233
|
const response = await axios.post(this.apiUrlOllama, ollamaPayload, {
|
|
263
234
|
headers: {
|
|
264
235
|
"Content-Type": "application/json",
|