@beltoinc/slyos-sdk 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/create-chatbot.sh +160 -90
- package/package.json +1 -1
package/create-chatbot.sh
CHANGED
|
@@ -292,128 +292,198 @@ function printWelcome() {
|
|
|
292
292
|
}
|
|
293
293
|
|
|
294
294
|
/**
|
|
295
|
-
*
|
|
295
|
+
* Clean up model output — stop hallucinated Q&A chains, strip artifacts
|
|
296
|
+
*/
|
|
297
|
+
function cleanResponse(text) {
|
|
298
|
+
return text
|
|
299
|
+
// CRITICAL: Cut at first hallucinated Q&A follow-up
|
|
300
|
+
.split(/\n\s*Q\s*:/)[0]
|
|
301
|
+
// Cut at hallucinated role prefixes
|
|
302
|
+
.split(/\n\s*(User|Human|System|Question|A:|Answer):/i)[0]
|
|
303
|
+
// Strip repeated garbage chars
|
|
304
|
+
.replace(/(.)\1{5,}/g, '')
|
|
305
|
+
// Strip leading role prefixes
|
|
306
|
+
.replace(/^(assistant|system|answer|response|AI)\s*[:]\s*/i, '')
|
|
307
|
+
// Strip any remaining mid-response role prefix
|
|
308
|
+
.replace(/^\s*(assistant|AI)\s*[:]\s*/im, '')
|
|
309
|
+
.trim();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Send message to AI and get response — with timing metrics and streaming
|
|
296
314
|
*/
|
|
297
315
|
async function sendMessage(userMessage) {
|
|
298
316
|
try {
|
|
299
|
-
|
|
300
|
-
|
|
317
|
+
const totalStart = Date.now();
|
|
301
318
|
let assistantMessage = '';
|
|
302
319
|
let sourceInfo = '';
|
|
320
|
+
let retrievalMs = 0;
|
|
321
|
+
let firstTokenMs = 0;
|
|
322
|
+
let generationMs = 0;
|
|
323
|
+
let tokensGenerated = 0;
|
|
303
324
|
|
|
304
325
|
if (config.kbId) {
|
|
305
|
-
// RAG
|
|
326
|
+
// ── RAG MODE ──
|
|
306
327
|
console.log(`${colors.dim}Searching knowledge base...${colors.reset}`);
|
|
328
|
+
const retrievalStart = Date.now();
|
|
329
|
+
|
|
307
330
|
try {
|
|
308
331
|
const token = await getAuthToken();
|
|
309
|
-
if (!token) throw new Error('Could not authenticate
|
|
310
|
-
|
|
332
|
+
if (!token) throw new Error('Could not authenticate');
|
|
333
|
+
|
|
311
334
|
const modelCtx = sdk.getModelContextWindow?.() || 2048;
|
|
312
|
-
const
|
|
335
|
+
const memoryMB = 8192; // default; could read from sdk.getDeviceProfile()
|
|
336
|
+
const cpuCores = 8;
|
|
337
|
+
|
|
338
|
+
// Dynamic config based on device + model
|
|
339
|
+
const deviceTier = (memoryMB >= 8192 && cpuCores >= 8) ? 'high' : (memoryMB >= 4096) ? 'mid' : 'low';
|
|
340
|
+
const topK = deviceTier === 'high' ? 3 : deviceTier === 'mid' ? 2 : 1;
|
|
341
|
+
const maxContextChars = modelCtx <= 2048
|
|
342
|
+
? (deviceTier === 'high' ? 600 : deviceTier === 'mid' ? 400 : 300)
|
|
343
|
+
: modelCtx <= 4096
|
|
344
|
+
? (deviceTier === 'high' ? 1500 : 1000)
|
|
345
|
+
: 2000;
|
|
346
|
+
const maxGenTokens = modelCtx <= 2048
|
|
347
|
+
? (deviceTier === 'high' ? 200 : 150)
|
|
348
|
+
: Math.min(400, Math.floor(modelCtx / 4));
|
|
349
|
+
|
|
313
350
|
const ragRes = await fetch(`${config.server}/api/rag/knowledge-bases/${config.kbId}/query`, {
|
|
314
351
|
method: 'POST',
|
|
315
|
-
headers: {
|
|
316
|
-
'Content-Type': 'application/json',
|
|
317
|
-
'Authorization': `Bearer ${token}`
|
|
318
|
-
},
|
|
352
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${token}` },
|
|
319
353
|
body: JSON.stringify({ query: userMessage, top_k: topK, model_id: config.model })
|
|
320
354
|
});
|
|
321
|
-
if (!ragRes.ok) {
|
|
322
|
-
const errText = await ragRes.text();
|
|
323
|
-
throw new Error(`RAG query failed: ${ragRes.status} - ${errText}`);
|
|
324
|
-
}
|
|
355
|
+
if (!ragRes.ok) throw new Error(`RAG ${ragRes.status}`);
|
|
325
356
|
const ragData = await ragRes.json();
|
|
326
|
-
const chunks = ragData.retrieved_chunks || [];
|
|
327
|
-
|
|
328
|
-
// Check if chunks are relevant enough (similarity > 0.3)
|
|
329
|
-
const goodChunks = chunks.filter(c => (c.similarity_score || 0) > 0.3);
|
|
330
|
-
|
|
331
|
-
if (goodChunks.length > 0) {
|
|
332
|
-
const ctxWindow = sdk.getModelContextWindow?.() || 2048;
|
|
333
|
-
|
|
334
|
-
// AGGRESSIVE context limits — small models choke on long prompts
|
|
335
|
-
// ~4 chars per token on average, reserve 60% of window for generation
|
|
336
|
-
const maxContextTokens = Math.floor(ctxWindow * 0.3);
|
|
337
|
-
const maxContextChars = ctxWindow <= 2048 ? 400 : ctxWindow <= 4096 ? 1000 : 2000;
|
|
338
|
-
const maxGenTokens = ctxWindow <= 2048 ? 150 : Math.min(300, Math.floor(ctxWindow / 4));
|
|
357
|
+
const chunks = (ragData.retrieved_chunks || []).filter(c => (c.similarity_score || 0) > 0.3);
|
|
358
|
+
retrievalMs = Date.now() - retrievalStart;
|
|
339
359
|
|
|
340
|
-
|
|
341
|
-
const bestChunk =
|
|
360
|
+
if (chunks.length > 0) {
|
|
361
|
+
const bestChunk = chunks[0];
|
|
342
362
|
let context = bestChunk.content
|
|
343
|
-
.replace(/[^\x20-\x7E\n]/g, ' ')
|
|
344
|
-
.replace(
|
|
345
|
-
.replace(
|
|
346
|
-
.replace(/https?:\/\/\S+/g, '') // Strip URLs
|
|
347
|
-
.replace(/[{}()\[\]]/g, '') // Strip brackets/braces
|
|
348
|
-
.trim();
|
|
363
|
+
.replace(/[^\x20-\x7E\n]/g, ' ').replace(/\s{2,}/g, ' ')
|
|
364
|
+
.replace(/<[^>]+>/g, ' ').replace(/https?:\/\/\S+/g, '')
|
|
365
|
+
.replace(/[{}()\[\]]/g, '').trim();
|
|
349
366
|
if (context.length > maxContextChars) context = context.substring(0, maxContextChars);
|
|
350
367
|
|
|
351
|
-
console.log(`${colors.dim}Context: ${context.length} chars from "${bestChunk.document_name}"${colors.reset}`);
|
|
352
|
-
|
|
353
|
-
//
|
|
354
|
-
const prompt =
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
368
|
+
console.log(`${colors.dim}Context: ${context.length} chars from "${bestChunk.document_name}" [retrieval: ${retrievalMs}ms, tier: ${deviceTier}]${colors.reset}`);
|
|
369
|
+
|
|
370
|
+
// Instruction prompt — avoids Q&A chain hallucination
|
|
371
|
+
const prompt = `Based on this information:\n${context}\n\nAnswer briefly: ${userMessage}\n\n`;
|
|
372
|
+
|
|
373
|
+
// Stream tokens
|
|
374
|
+
const genStart = Date.now();
|
|
375
|
+
let firstToken = false;
|
|
376
|
+
process.stdout.write(`\n${colors.bright}${colors.magenta}AI:${colors.reset} `);
|
|
377
|
+
|
|
378
|
+
if (sdk.generateStream) {
|
|
379
|
+
const result = await sdk.generateStream(config.model, prompt, {
|
|
380
|
+
temperature: 0.6,
|
|
381
|
+
maxTokens: maxGenTokens,
|
|
382
|
+
onToken: (token, partial) => {
|
|
383
|
+
if (!firstToken) {
|
|
384
|
+
firstTokenMs = Date.now() - genStart;
|
|
385
|
+
firstToken = true;
|
|
386
|
+
}
|
|
387
|
+
process.stdout.write(token);
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
assistantMessage = result.text || '';
|
|
391
|
+
if (!firstToken) firstTokenMs = result.firstTokenMs || 0;
|
|
392
|
+
tokensGenerated = result.tokensGenerated || assistantMessage.split(/\s+/).length;
|
|
393
|
+
} else {
|
|
394
|
+
// Fallback: no streaming
|
|
395
|
+
const response = await sdk.generate(config.model, prompt, {
|
|
396
|
+
temperature: 0.6,
|
|
397
|
+
maxTokens: maxGenTokens
|
|
398
|
+
});
|
|
399
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || '') || '';
|
|
400
|
+
firstTokenMs = Date.now() - genStart;
|
|
401
|
+
tokensGenerated = assistantMessage.split(/\s+/).length;
|
|
402
|
+
process.stdout.write(assistantMessage);
|
|
365
403
|
}
|
|
404
|
+
generationMs = Date.now() - genStart;
|
|
405
|
+
|
|
406
|
+
// Source info
|
|
407
|
+
const sources = [...new Set(chunks.map(c => c.document_name || c.source).filter(Boolean))];
|
|
408
|
+
if (sources.length > 0) sourceInfo = `\n${colors.dim}[Sources: ${sources.join(', ')}]${colors.reset}`;
|
|
366
409
|
} else {
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
410
|
+
console.log(`${colors.dim}No relevant context found [retrieval: ${retrievalMs}ms]${colors.reset}`);
|
|
411
|
+
const genStart = Date.now();
|
|
412
|
+
process.stdout.write(`\n${colors.bright}${colors.magenta}AI:${colors.reset} `);
|
|
413
|
+
|
|
414
|
+
if (sdk.generateStream) {
|
|
415
|
+
const result = await sdk.generateStream(config.model, `Answer briefly: ${userMessage}\n\n`, {
|
|
416
|
+
temperature: 0.7, maxTokens: 100,
|
|
417
|
+
onToken: (token) => {
|
|
418
|
+
if (!firstTokenMs) firstTokenMs = Date.now() - genStart;
|
|
419
|
+
process.stdout.write(token);
|
|
420
|
+
}
|
|
421
|
+
});
|
|
422
|
+
assistantMessage = result.text || '';
|
|
423
|
+
tokensGenerated = result.tokensGenerated || assistantMessage.split(/\s+/).length;
|
|
424
|
+
} else {
|
|
425
|
+
const response = await sdk.generate(config.model, `Answer briefly: ${userMessage}\n\n`, {
|
|
426
|
+
temperature: 0.7, maxTokens: 100
|
|
427
|
+
});
|
|
428
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || '') || '';
|
|
429
|
+
firstTokenMs = Date.now() - genStart;
|
|
430
|
+
tokensGenerated = assistantMessage.split(/\s+/).length;
|
|
431
|
+
process.stdout.write(assistantMessage);
|
|
432
|
+
}
|
|
433
|
+
generationMs = Date.now() - genStart;
|
|
375
434
|
}
|
|
376
435
|
} catch (ragErr) {
|
|
377
|
-
console.log(`${colors.yellow}RAG
|
|
378
|
-
const
|
|
379
|
-
const response = await sdk.generate(config.model,
|
|
380
|
-
temperature: 0.7,
|
|
381
|
-
maxTokens: 100
|
|
436
|
+
console.log(`${colors.yellow}RAG failed: ${ragErr.message}${colors.reset}`);
|
|
437
|
+
const genStart = Date.now();
|
|
438
|
+
const response = await sdk.generate(config.model, `Answer briefly: ${userMessage}\n\n`, {
|
|
439
|
+
temperature: 0.7, maxTokens: 100
|
|
382
440
|
});
|
|
383
|
-
assistantMessage = (typeof response === 'string' ? response : response?.text ||
|
|
441
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || '') || '';
|
|
442
|
+
firstTokenMs = Date.now() - genStart;
|
|
443
|
+
generationMs = firstTokenMs;
|
|
444
|
+
tokensGenerated = assistantMessage.split(/\s+/).length;
|
|
445
|
+
process.stdout.write(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}`);
|
|
384
446
|
}
|
|
385
447
|
} else {
|
|
386
|
-
//
|
|
387
|
-
const
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
448
|
+
// ── PLAIN MODE ──
|
|
449
|
+
const genStart = Date.now();
|
|
450
|
+
process.stdout.write(`\n${colors.bright}${colors.magenta}AI:${colors.reset} `);
|
|
451
|
+
|
|
452
|
+
if (sdk.generateStream) {
|
|
453
|
+
const result = await sdk.generateStream(config.model, `Answer briefly: ${userMessage}\n\n`, {
|
|
454
|
+
temperature: 0.7, maxTokens: 150,
|
|
455
|
+
onToken: (token) => {
|
|
456
|
+
if (!firstTokenMs) firstTokenMs = Date.now() - genStart;
|
|
457
|
+
process.stdout.write(token);
|
|
458
|
+
}
|
|
459
|
+
});
|
|
460
|
+
assistantMessage = result.text || '';
|
|
461
|
+
tokensGenerated = result.tokensGenerated || assistantMessage.split(/\s+/).length;
|
|
462
|
+
} else {
|
|
463
|
+
const response = await sdk.generate(config.model, `Answer briefly: ${userMessage}\n\n`, {
|
|
464
|
+
temperature: 0.7, maxTokens: 150
|
|
465
|
+
});
|
|
466
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || '') || '';
|
|
467
|
+
firstTokenMs = Date.now() - genStart;
|
|
468
|
+
tokensGenerated = assistantMessage.split(/\s+/).length;
|
|
469
|
+
process.stdout.write(assistantMessage);
|
|
470
|
+
}
|
|
471
|
+
generationMs = Date.now() - genStart;
|
|
393
472
|
}
|
|
394
473
|
|
|
395
|
-
// Clean up
|
|
396
|
-
assistantMessage = assistantMessage
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
return match.length < 50 && !match.includes(' is ') ? '' : match;
|
|
405
|
-
})
|
|
406
|
-
// Stop at any hallucinated role prefixes mid-response
|
|
407
|
-
.split(/\n\s*(User|Human|System|Question):/i)[0]
|
|
408
|
-
// Strip any remaining leading role prefix after newline
|
|
409
|
-
.replace(/^\s*(assistant|AI)\s*[:]\s*/im, '')
|
|
410
|
-
.trim();
|
|
474
|
+
// Clean up hallucinated Q&A chains
|
|
475
|
+
assistantMessage = cleanResponse(assistantMessage);
|
|
476
|
+
|
|
477
|
+
const totalMs = Date.now() - totalStart;
|
|
478
|
+
const tokPerSec = generationMs > 0 ? (tokensGenerated / (generationMs / 1000)).toFixed(1) : '0';
|
|
479
|
+
|
|
480
|
+
// Print timing summary
|
|
481
|
+
console.log(sourceInfo);
|
|
482
|
+
console.log(`${colors.dim}⏱ retrieval: ${retrievalMs}ms | first token: ${firstTokenMs}ms | generation: ${generationMs}ms | total: ${totalMs}ms | ${tokensGenerated} tokens @ ${tokPerSec} tok/s${colors.reset}\n`);
|
|
411
483
|
|
|
412
484
|
if (!assistantMessage || assistantMessage.length < 3) {
|
|
413
|
-
|
|
485
|
+
console.log(`${colors.yellow}(No response generated — try rephrasing your question)${colors.reset}\n`);
|
|
414
486
|
}
|
|
415
|
-
|
|
416
|
-
console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}${sourceInfo}\n`);
|
|
417
487
|
} catch (error) {
|
|
418
488
|
console.error(`\n${colors.red}Error:${colors.reset} ${error.message}\n`);
|
|
419
489
|
}
|