@beltoinc/slyos-sdk 1.5.1 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +8 -2
- package/dist/index.js +97 -46
- package/package.json +1 -1
- package/slyos-chatbot/.env +4 -0
- package/slyos-chatbot/.env.example +4 -0
- package/slyos-chatbot/README.md +89 -0
- package/slyos-chatbot/app.mjs +370 -0
- package/slyos-chatbot/package-lock.json +1408 -0
- package/slyos-chatbot/package.json +23 -0
- package/create-chatbot.sh +0 -640
- package/src/index.ts +0 -2073
- package/tsconfig.json +0 -15
package/dist/index.d.ts
CHANGED
|
@@ -241,12 +241,18 @@ declare class SlyOS {
|
|
|
241
241
|
loadModel(modelId: string, options?: {
|
|
242
242
|
quant?: QuantizationLevel;
|
|
243
243
|
}): Promise<void>;
|
|
244
|
-
generate(modelId: string, prompt: string
|
|
244
|
+
generate(modelId: string, prompt: string | Array<{
|
|
245
|
+
role: string;
|
|
246
|
+
content: string;
|
|
247
|
+
}>, options?: GenerateOptions): Promise<string>;
|
|
245
248
|
/**
|
|
246
249
|
* Stream text generation token-by-token.
|
|
247
250
|
* Calls onToken callback for each generated token.
|
|
248
251
|
*/
|
|
249
|
-
generateStream(modelId: string, prompt: string
|
|
252
|
+
generateStream(modelId: string, prompt: string | Array<{
|
|
253
|
+
role: string;
|
|
254
|
+
content: string;
|
|
255
|
+
}>, options?: GenerateOptions & {
|
|
250
256
|
onToken?: (token: string, partial: string) => void;
|
|
251
257
|
}): Promise<{
|
|
252
258
|
text: string;
|
package/dist/index.js
CHANGED
|
@@ -801,6 +801,7 @@ class SlyOS {
|
|
|
801
801
|
throw new Error(`Model "${modelId}" is not an LLM. Use transcribe() for STT models.`);
|
|
802
802
|
}
|
|
803
803
|
const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
|
|
804
|
+
const isMessages = Array.isArray(prompt);
|
|
804
805
|
this.emitProgress('generating', 0, `Generating response (max ${maxTokens} tokens)...`);
|
|
805
806
|
this.emitEvent('inference_start', { modelId, maxTokens });
|
|
806
807
|
const startTime = Date.now();
|
|
@@ -810,13 +811,30 @@ class SlyOS {
|
|
|
810
811
|
temperature: options.temperature || 0.7,
|
|
811
812
|
top_p: options.topP || 0.9,
|
|
812
813
|
do_sample: true,
|
|
814
|
+
repetition_penalty: 1.1,
|
|
813
815
|
});
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
816
|
+
let response;
|
|
817
|
+
if (isMessages) {
|
|
818
|
+
// When using messages format, the pipeline returns the assistant's reply
|
|
819
|
+
// in the last message of the generated conversation
|
|
820
|
+
const generated = result[0].generated_text;
|
|
821
|
+
if (Array.isArray(generated)) {
|
|
822
|
+
// Transformers.js returns messages array — extract assistant reply
|
|
823
|
+
const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
|
|
824
|
+
response = assistantMsg?.content?.trim() || '';
|
|
825
|
+
}
|
|
826
|
+
else {
|
|
827
|
+
response = typeof generated === 'string' ? generated.trim() : '';
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
else {
|
|
831
|
+
const rawOutput = result[0].generated_text;
|
|
832
|
+
// HuggingFace transformers returns the prompt + generated text concatenated.
|
|
833
|
+
// Strip the original prompt so we only return the NEW tokens.
|
|
834
|
+
response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
|
|
835
|
+
? rawOutput.slice(prompt.length).trim()
|
|
836
|
+
: (typeof rawOutput === 'string' ? rawOutput.trim() : '');
|
|
837
|
+
}
|
|
820
838
|
const latency = Date.now() - startTime;
|
|
821
839
|
const tokensGenerated = response.split(/\s+/).length;
|
|
822
840
|
const tokensPerSec = (tokensGenerated / (latency / 1000)).toFixed(1);
|
|
@@ -885,9 +903,12 @@ class SlyOS {
|
|
|
885
903
|
if (info.category !== 'llm')
|
|
886
904
|
throw new Error(`Not an LLM`);
|
|
887
905
|
const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
|
|
906
|
+
const isMessages = Array.isArray(prompt);
|
|
888
907
|
const startTime = Date.now();
|
|
889
908
|
let firstTokenTime = 0;
|
|
890
909
|
let accumulated = '';
|
|
910
|
+
let prevText = '';
|
|
911
|
+
let callbackCount = 0;
|
|
891
912
|
this.emitProgress('generating', 0, `Streaming (max ${maxTokens} tokens)...`);
|
|
892
913
|
try {
|
|
893
914
|
const result = await pipe(prompt, {
|
|
@@ -895,28 +916,57 @@ class SlyOS {
|
|
|
895
916
|
temperature: options.temperature || 0.7,
|
|
896
917
|
top_p: options.topP || 0.9,
|
|
897
918
|
do_sample: true,
|
|
898
|
-
|
|
919
|
+
repetition_penalty: 1.1,
|
|
920
|
+
// Transformers.js v3 streamer callback — receives decoded output tokens
|
|
899
921
|
callback_function: (output) => {
|
|
922
|
+
callbackCount++;
|
|
900
923
|
if (!firstTokenTime)
|
|
901
924
|
firstTokenTime = Date.now() - startTime;
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
925
|
+
// Transformers.js v3 callback_function may receive:
|
|
926
|
+
// 1. A string (decoded text so far) in some pipeline configurations
|
|
927
|
+
// 2. Token IDs array/tensor in others
|
|
928
|
+
// We handle both cases
|
|
929
|
+
let tokenText = '';
|
|
930
|
+
if (typeof output === 'string') {
|
|
931
|
+
tokenText = output;
|
|
932
|
+
}
|
|
933
|
+
else if (output && typeof output === 'object') {
|
|
934
|
+
// For newer Transformers.js: try to extract text if available
|
|
935
|
+
if (output.text)
|
|
936
|
+
tokenText = output.text;
|
|
937
|
+
}
|
|
938
|
+
if (tokenText && tokenText !== prevText) {
|
|
939
|
+
const newPart = tokenText.startsWith(prevText) ? tokenText.slice(prevText.length) : tokenText;
|
|
940
|
+
prevText = tokenText;
|
|
941
|
+
if (newPart) {
|
|
942
|
+
accumulated += newPart;
|
|
943
|
+
options.onToken?.(newPart, accumulated);
|
|
944
|
+
this.emitEvent('token', { token: newPart, partial: accumulated });
|
|
910
945
|
}
|
|
911
946
|
}
|
|
912
947
|
}
|
|
913
948
|
});
|
|
914
|
-
|
|
915
|
-
|
|
949
|
+
let response;
|
|
950
|
+
if (isMessages) {
|
|
951
|
+
const generated = result[0].generated_text;
|
|
952
|
+
if (Array.isArray(generated)) {
|
|
953
|
+
const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
|
|
954
|
+
response = assistantMsg?.content?.trim() || '';
|
|
955
|
+
}
|
|
956
|
+
else {
|
|
957
|
+
response = typeof generated === 'string' ? generated.trim() : '';
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
else {
|
|
961
|
+
const rawOutput = result[0].generated_text;
|
|
962
|
+
response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
|
|
963
|
+
? rawOutput.slice(prompt.length).trim()
|
|
964
|
+
: (typeof rawOutput === 'string' ? rawOutput.trim() : '');
|
|
965
|
+
}
|
|
916
966
|
if (!firstTokenTime)
|
|
917
967
|
firstTokenTime = Date.now() - startTime;
|
|
918
968
|
const totalMs = Date.now() - startTime;
|
|
919
|
-
const tokensGenerated = response.split(/\s+/).length;
|
|
969
|
+
const tokensGenerated = response.split(/\s+/).filter(Boolean).length;
|
|
920
970
|
this.emitProgress('ready', 100, `Streamed ${tokensGenerated} tokens in ${(totalMs / 1000).toFixed(1)}s`);
|
|
921
971
|
return { text: response, firstTokenMs: firstTokenTime, totalMs, tokensGenerated };
|
|
922
972
|
}
|
|
@@ -983,21 +1033,13 @@ class SlyOS {
|
|
|
983
1033
|
// ── OpenAI Compatibility ────────────────────────────────────────────
|
|
984
1034
|
async chatCompletion(modelId, request) {
|
|
985
1035
|
try {
|
|
986
|
-
//
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
return `User: ${msg.content}`;
|
|
994
|
-
}
|
|
995
|
-
else {
|
|
996
|
-
return `Assistant: ${msg.content}`;
|
|
997
|
-
}
|
|
998
|
-
})
|
|
999
|
-
.join('\n\n');
|
|
1000
|
-
const response = await this.generate(modelId, prompt, {
|
|
1036
|
+
// Pass messages directly to generate() — Transformers.js v3 applies the model's
|
|
1037
|
+
// chat template automatically, which produces much better results than raw text
|
|
1038
|
+
const messages = request.messages.map(msg => ({
|
|
1039
|
+
role: msg.role,
|
|
1040
|
+
content: msg.content,
|
|
1041
|
+
}));
|
|
1042
|
+
const response = await this.generate(modelId, messages, {
|
|
1001
1043
|
temperature: request.temperature,
|
|
1002
1044
|
maxTokens: request.max_tokens,
|
|
1003
1045
|
topP: request.top_p,
|
|
@@ -1291,23 +1333,26 @@ class SlyOS {
|
|
|
1291
1333
|
model_id: options.modelId
|
|
1292
1334
|
}, { headers: { Authorization: `Bearer ${this.token}` } });
|
|
1293
1335
|
const retrievalMs = Date.now() - retrievalStart;
|
|
1294
|
-
let { retrieved_chunks,
|
|
1336
|
+
let { retrieved_chunks, context } = searchResponse.data;
|
|
1295
1337
|
// Step 2: Build context with dynamic limits
|
|
1296
1338
|
const contextBuildStart = Date.now();
|
|
1297
1339
|
if (context && context.length > ragConfig.maxContextChars) {
|
|
1298
1340
|
context = context.substring(0, ragConfig.maxContextChars);
|
|
1299
1341
|
}
|
|
1300
|
-
//
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1342
|
+
// Build messages array for proper chat template handling
|
|
1343
|
+
// This uses the model's built-in chat template (e.g. <|im_start|> for SmolLM/Qwen)
|
|
1344
|
+
// which produces dramatically better results than raw text prompts
|
|
1345
|
+
const messages = [
|
|
1346
|
+
{ role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
|
|
1347
|
+
{ role: 'user', content: options.query },
|
|
1348
|
+
];
|
|
1304
1349
|
const contextBuildMs = Date.now() - contextBuildStart;
|
|
1305
1350
|
// Step 3: Generate response — stream if callback provided
|
|
1306
1351
|
const genStart = Date.now();
|
|
1307
1352
|
let response;
|
|
1308
1353
|
let firstTokenMs = 0;
|
|
1309
1354
|
if (options.onToken) {
|
|
1310
|
-
const streamResult = await this.generateStream(options.modelId,
|
|
1355
|
+
const streamResult = await this.generateStream(options.modelId, messages, {
|
|
1311
1356
|
temperature: options.temperature,
|
|
1312
1357
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1313
1358
|
onToken: options.onToken,
|
|
@@ -1316,7 +1361,7 @@ class SlyOS {
|
|
|
1316
1361
|
firstTokenMs = streamResult.firstTokenMs;
|
|
1317
1362
|
}
|
|
1318
1363
|
else {
|
|
1319
|
-
response = await this.generate(options.modelId,
|
|
1364
|
+
response = await this.generate(options.modelId, messages, {
|
|
1320
1365
|
temperature: options.temperature,
|
|
1321
1366
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1322
1367
|
});
|
|
@@ -1405,14 +1450,17 @@ class SlyOS {
|
|
|
1405
1450
|
.trim();
|
|
1406
1451
|
if (context.length > ragConfig.maxContextChars)
|
|
1407
1452
|
context = context.substring(0, ragConfig.maxContextChars);
|
|
1408
|
-
const
|
|
1453
|
+
const messages = [
|
|
1454
|
+
{ role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
|
|
1455
|
+
{ role: 'user', content: options.query },
|
|
1456
|
+
];
|
|
1409
1457
|
const contextBuildMs = Date.now() - contextBuildStart;
|
|
1410
1458
|
// Step 5: Generate — stream if callback provided
|
|
1411
1459
|
const genStart = Date.now();
|
|
1412
1460
|
let response;
|
|
1413
1461
|
let firstTokenMs = 0;
|
|
1414
1462
|
if (options.onToken) {
|
|
1415
|
-
const streamResult = await this.generateStream(options.modelId,
|
|
1463
|
+
const streamResult = await this.generateStream(options.modelId, messages, {
|
|
1416
1464
|
temperature: options.temperature || 0.6,
|
|
1417
1465
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1418
1466
|
onToken: options.onToken,
|
|
@@ -1421,7 +1469,7 @@ class SlyOS {
|
|
|
1421
1469
|
firstTokenMs = streamResult.firstTokenMs;
|
|
1422
1470
|
}
|
|
1423
1471
|
else {
|
|
1424
|
-
response = await this.generate(options.modelId,
|
|
1472
|
+
response = await this.generate(options.modelId, messages, {
|
|
1425
1473
|
temperature: options.temperature || 0.6,
|
|
1426
1474
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1427
1475
|
});
|
|
@@ -1505,14 +1553,17 @@ class SlyOS {
|
|
|
1505
1553
|
.trim();
|
|
1506
1554
|
if (context.length > ragConfig.maxContextChars)
|
|
1507
1555
|
context = context.substring(0, ragConfig.maxContextChars);
|
|
1508
|
-
const
|
|
1556
|
+
const messages = [
|
|
1557
|
+
{ role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
|
|
1558
|
+
{ role: 'user', content: options.query },
|
|
1559
|
+
];
|
|
1509
1560
|
const contextBuildMs = Date.now() - contextBuildStart;
|
|
1510
1561
|
// Generate
|
|
1511
1562
|
const genStart = Date.now();
|
|
1512
1563
|
let response;
|
|
1513
1564
|
let firstTokenMs = 0;
|
|
1514
1565
|
if (options.onToken) {
|
|
1515
|
-
const streamResult = await this.generateStream(options.modelId,
|
|
1566
|
+
const streamResult = await this.generateStream(options.modelId, messages, {
|
|
1516
1567
|
temperature: options.temperature || 0.6,
|
|
1517
1568
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1518
1569
|
onToken: options.onToken,
|
|
@@ -1521,7 +1572,7 @@ class SlyOS {
|
|
|
1521
1572
|
firstTokenMs = streamResult.firstTokenMs;
|
|
1522
1573
|
}
|
|
1523
1574
|
else {
|
|
1524
|
-
response = await this.generate(options.modelId,
|
|
1575
|
+
response = await this.generate(options.modelId, messages, {
|
|
1525
1576
|
temperature: options.temperature || 0.6,
|
|
1526
1577
|
maxTokens: options.maxTokens || ragConfig.maxGenTokens,
|
|
1527
1578
|
});
|
package/package.json
CHANGED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Slyos Interactive Chatbot
|
|
2
|
+
|
|
3
|
+
A simple yet powerful interactive chatbot powered by the Slyos SDK.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Interactive command-line interface with colored output
|
|
8
|
+
- Conversation history management
|
|
9
|
+
- Easy API configuration
|
|
10
|
+
- Cross-platform support (Mac, Windows, Linux)
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
1. Clone or download this project
|
|
15
|
+
2. Install dependencies: `npm install`
|
|
16
|
+
3. Configure your API key (see Configuration)
|
|
17
|
+
|
|
18
|
+
## Configuration
|
|
19
|
+
|
|
20
|
+
### Environment Variables
|
|
21
|
+
|
|
22
|
+
Set these environment variables before running:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
export SLYOS_API_KEY=your_api_key_here
|
|
26
|
+
export SLYOS_MODEL=quantum-1.7b
|
|
27
|
+
export SLYOS_SERVER=https://api.slyos.world
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Or create a `.env` file based on `.env.example`.
|
|
31
|
+
|
|
32
|
+
## Running the Chatbot
|
|
33
|
+
|
|
34
|
+
### Direct Method
|
|
35
|
+
```bash
|
|
36
|
+
npm start
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### With Environment Variables
|
|
40
|
+
```bash
|
|
41
|
+
SLYOS_API_KEY=your_key npm start
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Manual
|
|
45
|
+
```bash
|
|
46
|
+
node app.mjs
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
Once the chatbot starts:
|
|
52
|
+
|
|
53
|
+
- **Chat**: Type your message and press Enter
|
|
54
|
+
- **Clear History**: Type `clear` to reset conversation
|
|
55
|
+
- **Exit**: Type `exit` or `quit` to end session
|
|
56
|
+
- **Interrupt**: Press Ctrl+C to exit anytime
|
|
57
|
+
|
|
58
|
+
## API Response Format
|
|
59
|
+
|
|
60
|
+
The chatbot supports multiple response formats from the SDK:
|
|
61
|
+
|
|
62
|
+
- `response.content` - Primary response text
|
|
63
|
+
- `response.text` - Alternative response field
|
|
64
|
+
- Direct string response - Fallback format
|
|
65
|
+
|
|
66
|
+
## Troubleshooting
|
|
67
|
+
|
|
68
|
+
### "Error initializing SDK"
|
|
69
|
+
- Check that your API key is valid
|
|
70
|
+
- Verify the Slyos server is accessible
|
|
71
|
+
- Ensure internet connection is active
|
|
72
|
+
|
|
73
|
+
### "Cannot find module '@beltoinc/slyos-sdk'"
|
|
74
|
+
- Run `npm install` to install dependencies
|
|
75
|
+
- Check npm log: `npm list`
|
|
76
|
+
|
|
77
|
+
### Placeholder API Key Warning
|
|
78
|
+
- Set the `SLYOS_API_KEY` environment variable with your actual key
|
|
79
|
+
- Or update `config.apiKey` in `app.mjs`
|
|
80
|
+
|
|
81
|
+
## System Requirements
|
|
82
|
+
|
|
83
|
+
- Node.js 14+ (14.17.0 or higher recommended)
|
|
84
|
+
- npm 6+
|
|
85
|
+
- Internet connection for API access
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
MIT
|