@beltoinc/slyos-sdk 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -241,12 +241,18 @@ declare class SlyOS {
241
241
  loadModel(modelId: string, options?: {
242
242
  quant?: QuantizationLevel;
243
243
  }): Promise<void>;
244
- generate(modelId: string, prompt: string, options?: GenerateOptions): Promise<string>;
244
+ generate(modelId: string, prompt: string | Array<{
245
+ role: string;
246
+ content: string;
247
+ }>, options?: GenerateOptions): Promise<string>;
245
248
  /**
246
249
  * Stream text generation token-by-token.
247
250
  * Calls onToken callback for each generated token.
248
251
  */
249
- generateStream(modelId: string, prompt: string, options?: GenerateOptions & {
252
+ generateStream(modelId: string, prompt: string | Array<{
253
+ role: string;
254
+ content: string;
255
+ }>, options?: GenerateOptions & {
250
256
  onToken?: (token: string, partial: string) => void;
251
257
  }): Promise<{
252
258
  text: string;
package/dist/index.js CHANGED
@@ -801,6 +801,7 @@ class SlyOS {
801
801
  throw new Error(`Model "${modelId}" is not an LLM. Use transcribe() for STT models.`);
802
802
  }
803
803
  const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
804
+ const isMessages = Array.isArray(prompt);
804
805
  this.emitProgress('generating', 0, `Generating response (max ${maxTokens} tokens)...`);
805
806
  this.emitEvent('inference_start', { modelId, maxTokens });
806
807
  const startTime = Date.now();
@@ -810,13 +811,30 @@ class SlyOS {
810
811
  temperature: options.temperature || 0.7,
811
812
  top_p: options.topP || 0.9,
812
813
  do_sample: true,
814
+ repetition_penalty: 1.1,
813
815
  });
814
- const rawOutput = result[0].generated_text;
815
- // HuggingFace transformers returns the prompt + generated text concatenated.
816
- // Strip the original prompt so we only return the NEW tokens.
817
- const response = rawOutput.startsWith(prompt)
818
- ? rawOutput.slice(prompt.length).trim()
819
- : rawOutput.trim();
816
+ let response;
817
+ if (isMessages) {
818
+ // When using messages format, the pipeline returns the assistant's reply
819
+ // in the last message of the generated conversation
820
+ const generated = result[0].generated_text;
821
+ if (Array.isArray(generated)) {
822
+ // Transformers.js returns messages array — extract assistant reply
823
+ const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
824
+ response = assistantMsg?.content?.trim() || '';
825
+ }
826
+ else {
827
+ response = typeof generated === 'string' ? generated.trim() : '';
828
+ }
829
+ }
830
+ else {
831
+ const rawOutput = result[0].generated_text;
832
+ // HuggingFace transformers returns the prompt + generated text concatenated.
833
+ // Strip the original prompt so we only return the NEW tokens.
834
+ response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
835
+ ? rawOutput.slice(prompt.length).trim()
836
+ : (typeof rawOutput === 'string' ? rawOutput.trim() : '');
837
+ }
820
838
  const latency = Date.now() - startTime;
821
839
  const tokensGenerated = response.split(/\s+/).length;
822
840
  const tokensPerSec = (tokensGenerated / (latency / 1000)).toFixed(1);
@@ -885,9 +903,12 @@ class SlyOS {
885
903
  if (info.category !== 'llm')
886
904
  throw new Error(`Not an LLM`);
887
905
  const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
906
+ const isMessages = Array.isArray(prompt);
888
907
  const startTime = Date.now();
889
908
  let firstTokenTime = 0;
890
909
  let accumulated = '';
910
+ let prevText = '';
911
+ let callbackCount = 0;
891
912
  this.emitProgress('generating', 0, `Streaming (max ${maxTokens} tokens)...`);
892
913
  try {
893
914
  const result = await pipe(prompt, {
@@ -895,28 +916,57 @@ class SlyOS {
895
916
  temperature: options.temperature || 0.7,
896
917
  top_p: options.topP || 0.9,
897
918
  do_sample: true,
898
- // Transformers.js streamer callback
919
+ repetition_penalty: 1.1,
920
+ // Transformers.js v3 streamer callback — receives decoded output tokens
899
921
  callback_function: (output) => {
922
+ callbackCount++;
900
923
  if (!firstTokenTime)
901
924
  firstTokenTime = Date.now() - startTime;
902
- if (output && output.length > 0) {
903
- // output is token IDs, we need to decode
904
- // The callback in transformers.js v3 gives decoded text tokens
905
- const tokenText = typeof output === 'string' ? output : '';
906
- if (tokenText) {
907
- accumulated += tokenText;
908
- options.onToken?.(tokenText, accumulated);
909
- this.emitEvent('token', { token: tokenText, partial: accumulated });
925
+ // Transformers.js v3 callback_function may receive:
926
+ // 1. A string (decoded text so far) in some pipeline configurations
927
+ // 2. Token IDs array/tensor in others
928
+ // We handle both cases
929
+ let tokenText = '';
930
+ if (typeof output === 'string') {
931
+ tokenText = output;
932
+ }
933
+ else if (output && typeof output === 'object') {
934
+ // For newer Transformers.js: try to extract text if available
935
+ if (output.text)
936
+ tokenText = output.text;
937
+ }
938
+ if (tokenText && tokenText !== prevText) {
939
+ const newPart = tokenText.startsWith(prevText) ? tokenText.slice(prevText.length) : tokenText;
940
+ prevText = tokenText;
941
+ if (newPart) {
942
+ accumulated += newPart;
943
+ options.onToken?.(newPart, accumulated);
944
+ this.emitEvent('token', { token: newPart, partial: accumulated });
910
945
  }
911
946
  }
912
947
  }
913
948
  });
914
- const rawOutput = result[0].generated_text;
915
- const response = rawOutput.startsWith(prompt) ? rawOutput.slice(prompt.length).trim() : rawOutput.trim();
949
+ let response;
950
+ if (isMessages) {
951
+ const generated = result[0].generated_text;
952
+ if (Array.isArray(generated)) {
953
+ const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
954
+ response = assistantMsg?.content?.trim() || '';
955
+ }
956
+ else {
957
+ response = typeof generated === 'string' ? generated.trim() : '';
958
+ }
959
+ }
960
+ else {
961
+ const rawOutput = result[0].generated_text;
962
+ response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
963
+ ? rawOutput.slice(prompt.length).trim()
964
+ : (typeof rawOutput === 'string' ? rawOutput.trim() : '');
965
+ }
916
966
  if (!firstTokenTime)
917
967
  firstTokenTime = Date.now() - startTime;
918
968
  const totalMs = Date.now() - startTime;
919
- const tokensGenerated = response.split(/\s+/).length;
969
+ const tokensGenerated = response.split(/\s+/).filter(Boolean).length;
920
970
  this.emitProgress('ready', 100, `Streamed ${tokensGenerated} tokens in ${(totalMs / 1000).toFixed(1)}s`);
921
971
  return { text: response, firstTokenMs: firstTokenTime, totalMs, tokensGenerated };
922
972
  }
@@ -983,21 +1033,13 @@ class SlyOS {
983
1033
  // ── OpenAI Compatibility ────────────────────────────────────────────
984
1034
  async chatCompletion(modelId, request) {
985
1035
  try {
986
- // Convert OpenAI message format to a prompt string
987
- const prompt = request.messages
988
- .map(msg => {
989
- if (msg.role === 'system') {
990
- return `System: ${msg.content}`;
991
- }
992
- else if (msg.role === 'user') {
993
- return `User: ${msg.content}`;
994
- }
995
- else {
996
- return `Assistant: ${msg.content}`;
997
- }
998
- })
999
- .join('\n\n');
1000
- const response = await this.generate(modelId, prompt, {
1036
+ // Pass messages directly to generate() Transformers.js v3 applies the model's
1037
+ // chat template automatically, which produces much better results than raw text
1038
+ const messages = request.messages.map(msg => ({
1039
+ role: msg.role,
1040
+ content: msg.content,
1041
+ }));
1042
+ const response = await this.generate(modelId, messages, {
1001
1043
  temperature: request.temperature,
1002
1044
  maxTokens: request.max_tokens,
1003
1045
  topP: request.top_p,
@@ -1291,23 +1333,26 @@ class SlyOS {
1291
1333
  model_id: options.modelId
1292
1334
  }, { headers: { Authorization: `Bearer ${this.token}` } });
1293
1335
  const retrievalMs = Date.now() - retrievalStart;
1294
- let { retrieved_chunks, prompt_template, context } = searchResponse.data;
1336
+ let { retrieved_chunks, context } = searchResponse.data;
1295
1337
  // Step 2: Build context with dynamic limits
1296
1338
  const contextBuildStart = Date.now();
1297
1339
  if (context && context.length > ragConfig.maxContextChars) {
1298
1340
  context = context.substring(0, ragConfig.maxContextChars);
1299
1341
  }
1300
- // If no prompt_template from server, build minimal one
1301
- if (!prompt_template) {
1302
- prompt_template = `${context}\n\nQ: ${options.query}\nA:`;
1303
- }
1342
+ // Build messages array for proper chat template handling
1343
+ // This uses the model's built-in chat template (e.g. <|im_start|> for SmolLM/Qwen)
1344
+ // which produces dramatically better results than raw text prompts
1345
+ const messages = [
1346
+ { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
1347
+ { role: 'user', content: options.query },
1348
+ ];
1304
1349
  const contextBuildMs = Date.now() - contextBuildStart;
1305
1350
  // Step 3: Generate response — stream if callback provided
1306
1351
  const genStart = Date.now();
1307
1352
  let response;
1308
1353
  let firstTokenMs = 0;
1309
1354
  if (options.onToken) {
1310
- const streamResult = await this.generateStream(options.modelId, prompt_template, {
1355
+ const streamResult = await this.generateStream(options.modelId, messages, {
1311
1356
  temperature: options.temperature,
1312
1357
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1313
1358
  onToken: options.onToken,
@@ -1316,7 +1361,7 @@ class SlyOS {
1316
1361
  firstTokenMs = streamResult.firstTokenMs;
1317
1362
  }
1318
1363
  else {
1319
- response = await this.generate(options.modelId, prompt_template, {
1364
+ response = await this.generate(options.modelId, messages, {
1320
1365
  temperature: options.temperature,
1321
1366
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1322
1367
  });
@@ -1405,14 +1450,17 @@ class SlyOS {
1405
1450
  .trim();
1406
1451
  if (context.length > ragConfig.maxContextChars)
1407
1452
  context = context.substring(0, ragConfig.maxContextChars);
1408
- const prompt = `${context}\n\nQ: ${options.query}\nA:`;
1453
+ const messages = [
1454
+ { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
1455
+ { role: 'user', content: options.query },
1456
+ ];
1409
1457
  const contextBuildMs = Date.now() - contextBuildStart;
1410
1458
  // Step 5: Generate — stream if callback provided
1411
1459
  const genStart = Date.now();
1412
1460
  let response;
1413
1461
  let firstTokenMs = 0;
1414
1462
  if (options.onToken) {
1415
- const streamResult = await this.generateStream(options.modelId, prompt, {
1463
+ const streamResult = await this.generateStream(options.modelId, messages, {
1416
1464
  temperature: options.temperature || 0.6,
1417
1465
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1418
1466
  onToken: options.onToken,
@@ -1421,7 +1469,7 @@ class SlyOS {
1421
1469
  firstTokenMs = streamResult.firstTokenMs;
1422
1470
  }
1423
1471
  else {
1424
- response = await this.generate(options.modelId, prompt, {
1472
+ response = await this.generate(options.modelId, messages, {
1425
1473
  temperature: options.temperature || 0.6,
1426
1474
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1427
1475
  });
@@ -1505,14 +1553,17 @@ class SlyOS {
1505
1553
  .trim();
1506
1554
  if (context.length > ragConfig.maxContextChars)
1507
1555
  context = context.substring(0, ragConfig.maxContextChars);
1508
- const prompt = `${context}\n\nQ: ${options.query}\nA:`;
1556
+ const messages = [
1557
+ { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
1558
+ { role: 'user', content: options.query },
1559
+ ];
1509
1560
  const contextBuildMs = Date.now() - contextBuildStart;
1510
1561
  // Generate
1511
1562
  const genStart = Date.now();
1512
1563
  let response;
1513
1564
  let firstTokenMs = 0;
1514
1565
  if (options.onToken) {
1515
- const streamResult = await this.generateStream(options.modelId, prompt, {
1566
+ const streamResult = await this.generateStream(options.modelId, messages, {
1516
1567
  temperature: options.temperature || 0.6,
1517
1568
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1518
1569
  onToken: options.onToken,
@@ -1521,7 +1572,7 @@ class SlyOS {
1521
1572
  firstTokenMs = streamResult.firstTokenMs;
1522
1573
  }
1523
1574
  else {
1524
- response = await this.generate(options.modelId, prompt, {
1575
+ response = await this.generate(options.modelId, messages, {
1525
1576
  temperature: options.temperature || 0.6,
1526
1577
  maxTokens: options.maxTokens || ragConfig.maxGenTokens,
1527
1578
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@beltoinc/slyos-sdk",
3
- "version": "1.5.2",
3
+ "version": "1.5.3",
4
4
  "description": "SlyOS - On-Device AI SDK for Web and Node.js",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -0,0 +1,4 @@
1
+ SLYOS_API_KEY=sk_live_251271b958eff21be5e093da737a5c44c79ff61dd4f13903
2
+ SLYOS_MODEL=quantum-1.7b
3
+ SLYOS_SERVER=https://api.slyos.world
4
+ SLYOS_KB_ID=02e04c4d-ff4e-4b7a-b95a-7b89933041eb
@@ -0,0 +1,4 @@
1
+ # Slyos SDK Configuration
2
+ SLYOS_API_KEY=your_api_key_here
3
+ SLYOS_MODEL=quantum-1.7b
4
+ SLYOS_SERVER=https://api.slyos.world
@@ -0,0 +1,89 @@
1
+ # Slyos Interactive Chatbot
2
+
3
+ A simple yet powerful interactive chatbot powered by the Slyos SDK.
4
+
5
+ ## Features
6
+
7
+ - Interactive command-line interface with colored output
8
+ - Conversation history management
9
+ - Easy API configuration
10
+ - Cross-platform support (Mac, Windows, Linux)
11
+
12
+ ## Installation
13
+
14
+ 1. Clone or download this project
15
+ 2. Install dependencies: `npm install`
16
+ 3. Configure your API key (see Configuration)
17
+
18
+ ## Configuration
19
+
20
+ ### Environment Variables
21
+
22
+ Set these environment variables before running:
23
+
24
+ ```bash
25
+ export SLYOS_API_KEY=your_api_key_here
26
+ export SLYOS_MODEL=quantum-1.7b
27
+ export SLYOS_SERVER=https://api.slyos.world
28
+ ```
29
+
30
+ Or create a `.env` file based on `.env.example`.
31
+
32
+ ## Running the Chatbot
33
+
34
+ ### Direct Method
35
+ ```bash
36
+ npm start
37
+ ```
38
+
39
+ ### With Environment Variables
40
+ ```bash
41
+ SLYOS_API_KEY=your_key npm start
42
+ ```
43
+
44
+ ### Manual
45
+ ```bash
46
+ node app.mjs
47
+ ```
48
+
49
+ ## Usage
50
+
51
+ Once the chatbot starts:
52
+
53
+ - **Chat**: Type your message and press Enter
54
+ - **Clear History**: Type `clear` to reset conversation
55
+ - **Exit**: Type `exit` or `quit` to end session
56
+ - **Interrupt**: Press Ctrl+C to exit anytime
57
+
58
+ ## API Response Format
59
+
60
+ The chatbot supports multiple response formats from the SDK:
61
+
62
+ - `response.content` - Primary response text
63
+ - `response.text` - Alternative response field
64
+ - Direct string response - Fallback format
65
+
66
+ ## Troubleshooting
67
+
68
+ ### "Error initializing SDK"
69
+ - Check that your API key is valid
70
+ - Verify the Slyos server is accessible
71
+ - Ensure internet connection is active
72
+
73
+ ### "Cannot find module '@beltoinc/slyos-sdk'"
74
+ - Run `npm install` to install dependencies
75
+ - Check npm log: `npm list`
76
+
77
+ ### Placeholder API Key Warning
78
+ - Set the `SLYOS_API_KEY` environment variable with your actual key
79
+ - Or update `config.apiKey` in `app.mjs`
80
+
81
+ ## System Requirements
82
+
83
+ - Node.js 14+ (14.17.0 or higher recommended)
84
+ - npm 6+
85
+ - Internet connection for API access
86
+
87
+ ## License
88
+
89
+ MIT