npm - @beltoinc/slyos-sdk - Versions diffs - 1.5.2 → 1.5.3 - Mend

@beltoinc/slyos-sdk 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/index.d.ts +8 -2
package/dist/index.js +97 -46
package/package.json +1 -1
package/slyos-chatbot/.env +4 -0
package/slyos-chatbot/.env.example +4 -0
package/slyos-chatbot/README.md +89 -0
package/{create-chatbot.sh → slyos-chatbot/app.mjs} +0 -340
package/slyos-chatbot/package-lock.json +1408 -0
package/slyos-chatbot/package.json +23 -0
package/src/index.ts +0 -2073
package/tsconfig.json +0 -15

package/dist/index.d.ts CHANGED Viewed

@@ -241,12 +241,18 @@ declare class SlyOS {
     loadModel(modelId: string, options?: {
         quant?: QuantizationLevel;
     }): Promise<void>;
-    generate(modelId: string, prompt: string, options?: GenerateOptions): Promise<string>;
+    generate(modelId: string, prompt: string | Array<{
+        role: string;
+        content: string;
+    }>, options?: GenerateOptions): Promise<string>;
     /**
      * Stream text generation token-by-token.
      * Calls onToken callback for each generated token.
      */
-    generateStream(modelId: string, prompt: string, options?: GenerateOptions & {
+    generateStream(modelId: string, prompt: string | Array<{
+        role: string;
+        content: string;
+    }>, options?: GenerateOptions & {
         onToken?: (token: string, partial: string) => void;
     }): Promise<{
         text: string;

package/dist/index.js CHANGED Viewed

@@ -801,6 +801,7 @@ class SlyOS {
             throw new Error(`Model "${modelId}" is not an LLM. Use transcribe() for STT models.`);
         }
         const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
+        const isMessages = Array.isArray(prompt);
         this.emitProgress('generating', 0, `Generating response (max ${maxTokens} tokens)...`);
         this.emitEvent('inference_start', { modelId, maxTokens });
         const startTime = Date.now();
@@ -810,13 +811,30 @@ class SlyOS {
                 temperature: options.temperature || 0.7,
                 top_p: options.topP || 0.9,
                 do_sample: true,
+                repetition_penalty: 1.1,
             });
-            const rawOutput = result[0].generated_text;
-            // HuggingFace transformers returns the prompt + generated text concatenated.
-            // Strip the original prompt so we only return the NEW tokens.
-            const response = rawOutput.startsWith(prompt)
-                ? rawOutput.slice(prompt.length).trim()
-                : rawOutput.trim();
+            let response;
+            if (isMessages) {
+                // When using messages format, the pipeline returns the assistant's reply
+                // in the last message of the generated conversation
+                const generated = result[0].generated_text;
+                if (Array.isArray(generated)) {
+                    // Transformers.js returns messages array — extract assistant reply
+                    const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
+                    response = assistantMsg?.content?.trim() || '';
+                }
+                else {
+                    response = typeof generated === 'string' ? generated.trim() : '';
+                }
+            }
+            else {
+                const rawOutput = result[0].generated_text;
+                // HuggingFace transformers returns the prompt + generated text concatenated.
+                // Strip the original prompt so we only return the NEW tokens.
+                response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
+                    ? rawOutput.slice(prompt.length).trim()
+                    : (typeof rawOutput === 'string' ? rawOutput.trim() : '');
+            }
             const latency = Date.now() - startTime;
             const tokensGenerated = response.split(/\s+/).length;
             const tokensPerSec = (tokensGenerated / (latency / 1000)).toFixed(1);
@@ -885,9 +903,12 @@ class SlyOS {
         if (info.category !== 'llm')
             throw new Error(`Not an LLM`);
         const maxTokens = Math.min(options.maxTokens || 100, contextWindow || 2048);
+        const isMessages = Array.isArray(prompt);
         const startTime = Date.now();
         let firstTokenTime = 0;
         let accumulated = '';
+        let prevText = '';
+        let callbackCount = 0;
         this.emitProgress('generating', 0, `Streaming (max ${maxTokens} tokens)...`);
         try {
             const result = await pipe(prompt, {
@@ -895,28 +916,57 @@ class SlyOS {
                 temperature: options.temperature || 0.7,
                 top_p: options.topP || 0.9,
                 do_sample: true,
-                // Transformers.js streamer callback
+                repetition_penalty: 1.1,
+                // Transformers.js v3 streamer callback — receives decoded output tokens
                 callback_function: (output) => {
+                    callbackCount++;
                     if (!firstTokenTime)
                         firstTokenTime = Date.now() - startTime;
-                    if (output && output.length > 0) {
-                        // output is token IDs, we need to decode
-                        // The callback in transformers.js v3 gives decoded text tokens
-                        const tokenText = typeof output === 'string' ? output : '';
-                        if (tokenText) {
-                            accumulated += tokenText;
-                            options.onToken?.(tokenText, accumulated);
-                            this.emitEvent('token', { token: tokenText, partial: accumulated });
+                    // Transformers.js v3 callback_function may receive:
+                    // 1. A string (decoded text so far) in some pipeline configurations
+                    // 2. Token IDs array/tensor in others
+                    // We handle both cases
+                    let tokenText = '';
+                    if (typeof output === 'string') {
+                        tokenText = output;
+                    }
+                    else if (output && typeof output === 'object') {
+                        // For newer Transformers.js: try to extract text if available
+                        if (output.text)
+                            tokenText = output.text;
+                    }
+                    if (tokenText && tokenText !== prevText) {
+                        const newPart = tokenText.startsWith(prevText) ? tokenText.slice(prevText.length) : tokenText;
+                        prevText = tokenText;
+                        if (newPart) {
+                            accumulated += newPart;
+                            options.onToken?.(newPart, accumulated);
+                            this.emitEvent('token', { token: newPart, partial: accumulated });
                         }
                     }
                 }
             });
-            const rawOutput = result[0].generated_text;
-            const response = rawOutput.startsWith(prompt) ? rawOutput.slice(prompt.length).trim() : rawOutput.trim();
+            let response;
+            if (isMessages) {
+                const generated = result[0].generated_text;
+                if (Array.isArray(generated)) {
+                    const assistantMsg = generated.filter((m) => m.role === 'assistant').pop();
+                    response = assistantMsg?.content?.trim() || '';
+                }
+                else {
+                    response = typeof generated === 'string' ? generated.trim() : '';
+                }
+            }
+            else {
+                const rawOutput = result[0].generated_text;
+                response = (typeof rawOutput === 'string' && rawOutput.startsWith(prompt))
+                    ? rawOutput.slice(prompt.length).trim()
+                    : (typeof rawOutput === 'string' ? rawOutput.trim() : '');
+            }
             if (!firstTokenTime)
                 firstTokenTime = Date.now() - startTime;
             const totalMs = Date.now() - startTime;
-            const tokensGenerated = response.split(/\s+/).length;
+            const tokensGenerated = response.split(/\s+/).filter(Boolean).length;
             this.emitProgress('ready', 100, `Streamed ${tokensGenerated} tokens in ${(totalMs / 1000).toFixed(1)}s`);
             return { text: response, firstTokenMs: firstTokenTime, totalMs, tokensGenerated };
         }
@@ -983,21 +1033,13 @@ class SlyOS {
     // ── OpenAI Compatibility ────────────────────────────────────────────
     async chatCompletion(modelId, request) {
         try {
-            // Convert OpenAI message format to a prompt string
-            const prompt = request.messages
-                .map(msg => {
-                if (msg.role === 'system') {
-                    return `System: ${msg.content}`;
-                }
-                else if (msg.role === 'user') {
-                    return `User: ${msg.content}`;
-                }
-                else {
-                    return `Assistant: ${msg.content}`;
-                }
-            })
-                .join('\n\n');
-            const response = await this.generate(modelId, prompt, {
+            // Pass messages directly to generate() — Transformers.js v3 applies the model's
+            // chat template automatically, which produces much better results than raw text
+            const messages = request.messages.map(msg => ({
+                role: msg.role,
+                content: msg.content,
+            }));
+            const response = await this.generate(modelId, messages, {
                 temperature: request.temperature,
                 maxTokens: request.max_tokens,
                 topP: request.top_p,
@@ -1291,23 +1333,26 @@ class SlyOS {
                 model_id: options.modelId
             }, { headers: { Authorization: `Bearer ${this.token}` } });
             const retrievalMs = Date.now() - retrievalStart;
-            let { retrieved_chunks, prompt_template, context } = searchResponse.data;
+            let { retrieved_chunks, context } = searchResponse.data;
             // Step 2: Build context with dynamic limits
             const contextBuildStart = Date.now();
             if (context && context.length > ragConfig.maxContextChars) {
                 context = context.substring(0, ragConfig.maxContextChars);
             }
-            // If no prompt_template from server, build minimal one
-            if (!prompt_template) {
-                prompt_template = `${context}\n\nQ: ${options.query}\nA:`;
-            }
+            // Build messages array for proper chat template handling
+            // This uses the model's built-in chat template (e.g. <|im_start|> for SmolLM/Qwen)
+            // which produces dramatically better results than raw text prompts
+            const messages = [
+                { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
+                { role: 'user', content: options.query },
+            ];
             const contextBuildMs = Date.now() - contextBuildStart;
             // Step 3: Generate response — stream if callback provided
             const genStart = Date.now();
             let response;
             let firstTokenMs = 0;
             if (options.onToken) {
-                const streamResult = await this.generateStream(options.modelId, prompt_template, {
+                const streamResult = await this.generateStream(options.modelId, messages, {
                     temperature: options.temperature,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                     onToken: options.onToken,
@@ -1316,7 +1361,7 @@ class SlyOS {
                 firstTokenMs = streamResult.firstTokenMs;
             }
             else {
-                response = await this.generate(options.modelId, prompt_template, {
+                response = await this.generate(options.modelId, messages, {
                     temperature: options.temperature,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                 });
@@ -1405,14 +1450,17 @@ class SlyOS {
                 .trim();
             if (context.length > ragConfig.maxContextChars)
                 context = context.substring(0, ragConfig.maxContextChars);
-            const prompt = `${context}\n\nQ: ${options.query}\nA:`;
+            const messages = [
+                { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
+                { role: 'user', content: options.query },
+            ];
             const contextBuildMs = Date.now() - contextBuildStart;
             // Step 5: Generate — stream if callback provided
             const genStart = Date.now();
             let response;
             let firstTokenMs = 0;
             if (options.onToken) {
-                const streamResult = await this.generateStream(options.modelId, prompt, {
+                const streamResult = await this.generateStream(options.modelId, messages, {
                     temperature: options.temperature || 0.6,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                     onToken: options.onToken,
@@ -1421,7 +1469,7 @@ class SlyOS {
                 firstTokenMs = streamResult.firstTokenMs;
             }
             else {
-                response = await this.generate(options.modelId, prompt, {
+                response = await this.generate(options.modelId, messages, {
                     temperature: options.temperature || 0.6,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                 });
@@ -1505,14 +1553,17 @@ class SlyOS {
                 .trim();
             if (context.length > ragConfig.maxContextChars)
                 context = context.substring(0, ragConfig.maxContextChars);
-            const prompt = `${context}\n\nQ: ${options.query}\nA:`;
+            const messages = [
+                { role: 'system', content: `Answer questions using only the following context. Be concise.\n\n${context}` },
+                { role: 'user', content: options.query },
+            ];
             const contextBuildMs = Date.now() - contextBuildStart;
             // Generate
             const genStart = Date.now();
             let response;
             let firstTokenMs = 0;
             if (options.onToken) {
-                const streamResult = await this.generateStream(options.modelId, prompt, {
+                const streamResult = await this.generateStream(options.modelId, messages, {
                     temperature: options.temperature || 0.6,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                     onToken: options.onToken,
@@ -1521,7 +1572,7 @@ class SlyOS {
                 firstTokenMs = streamResult.firstTokenMs;
             }
             else {
-                response = await this.generate(options.modelId, prompt, {
+                response = await this.generate(options.modelId, messages, {
                     temperature: options.temperature || 0.6,
                     maxTokens: options.maxTokens || ragConfig.maxGenTokens,
                 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@beltoinc/slyos-sdk",
-  "version": "1.5.2",
+  "version": "1.5.3",
   "description": "SlyOS - On-Device AI SDK for Web and Node.js",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/slyos-chatbot/.env ADDED Viewed

@@ -0,0 +1,4 @@
+SLYOS_API_KEY=sk_live_251271b958eff21be5e093da737a5c44c79ff61dd4f13903
+SLYOS_MODEL=quantum-1.7b
+SLYOS_SERVER=https://api.slyos.world
+SLYOS_KB_ID=02e04c4d-ff4e-4b7a-b95a-7b89933041eb

package/slyos-chatbot/.env.example ADDED Viewed

@@ -0,0 +1,4 @@
+# Slyos SDK Configuration
+SLYOS_API_KEY=your_api_key_here
+SLYOS_MODEL=quantum-1.7b
+SLYOS_SERVER=https://api.slyos.world

package/slyos-chatbot/README.md ADDED Viewed

@@ -0,0 +1,89 @@
+# Slyos Interactive Chatbot
+A simple yet powerful interactive chatbot powered by the Slyos SDK.
+## Features
+- Interactive command-line interface with colored output
+- Conversation history management
+- Easy API configuration
+- Cross-platform support (Mac, Windows, Linux)
+## Installation
+1. Clone or download this project
+2. Install dependencies: `npm install`
+3. Configure your API key (see Configuration)
+## Configuration
+### Environment Variables
+Set these environment variables before running:
+```bash
+export SLYOS_API_KEY=your_api_key_here
+export SLYOS_MODEL=quantum-1.7b
+export SLYOS_SERVER=https://api.slyos.world
+```
+Or create a `.env` file based on `.env.example`.
+## Running the Chatbot
+### Direct Method
+```bash
+npm start
+```
+### With Environment Variables
+```bash
+SLYOS_API_KEY=your_key npm start
+```
+### Manual
+```bash
+node app.mjs
+```
+## Usage
+Once the chatbot starts:
+- **Chat**: Type your message and press Enter
+- **Clear History**: Type `clear` to reset conversation
+- **Exit**: Type `exit` or `quit` to end session
+- **Interrupt**: Press Ctrl+C to exit anytime
+## API Response Format
+The chatbot supports multiple response formats from the SDK:
+- `response.content` - Primary response text
+- `response.text` - Alternative response field
+- Direct string response - Fallback format
+## Troubleshooting
+### "Error initializing SDK"
+- Check that your API key is valid
+- Verify the Slyos server is accessible
+- Ensure internet connection is active
+### "Cannot find module '@beltoinc/slyos-sdk'"
+- Run `npm install` to install dependencies
+- Check npm log: `npm list`
+### Placeholder API Key Warning
+- Set the `SLYOS_API_KEY` environment variable with your actual key
+- Or update `config.apiKey` in `app.mjs`
+## System Requirements
+- Node.js 14+ (14.17.0 or higher recommended)
+- npm 6+
+- Internet connection for API access
+## License
+MIT