npm - @emilshirokikh/slyos-sdk - Versions diffs - 1.3.2 → 1.3.3 - Mend

@emilshirokikh/slyos-sdk 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -24,10 +24,10 @@ const sdk = new SlyOS({
 await sdk.initialize();
 // 2. Load model (downloads ~200MB once)
-await sdk.loadModel('quantum-360m');
+await sdk.loadModel('quantum-1.7b');
 // 3. Generate responses
-const response = await sdk.generate('quantum-360m',
+const response = await sdk.generate('quantum-1.7b',
   'What is artificial intelligence?',
   {
     temperature: 0.7,
@@ -74,15 +74,15 @@ await sdk.initialize();
 #### `loadModel(modelId)`
 Downloads and caches AI model locally.
 ```javascript
-await sdk.loadModel('quantum-360m');
+await sdk.loadModel('quantum-1.7b');
 ```
 **Parameters:**
 - `modelId` (string): Model identifier
-  - `quantum-135m` - 80MB, fastest
-  - `quantum-360m` - 200MB, recommended
-  - `quantum-1.7b` - 1GB, high quality
-  - `quantum-3b` - 1.7GB, best quality
+  - `quantum-1.7b` - 900MB, recommended
+  - `quantum-3b` - 1.6GB, high quality
+  - `quantum-code-3b` - 1.6GB, code-optimized
+  - `quantum-8b` - 4.2GB, best quality
 **Returns:** `Promise<void>`
@@ -94,7 +94,7 @@ await sdk.loadModel('quantum-360m');
 #### `generate(modelId, prompt, options?)`
 Generates AI response locally.
 ```javascript
-const response = await sdk.generate('quantum-360m',
+const response = await sdk.generate('quantum-1.7b',
   'Tell me about your menu',
   {
     temperature: 0.7,
@@ -137,10 +137,10 @@ import SlyOS from '@emilshirokikh/slyos-sdk';
 const sdk = new SlyOS({ apiKey: 'sk_live_...' });
 await sdk.initialize();
-await sdk.loadModel('quantum-360m');
+await sdk.loadModel('quantum-1.7b');
 async function chat(userMessage) {
-  return await sdk.generate('quantum-360m', userMessage);
+  return await sdk.generate('quantum-1.7b', userMessage);
 }
 const response = await chat('What are your hours?');
@@ -157,7 +157,7 @@ Help with menu, hours, and nutrition. Be friendly and concise.`;
 const userMessage = 'What breakfast items do you have?';
 const fullPrompt = `${systemPrompt}\n\nCustomer: ${userMessage}\nAssistant:`;
-const response = await sdk.generate('quantum-360m', fullPrompt, {
+const response = await sdk.generate('quantum-1.7b', fullPrompt, {
   temperature: 0.7,
   maxTokens: 150
 });
@@ -179,7 +179,7 @@ function Chatbot() {
     async function init() {
       const client = new SlyOS({ apiKey: 'sk_live_...' });
       await client.initialize();
-      await client.loadModel('quantum-360m');
+      await client.loadModel('quantum-1.7b');
       setSdk(client);
       setLoading(false);
     }
@@ -187,7 +187,7 @@ function Chatbot() {
   }, []);
   async function handleChat(message) {
-    const reply = await sdk.generate('quantum-360m', message);
+    const reply = await sdk.generate('quantum-1.7b', message);
     setResponse(reply);
   }
@@ -220,11 +220,11 @@ const sdk = new SlyOS({
 ### Multiple Models
 ```javascript
-await sdk.loadModel('quantum-360m');
+await sdk.loadModel('quantum-1.7b');
 await sdk.loadModel('quantum-1.7b');
 // Use different models
-const fast = await sdk.generate('quantum-360m', 'Quick question?');
+const fast = await sdk.generate('quantum-1.7b', 'Quick question?');
 const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
 ```
@@ -249,7 +249,7 @@ const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
 ```javascript
 // Check browser console for errors
 // Ensure 2GB+ RAM available
-// Try smaller model (quantum-135m)
+// Try smaller model (quantum-1.7b)
 ```
 ### CORS errors

package/create-chatbot.sh CHANGED Viewed

@@ -27,7 +27,8 @@ NC='\033[0m' # No Color
 # Default values
 API_KEY=""
 MODEL="quantum-1.7b"
-SLYOS_SERVER="https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com"
+KB_ID=""
+SLYOS_SERVER="https://api.slyos.world"
 PROJECT_NAME="slyos-chatbot"
 #################################################################################
@@ -70,12 +71,17 @@ while [[ $# -gt 0 ]]; do
       MODEL="$2"
       shift 2
       ;;
+    --kb-id)
+      KB_ID="$2"
+      shift 2
+      ;;
     -h|--help)
       echo "Usage: $0 [OPTIONS]"
       echo ""
       echo "Options:"
       echo "  --api-key KEY     Slyos API key (prompted if not provided)"
       echo "  --model MODEL     AI model to use (default: quantum-1.7b)"
+      echo "  --kb-id ID        Knowledge base ID for RAG (optional)"
       echo "  -h, --help        Show this help message"
       exit 0
       ;;
@@ -177,7 +183,7 @@ print_success "Package configuration updated"
 # Install Slyos SDK + dotenv
 print_step "Installing dependencies"
 print_info "This may take a moment..."
-npm install @emilshirokikh/slyos-sdk dotenv > /dev/null 2>&1
+npm install @emilshirokikh/slyos-sdk dotenv node-fetch > /dev/null 2>&1
 print_success "Dependencies installed"
 # Create the chatbot application
@@ -188,6 +194,7 @@ cat > app.mjs << 'CHATBOT_EOF'
 import 'dotenv/config';
 import readline from 'readline';
+import fetch from 'node-fetch';
 import SlyOS from '@emilshirokikh/slyos-sdk';
 // Color codes for terminal output
@@ -207,11 +214,13 @@ const colors = {
 const config = {
   apiKey: process.env.SLYOS_API_KEY || 'YOUR_API_KEY',
   model: process.env.SLYOS_MODEL || 'quantum-1.7b',
-  server: process.env.SLYOS_SERVER || 'https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com'
+  server: process.env.SLYOS_SERVER || 'https://api.slyos.world',
+  kbId: process.env.SLYOS_KB_ID || ''
 };
 // Initialize SlyOS SDK
 let sdk;
+let authToken = null; // Store auth token for direct RAG API calls
 try {
   sdk = new SlyOS({
     apiKey: config.apiKey,
@@ -222,6 +231,28 @@ try {
   process.exit(1);
 }
+// Get auth token directly for RAG API calls
+async function getAuthToken() {
+  if (authToken) return authToken;
+  try {
+    const res = await fetch(`${config.server}/api/auth/sdk`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ apiKey: config.apiKey })
+    });
+    if (!res.ok) {
+      console.log(`${colors.yellow}Auth failed: ${res.status} ${res.statusText}${colors.reset}`);
+      return null;
+    }
+    const data = await res.json();
+    authToken = data.token;
+    return authToken;
+  } catch (e) {
+    console.log(`${colors.yellow}Auth error: ${e.message}${colors.reset}`);
+    return null;
+  }
+}
 // Create readline interface
 const rl = readline.createInterface({
   input: process.stdin,
@@ -245,6 +276,11 @@ function printWelcome() {
   console.log(`${colors.blue}Model:${colors.reset} ${colors.yellow}${config.model}${colors.reset}`);
   console.log(`${colors.blue}Server:${colors.reset} ${colors.yellow}${config.server}${colors.reset}`);
+  if (config.kbId) {
+    console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.green}${config.kbId}${colors.reset} ${colors.green}(RAG enabled)${colors.reset}`);
+  } else {
+    console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.dim}None (plain generation)${colors.reset}`);
+  }
   if (config.apiKey === 'YOUR_API_KEY') {
     console.log(`${colors.red}⚠ Using placeholder API key - set SLYOS_API_KEY environment variable${colors.reset}`);
   }
@@ -262,28 +298,113 @@ async function sendMessage(userMessage) {
   try {
     console.log(`${colors.dim}Thinking...${colors.reset}`);
-    // Use chatCompletion (OpenAI-compatible) — handles prompt formatting for any model
-    const response = await sdk.chatCompletion(config.model, {
-      messages: [
-        { role: 'system', content: 'You are a helpful AI assistant. Give short, direct answers.' },
-        { role: 'user', content: userMessage }
-      ],
-      max_tokens: 200,
-      temperature: 0.7
-    });
-    let assistantMessage = response?.choices?.[0]?.message?.content || '';
+    let assistantMessage = '';
+    let sourceInfo = '';
+    if (config.kbId) {
+      // RAG mode: call API directly to get relevant chunks, then generate locally with context
+      console.log(`${colors.dim}Searching knowledge base...${colors.reset}`);
+      try {
+        const token = await getAuthToken();
+        if (!token) throw new Error('Could not authenticate — check your API key');
+        // Adapt chunk count to model's context window
+        const modelCtx = sdk.getModelContextWindow?.() || 2048;
+        const topK = modelCtx <= 2048 ? 2 : modelCtx <= 4096 ? 3 : 5;
+        const ragRes = await fetch(`${config.server}/api/rag/knowledge-bases/${config.kbId}/query`, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${token}`
+          },
+          body: JSON.stringify({ query: userMessage, top_k: topK, model_id: config.model })
+        });
+        if (!ragRes.ok) {
+          const errText = await ragRes.text();
+          throw new Error(`RAG query failed: ${ragRes.status} - ${errText}`);
+        }
+        const ragData = await ragRes.json();
+        const chunks = ragData.retrieved_chunks || [];
+        // Check if chunks are relevant enough (similarity > 0.3)
+        const goodChunks = chunks.filter(c => (c.similarity_score || 0) > 0.3);
+        if (goodChunks.length > 0) {
+          // Adapt context size to model's context window
+          const ctxWindow = sdk.getModelContextWindow?.() || 2048;
+          const maxContextChars = Math.max(500, (ctxWindow - 200) * 3);
+          const maxGenTokens = Math.min(200, Math.floor(ctxWindow / 4));
+          // Clean and truncate context — strip weird chars, fit model window
+          let context = goodChunks.map(c => c.content).join('\n')
+            .replace(/[^\x20-\x7E\n]/g, ' ')  // Strip non-ASCII/control chars
+            .replace(/\s{3,}/g, ' ')            // Collapse excessive whitespace
+            .trim();
+          if (context.length > maxContextChars) context = context.substring(0, maxContextChars);
+          // Simple context-then-QA format — this works best with small models
+          const prompt = `${context}\n\nQuestion: ${userMessage}\nAnswer:`;
+          const response = await sdk.generate(config.model, prompt, {
+            temperature: 0.5,
+            maxTokens: maxGenTokens
+          });
+          assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
+          // Collect source names
+          const sources = [...new Set(goodChunks.map(c => c.document_name || c.source).filter(Boolean))];
+          if (sources.length > 0) {
+            sourceInfo = `\n${colors.dim}[Sources: ${sources.join(', ')}]${colors.reset}`;
+          }
+        } else {
+          // No relevant chunks — answer conversationally
+          console.log(`${colors.dim}No RAG context found, using plain generation...${colors.reset}`);
+          const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
+          const response = await sdk.generate(config.model, prompt, {
+            temperature: 0.7,
+            maxTokens: 100
+          });
+          assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
+        }
+      } catch (ragErr) {
+        console.log(`${colors.yellow}RAG lookup failed: ${ragErr.message}${colors.reset}`);
+        const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
+        const response = await sdk.generate(config.model, prompt, {
+          temperature: 0.7,
+          maxTokens: 100
+        });
+        assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
+      }
+    } else {
+      // Plain mode: direct generation (no RAG)
+      const prompt = `The user said: "${userMessage}"\nGive a brief, helpful response:\n`;
+      const response = await sdk.generate(config.model, prompt, {
+        temperature: 0.7,
+        maxTokens: 150
+      });
+      assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
+    }
-    // Light cleanup — stop at any hallucinated role prefixes
+    // Clean up model output artifacts
     assistantMessage = assistantMessage
-      .split(/\n\s*(User|Human|System):/i)[0]
+      // Strip repeated garbage chars (!!!, ???, etc)
+      .replace(/(.)\1{5,}/g, '')
+      // Strip leading role prefixes the model loves to emit
+      .replace(/^(assistant|system|answer|response|AI)\s*[:]\s*/i, '')
+      // Remove leading partial sentences (fragments before the real answer)
+      .replace(/^[a-z][^.!?]{0,40}\.\s*/i, function(match) {
+        // Only strip if it looks like a fragment (< 50 chars ending in period)
+        return match.length < 50 && !match.includes(' is ') ? '' : match;
+      })
+      // Stop at any hallucinated role prefixes mid-response
+      .split(/\n\s*(User|Human|System|Question):/i)[0]
+      // Strip any remaining leading role prefix after newline
+      .replace(/^\s*(assistant|AI)\s*[:]\s*/im, '')
       .trim();
-    if (!assistantMessage) {
+    if (!assistantMessage || assistantMessage.length < 3) {
       assistantMessage = '(No response generated — try rephrasing your question)';
     }
-    console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}\n`);
+    console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}${sourceInfo}\n`);
   } catch (error) {
     console.error(`\n${colors.red}Error:${colors.reset} ${error.message}\n`);
   }
@@ -371,7 +492,7 @@ cat > .env.example << 'ENV_EOF'
 # Slyos SDK Configuration
 SLYOS_API_KEY=your_api_key_here
 SLYOS_MODEL=quantum-1.7b
-SLYOS_SERVER=https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com
+SLYOS_SERVER=https://api.slyos.world
 ENV_EOF
 print_success "Environment configuration template created"
@@ -404,7 +525,7 @@ Set these environment variables before running:
 ```bash
 export SLYOS_API_KEY=your_api_key_here
 export SLYOS_MODEL=quantum-1.7b
-export SLYOS_SERVER=https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com
+export SLYOS_SERVER=https://api.slyos.world
 ```
 Or create a `.env` file based on `.env.example`.
@@ -476,6 +597,7 @@ cat > .env << ENV_SETUP_EOF
 SLYOS_API_KEY=${API_KEY}
 SLYOS_MODEL=${MODEL}
 SLYOS_SERVER=${SLYOS_SERVER}
+SLYOS_KB_ID=${KB_ID}
 ENV_SETUP_EOF
 print_success "Environment configured"
@@ -489,6 +611,9 @@ echo -e "${CYAN}Project Details:${NC}"
 echo "  Location: ${YELLOW}$(pwd)${NC}"
 echo "  API Key: ${YELLOW}${API_KEY}${NC}"
 echo "  Model: ${YELLOW}${MODEL}${NC}"
+if [ -n "$KB_ID" ]; then
+  echo "  Knowledge Base: ${GREEN}${KB_ID} (RAG enabled)${NC}"
+fi
 echo ""
 echo -e "${CYAN}Next Steps:${NC}"
 echo "  1. Review the .env file and update your API key if needed"

package/dist/index.d.ts CHANGED Viewed

@@ -106,6 +106,51 @@ interface OpenAICompatibleClient {
         };
     };
 }
+interface RAGOptions {
+    knowledgeBaseId: string;
+    query: string;
+    topK?: number;
+    modelId: string;
+    temperature?: number;
+    maxTokens?: number;
+}
+interface RAGChunk {
+    id: string;
+    documentId: string;
+    documentName: string;
+    content: string;
+    similarityScore: number;
+    metadata?: Record<string, any>;
+}
+interface RAGResponse {
+    query: string;
+    retrievedChunks: RAGChunk[];
+    generatedResponse: string;
+    context: string;
+    latencyMs: number;
+    tierUsed: 1 | 2 | 3;
+}
+interface OfflineIndex {
+    metadata: {
+        kb_id: string;
+        kb_name: string;
+        chunk_size: number;
+        embedding_dim: number;
+        total_chunks: number;
+        synced_at: string;
+        expires_at: string;
+        sync_token: string;
+    };
+    chunks: Array<{
+        id: string;
+        document_id: string;
+        document_name: string;
+        content: string;
+        chunk_index: number;
+        embedding: number[] | null;
+        metadata: Record<string, any>;
+    }>;
+}
 declare class SlyOS {
     private apiKey;
     private apiUrl;
@@ -116,11 +161,13 @@ declare class SlyOS {
     private onProgress;
     private onEvent;
     private fallbackConfig;
+    private modelContextWindow;
     constructor(config: SlyOSConfigWithFallback);
     private emitProgress;
     private emitEvent;
     analyzeDevice(): Promise<DeviceProfile>;
     getDeviceProfile(): DeviceProfile | null;
+    getModelContextWindow(): number;
     recommendModel(category?: ModelCategory): {
         modelId: string;
         quant: QuantizationLevel;
@@ -135,6 +182,17 @@ declare class SlyOS {
             minRAM_MB: Record<string, number>;
         }[];
     }>;
+    searchModels(query: string, options?: {
+        limit?: number;
+        task?: string;
+    }): Promise<Array<{
+        id: string;
+        name: string;
+        downloads: number;
+        likes: number;
+        task: string;
+        size_category: string;
+    }>>;
     canRunModel(modelId: string, quant?: QuantizationLevel): {
         canRun: boolean;
         reason: string;
@@ -153,6 +211,41 @@ declare class SlyOS {
     private fallbackToBedrockCloud;
     private invokeBedrockCloud;
     private mapModelToOpenAI;
+    private localEmbeddingModel;
+    private offlineIndexes;
+    /**
+     * Tier 2: Cloud-indexed RAG with local inference.
+     * Retrieves relevant chunks from server, generates response locally.
+     */
+    ragQuery(options: RAGOptions): Promise<RAGResponse>;
+    /**
+     * Tier 1: Fully local RAG. Zero network calls.
+     * Documents are chunked/embedded on-device, retrieval and generation all local.
+     */
+    ragQueryLocal(options: RAGOptions & {
+        documents: Array<{
+            content: string;
+            name?: string;
+        }>;
+    }): Promise<RAGResponse>;
+    /**
+     * Tier 3: Offline RAG using a synced knowledge base.
+     * First call syncKnowledgeBase(), then use this for offline queries.
+     */
+    ragQueryOffline(options: RAGOptions): Promise<RAGResponse>;
+    /**
+     * Sync a knowledge base for offline use (Tier 3).
+     * Downloads chunks + embeddings from server, stores locally.
+     */
+    syncKnowledgeBase(knowledgeBaseId: string, deviceId?: string): Promise<{
+        chunkCount: number;
+        sizeMb: number;
+        expiresAt: string;
+    }>;
+    private loadEmbeddingModel;
+    private embedTextLocal;
+    private cosineSimilarity;
+    private chunkTextLocal;
     static openaiCompatible(config: {
         apiKey: string;
         apiUrl?: string;
@@ -160,4 +253,4 @@ declare class SlyOS {
     }): OpenAICompatibleClient;
 }
 export default SlyOS;
-export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, };
+export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, RAGOptions, RAGChunk, RAGResponse, OfflineIndex, };