@emilshirokikh/slyos-sdk 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,10 +24,10 @@ const sdk = new SlyOS({
24
24
  await sdk.initialize();
25
25
 
26
26
  // 2. Load model (downloads ~200MB once)
27
- await sdk.loadModel('quantum-360m');
27
+ await sdk.loadModel('quantum-1.7b');
28
28
 
29
29
  // 3. Generate responses
30
- const response = await sdk.generate('quantum-360m',
30
+ const response = await sdk.generate('quantum-1.7b',
31
31
  'What is artificial intelligence?',
32
32
  {
33
33
  temperature: 0.7,
@@ -74,15 +74,15 @@ await sdk.initialize();
74
74
  #### `loadModel(modelId)`
75
75
  Downloads and caches AI model locally.
76
76
  ```javascript
77
- await sdk.loadModel('quantum-360m');
77
+ await sdk.loadModel('quantum-1.7b');
78
78
  ```
79
79
 
80
80
  **Parameters:**
81
81
  - `modelId` (string): Model identifier
82
- - `quantum-135m` - 80MB, fastest
83
- - `quantum-360m` - 200MB, recommended
84
- - `quantum-1.7b` - 1GB, high quality
85
- - `quantum-3b` - 1.7GB, best quality
82
+ - `quantum-1.7b` - 900MB, recommended
83
+ - `quantum-3b` - 1.6GB, high quality
84
+ - `quantum-code-3b` - 1.6GB, code-optimized
85
+ - `quantum-8b` - 4.2GB, best quality
86
86
 
87
87
  **Returns:** `Promise<void>`
88
88
 
@@ -94,7 +94,7 @@ await sdk.loadModel('quantum-360m');
94
94
  #### `generate(modelId, prompt, options?)`
95
95
  Generates AI response locally.
96
96
  ```javascript
97
- const response = await sdk.generate('quantum-360m',
97
+ const response = await sdk.generate('quantum-1.7b',
98
98
  'Tell me about your menu',
99
99
  {
100
100
  temperature: 0.7,
@@ -137,10 +137,10 @@ import SlyOS from '@emilshirokikh/slyos-sdk';
137
137
 
138
138
  const sdk = new SlyOS({ apiKey: 'sk_live_...' });
139
139
  await sdk.initialize();
140
- await sdk.loadModel('quantum-360m');
140
+ await sdk.loadModel('quantum-1.7b');
141
141
 
142
142
  async function chat(userMessage) {
143
- return await sdk.generate('quantum-360m', userMessage);
143
+ return await sdk.generate('quantum-1.7b', userMessage);
144
144
  }
145
145
 
146
146
  const response = await chat('What are your hours?');
@@ -157,7 +157,7 @@ Help with menu, hours, and nutrition. Be friendly and concise.`;
157
157
  const userMessage = 'What breakfast items do you have?';
158
158
  const fullPrompt = `${systemPrompt}\n\nCustomer: ${userMessage}\nAssistant:`;
159
159
 
160
- const response = await sdk.generate('quantum-360m', fullPrompt, {
160
+ const response = await sdk.generate('quantum-1.7b', fullPrompt, {
161
161
  temperature: 0.7,
162
162
  maxTokens: 150
163
163
  });
@@ -179,7 +179,7 @@ function Chatbot() {
179
179
  async function init() {
180
180
  const client = new SlyOS({ apiKey: 'sk_live_...' });
181
181
  await client.initialize();
182
- await client.loadModel('quantum-360m');
182
+ await client.loadModel('quantum-1.7b');
183
183
  setSdk(client);
184
184
  setLoading(false);
185
185
  }
@@ -187,7 +187,7 @@ function Chatbot() {
187
187
  }, []);
188
188
 
189
189
  async function handleChat(message) {
190
- const reply = await sdk.generate('quantum-360m', message);
190
+ const reply = await sdk.generate('quantum-1.7b', message);
191
191
  setResponse(reply);
192
192
  }
193
193
 
@@ -220,11 +220,11 @@ const sdk = new SlyOS({
220
220
 
221
221
  ### Multiple Models
222
222
  ```javascript
223
- await sdk.loadModel('quantum-360m');
223
+ await sdk.loadModel('quantum-1.7b');
224
224
  await sdk.loadModel('quantum-1.7b');
225
225
 
226
226
  // Use different models
227
- const fast = await sdk.generate('quantum-360m', 'Quick question?');
227
+ const fast = await sdk.generate('quantum-1.7b', 'Quick question?');
228
228
  const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
229
229
  ```
230
230
 
@@ -249,7 +249,7 @@ const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
249
249
  ```javascript
250
250
  // Check browser console for errors
251
251
  // Ensure 2GB+ RAM available
252
- // Try smaller model (quantum-135m)
252
+ // Try smaller model (quantum-1.7b)
253
253
  ```
254
254
 
255
255
  ### CORS errors
package/create-chatbot.sh CHANGED
@@ -27,7 +27,8 @@ NC='\033[0m' # No Color
27
27
  # Default values
28
28
  API_KEY=""
29
29
  MODEL="quantum-1.7b"
30
- SLYOS_SERVER="https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com"
30
+ KB_ID=""
31
+ SLYOS_SERVER="https://api.slyos.world"
31
32
  PROJECT_NAME="slyos-chatbot"
32
33
 
33
34
  #################################################################################
@@ -70,12 +71,17 @@ while [[ $# -gt 0 ]]; do
70
71
  MODEL="$2"
71
72
  shift 2
72
73
  ;;
74
+ --kb-id)
75
+ KB_ID="$2"
76
+ shift 2
77
+ ;;
73
78
  -h|--help)
74
79
  echo "Usage: $0 [OPTIONS]"
75
80
  echo ""
76
81
  echo "Options:"
77
82
  echo " --api-key KEY Slyos API key (prompted if not provided)"
78
83
  echo " --model MODEL AI model to use (default: quantum-1.7b)"
84
+ echo " --kb-id ID Knowledge base ID for RAG (optional)"
79
85
  echo " -h, --help Show this help message"
80
86
  exit 0
81
87
  ;;
@@ -177,7 +183,7 @@ print_success "Package configuration updated"
177
183
  # Install Slyos SDK + dotenv
178
184
  print_step "Installing dependencies"
179
185
  print_info "This may take a moment..."
180
- npm install @emilshirokikh/slyos-sdk dotenv > /dev/null 2>&1
186
+ npm install @emilshirokikh/slyos-sdk dotenv node-fetch > /dev/null 2>&1
181
187
  print_success "Dependencies installed"
182
188
 
183
189
  # Create the chatbot application
@@ -188,6 +194,7 @@ cat > app.mjs << 'CHATBOT_EOF'
188
194
 
189
195
  import 'dotenv/config';
190
196
  import readline from 'readline';
197
+ import fetch from 'node-fetch';
191
198
  import SlyOS from '@emilshirokikh/slyos-sdk';
192
199
 
193
200
  // Color codes for terminal output
@@ -207,11 +214,13 @@ const colors = {
207
214
  const config = {
208
215
  apiKey: process.env.SLYOS_API_KEY || 'YOUR_API_KEY',
209
216
  model: process.env.SLYOS_MODEL || 'quantum-1.7b',
210
- server: process.env.SLYOS_SERVER || 'https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com'
217
+ server: process.env.SLYOS_SERVER || 'https://api.slyos.world',
218
+ kbId: process.env.SLYOS_KB_ID || ''
211
219
  };
212
220
 
213
221
  // Initialize SlyOS SDK
214
222
  let sdk;
223
+ let authToken = null; // Store auth token for direct RAG API calls
215
224
  try {
216
225
  sdk = new SlyOS({
217
226
  apiKey: config.apiKey,
@@ -222,6 +231,28 @@ try {
222
231
  process.exit(1);
223
232
  }
224
233
 
234
+ // Get auth token directly for RAG API calls
235
+ async function getAuthToken() {
236
+ if (authToken) return authToken;
237
+ try {
238
+ const res = await fetch(`${config.server}/api/auth/sdk`, {
239
+ method: 'POST',
240
+ headers: { 'Content-Type': 'application/json' },
241
+ body: JSON.stringify({ apiKey: config.apiKey })
242
+ });
243
+ if (!res.ok) {
244
+ console.log(`${colors.yellow}Auth failed: ${res.status} ${res.statusText}${colors.reset}`);
245
+ return null;
246
+ }
247
+ const data = await res.json();
248
+ authToken = data.token;
249
+ return authToken;
250
+ } catch (e) {
251
+ console.log(`${colors.yellow}Auth error: ${e.message}${colors.reset}`);
252
+ return null;
253
+ }
254
+ }
255
+
225
256
  // Create readline interface
226
257
  const rl = readline.createInterface({
227
258
  input: process.stdin,
@@ -245,6 +276,11 @@ function printWelcome() {
245
276
 
246
277
  console.log(`${colors.blue}Model:${colors.reset} ${colors.yellow}${config.model}${colors.reset}`);
247
278
  console.log(`${colors.blue}Server:${colors.reset} ${colors.yellow}${config.server}${colors.reset}`);
279
+ if (config.kbId) {
280
+ console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.green}${config.kbId}${colors.reset} ${colors.green}(RAG enabled)${colors.reset}`);
281
+ } else {
282
+ console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.dim}None (plain generation)${colors.reset}`);
283
+ }
248
284
  if (config.apiKey === 'YOUR_API_KEY') {
249
285
  console.log(`${colors.red}⚠ Using placeholder API key - set SLYOS_API_KEY environment variable${colors.reset}`);
250
286
  }
@@ -262,28 +298,113 @@ async function sendMessage(userMessage) {
262
298
  try {
263
299
  console.log(`${colors.dim}Thinking...${colors.reset}`);
264
300
 
265
- // Use chatCompletion (OpenAI-compatible) — handles prompt formatting for any model
266
- const response = await sdk.chatCompletion(config.model, {
267
- messages: [
268
- { role: 'system', content: 'You are a helpful AI assistant. Give short, direct answers.' },
269
- { role: 'user', content: userMessage }
270
- ],
271
- max_tokens: 200,
272
- temperature: 0.7
273
- });
274
-
275
- let assistantMessage = response?.choices?.[0]?.message?.content || '';
301
+ let assistantMessage = '';
302
+ let sourceInfo = '';
303
+
304
+ if (config.kbId) {
305
+ // RAG mode: call API directly to get relevant chunks, then generate locally with context
306
+ console.log(`${colors.dim}Searching knowledge base...${colors.reset}`);
307
+ try {
308
+ const token = await getAuthToken();
309
+ if (!token) throw new Error('Could not authenticate — check your API key');
310
+ // Adapt chunk count to model's context window
311
+ const modelCtx = sdk.getModelContextWindow?.() || 2048;
312
+ const topK = modelCtx <= 2048 ? 2 : modelCtx <= 4096 ? 3 : 5;
313
+ const ragRes = await fetch(`${config.server}/api/rag/knowledge-bases/${config.kbId}/query`, {
314
+ method: 'POST',
315
+ headers: {
316
+ 'Content-Type': 'application/json',
317
+ 'Authorization': `Bearer ${token}`
318
+ },
319
+ body: JSON.stringify({ query: userMessage, top_k: topK, model_id: config.model })
320
+ });
321
+ if (!ragRes.ok) {
322
+ const errText = await ragRes.text();
323
+ throw new Error(`RAG query failed: ${ragRes.status} - ${errText}`);
324
+ }
325
+ const ragData = await ragRes.json();
326
+ const chunks = ragData.retrieved_chunks || [];
327
+
328
+ // Check if chunks are relevant enough (similarity > 0.3)
329
+ const goodChunks = chunks.filter(c => (c.similarity_score || 0) > 0.3);
330
+
331
+ if (goodChunks.length > 0) {
332
+ // Adapt context size to model's context window
333
+ const ctxWindow = sdk.getModelContextWindow?.() || 2048;
334
+ const maxContextChars = Math.max(500, (ctxWindow - 200) * 3);
335
+ const maxGenTokens = Math.min(200, Math.floor(ctxWindow / 4));
336
+
337
+ // Clean and truncate context — strip weird chars, fit model window
338
+ let context = goodChunks.map(c => c.content).join('\n')
339
+ .replace(/[^\x20-\x7E\n]/g, ' ') // Strip non-ASCII/control chars
340
+ .replace(/\s{3,}/g, ' ') // Collapse excessive whitespace
341
+ .trim();
342
+ if (context.length > maxContextChars) context = context.substring(0, maxContextChars);
343
+
344
+ // Simple context-then-QA format — this works best with small models
345
+ const prompt = `${context}\n\nQuestion: ${userMessage}\nAnswer:`;
346
+ const response = await sdk.generate(config.model, prompt, {
347
+ temperature: 0.5,
348
+ maxTokens: maxGenTokens
349
+ });
350
+ assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
351
+
352
+ // Collect source names
353
+ const sources = [...new Set(goodChunks.map(c => c.document_name || c.source).filter(Boolean))];
354
+ if (sources.length > 0) {
355
+ sourceInfo = `\n${colors.dim}[Sources: ${sources.join(', ')}]${colors.reset}`;
356
+ }
357
+ } else {
358
+ // No relevant chunks — answer conversationally
359
+ console.log(`${colors.dim}No RAG context found, using plain generation...${colors.reset}`);
360
+ const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
361
+ const response = await sdk.generate(config.model, prompt, {
362
+ temperature: 0.7,
363
+ maxTokens: 100
364
+ });
365
+ assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
366
+ }
367
+ } catch (ragErr) {
368
+ console.log(`${colors.yellow}RAG lookup failed: ${ragErr.message}${colors.reset}`);
369
+ const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
370
+ const response = await sdk.generate(config.model, prompt, {
371
+ temperature: 0.7,
372
+ maxTokens: 100
373
+ });
374
+ assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
375
+ }
376
+ } else {
377
+ // Plain mode: direct generation (no RAG)
378
+ const prompt = `The user said: "${userMessage}"\nGive a brief, helpful response:\n`;
379
+ const response = await sdk.generate(config.model, prompt, {
380
+ temperature: 0.7,
381
+ maxTokens: 150
382
+ });
383
+ assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
384
+ }
276
385
 
277
- // Light cleanup stop at any hallucinated role prefixes
386
+ // Clean up model output artifacts
278
387
  assistantMessage = assistantMessage
279
- .split(/\n\s*(User|Human|System):/i)[0]
388
+ // Strip repeated garbage chars (!!!, ???, etc)
389
+ .replace(/(.)\1{5,}/g, '')
390
+ // Strip leading role prefixes the model loves to emit
391
+ .replace(/^(assistant|system|answer|response|AI)\s*[:]\s*/i, '')
392
+ // Remove leading partial sentences (fragments before the real answer)
393
+ .replace(/^[a-z][^.!?]{0,40}\.\s*/i, function(match) {
394
+ // Only strip if it looks like a fragment (< 50 chars ending in period)
395
+ return match.length < 50 && !match.includes(' is ') ? '' : match;
396
+ })
397
+ // Stop at any hallucinated role prefixes mid-response
398
+ .split(/\n\s*(User|Human|System|Question):/i)[0]
399
+ // Strip any remaining leading role prefix after newline
400
+ .replace(/^\s*(assistant|AI)\s*[:]\s*/im, '')
280
401
  .trim();
281
402
 
282
- if (!assistantMessage) {
403
+ if (!assistantMessage || assistantMessage.length < 3) {
283
404
  assistantMessage = '(No response generated — try rephrasing your question)';
284
405
  }
285
406
 
286
- console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}\n`);
407
+ console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}${sourceInfo}\n`);
287
408
  } catch (error) {
288
409
  console.error(`\n${colors.red}Error:${colors.reset} ${error.message}\n`);
289
410
  }
@@ -371,7 +492,7 @@ cat > .env.example << 'ENV_EOF'
371
492
  # Slyos SDK Configuration
372
493
  SLYOS_API_KEY=your_api_key_here
373
494
  SLYOS_MODEL=quantum-1.7b
374
- SLYOS_SERVER=https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com
495
+ SLYOS_SERVER=https://api.slyos.world
375
496
  ENV_EOF
376
497
  print_success "Environment configuration template created"
377
498
 
@@ -404,7 +525,7 @@ Set these environment variables before running:
404
525
  ```bash
405
526
  export SLYOS_API_KEY=your_api_key_here
406
527
  export SLYOS_MODEL=quantum-1.7b
407
- export SLYOS_SERVER=https://slyos-prod.eba-qjz3cmgq.us-east-2.elasticbeanstalk.com
528
+ export SLYOS_SERVER=https://api.slyos.world
408
529
  ```
409
530
 
410
531
  Or create a `.env` file based on `.env.example`.
@@ -476,6 +597,7 @@ cat > .env << ENV_SETUP_EOF
476
597
  SLYOS_API_KEY=${API_KEY}
477
598
  SLYOS_MODEL=${MODEL}
478
599
  SLYOS_SERVER=${SLYOS_SERVER}
600
+ SLYOS_KB_ID=${KB_ID}
479
601
  ENV_SETUP_EOF
480
602
  print_success "Environment configured"
481
603
 
@@ -489,6 +611,9 @@ echo -e "${CYAN}Project Details:${NC}"
489
611
  echo " Location: ${YELLOW}$(pwd)${NC}"
490
612
  echo " API Key: ${YELLOW}${API_KEY}${NC}"
491
613
  echo " Model: ${YELLOW}${MODEL}${NC}"
614
+ if [ -n "$KB_ID" ]; then
615
+ echo " Knowledge Base: ${GREEN}${KB_ID} (RAG enabled)${NC}"
616
+ fi
492
617
  echo ""
493
618
  echo -e "${CYAN}Next Steps:${NC}"
494
619
  echo " 1. Review the .env file and update your API key if needed"
package/dist/index.d.ts CHANGED
@@ -106,6 +106,51 @@ interface OpenAICompatibleClient {
106
106
  };
107
107
  };
108
108
  }
109
+ interface RAGOptions {
110
+ knowledgeBaseId: string;
111
+ query: string;
112
+ topK?: number;
113
+ modelId: string;
114
+ temperature?: number;
115
+ maxTokens?: number;
116
+ }
117
+ interface RAGChunk {
118
+ id: string;
119
+ documentId: string;
120
+ documentName: string;
121
+ content: string;
122
+ similarityScore: number;
123
+ metadata?: Record<string, any>;
124
+ }
125
+ interface RAGResponse {
126
+ query: string;
127
+ retrievedChunks: RAGChunk[];
128
+ generatedResponse: string;
129
+ context: string;
130
+ latencyMs: number;
131
+ tierUsed: 1 | 2 | 3;
132
+ }
133
+ interface OfflineIndex {
134
+ metadata: {
135
+ kb_id: string;
136
+ kb_name: string;
137
+ chunk_size: number;
138
+ embedding_dim: number;
139
+ total_chunks: number;
140
+ synced_at: string;
141
+ expires_at: string;
142
+ sync_token: string;
143
+ };
144
+ chunks: Array<{
145
+ id: string;
146
+ document_id: string;
147
+ document_name: string;
148
+ content: string;
149
+ chunk_index: number;
150
+ embedding: number[] | null;
151
+ metadata: Record<string, any>;
152
+ }>;
153
+ }
109
154
  declare class SlyOS {
110
155
  private apiKey;
111
156
  private apiUrl;
@@ -116,11 +161,13 @@ declare class SlyOS {
116
161
  private onProgress;
117
162
  private onEvent;
118
163
  private fallbackConfig;
164
+ private modelContextWindow;
119
165
  constructor(config: SlyOSConfigWithFallback);
120
166
  private emitProgress;
121
167
  private emitEvent;
122
168
  analyzeDevice(): Promise<DeviceProfile>;
123
169
  getDeviceProfile(): DeviceProfile | null;
170
+ getModelContextWindow(): number;
124
171
  recommendModel(category?: ModelCategory): {
125
172
  modelId: string;
126
173
  quant: QuantizationLevel;
@@ -135,6 +182,17 @@ declare class SlyOS {
135
182
  minRAM_MB: Record<string, number>;
136
183
  }[];
137
184
  }>;
185
+ searchModels(query: string, options?: {
186
+ limit?: number;
187
+ task?: string;
188
+ }): Promise<Array<{
189
+ id: string;
190
+ name: string;
191
+ downloads: number;
192
+ likes: number;
193
+ task: string;
194
+ size_category: string;
195
+ }>>;
138
196
  canRunModel(modelId: string, quant?: QuantizationLevel): {
139
197
  canRun: boolean;
140
198
  reason: string;
@@ -153,6 +211,41 @@ declare class SlyOS {
153
211
  private fallbackToBedrockCloud;
154
212
  private invokeBedrockCloud;
155
213
  private mapModelToOpenAI;
214
+ private localEmbeddingModel;
215
+ private offlineIndexes;
216
+ /**
217
+ * Tier 2: Cloud-indexed RAG with local inference.
218
+ * Retrieves relevant chunks from server, generates response locally.
219
+ */
220
+ ragQuery(options: RAGOptions): Promise<RAGResponse>;
221
+ /**
222
+ * Tier 1: Fully local RAG. Zero network calls.
223
+ * Documents are chunked/embedded on-device, retrieval and generation all local.
224
+ */
225
+ ragQueryLocal(options: RAGOptions & {
226
+ documents: Array<{
227
+ content: string;
228
+ name?: string;
229
+ }>;
230
+ }): Promise<RAGResponse>;
231
+ /**
232
+ * Tier 3: Offline RAG using a synced knowledge base.
233
+ * First call syncKnowledgeBase(), then use this for offline queries.
234
+ */
235
+ ragQueryOffline(options: RAGOptions): Promise<RAGResponse>;
236
+ /**
237
+ * Sync a knowledge base for offline use (Tier 3).
238
+ * Downloads chunks + embeddings from server, stores locally.
239
+ */
240
+ syncKnowledgeBase(knowledgeBaseId: string, deviceId?: string): Promise<{
241
+ chunkCount: number;
242
+ sizeMb: number;
243
+ expiresAt: string;
244
+ }>;
245
+ private loadEmbeddingModel;
246
+ private embedTextLocal;
247
+ private cosineSimilarity;
248
+ private chunkTextLocal;
156
249
  static openaiCompatible(config: {
157
250
  apiKey: string;
158
251
  apiUrl?: string;
@@ -160,4 +253,4 @@ declare class SlyOS {
160
253
  }): OpenAICompatibleClient;
161
254
  }
162
255
  export default SlyOS;
163
- export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, };
256
+ export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, RAGOptions, RAGChunk, RAGResponse, OfflineIndex, };