@emilshirokikh/slyos-sdk 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -16
- package/create-chatbot.sh +145 -20
- package/dist/index.d.ts +94 -1
- package/dist/index.js +386 -28
- package/package.json +1 -1
- package/src/index.ts +489 -28
package/README.md
CHANGED
|
@@ -24,10 +24,10 @@ const sdk = new SlyOS({
|
|
|
24
24
|
await sdk.initialize();
|
|
25
25
|
|
|
26
26
|
// 2. Load model (downloads ~200MB once)
|
|
27
|
-
await sdk.loadModel('quantum-
|
|
27
|
+
await sdk.loadModel('quantum-1.7b');
|
|
28
28
|
|
|
29
29
|
// 3. Generate responses
|
|
30
|
-
const response = await sdk.generate('quantum-
|
|
30
|
+
const response = await sdk.generate('quantum-1.7b',
|
|
31
31
|
'What is artificial intelligence?',
|
|
32
32
|
{
|
|
33
33
|
temperature: 0.7,
|
|
@@ -74,15 +74,15 @@ await sdk.initialize();
|
|
|
74
74
|
#### `loadModel(modelId)`
|
|
75
75
|
Downloads and caches AI model locally.
|
|
76
76
|
```javascript
|
|
77
|
-
await sdk.loadModel('quantum-
|
|
77
|
+
await sdk.loadModel('quantum-1.7b');
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
**Parameters:**
|
|
81
81
|
- `modelId` (string): Model identifier
|
|
82
|
-
- `quantum-
|
|
83
|
-
- `quantum-
|
|
84
|
-
- `quantum-
|
|
85
|
-
- `quantum-
|
|
82
|
+
- `quantum-1.7b` - 900MB, recommended
|
|
83
|
+
- `quantum-3b` - 1.6GB, high quality
|
|
84
|
+
- `quantum-code-3b` - 1.6GB, code-optimized
|
|
85
|
+
- `quantum-8b` - 4.2GB, best quality
|
|
86
86
|
|
|
87
87
|
**Returns:** `Promise<void>`
|
|
88
88
|
|
|
@@ -94,7 +94,7 @@ await sdk.loadModel('quantum-360m');
|
|
|
94
94
|
#### `generate(modelId, prompt, options?)`
|
|
95
95
|
Generates AI response locally.
|
|
96
96
|
```javascript
|
|
97
|
-
const response = await sdk.generate('quantum-
|
|
97
|
+
const response = await sdk.generate('quantum-1.7b',
|
|
98
98
|
'Tell me about your menu',
|
|
99
99
|
{
|
|
100
100
|
temperature: 0.7,
|
|
@@ -137,10 +137,10 @@ import SlyOS from '@emilshirokikh/slyos-sdk';
|
|
|
137
137
|
|
|
138
138
|
const sdk = new SlyOS({ apiKey: 'sk_live_...' });
|
|
139
139
|
await sdk.initialize();
|
|
140
|
-
await sdk.loadModel('quantum-
|
|
140
|
+
await sdk.loadModel('quantum-1.7b');
|
|
141
141
|
|
|
142
142
|
async function chat(userMessage) {
|
|
143
|
-
return await sdk.generate('quantum-
|
|
143
|
+
return await sdk.generate('quantum-1.7b', userMessage);
|
|
144
144
|
}
|
|
145
145
|
|
|
146
146
|
const response = await chat('What are your hours?');
|
|
@@ -157,7 +157,7 @@ Help with menu, hours, and nutrition. Be friendly and concise.`;
|
|
|
157
157
|
const userMessage = 'What breakfast items do you have?';
|
|
158
158
|
const fullPrompt = `${systemPrompt}\n\nCustomer: ${userMessage}\nAssistant:`;
|
|
159
159
|
|
|
160
|
-
const response = await sdk.generate('quantum-
|
|
160
|
+
const response = await sdk.generate('quantum-1.7b', fullPrompt, {
|
|
161
161
|
temperature: 0.7,
|
|
162
162
|
maxTokens: 150
|
|
163
163
|
});
|
|
@@ -179,7 +179,7 @@ function Chatbot() {
|
|
|
179
179
|
async function init() {
|
|
180
180
|
const client = new SlyOS({ apiKey: 'sk_live_...' });
|
|
181
181
|
await client.initialize();
|
|
182
|
-
await client.loadModel('quantum-
|
|
182
|
+
await client.loadModel('quantum-1.7b');
|
|
183
183
|
setSdk(client);
|
|
184
184
|
setLoading(false);
|
|
185
185
|
}
|
|
@@ -187,7 +187,7 @@ function Chatbot() {
|
|
|
187
187
|
}, []);
|
|
188
188
|
|
|
189
189
|
async function handleChat(message) {
|
|
190
|
-
const reply = await sdk.generate('quantum-
|
|
190
|
+
const reply = await sdk.generate('quantum-1.7b', message);
|
|
191
191
|
setResponse(reply);
|
|
192
192
|
}
|
|
193
193
|
|
|
@@ -220,11 +220,11 @@ const sdk = new SlyOS({
|
|
|
220
220
|
|
|
221
221
|
### Multiple Models
|
|
222
222
|
```javascript
|
|
223
|
-
await sdk.loadModel('quantum-
|
|
223
|
+
await sdk.loadModel('quantum-1.7b');
|
|
224
224
|
await sdk.loadModel('quantum-1.7b');
|
|
225
225
|
|
|
226
226
|
// Use different models
|
|
227
|
-
const fast = await sdk.generate('quantum-
|
|
227
|
+
const fast = await sdk.generate('quantum-1.7b', 'Quick question?');
|
|
228
228
|
const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
|
|
229
229
|
```
|
|
230
230
|
|
|
@@ -249,7 +249,7 @@ const detailed = await sdk.generate('quantum-1.7b', 'Complex question?');
|
|
|
249
249
|
```javascript
|
|
250
250
|
// Check browser console for errors
|
|
251
251
|
// Ensure 2GB+ RAM available
|
|
252
|
-
// Try smaller model (quantum-
|
|
252
|
+
// Try smaller model (quantum-1.7b)
|
|
253
253
|
```
|
|
254
254
|
|
|
255
255
|
### CORS errors
|
package/create-chatbot.sh
CHANGED
|
@@ -27,7 +27,8 @@ NC='\033[0m' # No Color
|
|
|
27
27
|
# Default values
|
|
28
28
|
API_KEY=""
|
|
29
29
|
MODEL="quantum-1.7b"
|
|
30
|
-
|
|
30
|
+
KB_ID=""
|
|
31
|
+
SLYOS_SERVER="https://api.slyos.world"
|
|
31
32
|
PROJECT_NAME="slyos-chatbot"
|
|
32
33
|
|
|
33
34
|
#################################################################################
|
|
@@ -70,12 +71,17 @@ while [[ $# -gt 0 ]]; do
|
|
|
70
71
|
MODEL="$2"
|
|
71
72
|
shift 2
|
|
72
73
|
;;
|
|
74
|
+
--kb-id)
|
|
75
|
+
KB_ID="$2"
|
|
76
|
+
shift 2
|
|
77
|
+
;;
|
|
73
78
|
-h|--help)
|
|
74
79
|
echo "Usage: $0 [OPTIONS]"
|
|
75
80
|
echo ""
|
|
76
81
|
echo "Options:"
|
|
77
82
|
echo " --api-key KEY Slyos API key (prompted if not provided)"
|
|
78
83
|
echo " --model MODEL AI model to use (default: quantum-1.7b)"
|
|
84
|
+
echo " --kb-id ID Knowledge base ID for RAG (optional)"
|
|
79
85
|
echo " -h, --help Show this help message"
|
|
80
86
|
exit 0
|
|
81
87
|
;;
|
|
@@ -177,7 +183,7 @@ print_success "Package configuration updated"
|
|
|
177
183
|
# Install Slyos SDK + dotenv
|
|
178
184
|
print_step "Installing dependencies"
|
|
179
185
|
print_info "This may take a moment..."
|
|
180
|
-
npm install @emilshirokikh/slyos-sdk dotenv > /dev/null 2>&1
|
|
186
|
+
npm install @emilshirokikh/slyos-sdk dotenv node-fetch > /dev/null 2>&1
|
|
181
187
|
print_success "Dependencies installed"
|
|
182
188
|
|
|
183
189
|
# Create the chatbot application
|
|
@@ -188,6 +194,7 @@ cat > app.mjs << 'CHATBOT_EOF'
|
|
|
188
194
|
|
|
189
195
|
import 'dotenv/config';
|
|
190
196
|
import readline from 'readline';
|
|
197
|
+
import fetch from 'node-fetch';
|
|
191
198
|
import SlyOS from '@emilshirokikh/slyos-sdk';
|
|
192
199
|
|
|
193
200
|
// Color codes for terminal output
|
|
@@ -207,11 +214,13 @@ const colors = {
|
|
|
207
214
|
const config = {
|
|
208
215
|
apiKey: process.env.SLYOS_API_KEY || 'YOUR_API_KEY',
|
|
209
216
|
model: process.env.SLYOS_MODEL || 'quantum-1.7b',
|
|
210
|
-
server: process.env.SLYOS_SERVER || 'https://slyos
|
|
217
|
+
server: process.env.SLYOS_SERVER || 'https://api.slyos.world',
|
|
218
|
+
kbId: process.env.SLYOS_KB_ID || ''
|
|
211
219
|
};
|
|
212
220
|
|
|
213
221
|
// Initialize SlyOS SDK
|
|
214
222
|
let sdk;
|
|
223
|
+
let authToken = null; // Store auth token for direct RAG API calls
|
|
215
224
|
try {
|
|
216
225
|
sdk = new SlyOS({
|
|
217
226
|
apiKey: config.apiKey,
|
|
@@ -222,6 +231,28 @@ try {
|
|
|
222
231
|
process.exit(1);
|
|
223
232
|
}
|
|
224
233
|
|
|
234
|
+
// Get auth token directly for RAG API calls
|
|
235
|
+
async function getAuthToken() {
|
|
236
|
+
if (authToken) return authToken;
|
|
237
|
+
try {
|
|
238
|
+
const res = await fetch(`${config.server}/api/auth/sdk`, {
|
|
239
|
+
method: 'POST',
|
|
240
|
+
headers: { 'Content-Type': 'application/json' },
|
|
241
|
+
body: JSON.stringify({ apiKey: config.apiKey })
|
|
242
|
+
});
|
|
243
|
+
if (!res.ok) {
|
|
244
|
+
console.log(`${colors.yellow}Auth failed: ${res.status} ${res.statusText}${colors.reset}`);
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
const data = await res.json();
|
|
248
|
+
authToken = data.token;
|
|
249
|
+
return authToken;
|
|
250
|
+
} catch (e) {
|
|
251
|
+
console.log(`${colors.yellow}Auth error: ${e.message}${colors.reset}`);
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
225
256
|
// Create readline interface
|
|
226
257
|
const rl = readline.createInterface({
|
|
227
258
|
input: process.stdin,
|
|
@@ -245,6 +276,11 @@ function printWelcome() {
|
|
|
245
276
|
|
|
246
277
|
console.log(`${colors.blue}Model:${colors.reset} ${colors.yellow}${config.model}${colors.reset}`);
|
|
247
278
|
console.log(`${colors.blue}Server:${colors.reset} ${colors.yellow}${config.server}${colors.reset}`);
|
|
279
|
+
if (config.kbId) {
|
|
280
|
+
console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.green}${config.kbId}${colors.reset} ${colors.green}(RAG enabled)${colors.reset}`);
|
|
281
|
+
} else {
|
|
282
|
+
console.log(`${colors.blue}Knowledge Base:${colors.reset} ${colors.dim}None (plain generation)${colors.reset}`);
|
|
283
|
+
}
|
|
248
284
|
if (config.apiKey === 'YOUR_API_KEY') {
|
|
249
285
|
console.log(`${colors.red}⚠ Using placeholder API key - set SLYOS_API_KEY environment variable${colors.reset}`);
|
|
250
286
|
}
|
|
@@ -262,28 +298,113 @@ async function sendMessage(userMessage) {
|
|
|
262
298
|
try {
|
|
263
299
|
console.log(`${colors.dim}Thinking...${colors.reset}`);
|
|
264
300
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
301
|
+
let assistantMessage = '';
|
|
302
|
+
let sourceInfo = '';
|
|
303
|
+
|
|
304
|
+
if (config.kbId) {
|
|
305
|
+
// RAG mode: call API directly to get relevant chunks, then generate locally with context
|
|
306
|
+
console.log(`${colors.dim}Searching knowledge base...${colors.reset}`);
|
|
307
|
+
try {
|
|
308
|
+
const token = await getAuthToken();
|
|
309
|
+
if (!token) throw new Error('Could not authenticate — check your API key');
|
|
310
|
+
// Adapt chunk count to model's context window
|
|
311
|
+
const modelCtx = sdk.getModelContextWindow?.() || 2048;
|
|
312
|
+
const topK = modelCtx <= 2048 ? 2 : modelCtx <= 4096 ? 3 : 5;
|
|
313
|
+
const ragRes = await fetch(`${config.server}/api/rag/knowledge-bases/${config.kbId}/query`, {
|
|
314
|
+
method: 'POST',
|
|
315
|
+
headers: {
|
|
316
|
+
'Content-Type': 'application/json',
|
|
317
|
+
'Authorization': `Bearer ${token}`
|
|
318
|
+
},
|
|
319
|
+
body: JSON.stringify({ query: userMessage, top_k: topK, model_id: config.model })
|
|
320
|
+
});
|
|
321
|
+
if (!ragRes.ok) {
|
|
322
|
+
const errText = await ragRes.text();
|
|
323
|
+
throw new Error(`RAG query failed: ${ragRes.status} - ${errText}`);
|
|
324
|
+
}
|
|
325
|
+
const ragData = await ragRes.json();
|
|
326
|
+
const chunks = ragData.retrieved_chunks || [];
|
|
327
|
+
|
|
328
|
+
// Check if chunks are relevant enough (similarity > 0.3)
|
|
329
|
+
const goodChunks = chunks.filter(c => (c.similarity_score || 0) > 0.3);
|
|
330
|
+
|
|
331
|
+
if (goodChunks.length > 0) {
|
|
332
|
+
// Adapt context size to model's context window
|
|
333
|
+
const ctxWindow = sdk.getModelContextWindow?.() || 2048;
|
|
334
|
+
const maxContextChars = Math.max(500, (ctxWindow - 200) * 3);
|
|
335
|
+
const maxGenTokens = Math.min(200, Math.floor(ctxWindow / 4));
|
|
336
|
+
|
|
337
|
+
// Clean and truncate context — strip weird chars, fit model window
|
|
338
|
+
let context = goodChunks.map(c => c.content).join('\n')
|
|
339
|
+
.replace(/[^\x20-\x7E\n]/g, ' ') // Strip non-ASCII/control chars
|
|
340
|
+
.replace(/\s{3,}/g, ' ') // Collapse excessive whitespace
|
|
341
|
+
.trim();
|
|
342
|
+
if (context.length > maxContextChars) context = context.substring(0, maxContextChars);
|
|
343
|
+
|
|
344
|
+
// Simple context-then-QA format — this works best with small models
|
|
345
|
+
const prompt = `${context}\n\nQuestion: ${userMessage}\nAnswer:`;
|
|
346
|
+
const response = await sdk.generate(config.model, prompt, {
|
|
347
|
+
temperature: 0.5,
|
|
348
|
+
maxTokens: maxGenTokens
|
|
349
|
+
});
|
|
350
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
|
|
351
|
+
|
|
352
|
+
// Collect source names
|
|
353
|
+
const sources = [...new Set(goodChunks.map(c => c.document_name || c.source).filter(Boolean))];
|
|
354
|
+
if (sources.length > 0) {
|
|
355
|
+
sourceInfo = `\n${colors.dim}[Sources: ${sources.join(', ')}]${colors.reset}`;
|
|
356
|
+
}
|
|
357
|
+
} else {
|
|
358
|
+
// No relevant chunks — answer conversationally
|
|
359
|
+
console.log(`${colors.dim}No RAG context found, using plain generation...${colors.reset}`);
|
|
360
|
+
const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
|
|
361
|
+
const response = await sdk.generate(config.model, prompt, {
|
|
362
|
+
temperature: 0.7,
|
|
363
|
+
maxTokens: 100
|
|
364
|
+
});
|
|
365
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
|
|
366
|
+
}
|
|
367
|
+
} catch (ragErr) {
|
|
368
|
+
console.log(`${colors.yellow}RAG lookup failed: ${ragErr.message}${colors.reset}`);
|
|
369
|
+
const prompt = `The user said: "${userMessage}"\nGive a brief, friendly response:\n`;
|
|
370
|
+
const response = await sdk.generate(config.model, prompt, {
|
|
371
|
+
temperature: 0.7,
|
|
372
|
+
maxTokens: 100
|
|
373
|
+
});
|
|
374
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
|
|
375
|
+
}
|
|
376
|
+
} else {
|
|
377
|
+
// Plain mode: direct generation (no RAG)
|
|
378
|
+
const prompt = `The user said: "${userMessage}"\nGive a brief, helpful response:\n`;
|
|
379
|
+
const response = await sdk.generate(config.model, prompt, {
|
|
380
|
+
temperature: 0.7,
|
|
381
|
+
maxTokens: 150
|
|
382
|
+
});
|
|
383
|
+
assistantMessage = (typeof response === 'string' ? response : response?.text || response?.content || '') || '';
|
|
384
|
+
}
|
|
276
385
|
|
|
277
|
-
//
|
|
386
|
+
// Clean up model output artifacts
|
|
278
387
|
assistantMessage = assistantMessage
|
|
279
|
-
|
|
388
|
+
// Strip repeated garbage chars (!!!, ???, etc)
|
|
389
|
+
.replace(/(.)\1{5,}/g, '')
|
|
390
|
+
// Strip leading role prefixes the model loves to emit
|
|
391
|
+
.replace(/^(assistant|system|answer|response|AI)\s*[:]\s*/i, '')
|
|
392
|
+
// Remove leading partial sentences (fragments before the real answer)
|
|
393
|
+
.replace(/^[a-z][^.!?]{0,40}\.\s*/i, function(match) {
|
|
394
|
+
// Only strip if it looks like a fragment (< 50 chars ending in period)
|
|
395
|
+
return match.length < 50 && !match.includes(' is ') ? '' : match;
|
|
396
|
+
})
|
|
397
|
+
// Stop at any hallucinated role prefixes mid-response
|
|
398
|
+
.split(/\n\s*(User|Human|System|Question):/i)[0]
|
|
399
|
+
// Strip any remaining leading role prefix after newline
|
|
400
|
+
.replace(/^\s*(assistant|AI)\s*[:]\s*/im, '')
|
|
280
401
|
.trim();
|
|
281
402
|
|
|
282
|
-
if (!assistantMessage) {
|
|
403
|
+
if (!assistantMessage || assistantMessage.length < 3) {
|
|
283
404
|
assistantMessage = '(No response generated — try rephrasing your question)';
|
|
284
405
|
}
|
|
285
406
|
|
|
286
|
-
console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}\n`);
|
|
407
|
+
console.log(`\n${colors.bright}${colors.magenta}AI:${colors.reset} ${assistantMessage}${sourceInfo}\n`);
|
|
287
408
|
} catch (error) {
|
|
288
409
|
console.error(`\n${colors.red}Error:${colors.reset} ${error.message}\n`);
|
|
289
410
|
}
|
|
@@ -371,7 +492,7 @@ cat > .env.example << 'ENV_EOF'
|
|
|
371
492
|
# Slyos SDK Configuration
|
|
372
493
|
SLYOS_API_KEY=your_api_key_here
|
|
373
494
|
SLYOS_MODEL=quantum-1.7b
|
|
374
|
-
SLYOS_SERVER=https://slyos
|
|
495
|
+
SLYOS_SERVER=https://api.slyos.world
|
|
375
496
|
ENV_EOF
|
|
376
497
|
print_success "Environment configuration template created"
|
|
377
498
|
|
|
@@ -404,7 +525,7 @@ Set these environment variables before running:
|
|
|
404
525
|
```bash
|
|
405
526
|
export SLYOS_API_KEY=your_api_key_here
|
|
406
527
|
export SLYOS_MODEL=quantum-1.7b
|
|
407
|
-
export SLYOS_SERVER=https://slyos
|
|
528
|
+
export SLYOS_SERVER=https://api.slyos.world
|
|
408
529
|
```
|
|
409
530
|
|
|
410
531
|
Or create a `.env` file based on `.env.example`.
|
|
@@ -476,6 +597,7 @@ cat > .env << ENV_SETUP_EOF
|
|
|
476
597
|
SLYOS_API_KEY=${API_KEY}
|
|
477
598
|
SLYOS_MODEL=${MODEL}
|
|
478
599
|
SLYOS_SERVER=${SLYOS_SERVER}
|
|
600
|
+
SLYOS_KB_ID=${KB_ID}
|
|
479
601
|
ENV_SETUP_EOF
|
|
480
602
|
print_success "Environment configured"
|
|
481
603
|
|
|
@@ -489,6 +611,9 @@ echo -e "${CYAN}Project Details:${NC}"
|
|
|
489
611
|
echo " Location: ${YELLOW}$(pwd)${NC}"
|
|
490
612
|
echo " API Key: ${YELLOW}${API_KEY}${NC}"
|
|
491
613
|
echo " Model: ${YELLOW}${MODEL}${NC}"
|
|
614
|
+
if [ -n "$KB_ID" ]; then
|
|
615
|
+
echo " Knowledge Base: ${GREEN}${KB_ID} (RAG enabled)${NC}"
|
|
616
|
+
fi
|
|
492
617
|
echo ""
|
|
493
618
|
echo -e "${CYAN}Next Steps:${NC}"
|
|
494
619
|
echo " 1. Review the .env file and update your API key if needed"
|
package/dist/index.d.ts
CHANGED
|
@@ -106,6 +106,51 @@ interface OpenAICompatibleClient {
|
|
|
106
106
|
};
|
|
107
107
|
};
|
|
108
108
|
}
|
|
109
|
+
interface RAGOptions {
|
|
110
|
+
knowledgeBaseId: string;
|
|
111
|
+
query: string;
|
|
112
|
+
topK?: number;
|
|
113
|
+
modelId: string;
|
|
114
|
+
temperature?: number;
|
|
115
|
+
maxTokens?: number;
|
|
116
|
+
}
|
|
117
|
+
interface RAGChunk {
|
|
118
|
+
id: string;
|
|
119
|
+
documentId: string;
|
|
120
|
+
documentName: string;
|
|
121
|
+
content: string;
|
|
122
|
+
similarityScore: number;
|
|
123
|
+
metadata?: Record<string, any>;
|
|
124
|
+
}
|
|
125
|
+
interface RAGResponse {
|
|
126
|
+
query: string;
|
|
127
|
+
retrievedChunks: RAGChunk[];
|
|
128
|
+
generatedResponse: string;
|
|
129
|
+
context: string;
|
|
130
|
+
latencyMs: number;
|
|
131
|
+
tierUsed: 1 | 2 | 3;
|
|
132
|
+
}
|
|
133
|
+
interface OfflineIndex {
|
|
134
|
+
metadata: {
|
|
135
|
+
kb_id: string;
|
|
136
|
+
kb_name: string;
|
|
137
|
+
chunk_size: number;
|
|
138
|
+
embedding_dim: number;
|
|
139
|
+
total_chunks: number;
|
|
140
|
+
synced_at: string;
|
|
141
|
+
expires_at: string;
|
|
142
|
+
sync_token: string;
|
|
143
|
+
};
|
|
144
|
+
chunks: Array<{
|
|
145
|
+
id: string;
|
|
146
|
+
document_id: string;
|
|
147
|
+
document_name: string;
|
|
148
|
+
content: string;
|
|
149
|
+
chunk_index: number;
|
|
150
|
+
embedding: number[] | null;
|
|
151
|
+
metadata: Record<string, any>;
|
|
152
|
+
}>;
|
|
153
|
+
}
|
|
109
154
|
declare class SlyOS {
|
|
110
155
|
private apiKey;
|
|
111
156
|
private apiUrl;
|
|
@@ -116,11 +161,13 @@ declare class SlyOS {
|
|
|
116
161
|
private onProgress;
|
|
117
162
|
private onEvent;
|
|
118
163
|
private fallbackConfig;
|
|
164
|
+
private modelContextWindow;
|
|
119
165
|
constructor(config: SlyOSConfigWithFallback);
|
|
120
166
|
private emitProgress;
|
|
121
167
|
private emitEvent;
|
|
122
168
|
analyzeDevice(): Promise<DeviceProfile>;
|
|
123
169
|
getDeviceProfile(): DeviceProfile | null;
|
|
170
|
+
getModelContextWindow(): number;
|
|
124
171
|
recommendModel(category?: ModelCategory): {
|
|
125
172
|
modelId: string;
|
|
126
173
|
quant: QuantizationLevel;
|
|
@@ -135,6 +182,17 @@ declare class SlyOS {
|
|
|
135
182
|
minRAM_MB: Record<string, number>;
|
|
136
183
|
}[];
|
|
137
184
|
}>;
|
|
185
|
+
searchModels(query: string, options?: {
|
|
186
|
+
limit?: number;
|
|
187
|
+
task?: string;
|
|
188
|
+
}): Promise<Array<{
|
|
189
|
+
id: string;
|
|
190
|
+
name: string;
|
|
191
|
+
downloads: number;
|
|
192
|
+
likes: number;
|
|
193
|
+
task: string;
|
|
194
|
+
size_category: string;
|
|
195
|
+
}>>;
|
|
138
196
|
canRunModel(modelId: string, quant?: QuantizationLevel): {
|
|
139
197
|
canRun: boolean;
|
|
140
198
|
reason: string;
|
|
@@ -153,6 +211,41 @@ declare class SlyOS {
|
|
|
153
211
|
private fallbackToBedrockCloud;
|
|
154
212
|
private invokeBedrockCloud;
|
|
155
213
|
private mapModelToOpenAI;
|
|
214
|
+
private localEmbeddingModel;
|
|
215
|
+
private offlineIndexes;
|
|
216
|
+
/**
|
|
217
|
+
* Tier 2: Cloud-indexed RAG with local inference.
|
|
218
|
+
* Retrieves relevant chunks from server, generates response locally.
|
|
219
|
+
*/
|
|
220
|
+
ragQuery(options: RAGOptions): Promise<RAGResponse>;
|
|
221
|
+
/**
|
|
222
|
+
* Tier 1: Fully local RAG. Zero network calls.
|
|
223
|
+
* Documents are chunked/embedded on-device, retrieval and generation all local.
|
|
224
|
+
*/
|
|
225
|
+
ragQueryLocal(options: RAGOptions & {
|
|
226
|
+
documents: Array<{
|
|
227
|
+
content: string;
|
|
228
|
+
name?: string;
|
|
229
|
+
}>;
|
|
230
|
+
}): Promise<RAGResponse>;
|
|
231
|
+
/**
|
|
232
|
+
* Tier 3: Offline RAG using a synced knowledge base.
|
|
233
|
+
* First call syncKnowledgeBase(), then use this for offline queries.
|
|
234
|
+
*/
|
|
235
|
+
ragQueryOffline(options: RAGOptions): Promise<RAGResponse>;
|
|
236
|
+
/**
|
|
237
|
+
* Sync a knowledge base for offline use (Tier 3).
|
|
238
|
+
* Downloads chunks + embeddings from server, stores locally.
|
|
239
|
+
*/
|
|
240
|
+
syncKnowledgeBase(knowledgeBaseId: string, deviceId?: string): Promise<{
|
|
241
|
+
chunkCount: number;
|
|
242
|
+
sizeMb: number;
|
|
243
|
+
expiresAt: string;
|
|
244
|
+
}>;
|
|
245
|
+
private loadEmbeddingModel;
|
|
246
|
+
private embedTextLocal;
|
|
247
|
+
private cosineSimilarity;
|
|
248
|
+
private chunkTextLocal;
|
|
156
249
|
static openaiCompatible(config: {
|
|
157
250
|
apiKey: string;
|
|
158
251
|
apiUrl?: string;
|
|
@@ -160,4 +253,4 @@ declare class SlyOS {
|
|
|
160
253
|
}): OpenAICompatibleClient;
|
|
161
254
|
}
|
|
162
255
|
export default SlyOS;
|
|
163
|
-
export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, };
|
|
256
|
+
export type { SlyOSConfig, SlyOSConfigWithFallback, GenerateOptions, TranscribeOptions, DeviceProfile, ProgressEvent, SlyEvent, QuantizationLevel, ModelCategory, OpenAIMessage, OpenAIChatCompletionRequest, OpenAIChatCompletionResponse, OpenAIChoice, OpenAIUsage, BedrockTextGenerationConfig, BedrockInvokeRequest, BedrockInvokeResponse, BedrockResult, FallbackConfig, FallbackProvider, OpenAICompatibleClient, RAGOptions, RAGChunk, RAGResponse, OfflineIndex, };
|