@emilshirokikh/slyos-sdk 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -16
- package/create-chatbot.sh +145 -20
- package/dist/index.d.ts +94 -1
- package/dist/index.js +386 -28
- package/package.json +1 -1
- package/src/index.ts +489 -28
package/dist/index.js
CHANGED
|
@@ -81,6 +81,25 @@ function selectQuantization(memoryMB, modelId) {
|
|
|
81
81
|
}
|
|
82
82
|
return 'q4'; // fallback
|
|
83
83
|
}
|
|
84
|
+
// ─── Context Window Detection ──────────────────────────────────────
|
|
85
|
+
async function detectContextWindowFromHF(hfModelId) {
|
|
86
|
+
try {
|
|
87
|
+
const configUrl = `https://huggingface.co/${hfModelId}/raw/main/config.json`;
|
|
88
|
+
const response = await axios.get(configUrl, { timeout: 5000 });
|
|
89
|
+
const config = response.data;
|
|
90
|
+
// Try multiple context window field names
|
|
91
|
+
const contextWindow = config.max_position_embeddings ||
|
|
92
|
+
config.n_positions ||
|
|
93
|
+
config.max_seq_len ||
|
|
94
|
+
config.model_max_length ||
|
|
95
|
+
2048;
|
|
96
|
+
return contextWindow;
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
// Default if config cannot be fetched
|
|
100
|
+
return 2048;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
84
103
|
// ─── Device Profiling ───────────────────────────────────────────────
|
|
85
104
|
async function profileDevice() {
|
|
86
105
|
const isNode = typeof window === 'undefined';
|
|
@@ -148,6 +167,12 @@ class SlyOS {
|
|
|
148
167
|
this.token = null;
|
|
149
168
|
this.models = new Map();
|
|
150
169
|
this.deviceProfile = null;
|
|
170
|
+
this.modelContextWindow = 0;
|
|
171
|
+
// ═══════════════════════════════════════════════════════════
|
|
172
|
+
// RAG — Retrieval Augmented Generation
|
|
173
|
+
// ═══════════════════════════════════════════════════════════
|
|
174
|
+
this.localEmbeddingModel = null;
|
|
175
|
+
this.offlineIndexes = new Map();
|
|
151
176
|
this.apiKey = config.apiKey;
|
|
152
177
|
this.apiUrl = config.apiUrl || 'https://api.slyos.world';
|
|
153
178
|
this.deviceId = `device-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
@@ -177,6 +202,9 @@ class SlyOS {
|
|
|
177
202
|
getDeviceProfile() {
|
|
178
203
|
return this.deviceProfile;
|
|
179
204
|
}
|
|
205
|
+
getModelContextWindow() {
|
|
206
|
+
return this.modelContextWindow;
|
|
207
|
+
}
|
|
180
208
|
// ── Smart Model Recommendation ──────────────────────────────────
|
|
181
209
|
recommendModel(category = 'llm') {
|
|
182
210
|
if (!this.deviceProfile) {
|
|
@@ -271,6 +299,31 @@ class SlyOS {
|
|
|
271
299
|
}
|
|
272
300
|
return Object.fromEntries(Object.entries(grouped).map(([cat, models]) => [cat, { models }]));
|
|
273
301
|
}
|
|
302
|
+
async searchModels(query, options) {
|
|
303
|
+
try {
|
|
304
|
+
const limit = options?.limit || 20;
|
|
305
|
+
const filters = ['onnx']; // Filter for ONNX models only
|
|
306
|
+
if (options?.task) {
|
|
307
|
+
filters.push(options.task);
|
|
308
|
+
}
|
|
309
|
+
const filterString = filters.map(f => `"${f}"`).join(',');
|
|
310
|
+
const url = `https://huggingface.co/api/models?search=${encodeURIComponent(query)}&filter=${encodeURIComponent(`[${filterString}]`)}&sort=downloads&direction=-1&limit=${limit}`;
|
|
311
|
+
const response = await axios.get(url, { timeout: 10000 });
|
|
312
|
+
const models = Array.isArray(response.data) ? response.data : [];
|
|
313
|
+
return models.map((model) => ({
|
|
314
|
+
id: model.id,
|
|
315
|
+
name: model.id.split('/')[1] || model.id,
|
|
316
|
+
downloads: model.downloads || 0,
|
|
317
|
+
likes: model.likes || 0,
|
|
318
|
+
task: model.task || 'unknown',
|
|
319
|
+
size_category: model.size_category || 'unknown',
|
|
320
|
+
}));
|
|
321
|
+
}
|
|
322
|
+
catch (error) {
|
|
323
|
+
this.emitEvent('error', { stage: 'model_search', error: error.message });
|
|
324
|
+
throw new Error(`Model search failed: ${error.message}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
274
327
|
canRunModel(modelId, quant) {
|
|
275
328
|
const info = modelMap[modelId];
|
|
276
329
|
if (!info)
|
|
@@ -297,25 +350,37 @@ class SlyOS {
|
|
|
297
350
|
}
|
|
298
351
|
async loadModel(modelId, options) {
|
|
299
352
|
const info = modelMap[modelId];
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
353
|
+
let hfModelId;
|
|
354
|
+
let task;
|
|
355
|
+
let estimatedSize;
|
|
356
|
+
// Handle curated models
|
|
357
|
+
if (info) {
|
|
358
|
+
hfModelId = info.hfModel;
|
|
359
|
+
task = info.task;
|
|
360
|
+
// Determine quantization
|
|
361
|
+
let quant = options?.quant || 'fp32';
|
|
362
|
+
if (!options?.quant && this.deviceProfile) {
|
|
363
|
+
quant = selectQuantization(this.deviceProfile.memoryMB, modelId);
|
|
364
|
+
this.emitProgress('downloading', 0, `Auto-selected ${quant.toUpperCase()} quantization for your device`);
|
|
365
|
+
}
|
|
366
|
+
// Check feasibility
|
|
367
|
+
const check = this.canRunModel(modelId, quant);
|
|
368
|
+
if (!check.canRun) {
|
|
369
|
+
this.emitProgress('error', 0, check.reason);
|
|
370
|
+
throw new Error(check.reason);
|
|
371
|
+
}
|
|
372
|
+
estimatedSize = info.sizesMB[quant];
|
|
373
|
+
this.emitProgress('downloading', 0, `Downloading ${modelId} (${quant.toUpperCase()}, ~${estimatedSize}MB)...`);
|
|
374
|
+
this.emitEvent('model_download_start', { modelId, quant, estimatedSizeMB: estimatedSize });
|
|
375
|
+
}
|
|
376
|
+
else {
|
|
377
|
+
// Handle custom HuggingFace models
|
|
378
|
+
hfModelId = modelId;
|
|
379
|
+
task = 'text-generation'; // Default task
|
|
380
|
+
estimatedSize = 2048; // Default estimate
|
|
381
|
+
this.emitProgress('downloading', 0, `Loading custom HuggingFace model: ${modelId}...`);
|
|
382
|
+
this.emitEvent('model_download_start', { modelId, custom: true, estimatedSizeMB: estimatedSize });
|
|
383
|
+
}
|
|
319
384
|
// Map quant to dtype for HuggingFace
|
|
320
385
|
const dtypeMap = {
|
|
321
386
|
q4: 'q4',
|
|
@@ -326,9 +391,14 @@ class SlyOS {
|
|
|
326
391
|
let lastReportedPercent = 0;
|
|
327
392
|
const startTime = Date.now();
|
|
328
393
|
try {
|
|
329
|
-
|
|
394
|
+
// For custom HF models, detect context window
|
|
395
|
+
let detectedContextWindow = 2048;
|
|
396
|
+
if (!info) {
|
|
397
|
+
detectedContextWindow = await detectContextWindowFromHF(hfModelId);
|
|
398
|
+
}
|
|
399
|
+
const pipe = await pipeline(task, hfModelId, {
|
|
330
400
|
device: 'cpu',
|
|
331
|
-
dtype:
|
|
401
|
+
dtype: 'q4', // Default to q4 for stability
|
|
332
402
|
progress_callback: (progressData) => {
|
|
333
403
|
// HuggingFace transformers sends progress events during download
|
|
334
404
|
if (progressData && typeof progressData === 'object') {
|
|
@@ -357,12 +427,22 @@ class SlyOS {
|
|
|
357
427
|
},
|
|
358
428
|
});
|
|
359
429
|
const loadTime = Date.now() - startTime;
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
430
|
+
let contextWindow;
|
|
431
|
+
if (info) {
|
|
432
|
+
// For curated models, use recommendContextWindow
|
|
433
|
+
const quant = options?.quant || (this.deviceProfile ? selectQuantization(this.deviceProfile.memoryMB, modelId) : 'q4');
|
|
434
|
+
contextWindow = this.deviceProfile
|
|
435
|
+
? recommendContextWindow(this.deviceProfile.memoryMB, quant)
|
|
436
|
+
: 2048;
|
|
437
|
+
}
|
|
438
|
+
else {
|
|
439
|
+
// For custom HF models, use detected context window
|
|
440
|
+
contextWindow = detectedContextWindow;
|
|
441
|
+
}
|
|
442
|
+
this.modelContextWindow = contextWindow;
|
|
443
|
+
this.models.set(modelId, { pipe, info, quant: 'q4', contextWindow });
|
|
444
|
+
this.emitProgress('ready', 100, `${modelId} loaded (q4, ${(loadTime / 1000).toFixed(1)}s, ctx: ${contextWindow})`);
|
|
445
|
+
this.emitEvent('model_loaded', { modelId, quant: 'q4', loadTimeMs: loadTime, contextWindow });
|
|
366
446
|
// Telemetry
|
|
367
447
|
if (this.token) {
|
|
368
448
|
await axios.post(`${this.apiUrl}/api/telemetry`, {
|
|
@@ -370,7 +450,7 @@ class SlyOS {
|
|
|
370
450
|
event_type: 'model_load',
|
|
371
451
|
model_id: modelId,
|
|
372
452
|
success: true,
|
|
373
|
-
metadata: { quant, loadTimeMs: loadTime, contextWindow },
|
|
453
|
+
metadata: { quant: 'q4', loadTimeMs: loadTime, contextWindow, custom: !info },
|
|
374
454
|
}, {
|
|
375
455
|
headers: { Authorization: `Bearer ${this.token}` },
|
|
376
456
|
}).catch(() => { });
|
|
@@ -760,6 +840,284 @@ class SlyOS {
|
|
|
760
840
|
};
|
|
761
841
|
return modelMapping[slyModelId] || 'gpt-4o-mini';
|
|
762
842
|
}
|
|
843
|
+
/**
|
|
844
|
+
* Tier 2: Cloud-indexed RAG with local inference.
|
|
845
|
+
* Retrieves relevant chunks from server, generates response locally.
|
|
846
|
+
*/
|
|
847
|
+
async ragQuery(options) {
|
|
848
|
+
const startTime = Date.now();
|
|
849
|
+
try {
|
|
850
|
+
if (!this.token)
|
|
851
|
+
throw new Error('Not authenticated. Call init() first.');
|
|
852
|
+
// Step 1: Retrieve relevant chunks from backend
|
|
853
|
+
const searchResponse = await axios.post(`${this.apiUrl}/api/rag/knowledge-bases/${options.knowledgeBaseId}/query`, {
|
|
854
|
+
query: options.query,
|
|
855
|
+
top_k: options.topK || 5,
|
|
856
|
+
model_id: options.modelId
|
|
857
|
+
}, { headers: { Authorization: `Bearer ${this.token}` } });
|
|
858
|
+
let { retrieved_chunks, prompt_template, context } = searchResponse.data;
|
|
859
|
+
// Apply context window limits
|
|
860
|
+
const contextWindow = this.modelContextWindow || 2048;
|
|
861
|
+
const maxContextChars = (contextWindow - 200) * 3; // Rough token-to-char ratio, reserving 200 tokens
|
|
862
|
+
if (context && context.length > maxContextChars) {
|
|
863
|
+
context = context.substring(0, maxContextChars) + '...';
|
|
864
|
+
}
|
|
865
|
+
// Step 2: Generate response locally using the augmented prompt
|
|
866
|
+
const response = await this.generate(options.modelId, prompt_template, {
|
|
867
|
+
temperature: options.temperature,
|
|
868
|
+
maxTokens: options.maxTokens,
|
|
869
|
+
});
|
|
870
|
+
return {
|
|
871
|
+
query: options.query,
|
|
872
|
+
retrievedChunks: retrieved_chunks.map((c) => ({
|
|
873
|
+
id: c.id,
|
|
874
|
+
documentId: c.document_id,
|
|
875
|
+
documentName: c.document_name,
|
|
876
|
+
content: c.content,
|
|
877
|
+
similarityScore: c.similarity_score,
|
|
878
|
+
metadata: c.metadata
|
|
879
|
+
})),
|
|
880
|
+
generatedResponse: response,
|
|
881
|
+
context,
|
|
882
|
+
latencyMs: Date.now() - startTime,
|
|
883
|
+
tierUsed: 2,
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
catch (error) {
|
|
887
|
+
this.emitEvent('error', { stage: 'rag_query', error: error.message });
|
|
888
|
+
throw new Error(`RAG query failed: ${error.message}`);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
/**
|
|
892
|
+
* Tier 1: Fully local RAG. Zero network calls.
|
|
893
|
+
* Documents are chunked/embedded on-device, retrieval and generation all local.
|
|
894
|
+
*/
|
|
895
|
+
async ragQueryLocal(options) {
|
|
896
|
+
const startTime = Date.now();
|
|
897
|
+
try {
|
|
898
|
+
// Step 1: Load embedding model if needed
|
|
899
|
+
if (!this.localEmbeddingModel) {
|
|
900
|
+
await this.loadEmbeddingModel();
|
|
901
|
+
}
|
|
902
|
+
// Adapt chunk size based on context window for efficiency
|
|
903
|
+
const contextWindow = this.modelContextWindow || 2048;
|
|
904
|
+
const chunkSize = contextWindow <= 1024 ? 256 : contextWindow <= 2048 ? 512 : 1024;
|
|
905
|
+
const overlap = Math.floor(chunkSize / 4);
|
|
906
|
+
// Step 2: Chunk documents if not already chunked
|
|
907
|
+
const allChunks = [];
|
|
908
|
+
for (const doc of options.documents) {
|
|
909
|
+
const chunks = this.chunkTextLocal(doc.content, chunkSize, overlap);
|
|
910
|
+
for (const chunk of chunks) {
|
|
911
|
+
const embedding = await this.embedTextLocal(chunk);
|
|
912
|
+
allChunks.push({ content: chunk, documentName: doc.name || 'Document', embedding });
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
// Step 3: Embed query
|
|
916
|
+
const queryEmbedding = await this.embedTextLocal(options.query);
|
|
917
|
+
// Step 4: Cosine similarity search
|
|
918
|
+
const scored = allChunks
|
|
919
|
+
.filter(c => c.embedding)
|
|
920
|
+
.map(c => ({
|
|
921
|
+
...c,
|
|
922
|
+
similarityScore: this.cosineSimilarity(queryEmbedding, c.embedding)
|
|
923
|
+
}))
|
|
924
|
+
.sort((a, b) => b.similarityScore - a.similarityScore)
|
|
925
|
+
.slice(0, options.topK || 5);
|
|
926
|
+
// Step 5: Build context with size limits
|
|
927
|
+
const maxContextChars = (contextWindow - 200) * 3; // Rough token-to-char ratio, reserving 200 tokens
|
|
928
|
+
let contextLength = 0;
|
|
929
|
+
const contextParts = [];
|
|
930
|
+
for (const c of scored) {
|
|
931
|
+
const part = `[Source: ${c.documentName}]\n${c.content}`;
|
|
932
|
+
if (contextLength + part.length <= maxContextChars) {
|
|
933
|
+
contextParts.push(part);
|
|
934
|
+
contextLength += part.length + 10; // Account for separator
|
|
935
|
+
}
|
|
936
|
+
else {
|
|
937
|
+
break;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
const context = contextParts.join('\n\n---\n\n');
|
|
941
|
+
const prompt = `You are a helpful assistant. Answer based ONLY on the following context:\n\n${context}\n\nQuestion: ${options.query}\n\nAnswer:`;
|
|
942
|
+
// Step 6: Generate locally
|
|
943
|
+
const response = await this.generate(options.modelId, prompt, {
|
|
944
|
+
temperature: options.temperature,
|
|
945
|
+
maxTokens: options.maxTokens,
|
|
946
|
+
});
|
|
947
|
+
return {
|
|
948
|
+
query: options.query,
|
|
949
|
+
retrievedChunks: scored.map((c, i) => ({
|
|
950
|
+
id: `local-${i}`,
|
|
951
|
+
documentId: 'local',
|
|
952
|
+
documentName: c.documentName,
|
|
953
|
+
content: c.content,
|
|
954
|
+
similarityScore: c.similarityScore,
|
|
955
|
+
metadata: {}
|
|
956
|
+
})),
|
|
957
|
+
generatedResponse: response,
|
|
958
|
+
context,
|
|
959
|
+
latencyMs: Date.now() - startTime,
|
|
960
|
+
tierUsed: 1,
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
catch (error) {
|
|
964
|
+
this.emitEvent('error', { stage: 'rag_local', error: error.message });
|
|
965
|
+
throw new Error(`Local RAG failed: ${error.message}`);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
/**
|
|
969
|
+
* Tier 3: Offline RAG using a synced knowledge base.
|
|
970
|
+
* First call syncKnowledgeBase(), then use this for offline queries.
|
|
971
|
+
*/
|
|
972
|
+
async ragQueryOffline(options) {
|
|
973
|
+
const startTime = Date.now();
|
|
974
|
+
const index = this.offlineIndexes.get(options.knowledgeBaseId);
|
|
975
|
+
if (!index) {
|
|
976
|
+
throw new Error(`Knowledge base "${options.knowledgeBaseId}" not synced. Call syncKnowledgeBase() first.`);
|
|
977
|
+
}
|
|
978
|
+
// Check expiry
|
|
979
|
+
if (new Date(index.metadata.expires_at) < new Date()) {
|
|
980
|
+
throw new Error('Offline index has expired. Please re-sync.');
|
|
981
|
+
}
|
|
982
|
+
try {
|
|
983
|
+
// Load embedding model
|
|
984
|
+
if (!this.localEmbeddingModel) {
|
|
985
|
+
await this.loadEmbeddingModel();
|
|
986
|
+
}
|
|
987
|
+
// Embed query
|
|
988
|
+
const queryEmbedding = await this.embedTextLocal(options.query);
|
|
989
|
+
// Search offline index
|
|
990
|
+
const scored = index.chunks
|
|
991
|
+
.filter(c => c.embedding && c.embedding.length > 0)
|
|
992
|
+
.map(c => ({
|
|
993
|
+
...c,
|
|
994
|
+
similarityScore: this.cosineSimilarity(queryEmbedding, c.embedding)
|
|
995
|
+
}))
|
|
996
|
+
.sort((a, b) => b.similarityScore - a.similarityScore)
|
|
997
|
+
.slice(0, options.topK || 5);
|
|
998
|
+
// Build context with size limits
|
|
999
|
+
const contextWindow = this.modelContextWindow || 2048;
|
|
1000
|
+
const maxContextChars = (contextWindow - 200) * 3; // Rough token-to-char ratio, reserving 200 tokens
|
|
1001
|
+
let contextLength = 0;
|
|
1002
|
+
const contextParts = [];
|
|
1003
|
+
for (const c of scored) {
|
|
1004
|
+
const part = `[Source: ${c.document_name}]\n${c.content}`;
|
|
1005
|
+
if (contextLength + part.length <= maxContextChars) {
|
|
1006
|
+
contextParts.push(part);
|
|
1007
|
+
contextLength += part.length + 10; // Account for separator
|
|
1008
|
+
}
|
|
1009
|
+
else {
|
|
1010
|
+
break;
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
const context = contextParts.join('\n\n---\n\n');
|
|
1014
|
+
const prompt = `You are a helpful assistant. Answer based ONLY on the following context:\n\n${context}\n\nQuestion: ${options.query}\n\nAnswer:`;
|
|
1015
|
+
// Generate locally
|
|
1016
|
+
const response = await this.generate(options.modelId, prompt, {
|
|
1017
|
+
temperature: options.temperature,
|
|
1018
|
+
maxTokens: options.maxTokens,
|
|
1019
|
+
});
|
|
1020
|
+
return {
|
|
1021
|
+
query: options.query,
|
|
1022
|
+
retrievedChunks: scored.map(c => ({
|
|
1023
|
+
id: c.id,
|
|
1024
|
+
documentId: c.document_id,
|
|
1025
|
+
documentName: c.document_name,
|
|
1026
|
+
content: c.content,
|
|
1027
|
+
similarityScore: c.similarityScore,
|
|
1028
|
+
metadata: c.metadata
|
|
1029
|
+
})),
|
|
1030
|
+
generatedResponse: response,
|
|
1031
|
+
context,
|
|
1032
|
+
latencyMs: Date.now() - startTime,
|
|
1033
|
+
tierUsed: 3,
|
|
1034
|
+
};
|
|
1035
|
+
}
|
|
1036
|
+
catch (error) {
|
|
1037
|
+
this.emitEvent('error', { stage: 'rag_offline', error: error.message });
|
|
1038
|
+
throw new Error(`Offline RAG failed: ${error.message}`);
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
/**
|
|
1042
|
+
* Sync a knowledge base for offline use (Tier 3).
|
|
1043
|
+
* Downloads chunks + embeddings from server, stores locally.
|
|
1044
|
+
*/
|
|
1045
|
+
async syncKnowledgeBase(knowledgeBaseId, deviceId) {
|
|
1046
|
+
try {
|
|
1047
|
+
if (!this.token)
|
|
1048
|
+
throw new Error('Not authenticated. Call init() first.');
|
|
1049
|
+
const response = await axios.post(`${this.apiUrl}/api/rag/knowledge-bases/${knowledgeBaseId}/sync`, { device_id: deviceId || this.deviceId || 'sdk-device' }, { headers: { Authorization: `Bearer ${this.token}` } });
|
|
1050
|
+
const { sync_package, chunk_count, package_size_mb, expires_at } = response.data;
|
|
1051
|
+
this.offlineIndexes.set(knowledgeBaseId, sync_package);
|
|
1052
|
+
return {
|
|
1053
|
+
chunkCount: chunk_count,
|
|
1054
|
+
sizeMb: package_size_mb,
|
|
1055
|
+
expiresAt: expires_at
|
|
1056
|
+
};
|
|
1057
|
+
}
|
|
1058
|
+
catch (error) {
|
|
1059
|
+
throw new Error(`Sync failed: ${error.message}`);
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
// --- RAG Helper Methods ---
|
|
1063
|
+
async loadEmbeddingModel() {
|
|
1064
|
+
this.emitProgress('downloading', 0, 'Loading embedding model (all-MiniLM-L6-v2)...');
|
|
1065
|
+
try {
|
|
1066
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
1067
|
+
this.localEmbeddingModel = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
1068
|
+
this.emitProgress('ready', 100, 'Embedding model loaded');
|
|
1069
|
+
}
|
|
1070
|
+
catch (error) {
|
|
1071
|
+
this.emitProgress('error', 0, `Embedding model failed: ${error.message}`);
|
|
1072
|
+
throw error;
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
async embedTextLocal(text) {
|
|
1076
|
+
if (!this.localEmbeddingModel)
|
|
1077
|
+
throw new Error('Embedding model not loaded');
|
|
1078
|
+
const result = await this.localEmbeddingModel(text, { pooling: 'mean', normalize: true });
|
|
1079
|
+
// Handle different tensor output formats (v2 vs v3 of transformers)
|
|
1080
|
+
if (result.data)
|
|
1081
|
+
return Array.from(result.data);
|
|
1082
|
+
if (result.tolist)
|
|
1083
|
+
return result.tolist().flat();
|
|
1084
|
+
if (Array.isArray(result))
|
|
1085
|
+
return result.flat();
|
|
1086
|
+
throw new Error('Unexpected embedding output format');
|
|
1087
|
+
}
|
|
1088
|
+
cosineSimilarity(a, b) {
|
|
1089
|
+
let dot = 0, normA = 0, normB = 0;
|
|
1090
|
+
for (let i = 0; i < a.length; i++) {
|
|
1091
|
+
dot += a[i] * b[i];
|
|
1092
|
+
normA += a[i] * a[i];
|
|
1093
|
+
normB += b[i] * b[i];
|
|
1094
|
+
}
|
|
1095
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
1096
|
+
return denom === 0 ? 0 : dot / denom;
|
|
1097
|
+
}
|
|
1098
|
+
chunkTextLocal(text, chunkSize = 512, overlap = 128) {
|
|
1099
|
+
if (!text || text.length === 0)
|
|
1100
|
+
return [];
|
|
1101
|
+
if (overlap >= chunkSize)
|
|
1102
|
+
overlap = Math.floor(chunkSize * 0.25);
|
|
1103
|
+
const chunks = [];
|
|
1104
|
+
let start = 0;
|
|
1105
|
+
while (start < text.length) {
|
|
1106
|
+
let end = start + chunkSize;
|
|
1107
|
+
if (end < text.length) {
|
|
1108
|
+
const bp = Math.max(text.lastIndexOf('.', end), text.lastIndexOf('\n', end));
|
|
1109
|
+
if (bp > start + chunkSize / 2)
|
|
1110
|
+
end = bp + 1;
|
|
1111
|
+
}
|
|
1112
|
+
const chunk = text.slice(start, end).trim();
|
|
1113
|
+
if (chunk.length > 20)
|
|
1114
|
+
chunks.push(chunk);
|
|
1115
|
+
start = end - overlap;
|
|
1116
|
+
if (start >= text.length)
|
|
1117
|
+
break;
|
|
1118
|
+
}
|
|
1119
|
+
return chunks;
|
|
1120
|
+
}
|
|
763
1121
|
// ── Static OpenAI Compatible Factory ────────────────────────────────
|
|
764
1122
|
static openaiCompatible(config) {
|
|
765
1123
|
const instance = new SlyOS({
|