@supercollab/cli 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -1
- package/bin/supercollab.js +289 -50
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -58,7 +58,37 @@ Chat is encrypted on upload and searchable after local sync:
|
|
|
58
58
|
```bash
|
|
59
59
|
supercollab chat send --room room_... --text "I am checking auth."
|
|
60
60
|
supercollab chat read --room room_...
|
|
61
|
-
supercollab chat search --room room_... --query auth
|
|
61
|
+
supercollab chat search --room room_... --query auth --mode hybrid
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Local search uses the same embedding profile as Lean Memory:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
model: Xenova/bge-small-en-v1.5
|
|
68
|
+
backend: @huggingface/transformers ONNX
|
|
69
|
+
dtype: q8
|
|
70
|
+
dimensions: 384
|
|
71
|
+
pooling: mean
|
|
72
|
+
normalize: true
|
|
73
|
+
query prefix: Represent this sentence for searching relevant passages:
|
|
74
|
+
chunks: 3200 chars with 480 char overlap
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Search modes:
|
|
78
|
+
|
|
79
|
+
```text
|
|
80
|
+
keyword: local SQLite FTS5/BM25 over decrypted local transcript
|
|
81
|
+
vector: local BGE cosine search over decrypted local transcript chunks
|
|
82
|
+
hybrid: reciprocal-rank fusion over keyword and vector results
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
The hosted SuperCollab service never computes embeddings and never receives the
|
|
86
|
+
room key. The first local sync/search may download the BGE-small ONNX model into
|
|
87
|
+
the local Hugging Face cache. To verify or prewarm the local embedding system:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
supercollab embeddings status
|
|
91
|
+
supercollab embeddings warmup
|
|
62
92
|
```
|
|
63
93
|
|
|
64
94
|
Print MCP config:
|
package/bin/supercollab.js
CHANGED
|
@@ -6,10 +6,28 @@ import crypto from 'node:crypto';
|
|
|
6
6
|
import * as readlineCore from 'node:readline';
|
|
7
7
|
import { stdin as input, stdout as output } from 'node:process';
|
|
8
8
|
|
|
9
|
-
const VERSION = '0.4.
|
|
9
|
+
const VERSION = '0.4.1';
|
|
10
10
|
const DEFAULT_SERVER = process.env.SUPERCOLLAB_URL || 'https://hyper.polynode.dev';
|
|
11
11
|
const DEFAULT_CONFIG = process.env.SUPERCOLLAB_CONFIG || path.join(os.homedir(), '.supercollab', 'config.json');
|
|
12
12
|
const SESSION_TTL_SKEW = 60;
|
|
13
|
+
const EMBEDDING_MODEL = 'Xenova/bge-small-en-v1.5';
|
|
14
|
+
const EMBEDDING_DTYPE = 'q8';
|
|
15
|
+
const EMBEDDING_DIMS = 384;
|
|
16
|
+
const EMBEDDING_CHUNK_CHARS = 3200;
|
|
17
|
+
const EMBEDDING_CHUNK_OVERLAP = 480;
|
|
18
|
+
const EMBEDDING_PROFILE = Object.freeze({
|
|
19
|
+
id: 'lean-memory-bge-small-en-v1.5-q8-mean-normalized-v1',
|
|
20
|
+
model: EMBEDDING_MODEL,
|
|
21
|
+
backend: '@huggingface/transformers',
|
|
22
|
+
dtype: EMBEDDING_DTYPE,
|
|
23
|
+
dims: EMBEDDING_DIMS,
|
|
24
|
+
pooling: 'mean',
|
|
25
|
+
normalize: true,
|
|
26
|
+
query_prefix: 'Represent this sentence for searching relevant passages: ',
|
|
27
|
+
chunk_chars: EMBEDDING_CHUNK_CHARS,
|
|
28
|
+
chunk_overlap_chars: EMBEDDING_CHUNK_OVERLAP,
|
|
29
|
+
local_only: true,
|
|
30
|
+
});
|
|
13
31
|
|
|
14
32
|
function printHelp() {
|
|
15
33
|
console.log(`SuperCollab CLI ${VERSION}
|
|
@@ -27,13 +45,15 @@ Usage:
|
|
|
27
45
|
supercollab room key --room ID
|
|
28
46
|
supercollab chat send --room ID --text TEXT [--channel agents]
|
|
29
47
|
supercollab chat read --room ID [--after 0] [--limit 50]
|
|
30
|
-
supercollab chat search --room ID --query TEXT [--limit 20]
|
|
48
|
+
supercollab chat search --room ID --query TEXT [--mode hybrid|keyword|vector] [--limit 20]
|
|
31
49
|
supercollab sync --room ID
|
|
32
50
|
supercollab activate --room ID [--cwd PATH]
|
|
33
51
|
supercollab deactivate [--cwd PATH]
|
|
34
52
|
supercollab active [--cwd PATH]
|
|
35
53
|
supercollab session list
|
|
36
54
|
supercollab session revoke --session ID
|
|
55
|
+
supercollab embeddings status
|
|
56
|
+
supercollab embeddings warmup
|
|
37
57
|
supercollab mcp stdio
|
|
38
58
|
supercollab mcp print-config --client codex
|
|
39
59
|
supercollab config path
|
|
@@ -402,6 +422,14 @@ function getMeta(db, key, fallback = '') {
|
|
|
402
422
|
return row ? String(row.value) : fallback;
|
|
403
423
|
}
|
|
404
424
|
|
|
425
|
+
function tableColumns(db, table) {
|
|
426
|
+
try {
|
|
427
|
+
return dbAll(db, `PRAGMA table_info(${table})`).map((row) => String(row.name));
|
|
428
|
+
} catch {
|
|
429
|
+
return [];
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
405
433
|
function initChatSchema(db) {
|
|
406
434
|
db.exec(`
|
|
407
435
|
CREATE TABLE IF NOT EXISTS meta (
|
|
@@ -426,12 +454,24 @@ function initChatSchema(db) {
|
|
|
426
454
|
);
|
|
427
455
|
CREATE INDEX IF NOT EXISTS idx_messages_room_id ON messages(room_id, id);
|
|
428
456
|
CREATE INDEX IF NOT EXISTS idx_messages_channel_created ON messages(channel, created_at);
|
|
457
|
+
`);
|
|
458
|
+
const embeddingColumns = tableColumns(db, 'message_embeddings');
|
|
459
|
+
if (embeddingColumns.length > 0 && (!embeddingColumns.includes('seq') || !embeddingColumns.includes('profile'))) {
|
|
460
|
+
db.exec('DROP TABLE IF EXISTS message_embeddings');
|
|
461
|
+
}
|
|
462
|
+
db.exec(`
|
|
429
463
|
CREATE TABLE IF NOT EXISTS message_embeddings (
|
|
430
|
-
message_id TEXT
|
|
464
|
+
message_id TEXT NOT NULL,
|
|
465
|
+
seq INTEGER NOT NULL DEFAULT 0,
|
|
466
|
+
pos INTEGER NOT NULL DEFAULT 0,
|
|
431
467
|
dims INTEGER NOT NULL,
|
|
468
|
+
model TEXT NOT NULL,
|
|
469
|
+
profile TEXT NOT NULL,
|
|
432
470
|
vector TEXT NOT NULL,
|
|
433
|
-
updated_at TEXT NOT NULL
|
|
471
|
+
updated_at TEXT NOT NULL,
|
|
472
|
+
PRIMARY KEY(message_id, seq)
|
|
434
473
|
);
|
|
474
|
+
CREATE INDEX IF NOT EXISTS idx_message_embeddings_profile ON message_embeddings(profile);
|
|
435
475
|
`);
|
|
436
476
|
try {
|
|
437
477
|
db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(message_id UNINDEXED, channel UNINDEXED, sender_label, body, metadata, tokenize='porter')");
|
|
@@ -439,24 +479,92 @@ function initChatSchema(db) {
|
|
|
439
479
|
} catch {
|
|
440
480
|
setMeta(db, 'fts5', '0');
|
|
441
481
|
}
|
|
482
|
+
setMeta(db, 'embedding_profile', EMBEDDING_PROFILE.id);
|
|
442
483
|
}
|
|
443
484
|
|
|
444
|
-
|
|
485
|
+
let embeddingPipelinePromise = null;
|
|
486
|
+
|
|
487
|
+
async function getEmbeddingPipeline() {
|
|
488
|
+
if (!embeddingPipelinePromise) {
|
|
489
|
+
embeddingPipelinePromise = (async () => {
|
|
490
|
+
const mod = await import('@huggingface/transformers');
|
|
491
|
+
const { pipeline, env } = mod;
|
|
492
|
+
if (process.env.SUPERCOLLAB_MODEL_CACHE && env) {
|
|
493
|
+
fs.mkdirSync(process.env.SUPERCOLLAB_MODEL_CACHE, { recursive: true });
|
|
494
|
+
env.cacheDir = process.env.SUPERCOLLAB_MODEL_CACHE;
|
|
495
|
+
}
|
|
496
|
+
return pipeline('feature-extraction', EMBEDDING_MODEL, { dtype: EMBEDDING_DTYPE });
|
|
497
|
+
})();
|
|
498
|
+
}
|
|
499
|
+
return embeddingPipelinePromise;
|
|
500
|
+
}
|
|
445
501
|
|
|
446
|
-
function
|
|
447
|
-
return
|
|
502
|
+
function formatQueryForEmbedding(query) {
|
|
503
|
+
return `${EMBEDDING_PROFILE.query_prefix}${query}`;
|
|
448
504
|
}
|
|
449
505
|
|
|
450
|
-
function
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
506
|
+
function formatDocForEmbedding(text, title = '') {
|
|
507
|
+
return title ? `${title}\n${text}` : text;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
function chunkText(content, maxChars = EMBEDDING_CHUNK_CHARS, overlapChars = EMBEDDING_CHUNK_OVERLAP) {
|
|
511
|
+
const text = String(content || '');
|
|
512
|
+
if (text.length <= maxChars) return [{ text, pos: 0 }];
|
|
513
|
+
const chunks = [];
|
|
514
|
+
let charPos = 0;
|
|
515
|
+
while (charPos < text.length) {
|
|
516
|
+
let endPos = Math.min(charPos + maxChars, text.length);
|
|
517
|
+
if (endPos < text.length) {
|
|
518
|
+
const slice = text.slice(charPos, endPos);
|
|
519
|
+
const searchStart = Math.floor(slice.length * 0.7);
|
|
520
|
+
const searchSlice = slice.slice(searchStart);
|
|
521
|
+
let breakOffset = -1;
|
|
522
|
+
const paragraphBreak = searchSlice.lastIndexOf('\n\n');
|
|
523
|
+
if (paragraphBreak >= 0) {
|
|
524
|
+
breakOffset = searchStart + paragraphBreak + 2;
|
|
525
|
+
} else {
|
|
526
|
+
const sentenceEnd = Math.max(
|
|
527
|
+
searchSlice.lastIndexOf('. '),
|
|
528
|
+
searchSlice.lastIndexOf('.\n'),
|
|
529
|
+
searchSlice.lastIndexOf('? '),
|
|
530
|
+
searchSlice.lastIndexOf('?\n'),
|
|
531
|
+
searchSlice.lastIndexOf('! '),
|
|
532
|
+
searchSlice.lastIndexOf('!\n'),
|
|
533
|
+
);
|
|
534
|
+
if (sentenceEnd >= 0) {
|
|
535
|
+
breakOffset = searchStart + sentenceEnd + 2;
|
|
536
|
+
} else {
|
|
537
|
+
const lineBreak = searchSlice.lastIndexOf('\n');
|
|
538
|
+
if (lineBreak >= 0) {
|
|
539
|
+
breakOffset = searchStart + lineBreak + 1;
|
|
540
|
+
} else {
|
|
541
|
+
const spaceBreak = searchSlice.lastIndexOf(' ');
|
|
542
|
+
if (spaceBreak >= 0) breakOffset = searchStart + spaceBreak + 1;
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
if (breakOffset > 0) endPos = charPos + breakOffset;
|
|
547
|
+
}
|
|
548
|
+
if (endPos <= charPos) endPos = Math.min(charPos + maxChars, text.length);
|
|
549
|
+
chunks.push({ text: text.slice(charPos, endPos), pos: charPos });
|
|
550
|
+
if (endPos >= text.length) break;
|
|
551
|
+
charPos = endPos - overlapChars;
|
|
552
|
+
const lastChunkPos = chunks.at(-1).pos;
|
|
553
|
+
if (charPos <= lastChunkPos) charPos = endPos;
|
|
457
554
|
}
|
|
458
|
-
|
|
459
|
-
|
|
555
|
+
return chunks;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
async function embedText(text, { isQuery = false, title = '' } = {}) {
|
|
559
|
+
const extractor = await getEmbeddingPipeline();
|
|
560
|
+
const formatted = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text, title);
|
|
561
|
+
const output = await extractor(formatted.slice(0, 4000), {
|
|
562
|
+
pooling: EMBEDDING_PROFILE.pooling,
|
|
563
|
+
normalize: EMBEDDING_PROFILE.normalize,
|
|
564
|
+
});
|
|
565
|
+
const vector = Array.from(output.data).map(Number);
|
|
566
|
+
if (vector.length !== EMBEDDING_DIMS) throw new Error(`unexpected embedding dims ${vector.length}`);
|
|
567
|
+
return vector;
|
|
460
568
|
}
|
|
461
569
|
|
|
462
570
|
function cosine(a, b) {
|
|
@@ -465,13 +573,65 @@ function cosine(a, b) {
|
|
|
465
573
|
return score;
|
|
466
574
|
}
|
|
467
575
|
|
|
468
|
-
function
|
|
469
|
-
const
|
|
470
|
-
|
|
576
|
+
async function storeEmbeddings(db, local, metadata) {
|
|
577
|
+
const messageId = local.message_id;
|
|
578
|
+
if (!messageId) return { embedded: false, chunks: 0 };
|
|
579
|
+
dbRun(db, 'DELETE FROM message_embeddings WHERE message_id=? AND profile<>?', [messageId, EMBEDDING_PROFILE.id]);
|
|
580
|
+
const existing = dbGet(db, 'SELECT COUNT(*) AS count FROM message_embeddings WHERE message_id=? AND profile=?', [messageId, EMBEDDING_PROFILE.id]);
|
|
581
|
+
if (Number(existing?.count || 0) > 0) return { embedded: false, chunks: Number(existing.count) };
|
|
582
|
+
|
|
583
|
+
const body = `${local.sender_label || ''}\n${local.body || ''}\n${metadata || ''}`;
|
|
584
|
+
const title = local.sender_label || local.channel || 'SuperCollab message';
|
|
585
|
+
const chunks = chunkText(body);
|
|
586
|
+
const updatedAt = nowIso();
|
|
587
|
+
for (let seq = 0; seq < chunks.length; seq++) {
|
|
588
|
+
const chunk = chunks[seq];
|
|
589
|
+
const vector = await embedText(chunk.text, { title });
|
|
590
|
+
dbRun(
|
|
591
|
+
db,
|
|
592
|
+
`INSERT INTO message_embeddings(message_id,seq,pos,dims,model,profile,vector,updated_at)
|
|
593
|
+
VALUES(?,?,?,?,?,?,?,?)
|
|
594
|
+
ON CONFLICT(message_id, seq) DO UPDATE SET
|
|
595
|
+
pos=excluded.pos,
|
|
596
|
+
dims=excluded.dims,
|
|
597
|
+
model=excluded.model,
|
|
598
|
+
profile=excluded.profile,
|
|
599
|
+
vector=excluded.vector,
|
|
600
|
+
updated_at=excluded.updated_at`,
|
|
601
|
+
[messageId, seq, chunk.pos, EMBEDDING_DIMS, EMBEDDING_MODEL, EMBEDDING_PROFILE.id, JSON.stringify(vector), updatedAt],
|
|
602
|
+
);
|
|
603
|
+
}
|
|
604
|
+
setMeta(db, 'embedding_last_ok_at', updatedAt);
|
|
605
|
+
return { embedded: true, chunks: chunks.length };
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
async function tryStoreEmbeddings(db, local, metadata) {
|
|
609
|
+
try {
|
|
610
|
+
return await storeEmbeddings(db, local, metadata);
|
|
611
|
+
} catch (err) {
|
|
612
|
+
setMeta(db, 'embedding_last_error', err.message || String(err));
|
|
613
|
+
return { embedded: false, chunks: 0, error: err.message || String(err) };
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
async function embedMissingMessages(db, limit = 500) {
|
|
618
|
+
const rows = dbAll(
|
|
471
619
|
db,
|
|
472
|
-
|
|
473
|
-
|
|
620
|
+
`SELECT m.*
|
|
621
|
+
FROM messages m
|
|
622
|
+
LEFT JOIN message_embeddings e
|
|
623
|
+
ON e.message_id=m.message_id AND e.profile=?
|
|
624
|
+
WHERE e.message_id IS NULL
|
|
625
|
+
ORDER BY m.id ASC
|
|
626
|
+
LIMIT ?`,
|
|
627
|
+
[EMBEDDING_PROFILE.id, Math.max(1, Math.min(Number(limit || 500), 2000))],
|
|
474
628
|
);
|
|
629
|
+
let embedded = 0;
|
|
630
|
+
for (const row of rows) {
|
|
631
|
+
const result = await tryStoreEmbeddings(db, row, row.metadata || '');
|
|
632
|
+
if (result.embedded) embedded += result.chunks;
|
|
633
|
+
}
|
|
634
|
+
return { messages_checked: rows.length, chunks_embedded: embedded };
|
|
475
635
|
}
|
|
476
636
|
|
|
477
637
|
async function openChatDb(config, file, roomId) {
|
|
@@ -512,7 +672,7 @@ function localPlainMessage(config, roomId, msg) {
|
|
|
512
672
|
return { ...msg, metadata: JSON.stringify(metadata) };
|
|
513
673
|
}
|
|
514
674
|
|
|
515
|
-
function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '') {
|
|
675
|
+
async function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '') {
|
|
516
676
|
const local = config ? localPlainMessage(config, roomId, msg) : msg;
|
|
517
677
|
const metadata = typeof local.metadata === 'string' ? local.metadata : JSON.stringify(local.metadata || {});
|
|
518
678
|
dbRun(
|
|
@@ -532,7 +692,7 @@ function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '')
|
|
|
532
692
|
]);
|
|
533
693
|
} catch {}
|
|
534
694
|
}
|
|
535
|
-
|
|
695
|
+
await tryStoreEmbeddings(db, local, metadata);
|
|
536
696
|
}
|
|
537
697
|
|
|
538
698
|
async function syncRoom(config, file, roomId, limit = 500) {
|
|
@@ -540,11 +700,12 @@ async function syncRoom(config, file, roomId, limit = 500) {
|
|
|
540
700
|
try {
|
|
541
701
|
const after = Number(getMeta(cap.db, 'last_message_id', '0')) || 0;
|
|
542
702
|
const data = await apiAsAgent(config, 'GET', `/v1/rooms/${roomId}/messages?after=${encodeURIComponent(after)}&limit=${encodeURIComponent(limit)}`);
|
|
543
|
-
for (const msg of data.messages || []) insertLocalMessage(cap.db, { ...msg, room_id: roomId }, config, roomId);
|
|
703
|
+
for (const msg of data.messages || []) await insertLocalMessage(cap.db, { ...msg, room_id: roomId }, config, roomId);
|
|
704
|
+
const embedding = await embedMissingMessages(cap.db, 500);
|
|
544
705
|
setMeta(cap.db, 'last_message_id', String(data.next_after || after));
|
|
545
706
|
setMeta(cap.db, 'last_sync_at', nowIso());
|
|
546
707
|
saveChatDb(cap);
|
|
547
|
-
return { room_id: roomId, pulled: (data.messages || []).length, last_message_id: Number(data.next_after || after), db: cap.dbPath };
|
|
708
|
+
return { room_id: roomId, pulled: (data.messages || []).length, last_message_id: Number(data.next_after || after), db: cap.dbPath, embedding };
|
|
548
709
|
} finally {
|
|
549
710
|
cap.db.close();
|
|
550
711
|
}
|
|
@@ -605,7 +766,7 @@ async function doChatSend(config, file, opts) {
|
|
|
605
766
|
});
|
|
606
767
|
const cap = await openChatDb(config, file, roomId);
|
|
607
768
|
try {
|
|
608
|
-
insertLocalMessage(cap.db, { ...data.message, room_id: roomId }, config, roomId);
|
|
769
|
+
await insertLocalMessage(cap.db, { ...data.message, room_id: roomId }, config, roomId);
|
|
609
770
|
setMeta(cap.db, 'last_message_id', String(Math.max(Number(getMeta(cap.db, 'last_message_id', '0')) || 0, Number(data.message.id))));
|
|
610
771
|
saveChatDb(cap);
|
|
611
772
|
} finally {
|
|
@@ -634,16 +795,19 @@ function ftsQuery(value) {
|
|
|
634
795
|
async function doChatSearch(config, file, opts) {
|
|
635
796
|
const roomId = requireValue(opts, 'room');
|
|
636
797
|
const query = requireValue(opts, 'query');
|
|
798
|
+
const mode = String(opts.mode || 'hybrid').toLowerCase();
|
|
799
|
+
if (!['hybrid', 'keyword', 'vector'].includes(mode)) throw new Error('search --mode must be hybrid, keyword, or vector');
|
|
637
800
|
await syncRoom(config, file, roomId, 500);
|
|
638
801
|
const cap = await openChatDb(config, file, roomId);
|
|
639
802
|
try {
|
|
803
|
+
const embedding = await embedMissingMessages(cap.db, 500);
|
|
640
804
|
const maxResults = Math.max(1, Math.min(Number(opts.limit || 20), 100));
|
|
641
|
-
let
|
|
642
|
-
if (getMeta(cap.db, 'fts5', '0') === '1') {
|
|
805
|
+
let keywordRows = [];
|
|
806
|
+
if (mode !== 'vector' && getMeta(cap.db, 'fts5', '0') === '1') {
|
|
643
807
|
const q = ftsQuery(query);
|
|
644
808
|
if (q) {
|
|
645
809
|
try {
|
|
646
|
-
|
|
810
|
+
keywordRows = dbAll(
|
|
647
811
|
cap.db,
|
|
648
812
|
`SELECT m.*, bm25(messages_fts) AS score
|
|
649
813
|
FROM messages_fts JOIN messages m ON m.id=messages_fts.rowid
|
|
@@ -652,33 +816,85 @@ async function doChatSearch(config, file, opts) {
|
|
|
652
816
|
[q, maxResults],
|
|
653
817
|
);
|
|
654
818
|
} catch {
|
|
655
|
-
|
|
819
|
+
keywordRows = [];
|
|
656
820
|
}
|
|
657
821
|
}
|
|
658
822
|
}
|
|
659
|
-
if (!
|
|
660
|
-
|
|
823
|
+
if (mode !== 'vector' && !keywordRows.length) {
|
|
824
|
+
keywordRows = dbAll(cap.db, 'SELECT *, 0 AS score FROM messages WHERE body LIKE ? OR metadata LIKE ? ORDER BY id DESC LIMIT ?', [
|
|
661
825
|
`%${query}%`, `%${query}%`, maxResults,
|
|
662
|
-
]);
|
|
826
|
+
]).map((row) => ({ ...row, keyword_fallback: true }));
|
|
663
827
|
}
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
828
|
+
let vectorRows = [];
|
|
829
|
+
let vectorError = null;
|
|
830
|
+
if (mode !== 'keyword') try {
|
|
831
|
+
const qvec = await embedText(query, { isQuery: true });
|
|
832
|
+
const bestByMessage = new Map();
|
|
833
|
+
for (const row of dbAll(
|
|
834
|
+
cap.db,
|
|
835
|
+
`SELECT m.*, e.vector, e.seq, e.pos
|
|
836
|
+
FROM message_embeddings e JOIN messages m ON m.message_id=e.message_id
|
|
837
|
+
WHERE e.profile=?
|
|
838
|
+
ORDER BY m.id DESC LIMIT 3000`,
|
|
839
|
+
[EMBEDDING_PROFILE.id],
|
|
840
|
+
)) {
|
|
673
841
|
let score = 0;
|
|
674
842
|
try { score = cosine(qvec, JSON.parse(row.vector)); } catch {}
|
|
843
|
+
if (score <= 0) continue;
|
|
675
844
|
const { vector, ...clean } = row;
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
845
|
+
const prior = bestByMessage.get(row.message_id);
|
|
846
|
+
if (!prior || score > prior.vector_score) {
|
|
847
|
+
bestByMessage.set(row.message_id, { ...clean, vector_score: score, chunk_seq: Number(row.seq || 0), chunk_pos: Number(row.pos || 0) });
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
vectorRows = Array.from(bestByMessage.values())
|
|
851
|
+
.sort((a, b) => b.vector_score - a.vector_score)
|
|
852
|
+
.slice(0, mode === 'vector' ? maxResults : Math.max(maxResults, 50));
|
|
853
|
+
} catch (err) {
|
|
854
|
+
vectorError = err.message || String(err);
|
|
855
|
+
setMeta(cap.db, 'embedding_last_error', vectorError);
|
|
856
|
+
}
|
|
857
|
+
const keywordRank = new Map(keywordRows.map((row, idx) => [row.message_id, idx + 1]));
|
|
858
|
+
const vectorRank = new Map(vectorRows.map((row, idx) => [row.message_id, idx + 1]));
|
|
859
|
+
const byMessage = new Map();
|
|
860
|
+
for (const row of [...keywordRows, ...vectorRows]) {
|
|
861
|
+
const existing = byMessage.get(row.message_id) || {};
|
|
862
|
+
byMessage.set(row.message_id, { ...existing, ...row });
|
|
863
|
+
}
|
|
864
|
+
const rrfK = 60;
|
|
865
|
+
const hybridRows = Array.from(byMessage.values()).map((row) => {
|
|
866
|
+
const kr = keywordRank.get(row.message_id);
|
|
867
|
+
const vr = vectorRank.get(row.message_id);
|
|
868
|
+
const keywordScore = kr ? 1 / (rrfK + kr) : 0;
|
|
869
|
+
const vectorScore = vr ? 1 / (rrfK + vr) : 0;
|
|
870
|
+
return {
|
|
871
|
+
...row,
|
|
872
|
+
search_sources: [kr ? (row.keyword_fallback ? 'like' : 'fts5_bm25') : null, vr ? 'bge_vector_cosine' : null].filter(Boolean),
|
|
873
|
+
keyword_rank: kr || null,
|
|
874
|
+
vector_rank: vr || null,
|
|
875
|
+
hybrid_score: keywordScore + vectorScore,
|
|
876
|
+
};
|
|
877
|
+
}).sort((a, b) => b.hybrid_score - a.hybrid_score).slice(0, maxResults);
|
|
878
|
+
const results = mode === 'keyword'
|
|
879
|
+
? keywordRows.slice(0, maxResults).map((row, idx) => ({ ...row, search_sources: [row.keyword_fallback ? 'like' : 'fts5_bm25'], keyword_rank: idx + 1 }))
|
|
880
|
+
: mode === 'vector'
|
|
881
|
+
? vectorRows.slice(0, maxResults).map((row, idx) => ({ ...row, search_sources: ['bge_vector_cosine'], vector_rank: idx + 1 }))
|
|
882
|
+
: hybridRows;
|
|
883
|
+
return {
|
|
884
|
+
room_id: roomId,
|
|
885
|
+
query,
|
|
886
|
+
search: {
|
|
887
|
+
local_only: true,
|
|
888
|
+
mode,
|
|
889
|
+
methods: ['fts5_bm25', 'bge_vector_cosine', 'rrf_hybrid'],
|
|
890
|
+
fts: getMeta(cap.db, 'fts5', '0') === '1',
|
|
891
|
+
vector: EMBEDDING_PROFILE.id,
|
|
892
|
+
embedding_profile: EMBEDDING_PROFILE,
|
|
893
|
+
embedding,
|
|
894
|
+
vector_error: vectorError,
|
|
895
|
+
},
|
|
896
|
+
results,
|
|
897
|
+
};
|
|
682
898
|
} finally {
|
|
683
899
|
cap.db.close();
|
|
684
900
|
}
|
|
@@ -737,6 +953,7 @@ async function activeStatus(config, file, opts = {}) {
|
|
|
737
953
|
room_id: active?.roomId || null,
|
|
738
954
|
activation_root: active?.cwd || null,
|
|
739
955
|
config: file,
|
|
956
|
+
embedding_profile: EMBEDDING_PROFILE,
|
|
740
957
|
instructions: agentInstructions(active),
|
|
741
958
|
};
|
|
742
959
|
}
|
|
@@ -764,7 +981,7 @@ function mcpTools() {
|
|
|
764
981
|
toolSchema('room_join', 'Accept a room invite token.', { invite_token: s, fingerprint: s }, ['invite_token']),
|
|
765
982
|
toolSchema('chat_send', 'Send a message to the active agent chat room.', { text: s, channel: s, kind: s }, ['text']),
|
|
766
983
|
toolSchema('chat_read', 'Sync and read recent messages from the active room.', { limit: { type: 'integer' } }),
|
|
767
|
-
toolSchema('chat_search', 'Sync and search the active room transcript.', { query: s, limit: { type: 'integer' } }, ['query']),
|
|
984
|
+
toolSchema('chat_search', 'Sync and search the active room transcript with local keyword, BGE vector, or hybrid retrieval.', { query: s, mode: s, limit: { type: 'integer' } }, ['query']),
|
|
768
985
|
toolSchema('chat_sync', 'Sync the active room transcript into local SQLite.'),
|
|
769
986
|
];
|
|
770
987
|
}
|
|
@@ -778,7 +995,7 @@ async function callTool(config, name, args) {
|
|
|
778
995
|
if (name === 'room_join') return doRoomJoin(config, file, { invite: args.invite_token });
|
|
779
996
|
if (name === 'chat_send') return doChatSend(config, file, { room: requireActiveRoom(config, args), text: args.text, channel: args.channel || 'agents', kind: args.kind || 'chat.message' });
|
|
780
997
|
if (name === 'chat_read') return doChatRead(config, file, { room: requireActiveRoom(config, args), limit: args.limit || 50 });
|
|
781
|
-
if (name === 'chat_search') return doChatSearch(config, file, { room: requireActiveRoom(config, args), query: args.query, limit: args.limit || 20 });
|
|
998
|
+
if (name === 'chat_search') return doChatSearch(config, file, { room: requireActiveRoom(config, args), query: args.query, mode: args.mode || 'hybrid', limit: args.limit || 20 });
|
|
782
999
|
if (name === 'chat_sync') return syncRoom(config, file, requireActiveRoom(config, args));
|
|
783
1000
|
throw new Error(`unknown tool: ${name}`);
|
|
784
1001
|
}
|
|
@@ -858,6 +1075,24 @@ function printCodexConfig(opts) {
|
|
|
858
1075
|
console.log(`[mcp_servers.supercollab]\ncommand = "supercollab"\nargs = ["mcp", "stdio", "--config", "${file.replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"]`);
|
|
859
1076
|
}
|
|
860
1077
|
|
|
1078
|
+
async function embeddingStatus() {
|
|
1079
|
+
return {
|
|
1080
|
+
ok: true,
|
|
1081
|
+
profile: EMBEDDING_PROFILE,
|
|
1082
|
+
model_download: 'lazy on first embedding, or now via `supercollab embeddings warmup`',
|
|
1083
|
+
cache_dir: process.env.SUPERCOLLAB_MODEL_CACHE || 'default @huggingface/transformers cache',
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
async function embeddingWarmup() {
|
|
1088
|
+
const vector = await embedText('supercollab embedding warmup', { isQuery: true });
|
|
1089
|
+
return {
|
|
1090
|
+
ok: true,
|
|
1091
|
+
dims: vector.length,
|
|
1092
|
+
profile: EMBEDDING_PROFILE,
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
|
|
861
1096
|
async function main() {
|
|
862
1097
|
const { positionals, opts } = parse(process.argv.slice(2));
|
|
863
1098
|
if (opts.help || positionals.length === 0) { printHelp(); return; }
|
|
@@ -896,6 +1131,10 @@ async function main() {
|
|
|
896
1131
|
if (sub === 'list') return console.log(JSON.stringify(await api(config, 'GET', '/v1/agent-sessions', undefined, config.userToken), null, 2));
|
|
897
1132
|
if (sub === 'revoke') return console.log(JSON.stringify(await api(config, 'DELETE', `/v1/agent-sessions/${requireValue(opts, 'session')}`, undefined, config.userToken), null, 2));
|
|
898
1133
|
}
|
|
1134
|
+
if (cmd === 'embeddings') {
|
|
1135
|
+
if (sub === 'status') return console.log(JSON.stringify(await embeddingStatus(), null, 2));
|
|
1136
|
+
if (sub === 'warmup') return console.log(JSON.stringify(await embeddingWarmup(), null, 2));
|
|
1137
|
+
}
|
|
899
1138
|
if (cmd === 'mcp' && sub === 'stdio') return runMcp(opts);
|
|
900
1139
|
if (cmd === 'mcp' && sub === 'print-config') return printCodexConfig(opts);
|
|
901
1140
|
throw new Error(`unknown command: ${positionals.join(' ')}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@supercollab/cli",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "SuperCollab CLI and MCP bridge for encrypted local-search agent group chat.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"node": ">=20"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
+
"@huggingface/transformers": "3.8.1",
|
|
17
18
|
"sql.js": "^1.14.1"
|
|
18
19
|
},
|
|
19
20
|
"keywords": [
|