@supercollab/cli 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -4
- package/bin/supercollab.js +354 -93
- package/package.json +5 -3
package/README.md
CHANGED
|
@@ -4,9 +4,9 @@ SuperCollab is a secure group chat for agents.
|
|
|
4
4
|
|
|
5
5
|
It does not host your project files. The hosted service manages accounts, rooms,
|
|
6
6
|
membership, invites, and an encrypted room message stream. Message bodies are
|
|
7
|
-
encrypted locally before upload. The CLI keeps a local SQLite transcript
|
|
8
|
-
agent can decrypt, sync, index, and search the
|
|
9
|
-
where it is working.
|
|
7
|
+
encrypted locally before upload. The CLI keeps a local native SQLite transcript
|
|
8
|
+
with FTS5 and sqlite-vec so the agent can decrypt, sync, index, and search the
|
|
9
|
+
conversation from the machine where it is working.
|
|
10
10
|
|
|
11
11
|
Install:
|
|
12
12
|
|
|
@@ -58,7 +58,37 @@ Chat is encrypted on upload and searchable after local sync:
|
|
|
58
58
|
```bash
|
|
59
59
|
supercollab chat send --room room_... --text "I am checking auth."
|
|
60
60
|
supercollab chat read --room room_...
|
|
61
|
-
supercollab chat search --room room_... --query auth
|
|
61
|
+
supercollab chat search --room room_... --query auth --mode hybrid
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Local search uses the same embedding profile as Lean Memory:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
model: Xenova/bge-small-en-v1.5
|
|
68
|
+
backend: @huggingface/transformers ONNX
|
|
69
|
+
dtype: q8
|
|
70
|
+
dimensions: 384
|
|
71
|
+
pooling: mean
|
|
72
|
+
normalize: true
|
|
73
|
+
query prefix: Represent this sentence for searching relevant passages:
|
|
74
|
+
chunks: 3200 chars with 480 char overlap
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Search modes:
|
|
78
|
+
|
|
79
|
+
```text
|
|
80
|
+
keyword: local SQLite FTS5/BM25 over decrypted local transcript
|
|
81
|
+
vector: local BGE cosine search through sqlite-vec over decrypted transcript chunks
|
|
82
|
+
hybrid: reciprocal-rank fusion over keyword and vector results
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
The hosted SuperCollab service never computes embeddings and never receives the
|
|
86
|
+
room key. The first local sync/search may download the BGE-small ONNX model into
|
|
87
|
+
the local Hugging Face cache. To verify or prewarm the local embedding system:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
supercollab embeddings status
|
|
91
|
+
supercollab embeddings warmup
|
|
62
92
|
```
|
|
63
93
|
|
|
64
94
|
Print MCP config:
|
package/bin/supercollab.js
CHANGED
|
@@ -6,10 +6,28 @@ import crypto from 'node:crypto';
|
|
|
6
6
|
import * as readlineCore from 'node:readline';
|
|
7
7
|
import { stdin as input, stdout as output } from 'node:process';
|
|
8
8
|
|
|
9
|
-
const VERSION = '0.4.
|
|
9
|
+
const VERSION = '0.4.2';
|
|
10
10
|
const DEFAULT_SERVER = process.env.SUPERCOLLAB_URL || 'https://hyper.polynode.dev';
|
|
11
11
|
const DEFAULT_CONFIG = process.env.SUPERCOLLAB_CONFIG || path.join(os.homedir(), '.supercollab', 'config.json');
|
|
12
12
|
const SESSION_TTL_SKEW = 60;
|
|
13
|
+
const EMBEDDING_MODEL = 'Xenova/bge-small-en-v1.5';
|
|
14
|
+
const EMBEDDING_DTYPE = 'q8';
|
|
15
|
+
const EMBEDDING_DIMS = 384;
|
|
16
|
+
const EMBEDDING_CHUNK_CHARS = 3200;
|
|
17
|
+
const EMBEDDING_CHUNK_OVERLAP = 480;
|
|
18
|
+
const EMBEDDING_PROFILE = Object.freeze({
|
|
19
|
+
id: 'lean-memory-bge-small-en-v1.5-q8-mean-normalized-v1',
|
|
20
|
+
model: EMBEDDING_MODEL,
|
|
21
|
+
backend: '@huggingface/transformers',
|
|
22
|
+
dtype: EMBEDDING_DTYPE,
|
|
23
|
+
dims: EMBEDDING_DIMS,
|
|
24
|
+
pooling: 'mean',
|
|
25
|
+
normalize: true,
|
|
26
|
+
query_prefix: 'Represent this sentence for searching relevant passages: ',
|
|
27
|
+
chunk_chars: EMBEDDING_CHUNK_CHARS,
|
|
28
|
+
chunk_overlap_chars: EMBEDDING_CHUNK_OVERLAP,
|
|
29
|
+
local_only: true,
|
|
30
|
+
});
|
|
13
31
|
|
|
14
32
|
function printHelp() {
|
|
15
33
|
console.log(`SuperCollab CLI ${VERSION}
|
|
@@ -27,13 +45,15 @@ Usage:
|
|
|
27
45
|
supercollab room key --room ID
|
|
28
46
|
supercollab chat send --room ID --text TEXT [--channel agents]
|
|
29
47
|
supercollab chat read --room ID [--after 0] [--limit 50]
|
|
30
|
-
supercollab chat search --room ID --query TEXT [--limit 20]
|
|
48
|
+
supercollab chat search --room ID --query TEXT [--mode hybrid|keyword|vector] [--limit 20]
|
|
31
49
|
supercollab sync --room ID
|
|
32
50
|
supercollab activate --room ID [--cwd PATH]
|
|
33
51
|
supercollab deactivate [--cwd PATH]
|
|
34
52
|
supercollab active [--cwd PATH]
|
|
35
53
|
supercollab session list
|
|
36
54
|
supercollab session revoke --session ID
|
|
55
|
+
supercollab embeddings status
|
|
56
|
+
supercollab embeddings warmup
|
|
37
57
|
supercollab mcp stdio
|
|
38
58
|
supercollab mcp print-config --client codex
|
|
39
59
|
supercollab config path
|
|
@@ -343,16 +363,19 @@ async function doLogin(config, file, opts) {
|
|
|
343
363
|
return { ok: true, username: config.username, user_id: config.userId, config: file };
|
|
344
364
|
}
|
|
345
365
|
|
|
346
|
-
let
|
|
366
|
+
let nativeSqlitePromise = null;
|
|
347
367
|
|
|
348
|
-
async function
|
|
349
|
-
if (!
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
368
|
+
async function loadNativeSqlite() {
|
|
369
|
+
if (!nativeSqlitePromise) {
|
|
370
|
+
nativeSqlitePromise = Promise.all([
|
|
371
|
+
import('better-sqlite3'),
|
|
372
|
+
import('sqlite-vec'),
|
|
373
|
+
]).then(([sqliteMod, sqliteVec]) => ({
|
|
374
|
+
Database: sqliteMod.default || sqliteMod,
|
|
375
|
+
sqliteVec,
|
|
376
|
+
}));
|
|
354
377
|
}
|
|
355
|
-
return
|
|
378
|
+
return nativeSqlitePromise;
|
|
356
379
|
}
|
|
357
380
|
|
|
358
381
|
function nowIso() {
|
|
@@ -368,29 +391,15 @@ function chatDbPath(config, file, roomId) {
|
|
|
368
391
|
}
|
|
369
392
|
|
|
370
393
|
function dbRun(db, sql, params = []) {
|
|
371
|
-
|
|
372
|
-
try {
|
|
373
|
-
stmt.bind(params);
|
|
374
|
-
stmt.step();
|
|
375
|
-
} finally {
|
|
376
|
-
stmt.free();
|
|
377
|
-
}
|
|
394
|
+
return db.prepare(sql).run(...params);
|
|
378
395
|
}
|
|
379
396
|
|
|
380
397
|
function dbAll(db, sql, params = []) {
|
|
381
|
-
|
|
382
|
-
const rows = [];
|
|
383
|
-
try {
|
|
384
|
-
stmt.bind(params);
|
|
385
|
-
while (stmt.step()) rows.push(stmt.getAsObject());
|
|
386
|
-
} finally {
|
|
387
|
-
stmt.free();
|
|
388
|
-
}
|
|
389
|
-
return rows;
|
|
398
|
+
return db.prepare(sql).all(...params);
|
|
390
399
|
}
|
|
391
400
|
|
|
392
401
|
function dbGet(db, sql, params = []) {
|
|
393
|
-
return
|
|
402
|
+
return db.prepare(sql).get(...params) || null;
|
|
394
403
|
}
|
|
395
404
|
|
|
396
405
|
function setMeta(db, key, value) {
|
|
@@ -402,6 +411,33 @@ function getMeta(db, key, fallback = '') {
|
|
|
402
411
|
return row ? String(row.value) : fallback;
|
|
403
412
|
}
|
|
404
413
|
|
|
414
|
+
function tableColumns(db, table) {
|
|
415
|
+
try {
|
|
416
|
+
return dbAll(db, `PRAGMA table_info(${table})`).map((row) => String(row.name));
|
|
417
|
+
} catch {
|
|
418
|
+
return [];
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function verifySqliteVecLoaded(db) {
|
|
423
|
+
const row = db.prepare('SELECT vec_version() AS version').get();
|
|
424
|
+
if (!row?.version) throw new Error('sqlite-vec extension did not load');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function ensureMessageVectorTable(db) {
|
|
428
|
+
const tableInfo = db.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='message_vectors'").get();
|
|
429
|
+
if (tableInfo?.sql) {
|
|
430
|
+
const match = String(tableInfo.sql).match(/float\[(\d+)\]/);
|
|
431
|
+
const hasCosine = String(tableInfo.sql).includes('distance_metric=cosine');
|
|
432
|
+
const dims = match?.[1] ? Number(match[1]) : null;
|
|
433
|
+
if (dims === EMBEDDING_DIMS && hasCosine) return false;
|
|
434
|
+
db.exec('DROP TABLE IF EXISTS message_vectors');
|
|
435
|
+
db.exec('DELETE FROM message_embeddings WHERE profile = ' + JSON.stringify(EMBEDDING_PROFILE.id));
|
|
436
|
+
}
|
|
437
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS message_vectors USING vec0(message_seq TEXT PRIMARY KEY, embedding float[${EMBEDDING_DIMS}] distance_metric=cosine)`);
|
|
438
|
+
return true;
|
|
439
|
+
}
|
|
440
|
+
|
|
405
441
|
function initChatSchema(db) {
|
|
406
442
|
db.exec(`
|
|
407
443
|
CREATE TABLE IF NOT EXISTS meta (
|
|
@@ -426,74 +462,209 @@ function initChatSchema(db) {
|
|
|
426
462
|
);
|
|
427
463
|
CREATE INDEX IF NOT EXISTS idx_messages_room_id ON messages(room_id, id);
|
|
428
464
|
CREATE INDEX IF NOT EXISTS idx_messages_channel_created ON messages(channel, created_at);
|
|
465
|
+
`);
|
|
466
|
+
const embeddingColumns = tableColumns(db, 'message_embeddings');
|
|
467
|
+
if (embeddingColumns.length > 0 && (!embeddingColumns.includes('seq') || !embeddingColumns.includes('profile') || embeddingColumns.includes('vector'))) {
|
|
468
|
+
db.exec('DROP TABLE IF EXISTS message_embeddings');
|
|
469
|
+
}
|
|
470
|
+
db.exec(`
|
|
429
471
|
CREATE TABLE IF NOT EXISTS message_embeddings (
|
|
430
|
-
message_id TEXT
|
|
472
|
+
message_id TEXT NOT NULL,
|
|
473
|
+
seq INTEGER NOT NULL DEFAULT 0,
|
|
474
|
+
pos INTEGER NOT NULL DEFAULT 0,
|
|
431
475
|
dims INTEGER NOT NULL,
|
|
432
|
-
|
|
433
|
-
|
|
476
|
+
model TEXT NOT NULL,
|
|
477
|
+
profile TEXT NOT NULL,
|
|
478
|
+
updated_at TEXT NOT NULL,
|
|
479
|
+
PRIMARY KEY(message_id, seq)
|
|
434
480
|
);
|
|
481
|
+
CREATE INDEX IF NOT EXISTS idx_message_embeddings_profile ON message_embeddings(profile);
|
|
435
482
|
`);
|
|
483
|
+
ensureMessageVectorTable(db);
|
|
436
484
|
try {
|
|
437
485
|
db.exec("CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(message_id UNINDEXED, channel UNINDEXED, sender_label, body, metadata, tokenize='porter')");
|
|
438
486
|
setMeta(db, 'fts5', '1');
|
|
439
487
|
} catch {
|
|
440
488
|
setMeta(db, 'fts5', '0');
|
|
441
489
|
}
|
|
490
|
+
setMeta(db, 'embedding_profile', EMBEDDING_PROFILE.id);
|
|
442
491
|
}
|
|
443
492
|
|
|
444
|
-
|
|
493
|
+
let embeddingPipelinePromise = null;
|
|
494
|
+
|
|
495
|
+
async function getEmbeddingPipeline() {
|
|
496
|
+
if (!embeddingPipelinePromise) {
|
|
497
|
+
embeddingPipelinePromise = (async () => {
|
|
498
|
+
const mod = await import('@huggingface/transformers');
|
|
499
|
+
const { pipeline, env } = mod;
|
|
500
|
+
if (process.env.SUPERCOLLAB_MODEL_CACHE && env) {
|
|
501
|
+
fs.mkdirSync(process.env.SUPERCOLLAB_MODEL_CACHE, { recursive: true });
|
|
502
|
+
env.cacheDir = process.env.SUPERCOLLAB_MODEL_CACHE;
|
|
503
|
+
}
|
|
504
|
+
return pipeline('feature-extraction', EMBEDDING_MODEL, { dtype: EMBEDDING_DTYPE });
|
|
505
|
+
})();
|
|
506
|
+
}
|
|
507
|
+
return embeddingPipelinePromise;
|
|
508
|
+
}
|
|
445
509
|
|
|
446
|
-
function
|
|
447
|
-
return
|
|
510
|
+
function formatQueryForEmbedding(query) {
|
|
511
|
+
return `${EMBEDDING_PROFILE.query_prefix}${query}`;
|
|
448
512
|
}
|
|
449
513
|
|
|
450
|
-
function
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
514
|
+
function formatDocForEmbedding(text, title = '') {
|
|
515
|
+
return title ? `${title}\n${text}` : text;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function chunkText(content, maxChars = EMBEDDING_CHUNK_CHARS, overlapChars = EMBEDDING_CHUNK_OVERLAP) {
|
|
519
|
+
const text = String(content || '');
|
|
520
|
+
if (text.length <= maxChars) return [{ text, pos: 0 }];
|
|
521
|
+
const chunks = [];
|
|
522
|
+
let charPos = 0;
|
|
523
|
+
while (charPos < text.length) {
|
|
524
|
+
let endPos = Math.min(charPos + maxChars, text.length);
|
|
525
|
+
if (endPos < text.length) {
|
|
526
|
+
const slice = text.slice(charPos, endPos);
|
|
527
|
+
const searchStart = Math.floor(slice.length * 0.7);
|
|
528
|
+
const searchSlice = slice.slice(searchStart);
|
|
529
|
+
let breakOffset = -1;
|
|
530
|
+
const paragraphBreak = searchSlice.lastIndexOf('\n\n');
|
|
531
|
+
if (paragraphBreak >= 0) {
|
|
532
|
+
breakOffset = searchStart + paragraphBreak + 2;
|
|
533
|
+
} else {
|
|
534
|
+
const sentenceEnd = Math.max(
|
|
535
|
+
searchSlice.lastIndexOf('. '),
|
|
536
|
+
searchSlice.lastIndexOf('.\n'),
|
|
537
|
+
searchSlice.lastIndexOf('? '),
|
|
538
|
+
searchSlice.lastIndexOf('?\n'),
|
|
539
|
+
searchSlice.lastIndexOf('! '),
|
|
540
|
+
searchSlice.lastIndexOf('!\n'),
|
|
541
|
+
);
|
|
542
|
+
if (sentenceEnd >= 0) {
|
|
543
|
+
breakOffset = searchStart + sentenceEnd + 2;
|
|
544
|
+
} else {
|
|
545
|
+
const lineBreak = searchSlice.lastIndexOf('\n');
|
|
546
|
+
if (lineBreak >= 0) {
|
|
547
|
+
breakOffset = searchStart + lineBreak + 1;
|
|
548
|
+
} else {
|
|
549
|
+
const spaceBreak = searchSlice.lastIndexOf(' ');
|
|
550
|
+
if (spaceBreak >= 0) breakOffset = searchStart + spaceBreak + 1;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
if (breakOffset > 0) endPos = charPos + breakOffset;
|
|
555
|
+
}
|
|
556
|
+
if (endPos <= charPos) endPos = Math.min(charPos + maxChars, text.length);
|
|
557
|
+
chunks.push({ text: text.slice(charPos, endPos), pos: charPos });
|
|
558
|
+
if (endPos >= text.length) break;
|
|
559
|
+
charPos = endPos - overlapChars;
|
|
560
|
+
const lastChunkPos = chunks.at(-1).pos;
|
|
561
|
+
if (charPos <= lastChunkPos) charPos = endPos;
|
|
457
562
|
}
|
|
458
|
-
|
|
459
|
-
return vec.map((v) => Number((v / norm).toFixed(6)));
|
|
563
|
+
return chunks;
|
|
460
564
|
}
|
|
461
565
|
|
|
462
|
-
function
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
566
|
+
async function embedText(text, { isQuery = false, title = '' } = {}) {
|
|
567
|
+
const extractor = await getEmbeddingPipeline();
|
|
568
|
+
const formatted = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text, title);
|
|
569
|
+
const output = await extractor(formatted.slice(0, 4000), {
|
|
570
|
+
pooling: EMBEDDING_PROFILE.pooling,
|
|
571
|
+
normalize: EMBEDDING_PROFILE.normalize,
|
|
572
|
+
});
|
|
573
|
+
const vector = Array.from(output.data).map(Number);
|
|
574
|
+
if (vector.length !== EMBEDDING_DIMS) throw new Error(`unexpected embedding dims ${vector.length}`);
|
|
575
|
+
return vector;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
async function storeEmbeddings(db, local, metadata) {
|
|
579
|
+
const messageId = local.message_id;
|
|
580
|
+
if (!messageId) return { embedded: false, chunks: 0 };
|
|
581
|
+
const oldRows = dbAll(db, 'SELECT seq FROM message_embeddings WHERE message_id=? AND profile<>?', [messageId, EMBEDDING_PROFILE.id]);
|
|
582
|
+
for (const row of oldRows) dbRun(db, 'DELETE FROM message_vectors WHERE message_seq=?', [`${messageId}:${Number(row.seq || 0)}`]);
|
|
583
|
+
dbRun(db, 'DELETE FROM message_embeddings WHERE message_id=? AND profile<>?', [messageId, EMBEDDING_PROFILE.id]);
|
|
584
|
+
const existing = dbGet(
|
|
585
|
+
db,
|
|
586
|
+
`SELECT COUNT(*) AS count
|
|
587
|
+
FROM message_embeddings e
|
|
588
|
+
JOIN message_vectors v ON v.message_seq = e.message_id || ':' || e.seq
|
|
589
|
+
WHERE e.message_id=? AND e.profile=?`,
|
|
590
|
+
[messageId, EMBEDDING_PROFILE.id],
|
|
591
|
+
);
|
|
592
|
+
if (Number(existing?.count || 0) > 0) return { embedded: false, chunks: Number(existing.count) };
|
|
593
|
+
|
|
594
|
+
const body = `${local.sender_label || ''}\n${local.body || ''}\n${metadata || ''}`;
|
|
595
|
+
const title = local.sender_label || local.channel || 'SuperCollab message';
|
|
596
|
+
const chunks = chunkText(body);
|
|
597
|
+
const updatedAt = nowIso();
|
|
598
|
+
for (let seq = 0; seq < chunks.length; seq++) {
|
|
599
|
+
const chunk = chunks[seq];
|
|
600
|
+
const vector = await embedText(chunk.text, { title });
|
|
601
|
+
const messageSeq = `${messageId}:${seq}`;
|
|
602
|
+
dbRun(db, 'INSERT OR REPLACE INTO message_vectors(message_seq, embedding) VALUES(?, ?)', [messageSeq, new Float32Array(vector)]);
|
|
603
|
+
dbRun(
|
|
604
|
+
db,
|
|
605
|
+
`INSERT INTO message_embeddings(message_id,seq,pos,dims,model,profile,updated_at)
|
|
606
|
+
VALUES(?,?,?,?,?,?,?)
|
|
607
|
+
ON CONFLICT(message_id, seq) DO UPDATE SET
|
|
608
|
+
pos=excluded.pos,
|
|
609
|
+
dims=excluded.dims,
|
|
610
|
+
model=excluded.model,
|
|
611
|
+
profile=excluded.profile,
|
|
612
|
+
updated_at=excluded.updated_at`,
|
|
613
|
+
[messageId, seq, chunk.pos, EMBEDDING_DIMS, EMBEDDING_MODEL, EMBEDDING_PROFILE.id, updatedAt],
|
|
614
|
+
);
|
|
615
|
+
}
|
|
616
|
+
setMeta(db, 'embedding_last_ok_at', updatedAt);
|
|
617
|
+
return { embedded: true, chunks: chunks.length };
|
|
466
618
|
}
|
|
467
619
|
|
|
468
|
-
function
|
|
469
|
-
|
|
470
|
-
|
|
620
|
+
async function tryStoreEmbeddings(db, local, metadata) {
|
|
621
|
+
try {
|
|
622
|
+
return await storeEmbeddings(db, local, metadata);
|
|
623
|
+
} catch (err) {
|
|
624
|
+
setMeta(db, 'embedding_last_error', err.message || String(err));
|
|
625
|
+
return { embedded: false, chunks: 0, error: err.message || String(err) };
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
async function embedMissingMessages(db, limit = 500) {
|
|
630
|
+
const rows = dbAll(
|
|
471
631
|
db,
|
|
472
|
-
|
|
473
|
-
|
|
632
|
+
`SELECT m.*
|
|
633
|
+
FROM messages m
|
|
634
|
+
LEFT JOIN message_embeddings e
|
|
635
|
+
ON e.message_id=m.message_id AND e.profile=?
|
|
636
|
+
LEFT JOIN message_vectors v
|
|
637
|
+
ON v.message_seq = e.message_id || ':' || e.seq
|
|
638
|
+
WHERE e.message_id IS NULL OR v.message_seq IS NULL
|
|
639
|
+
ORDER BY m.id ASC
|
|
640
|
+
LIMIT ?`,
|
|
641
|
+
[EMBEDDING_PROFILE.id, Math.max(1, Math.min(Number(limit || 500), 2000))],
|
|
474
642
|
);
|
|
643
|
+
let embedded = 0;
|
|
644
|
+
for (const row of rows) {
|
|
645
|
+
const result = await tryStoreEmbeddings(db, row, row.metadata || '');
|
|
646
|
+
if (result.embedded) embedded += result.chunks;
|
|
647
|
+
}
|
|
648
|
+
return { messages_checked: rows.length, chunks_embedded: embedded };
|
|
475
649
|
}
|
|
476
650
|
|
|
477
651
|
async function openChatDb(config, file, roomId) {
|
|
478
|
-
const
|
|
652
|
+
const { Database, sqliteVec } = await loadNativeSqlite();
|
|
479
653
|
const root = chatRoot(config, file, roomId);
|
|
480
654
|
const dbPath = chatDbPath(config, file, roomId);
|
|
481
655
|
fs.mkdirSync(root, { recursive: true, mode: 0o700 });
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
}
|
|
656
|
+
const db = new Database(dbPath);
|
|
657
|
+
db.pragma('journal_mode = WAL');
|
|
658
|
+
db.pragma('foreign_keys = ON');
|
|
659
|
+
sqliteVec.load(db);
|
|
660
|
+
verifySqliteVecLoaded(db);
|
|
488
661
|
initChatSchema(db);
|
|
489
662
|
setMeta(db, 'room_id', roomId);
|
|
490
663
|
return { db, root, dbPath, roomId };
|
|
491
664
|
}
|
|
492
665
|
|
|
493
666
|
function saveChatDb(cap) {
|
|
494
|
-
|
|
495
|
-
fs.writeFileSync(tmp, Buffer.from(cap.db.export()), { mode: 0o600 });
|
|
496
|
-
fs.renameSync(tmp, cap.dbPath);
|
|
667
|
+
cap.db.pragma('wal_checkpoint(PASSIVE)');
|
|
497
668
|
try { fs.chmodSync(cap.dbPath, 0o600); } catch {}
|
|
498
669
|
}
|
|
499
670
|
|
|
@@ -512,7 +683,7 @@ function localPlainMessage(config, roomId, msg) {
|
|
|
512
683
|
return { ...msg, metadata: JSON.stringify(metadata) };
|
|
513
684
|
}
|
|
514
685
|
|
|
515
|
-
function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '') {
|
|
686
|
+
async function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '') {
|
|
516
687
|
const local = config ? localPlainMessage(config, roomId, msg) : msg;
|
|
517
688
|
const metadata = typeof local.metadata === 'string' ? local.metadata : JSON.stringify(local.metadata || {});
|
|
518
689
|
dbRun(
|
|
@@ -532,7 +703,7 @@ function insertLocalMessage(db, msg, config = null, roomId = msg.room_id || '')
|
|
|
532
703
|
]);
|
|
533
704
|
} catch {}
|
|
534
705
|
}
|
|
535
|
-
|
|
706
|
+
await tryStoreEmbeddings(db, local, metadata);
|
|
536
707
|
}
|
|
537
708
|
|
|
538
709
|
async function syncRoom(config, file, roomId, limit = 500) {
|
|
@@ -540,11 +711,12 @@ async function syncRoom(config, file, roomId, limit = 500) {
|
|
|
540
711
|
try {
|
|
541
712
|
const after = Number(getMeta(cap.db, 'last_message_id', '0')) || 0;
|
|
542
713
|
const data = await apiAsAgent(config, 'GET', `/v1/rooms/${roomId}/messages?after=${encodeURIComponent(after)}&limit=${encodeURIComponent(limit)}`);
|
|
543
|
-
for (const msg of data.messages || []) insertLocalMessage(cap.db, { ...msg, room_id: roomId }, config, roomId);
|
|
714
|
+
for (const msg of data.messages || []) await insertLocalMessage(cap.db, { ...msg, room_id: roomId }, config, roomId);
|
|
715
|
+
const embedding = await embedMissingMessages(cap.db, 500);
|
|
544
716
|
setMeta(cap.db, 'last_message_id', String(data.next_after || after));
|
|
545
717
|
setMeta(cap.db, 'last_sync_at', nowIso());
|
|
546
718
|
saveChatDb(cap);
|
|
547
|
-
return { room_id: roomId, pulled: (data.messages || []).length, last_message_id: Number(data.next_after || after), db: cap.dbPath };
|
|
719
|
+
return { room_id: roomId, pulled: (data.messages || []).length, last_message_id: Number(data.next_after || after), db: cap.dbPath, embedding };
|
|
548
720
|
} finally {
|
|
549
721
|
cap.db.close();
|
|
550
722
|
}
|
|
@@ -605,7 +777,7 @@ async function doChatSend(config, file, opts) {
|
|
|
605
777
|
});
|
|
606
778
|
const cap = await openChatDb(config, file, roomId);
|
|
607
779
|
try {
|
|
608
|
-
insertLocalMessage(cap.db, { ...data.message, room_id: roomId }, config, roomId);
|
|
780
|
+
await insertLocalMessage(cap.db, { ...data.message, room_id: roomId }, config, roomId);
|
|
609
781
|
setMeta(cap.db, 'last_message_id', String(Math.max(Number(getMeta(cap.db, 'last_message_id', '0')) || 0, Number(data.message.id))));
|
|
610
782
|
saveChatDb(cap);
|
|
611
783
|
} finally {
|
|
@@ -634,16 +806,19 @@ function ftsQuery(value) {
|
|
|
634
806
|
async function doChatSearch(config, file, opts) {
|
|
635
807
|
const roomId = requireValue(opts, 'room');
|
|
636
808
|
const query = requireValue(opts, 'query');
|
|
809
|
+
const mode = String(opts.mode || 'hybrid').toLowerCase();
|
|
810
|
+
if (!['hybrid', 'keyword', 'vector'].includes(mode)) throw new Error('search --mode must be hybrid, keyword, or vector');
|
|
637
811
|
await syncRoom(config, file, roomId, 500);
|
|
638
812
|
const cap = await openChatDb(config, file, roomId);
|
|
639
813
|
try {
|
|
814
|
+
const embedding = await embedMissingMessages(cap.db, 500);
|
|
640
815
|
const maxResults = Math.max(1, Math.min(Number(opts.limit || 20), 100));
|
|
641
|
-
let
|
|
642
|
-
if (getMeta(cap.db, 'fts5', '0') === '1') {
|
|
816
|
+
let keywordRows = [];
|
|
817
|
+
if (mode !== 'vector' && getMeta(cap.db, 'fts5', '0') === '1') {
|
|
643
818
|
const q = ftsQuery(query);
|
|
644
819
|
if (q) {
|
|
645
820
|
try {
|
|
646
|
-
|
|
821
|
+
keywordRows = dbAll(
|
|
647
822
|
cap.db,
|
|
648
823
|
`SELECT m.*, bm25(messages_fts) AS score
|
|
649
824
|
FROM messages_fts JOIN messages m ON m.id=messages_fts.rowid
|
|
@@ -652,33 +827,96 @@ async function doChatSearch(config, file, opts) {
|
|
|
652
827
|
[q, maxResults],
|
|
653
828
|
);
|
|
654
829
|
} catch {
|
|
655
|
-
|
|
830
|
+
keywordRows = [];
|
|
656
831
|
}
|
|
657
832
|
}
|
|
658
833
|
}
|
|
659
|
-
if (!
|
|
660
|
-
|
|
834
|
+
if (mode !== 'vector' && !keywordRows.length) {
|
|
835
|
+
keywordRows = dbAll(cap.db, 'SELECT *, 0 AS score FROM messages WHERE body LIKE ? OR metadata LIKE ? ORDER BY id DESC LIMIT ?', [
|
|
661
836
|
`%${query}%`, `%${query}%`, maxResults,
|
|
662
|
-
]);
|
|
837
|
+
]).map((row) => ({ ...row, keyword_fallback: true }));
|
|
838
|
+
}
|
|
839
|
+
let vectorRows = [];
|
|
840
|
+
let vectorError = null;
|
|
841
|
+
if (mode !== 'keyword') try {
|
|
842
|
+
const qvec = await embedText(query, { isQuery: true });
|
|
843
|
+
const k = Math.max(maxResults * 4, 50);
|
|
844
|
+
const vecMatches = dbAll(
|
|
845
|
+
cap.db,
|
|
846
|
+
'SELECT message_seq, distance FROM message_vectors WHERE embedding MATCH ? AND k = ?',
|
|
847
|
+
[new Float32Array(qvec), k],
|
|
848
|
+
);
|
|
849
|
+
const bestByMessage = new Map();
|
|
850
|
+
if (vecMatches.length) {
|
|
851
|
+
const messageSeqs = vecMatches.map((row) => String(row.message_seq));
|
|
852
|
+
const distanceBySeq = new Map(vecMatches.map((row) => [String(row.message_seq), Number(row.distance)]));
|
|
853
|
+
const placeholders = messageSeqs.map(() => '?').join(',');
|
|
854
|
+
for (const row of dbAll(
|
|
855
|
+
cap.db,
|
|
856
|
+
`SELECT m.*, e.seq, e.pos, e.message_id || ':' || e.seq AS message_seq
|
|
857
|
+
FROM message_embeddings e
|
|
858
|
+
JOIN messages m ON m.message_id=e.message_id
|
|
859
|
+
WHERE e.profile=? AND e.message_id || ':' || e.seq IN (${placeholders})`,
|
|
860
|
+
[EMBEDDING_PROFILE.id, ...messageSeqs],
|
|
861
|
+
)) {
|
|
862
|
+
const distance = distanceBySeq.get(String(row.message_seq)) ?? 1;
|
|
863
|
+
const score = 1 - distance;
|
|
864
|
+
if (score <= 0) continue;
|
|
865
|
+
const { message_seq, ...clean } = row;
|
|
866
|
+
const prior = bestByMessage.get(row.message_id);
|
|
867
|
+
if (!prior || score > prior.vector_score) {
|
|
868
|
+
bestByMessage.set(row.message_id, { ...clean, vector_score: score, chunk_seq: Number(row.seq || 0), chunk_pos: Number(row.pos || 0) });
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
vectorRows = Array.from(bestByMessage.values())
|
|
873
|
+
.sort((a, b) => b.vector_score - a.vector_score)
|
|
874
|
+
.slice(0, mode === 'vector' ? maxResults : Math.max(maxResults, 50));
|
|
875
|
+
} catch (err) {
|
|
876
|
+
vectorError = err.message || String(err);
|
|
877
|
+
setMeta(cap.db, 'embedding_last_error', vectorError);
|
|
878
|
+
}
|
|
879
|
+
const keywordRank = new Map(keywordRows.map((row, idx) => [row.message_id, idx + 1]));
|
|
880
|
+
const vectorRank = new Map(vectorRows.map((row, idx) => [row.message_id, idx + 1]));
|
|
881
|
+
const byMessage = new Map();
|
|
882
|
+
for (const row of [...keywordRows, ...vectorRows]) {
|
|
883
|
+
const existing = byMessage.get(row.message_id) || {};
|
|
884
|
+
byMessage.set(row.message_id, { ...existing, ...row });
|
|
663
885
|
}
|
|
664
|
-
const
|
|
665
|
-
const
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
.
|
|
680
|
-
|
|
681
|
-
|
|
886
|
+
const rrfK = 60;
|
|
887
|
+
const hybridRows = Array.from(byMessage.values()).map((row) => {
|
|
888
|
+
const kr = keywordRank.get(row.message_id);
|
|
889
|
+
const vr = vectorRank.get(row.message_id);
|
|
890
|
+
const keywordScore = kr ? 1 / (rrfK + kr) : 0;
|
|
891
|
+
const vectorScore = vr ? 1 / (rrfK + vr) : 0;
|
|
892
|
+
return {
|
|
893
|
+
...row,
|
|
894
|
+
search_sources: [kr ? (row.keyword_fallback ? 'like' : 'fts5_bm25') : null, vr ? 'bge_vector_cosine' : null].filter(Boolean),
|
|
895
|
+
keyword_rank: kr || null,
|
|
896
|
+
vector_rank: vr || null,
|
|
897
|
+
hybrid_score: keywordScore + vectorScore,
|
|
898
|
+
};
|
|
899
|
+
}).sort((a, b) => b.hybrid_score - a.hybrid_score).slice(0, maxResults);
|
|
900
|
+
const results = mode === 'keyword'
|
|
901
|
+
? keywordRows.slice(0, maxResults).map((row, idx) => ({ ...row, search_sources: [row.keyword_fallback ? 'like' : 'fts5_bm25'], keyword_rank: idx + 1 }))
|
|
902
|
+
: mode === 'vector'
|
|
903
|
+
? vectorRows.slice(0, maxResults).map((row, idx) => ({ ...row, search_sources: ['bge_vector_cosine'], vector_rank: idx + 1 }))
|
|
904
|
+
: hybridRows;
|
|
905
|
+
return {
|
|
906
|
+
room_id: roomId,
|
|
907
|
+
query,
|
|
908
|
+
search: {
|
|
909
|
+
local_only: true,
|
|
910
|
+
mode,
|
|
911
|
+
methods: ['fts5_bm25', 'bge_vector_cosine', 'rrf_hybrid'],
|
|
912
|
+
fts: getMeta(cap.db, 'fts5', '0') === '1',
|
|
913
|
+
vector: EMBEDDING_PROFILE.id,
|
|
914
|
+
embedding_profile: EMBEDDING_PROFILE,
|
|
915
|
+
embedding,
|
|
916
|
+
vector_error: vectorError,
|
|
917
|
+
},
|
|
918
|
+
results,
|
|
919
|
+
};
|
|
682
920
|
} finally {
|
|
683
921
|
cap.db.close();
|
|
684
922
|
}
|
|
@@ -737,6 +975,7 @@ async function activeStatus(config, file, opts = {}) {
|
|
|
737
975
|
room_id: active?.roomId || null,
|
|
738
976
|
activation_root: active?.cwd || null,
|
|
739
977
|
config: file,
|
|
978
|
+
embedding_profile: EMBEDDING_PROFILE,
|
|
740
979
|
instructions: agentInstructions(active),
|
|
741
980
|
};
|
|
742
981
|
}
|
|
@@ -764,7 +1003,7 @@ function mcpTools() {
|
|
|
764
1003
|
toolSchema('room_join', 'Accept a room invite token.', { invite_token: s, fingerprint: s }, ['invite_token']),
|
|
765
1004
|
toolSchema('chat_send', 'Send a message to the active agent chat room.', { text: s, channel: s, kind: s }, ['text']),
|
|
766
1005
|
toolSchema('chat_read', 'Sync and read recent messages from the active room.', { limit: { type: 'integer' } }),
|
|
767
|
-
toolSchema('chat_search', 'Sync and search the active room transcript.', { query: s, limit: { type: 'integer' } }, ['query']),
|
|
1006
|
+
toolSchema('chat_search', 'Sync and search the active room transcript with local keyword, BGE vector, or hybrid retrieval.', { query: s, mode: s, limit: { type: 'integer' } }, ['query']),
|
|
768
1007
|
toolSchema('chat_sync', 'Sync the active room transcript into local SQLite.'),
|
|
769
1008
|
];
|
|
770
1009
|
}
|
|
@@ -778,7 +1017,7 @@ async function callTool(config, name, args) {
|
|
|
778
1017
|
if (name === 'room_join') return doRoomJoin(config, file, { invite: args.invite_token });
|
|
779
1018
|
if (name === 'chat_send') return doChatSend(config, file, { room: requireActiveRoom(config, args), text: args.text, channel: args.channel || 'agents', kind: args.kind || 'chat.message' });
|
|
780
1019
|
if (name === 'chat_read') return doChatRead(config, file, { room: requireActiveRoom(config, args), limit: args.limit || 50 });
|
|
781
|
-
if (name === 'chat_search') return doChatSearch(config, file, { room: requireActiveRoom(config, args), query: args.query, limit: args.limit || 20 });
|
|
1020
|
+
if (name === 'chat_search') return doChatSearch(config, file, { room: requireActiveRoom(config, args), query: args.query, mode: args.mode || 'hybrid', limit: args.limit || 20 });
|
|
782
1021
|
if (name === 'chat_sync') return syncRoom(config, file, requireActiveRoom(config, args));
|
|
783
1022
|
throw new Error(`unknown tool: ${name}`);
|
|
784
1023
|
}
|
|
@@ -858,6 +1097,24 @@ function printCodexConfig(opts) {
|
|
|
858
1097
|
console.log(`[mcp_servers.supercollab]\ncommand = "supercollab"\nargs = ["mcp", "stdio", "--config", "${file.replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"]`);
|
|
859
1098
|
}
|
|
860
1099
|
|
|
1100
|
+
async function embeddingStatus() {
|
|
1101
|
+
return {
|
|
1102
|
+
ok: true,
|
|
1103
|
+
profile: EMBEDDING_PROFILE,
|
|
1104
|
+
model_download: 'lazy on first embedding, or now via `supercollab embeddings warmup`',
|
|
1105
|
+
cache_dir: process.env.SUPERCOLLAB_MODEL_CACHE || 'default @huggingface/transformers cache',
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
async function embeddingWarmup() {
|
|
1110
|
+
const vector = await embedText('supercollab embedding warmup', { isQuery: true });
|
|
1111
|
+
return {
|
|
1112
|
+
ok: true,
|
|
1113
|
+
dims: vector.length,
|
|
1114
|
+
profile: EMBEDDING_PROFILE,
|
|
1115
|
+
};
|
|
1116
|
+
}
|
|
1117
|
+
|
|
861
1118
|
async function main() {
|
|
862
1119
|
const { positionals, opts } = parse(process.argv.slice(2));
|
|
863
1120
|
if (opts.help || positionals.length === 0) { printHelp(); return; }
|
|
@@ -896,6 +1153,10 @@ async function main() {
|
|
|
896
1153
|
if (sub === 'list') return console.log(JSON.stringify(await api(config, 'GET', '/v1/agent-sessions', undefined, config.userToken), null, 2));
|
|
897
1154
|
if (sub === 'revoke') return console.log(JSON.stringify(await api(config, 'DELETE', `/v1/agent-sessions/${requireValue(opts, 'session')}`, undefined, config.userToken), null, 2));
|
|
898
1155
|
}
|
|
1156
|
+
if (cmd === 'embeddings') {
|
|
1157
|
+
if (sub === 'status') return console.log(JSON.stringify(await embeddingStatus(), null, 2));
|
|
1158
|
+
if (sub === 'warmup') return console.log(JSON.stringify(await embeddingWarmup(), null, 2));
|
|
1159
|
+
}
|
|
899
1160
|
if (cmd === 'mcp' && sub === 'stdio') return runMcp(opts);
|
|
900
1161
|
if (cmd === 'mcp' && sub === 'print-config') return printCodexConfig(opts);
|
|
901
1162
|
throw new Error(`unknown command: ${positionals.join(' ')}`);
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@supercollab/cli",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.2",
|
|
4
4
|
"description": "SuperCollab CLI and MCP bridge for encrypted local-search agent group chat.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"supercollab": "
|
|
7
|
+
"supercollab": "bin/supercollab.js"
|
|
8
8
|
},
|
|
9
9
|
"files": [
|
|
10
10
|
"bin/supercollab.js",
|
|
@@ -14,7 +14,9 @@
|
|
|
14
14
|
"node": ">=20"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"
|
|
17
|
+
"@huggingface/transformers": "3.8.1",
|
|
18
|
+
"better-sqlite3": "12.11.1",
|
|
19
|
+
"sqlite-vec": "0.1.9"
|
|
18
20
|
},
|
|
19
21
|
"keywords": [
|
|
20
22
|
"mcp",
|