fossel 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -18
- package/dist/cli.js +257 -8
- package/dist/index.js +240 -10
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -20,7 +20,31 @@
|
|
|
20
20
|
npx -y fossel
|
|
21
21
|
```
|
|
22
22
|
|
|
23
|
-
4. In chat,
|
|
23
|
+
4. In chat, say:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
remember: [anything about this repo]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Then ask:
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
what does Fossel remember about [topic]?
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
5. **Verify it works** — paste this in your AI chat:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
remember: Fossel is working in this repo
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Then immediately ask:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
what does Fossel remember?
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
You should see your memory returned.
|
|
24
48
|
|
|
25
49
|
**Database path:** `~/.fossel/memory.db` (override with `FOSSEL_DB_PATH`).
|
|
26
50
|
|
|
@@ -76,17 +100,14 @@ Every original tool is still available for power users.
|
|
|
76
100
|
|
|
77
101
|
| Tool | Purpose |
|
|
78
102
|
|------|---------|
|
|
79
|
-
| `remember` |
|
|
80
|
-
| `get_context` |
|
|
81
|
-
| `
|
|
82
|
-
| `
|
|
83
|
-
| `
|
|
84
|
-
| `
|
|
85
|
-
| `
|
|
86
|
-
| `summarize_repo_context` | Markdown
|
|
87
|
-
| `pin_memory` / `unpin_memory` | Pin important items. |
|
|
88
|
-
| `update_memory` | Partial update by numeric id. |
|
|
89
|
-
| `delete_memory` | Delete by legacy string id. |
|
|
103
|
+
| `remember` | Save a memory in natural language — auto-infers type, tags, and repo |
|
|
104
|
+
| `get_context` | Retrieve relevant memories, pinned first then recent |
|
|
105
|
+
| `search_memory` | FTS search across notes, optional repo filter |
|
|
106
|
+
| `pin_memory` / `unpin_memory` | Pin important memories to always appear first |
|
|
107
|
+
| `delete_memory` | Delete by id |
|
|
108
|
+
| `update_memory` | Edit an existing memory by id |
|
|
109
|
+
| `dedupe_repo` | Merge near-duplicate memories |
|
|
110
|
+
| `summarize_repo_context` | Markdown summary — useful for PR descriptions |
|
|
90
111
|
|
|
91
112
|
### Memory types
|
|
92
113
|
|
|
@@ -175,10 +196,6 @@ Reports on:
|
|
|
175
196
|
|
|
176
197
|
Pass `--fix` to apply safe automated cleanup in one go: merge sibling repo keys, rewrite stale alias mentions, and remove exact-text duplicates. Without `--fix` it's read-only and exits non-zero on issues so it can run in CI.
|
|
177
198
|
|
|
178
|
-
### `fossel init`
|
|
179
|
-
|
|
180
|
-
`fossel init` auto-deduplicates exact duplicate memories at the end of the run; pass `--no-dedupe` to opt out.
|
|
181
|
-
|
|
182
199
|
---
|
|
183
200
|
|
|
184
201
|
## Cursor MCP config
|
|
@@ -208,7 +225,7 @@ Pass `--fix` to apply safe automated cleanup in one go: merge sibling repo keys,
|
|
|
208
225
|
"command": "npx",
|
|
209
226
|
"args": ["-y", "fossel"],
|
|
210
227
|
"env": {
|
|
211
|
-
"FOSSEL_WORKSPACE": "/
|
|
228
|
+
"FOSSEL_WORKSPACE": "/path/to/your/project"
|
|
212
229
|
}
|
|
213
230
|
}
|
|
214
231
|
}
|
|
@@ -235,10 +252,54 @@ npm run ci # typecheck + tests + build + smoke
|
|
|
235
252
|
## Notes
|
|
236
253
|
|
|
237
254
|
- **Local-first:** data stays on your machine.
|
|
238
|
-
- **Search:** FTS5
|
|
255
|
+
- **Search:** FTS5 keyword search by default. Optional **hybrid semantic search**
|
|
256
|
+
via `FOSSEL_EMBEDDINGS=1` (see below).
|
|
239
257
|
- **`FOSSEL_DB_PATH`:** optional override for DB location (e.g. tests).
|
|
240
258
|
- **Schema:** migrations live in `src/db/migrate.ts`; reference shape in `src/db/schema.sql`.
|
|
241
259
|
|
|
260
|
+
## Hybrid semantic search (optional)
|
|
261
|
+
|
|
262
|
+
By default Fossel retrieves memories with FTS5 keyword search. Keyword search
|
|
263
|
+
misses paraphrases — a query like "how does authentication work?" won't match a
|
|
264
|
+
note that says "JWT lives in localStorage" because they share no words.
|
|
265
|
+
|
|
266
|
+
Set `FOSSEL_EMBEDDINGS=1` to enable **hybrid retrieval**: a local, dependency-free
|
|
267
|
+
embedding is computed for every memory and fused with the keyword results
|
|
268
|
+
(Reciprocal Rank Fusion). This adds semantic recall while keeping FTS5's exact-
|
|
269
|
+
match precision for identifiers, file paths, and ticket numbers.
|
|
270
|
+
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"mcpServers": {
|
|
274
|
+
"fossel": {
|
|
275
|
+
"command": "npx",
|
|
276
|
+
"args": ["-y", "fossel"],
|
|
277
|
+
"env": {
|
|
278
|
+
"FOSSEL_WORKSPACE": "${workspaceFolder}",
|
|
279
|
+
"FOSSEL_EMBEDDINGS": "1"
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
Properties:
|
|
287
|
+
|
|
288
|
+
- **Zero install weight / fully offline.** The embedding is a deterministic
|
|
289
|
+
feature-hashing of token unigrams and bigrams — no model download, no native
|
|
290
|
+
dependency, no network. It runs instantly and keeps the local-first promise.
|
|
291
|
+
- **Opt-in.** With the flag unset, Fossel behaves exactly as before: no vectors
|
|
292
|
+
are written and retrieval is FTS-only.
|
|
293
|
+
- **Self-healing index.** Memories created before enabling the flag are embedded
|
|
294
|
+
on demand the first time the repo is searched.
|
|
295
|
+
- **Pluggable.** `embedText` in `src/lib/embeddings.ts` is the single entry
|
|
296
|
+
point, so a stronger embedder (transformers.js, ONNX, or a remote model) can
|
|
297
|
+
be swapped in later without touching callers. Bump `EMBEDDING_VERSION` to
|
|
298
|
+
trigger automatic re-indexing of stale vectors.
|
|
299
|
+
|
|
300
|
+
Vectors are stored in a `memory_embeddings` side table keyed by memory rowid and
|
|
301
|
+
cleaned up via trigger when a memory is deleted.
|
|
302
|
+
|
|
242
303
|
## Community
|
|
243
304
|
|
|
244
305
|
If Fossel saves you time, **[star the repo](https://github.com/7vignesh/fossel)** and **[open an issue](https://github.com/7vignesh/fossel/issues)** for bugs or ideas — that helps others discover it too.
|
package/dist/cli.js
CHANGED
|
@@ -164,6 +164,25 @@ var init_migrate = __esm({
|
|
|
164
164
|
tx(rows);
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
name: "007_add_memory_embeddings",
|
|
170
|
+
apply: (db) => {
|
|
171
|
+
db.exec(`
|
|
172
|
+
CREATE TABLE IF NOT EXISTS memory_embeddings (
|
|
173
|
+
memory_rowid INTEGER PRIMARY KEY,
|
|
174
|
+
dim INTEGER NOT NULL,
|
|
175
|
+
version INTEGER NOT NULL,
|
|
176
|
+
vector BLOB NOT NULL,
|
|
177
|
+
updated_at INTEGER NOT NULL
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
CREATE TRIGGER IF NOT EXISTS memories_embeddings_ad
|
|
181
|
+
AFTER DELETE ON memories BEGIN
|
|
182
|
+
DELETE FROM memory_embeddings WHERE memory_rowid = old.rowid;
|
|
183
|
+
END;
|
|
184
|
+
`);
|
|
185
|
+
}
|
|
167
186
|
}
|
|
168
187
|
];
|
|
169
188
|
}
|
|
@@ -538,6 +557,170 @@ var init_repo = __esm({
|
|
|
538
557
|
}
|
|
539
558
|
});
|
|
540
559
|
|
|
560
|
+
// src/lib/embeddings.ts
|
|
561
|
+
function embeddingsEnabled() {
|
|
562
|
+
const value = process.env.FOSSEL_EMBEDDINGS?.trim().toLowerCase();
|
|
563
|
+
return value === "1" || value === "true" || value === "on" || value === "yes";
|
|
564
|
+
}
|
|
565
|
+
function fnv1a(str) {
|
|
566
|
+
let hash = 2166136261;
|
|
567
|
+
for (let i = 0; i < str.length; i += 1) {
|
|
568
|
+
hash ^= str.charCodeAt(i);
|
|
569
|
+
hash = Math.imul(hash, 16777619);
|
|
570
|
+
}
|
|
571
|
+
return hash >>> 0;
|
|
572
|
+
}
|
|
573
|
+
function tokenize2(text) {
|
|
574
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim().split(" ").filter((token) => token.length >= 2);
|
|
575
|
+
}
|
|
576
|
+
function embedText(text) {
|
|
577
|
+
const vector = new Float32Array(EMBEDDING_DIM);
|
|
578
|
+
const tokens = tokenize2(text);
|
|
579
|
+
if (tokens.length === 0) {
|
|
580
|
+
return vector;
|
|
581
|
+
}
|
|
582
|
+
const addFeature = (feature, weight) => {
|
|
583
|
+
const h = fnv1a(feature);
|
|
584
|
+
const index = h % EMBEDDING_DIM;
|
|
585
|
+
const sign = (fnv1a(`#${feature}`) & 1) === 0 ? 1 : -1;
|
|
586
|
+
vector[index] += sign * weight;
|
|
587
|
+
};
|
|
588
|
+
for (let i = 0; i < tokens.length; i += 1) {
|
|
589
|
+
addFeature(tokens[i], 1);
|
|
590
|
+
if (i + 1 < tokens.length) {
|
|
591
|
+
addFeature(`${tokens[i]} ${tokens[i + 1]}`, 0.6);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
let norm = 0;
|
|
595
|
+
for (let i = 0; i < EMBEDDING_DIM; i += 1) {
|
|
596
|
+
norm += vector[i] * vector[i];
|
|
597
|
+
}
|
|
598
|
+
norm = Math.sqrt(norm);
|
|
599
|
+
if (norm > 0) {
|
|
600
|
+
for (let i = 0; i < EMBEDDING_DIM; i += 1) {
|
|
601
|
+
vector[i] /= norm;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
return vector;
|
|
605
|
+
}
|
|
606
|
+
function cosineSimilarity(a, b) {
|
|
607
|
+
if (a.length !== b.length) {
|
|
608
|
+
return 0;
|
|
609
|
+
}
|
|
610
|
+
let dot = 0;
|
|
611
|
+
for (let i = 0; i < a.length; i += 1) {
|
|
612
|
+
dot += a[i] * b[i];
|
|
613
|
+
}
|
|
614
|
+
return dot;
|
|
615
|
+
}
|
|
616
|
+
function vectorToBuffer(vector) {
|
|
617
|
+
return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
618
|
+
}
|
|
619
|
+
function bufferToVector(buffer) {
|
|
620
|
+
const copy = Buffer.from(buffer);
|
|
621
|
+
return new Float32Array(
|
|
622
|
+
copy.buffer,
|
|
623
|
+
copy.byteOffset,
|
|
624
|
+
Math.floor(copy.byteLength / 4)
|
|
625
|
+
);
|
|
626
|
+
}
|
|
627
|
+
var EMBEDDING_DIM, EMBEDDING_VERSION;
|
|
628
|
+
var init_embeddings = __esm({
|
|
629
|
+
"src/lib/embeddings.ts"() {
|
|
630
|
+
"use strict";
|
|
631
|
+
EMBEDDING_DIM = 256;
|
|
632
|
+
EMBEDDING_VERSION = 1;
|
|
633
|
+
}
|
|
634
|
+
});
|
|
635
|
+
|
|
636
|
+
// src/lib/vector-index.ts
|
|
637
|
+
function indexMemoryEmbedding(db, rowId, note) {
|
|
638
|
+
if (!embeddingsEnabled()) {
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
const vector = embedText(note);
|
|
642
|
+
db.prepare(
|
|
643
|
+
`
|
|
644
|
+
INSERT INTO memory_embeddings (memory_rowid, dim, version, vector, updated_at)
|
|
645
|
+
VALUES (?, ?, ?, ?, ?)
|
|
646
|
+
ON CONFLICT(memory_rowid) DO UPDATE SET
|
|
647
|
+
dim = excluded.dim,
|
|
648
|
+
version = excluded.version,
|
|
649
|
+
vector = excluded.vector,
|
|
650
|
+
updated_at = excluded.updated_at
|
|
651
|
+
`
|
|
652
|
+
).run(
|
|
653
|
+
rowId,
|
|
654
|
+
EMBEDDING_DIM,
|
|
655
|
+
EMBEDDING_VERSION,
|
|
656
|
+
vectorToBuffer(vector),
|
|
657
|
+
Math.floor(Date.now() / 1e3)
|
|
658
|
+
);
|
|
659
|
+
}
|
|
660
|
+
function backfillRepoEmbeddings(db, repo) {
|
|
661
|
+
if (!embeddingsEnabled()) {
|
|
662
|
+
return 0;
|
|
663
|
+
}
|
|
664
|
+
const rows = db.prepare(
|
|
665
|
+
`
|
|
666
|
+
SELECT m.rowid AS row_id, m.note
|
|
667
|
+
FROM memories AS m
|
|
668
|
+
LEFT JOIN memory_embeddings AS e ON e.memory_rowid = m.rowid
|
|
669
|
+
WHERE m.repo = ?
|
|
670
|
+
AND (e.memory_rowid IS NULL OR e.version != ? OR e.dim != ?)
|
|
671
|
+
`
|
|
672
|
+
).all(repo, EMBEDDING_VERSION, EMBEDDING_DIM);
|
|
673
|
+
if (rows.length === 0) {
|
|
674
|
+
return 0;
|
|
675
|
+
}
|
|
676
|
+
const tx = db.transaction((batch) => {
|
|
677
|
+
for (const row of batch) {
|
|
678
|
+
indexMemoryEmbedding(db, row.row_id, row.note);
|
|
679
|
+
}
|
|
680
|
+
});
|
|
681
|
+
tx(rows);
|
|
682
|
+
return rows.length;
|
|
683
|
+
}
|
|
684
|
+
function vectorSearch(db, repo, query, limit) {
|
|
685
|
+
if (!embeddingsEnabled()) {
|
|
686
|
+
return [];
|
|
687
|
+
}
|
|
688
|
+
backfillRepoEmbeddings(db, repo);
|
|
689
|
+
const queryVector = embedText(query);
|
|
690
|
+
let queryNorm = 0;
|
|
691
|
+
for (let i = 0; i < queryVector.length; i += 1) {
|
|
692
|
+
queryNorm += queryVector[i] * queryVector[i];
|
|
693
|
+
}
|
|
694
|
+
if (queryNorm === 0) {
|
|
695
|
+
return [];
|
|
696
|
+
}
|
|
697
|
+
const rows = db.prepare(
|
|
698
|
+
`
|
|
699
|
+
SELECT m.rowid AS row_id, m.id, m.repo, m.type, m.note, m.tags,
|
|
700
|
+
m.created_at, m.updated_at, m.pinned, e.vector AS vector
|
|
701
|
+
FROM memory_embeddings AS e
|
|
702
|
+
JOIN memories AS m ON m.rowid = e.memory_rowid
|
|
703
|
+
WHERE m.repo = ? AND e.dim = ? AND e.version = ?
|
|
704
|
+
`
|
|
705
|
+
).all(repo, EMBEDDING_DIM, EMBEDDING_VERSION);
|
|
706
|
+
const scored = [];
|
|
707
|
+
for (const row of rows) {
|
|
708
|
+
const { vector, ...memory } = row;
|
|
709
|
+
const score = cosineSimilarity(queryVector, bufferToVector(vector));
|
|
710
|
+
if (score > 0) {
|
|
711
|
+
scored.push({ ...memory, score });
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
scored.sort((a, b) => b.score - a.score);
|
|
715
|
+
return scored.slice(0, limit);
|
|
716
|
+
}
|
|
717
|
+
var init_vector_index = __esm({
|
|
718
|
+
"src/lib/vector-index.ts"() {
|
|
719
|
+
"use strict";
|
|
720
|
+
init_embeddings();
|
|
721
|
+
}
|
|
722
|
+
});
|
|
723
|
+
|
|
541
724
|
// src/lib/context.ts
|
|
542
725
|
function parseTags(raw) {
|
|
543
726
|
try {
|
|
@@ -587,7 +770,10 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
587
770
|
for (const row of pinned) {
|
|
588
771
|
push(row, "pinned");
|
|
589
772
|
}
|
|
590
|
-
|
|
773
|
+
const pushRecent = () => {
|
|
774
|
+
if (rows.length >= limit) {
|
|
775
|
+
return;
|
|
776
|
+
}
|
|
591
777
|
const recent = db.prepare(
|
|
592
778
|
`
|
|
593
779
|
SELECT rowid AS row_id, id, repo, type, note, tags, created_at, updated_at, pinned
|
|
@@ -596,13 +782,20 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
596
782
|
ORDER BY updated_at DESC
|
|
597
783
|
LIMIT ?
|
|
598
784
|
`
|
|
599
|
-
).all(repo, limit
|
|
785
|
+
).all(repo, limit);
|
|
600
786
|
for (const row of recent) {
|
|
601
787
|
push(row, "recent");
|
|
788
|
+
if (rows.length >= limit) {
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
602
791
|
}
|
|
792
|
+
};
|
|
793
|
+
if (!query) {
|
|
794
|
+
pushRecent();
|
|
603
795
|
}
|
|
604
796
|
if (query && rows.length < limit) {
|
|
605
797
|
const ftsQuery = buildFtsQuery(query);
|
|
798
|
+
const ftsRows = [];
|
|
606
799
|
if (ftsQuery) {
|
|
607
800
|
try {
|
|
608
801
|
const matches = db.prepare(
|
|
@@ -616,15 +809,49 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
616
809
|
LIMIT ?
|
|
617
810
|
`
|
|
618
811
|
).all(ftsQuery, repo, limit);
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
812
|
+
ftsRows.push(...matches);
|
|
813
|
+
} catch {
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
const vectorRows = embeddingsEnabled() ? vectorSearch(db, repo, query, limit) : [];
|
|
817
|
+
if (vectorRows.length > 0) {
|
|
818
|
+
const RRF_K = 60;
|
|
819
|
+
const fused = /* @__PURE__ */ new Map();
|
|
820
|
+
const accumulate = (list, rankOf) => {
|
|
821
|
+
list.forEach((memory, index) => {
|
|
822
|
+
const contribution = 1 / (RRF_K + index + 1);
|
|
823
|
+
const prior = fused.get(memory.row_id);
|
|
824
|
+
if (prior) {
|
|
825
|
+
prior.score += contribution;
|
|
826
|
+
} else {
|
|
827
|
+
fused.set(memory.row_id, {
|
|
828
|
+
memory,
|
|
829
|
+
score: contribution,
|
|
830
|
+
rank: rankOf?.(memory, index)
|
|
831
|
+
});
|
|
623
832
|
}
|
|
833
|
+
});
|
|
834
|
+
};
|
|
835
|
+
accumulate(ftsRows, (m) => m.rank);
|
|
836
|
+
accumulate(vectorRows);
|
|
837
|
+
const ordered = Array.from(fused.values()).sort(
|
|
838
|
+
(a, b) => b.score - a.score
|
|
839
|
+
);
|
|
840
|
+
for (const { memory, rank } of ordered) {
|
|
841
|
+
push(memory, "search", rank);
|
|
842
|
+
if (rows.length >= limit) {
|
|
843
|
+
break;
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
} else {
|
|
847
|
+
for (const row of ftsRows) {
|
|
848
|
+
push(row, "search", row.rank);
|
|
849
|
+
if (rows.length >= limit) {
|
|
850
|
+
break;
|
|
624
851
|
}
|
|
625
|
-
} catch {
|
|
626
852
|
}
|
|
627
853
|
}
|
|
854
|
+
pushRecent();
|
|
628
855
|
}
|
|
629
856
|
return rows.slice(0, limit);
|
|
630
857
|
}
|
|
@@ -687,6 +914,8 @@ var init_context = __esm({
|
|
|
687
914
|
"src/lib/context.ts"() {
|
|
688
915
|
"use strict";
|
|
689
916
|
init_client();
|
|
917
|
+
init_embeddings();
|
|
918
|
+
init_vector_index();
|
|
690
919
|
SECTION_TITLES = {
|
|
691
920
|
convention: "Conventions",
|
|
692
921
|
bug_fix: "Bug Fixes",
|
|
@@ -1725,6 +1954,7 @@ function registerRememberTool(server) {
|
|
|
1725
1954
|
now,
|
|
1726
1955
|
existing.row_id
|
|
1727
1956
|
);
|
|
1957
|
+
indexMemoryEmbedding(db, existing.row_id, longerNote);
|
|
1728
1958
|
return {
|
|
1729
1959
|
content: [
|
|
1730
1960
|
{
|
|
@@ -1765,6 +1995,9 @@ function registerRememberTool(server) {
|
|
|
1765
1995
|
normalizeText(note)
|
|
1766
1996
|
);
|
|
1767
1997
|
const inserted = db.prepare("SELECT rowid AS row_id FROM memories WHERE id = ?").get(id);
|
|
1998
|
+
if (inserted) {
|
|
1999
|
+
indexMemoryEmbedding(db, inserted.row_id, note);
|
|
2000
|
+
}
|
|
1768
2001
|
return {
|
|
1769
2002
|
content: [
|
|
1770
2003
|
{
|
|
@@ -1796,6 +2029,7 @@ var init_remember = __esm({
|
|
|
1796
2029
|
init_dedupe();
|
|
1797
2030
|
init_inference();
|
|
1798
2031
|
init_repo();
|
|
2032
|
+
init_vector_index();
|
|
1799
2033
|
init_workspace();
|
|
1800
2034
|
rememberInputSchema = {
|
|
1801
2035
|
note: z6.string().trim().min(1, "note is required"),
|
|
@@ -1942,6 +2176,13 @@ function registerSearchMemoryTool(server) {
|
|
|
1942
2176
|
rows = runFts(orQuery, resolvedRepo, limit);
|
|
1943
2177
|
}
|
|
1944
2178
|
}
|
|
2179
|
+
if (rows.length === 0 && resolvedRepo && embeddingsEnabled()) {
|
|
2180
|
+
const semantic = vectorSearch(db, resolvedRepo, query, limit);
|
|
2181
|
+
rows = semantic.map(({ score, ...row }) => ({
|
|
2182
|
+
...row,
|
|
2183
|
+
rank: score
|
|
2184
|
+
}));
|
|
2185
|
+
}
|
|
1945
2186
|
let usedFallback = false;
|
|
1946
2187
|
if (rows.length === 0 && resolvedRepo) {
|
|
1947
2188
|
const fallback = fetchRepoContext(db, resolvedRepo, limit);
|
|
@@ -1997,7 +2238,9 @@ var init_search = __esm({
|
|
|
1997
2238
|
"use strict";
|
|
1998
2239
|
init_client();
|
|
1999
2240
|
init_context();
|
|
2241
|
+
init_embeddings();
|
|
2000
2242
|
init_repo();
|
|
2243
|
+
init_vector_index();
|
|
2001
2244
|
init_workspace();
|
|
2002
2245
|
searchMemoryInputSchema = {
|
|
2003
2246
|
query: z8.string().trim().min(1, "query is required"),
|
|
@@ -2048,6 +2291,9 @@ function registerStoreContextTool(server) {
|
|
|
2048
2291
|
WHERE id = ?
|
|
2049
2292
|
`
|
|
2050
2293
|
).get(id);
|
|
2294
|
+
if (stored) {
|
|
2295
|
+
indexMemoryEmbedding(db, stored.row_id, note);
|
|
2296
|
+
}
|
|
2051
2297
|
return {
|
|
2052
2298
|
content: [
|
|
2053
2299
|
{
|
|
@@ -2078,6 +2324,7 @@ var init_store = __esm({
|
|
|
2078
2324
|
init_client();
|
|
2079
2325
|
init_dedupe();
|
|
2080
2326
|
init_repo();
|
|
2327
|
+
init_vector_index();
|
|
2081
2328
|
init_workspace();
|
|
2082
2329
|
storeContextInputSchema = {
|
|
2083
2330
|
repo: z9.string().trim().min(1).optional(),
|
|
@@ -2257,6 +2504,7 @@ function registerUpdateMemoryTool(server) {
|
|
|
2257
2504
|
WHERE rowid = ?
|
|
2258
2505
|
`
|
|
2259
2506
|
).run(nextType, nextNote, nextNormalized, now, existing.row_id);
|
|
2507
|
+
indexMemoryEmbedding(db, existing.row_id, nextNote);
|
|
2260
2508
|
} else {
|
|
2261
2509
|
db.prepare(
|
|
2262
2510
|
`
|
|
@@ -2314,6 +2562,7 @@ var init_update = __esm({
|
|
|
2314
2562
|
init_client();
|
|
2315
2563
|
init_dedupe();
|
|
2316
2564
|
init_memory();
|
|
2565
|
+
init_vector_index();
|
|
2317
2566
|
updateMemoryInputSchema = {
|
|
2318
2567
|
// Accept numeric row_id or legacy string id so callers can paste whichever
|
|
2319
2568
|
// form they have.
|
|
@@ -2387,7 +2636,7 @@ async function startServer() {
|
|
|
2387
2636
|
initDb(dbPath);
|
|
2388
2637
|
const server = new McpServer({
|
|
2389
2638
|
name: "fossel",
|
|
2390
|
-
version: "1.
|
|
2639
|
+
version: "1.2.1"
|
|
2391
2640
|
});
|
|
2392
2641
|
registerRememberTool(server);
|
|
2393
2642
|
registerGetContextTool(server);
|
package/dist/index.js
CHANGED
|
@@ -138,6 +138,25 @@ var migrations = [
|
|
|
138
138
|
tx(rows);
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
name: "007_add_memory_embeddings",
|
|
144
|
+
apply: (db) => {
|
|
145
|
+
db.exec(`
|
|
146
|
+
CREATE TABLE IF NOT EXISTS memory_embeddings (
|
|
147
|
+
memory_rowid INTEGER PRIMARY KEY,
|
|
148
|
+
dim INTEGER NOT NULL,
|
|
149
|
+
version INTEGER NOT NULL,
|
|
150
|
+
vector BLOB NOT NULL,
|
|
151
|
+
updated_at INTEGER NOT NULL
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
CREATE TRIGGER IF NOT EXISTS memories_embeddings_ad
|
|
155
|
+
AFTER DELETE ON memories BEGIN
|
|
156
|
+
DELETE FROM memory_embeddings WHERE memory_rowid = old.rowid;
|
|
157
|
+
END;
|
|
158
|
+
`);
|
|
159
|
+
}
|
|
141
160
|
}
|
|
142
161
|
];
|
|
143
162
|
function runMigrations(db) {
|
|
@@ -195,6 +214,158 @@ function getDb() {
|
|
|
195
214
|
return dbInstance;
|
|
196
215
|
}
|
|
197
216
|
|
|
217
|
+
// src/lib/embeddings.ts
|
|
218
|
+
var EMBEDDING_DIM = 256;
|
|
219
|
+
var EMBEDDING_VERSION = 1;
|
|
220
|
+
function embeddingsEnabled() {
|
|
221
|
+
const value = process.env.FOSSEL_EMBEDDINGS?.trim().toLowerCase();
|
|
222
|
+
return value === "1" || value === "true" || value === "on" || value === "yes";
|
|
223
|
+
}
|
|
224
|
+
function fnv1a(str) {
|
|
225
|
+
let hash = 2166136261;
|
|
226
|
+
for (let i = 0; i < str.length; i += 1) {
|
|
227
|
+
hash ^= str.charCodeAt(i);
|
|
228
|
+
hash = Math.imul(hash, 16777619);
|
|
229
|
+
}
|
|
230
|
+
return hash >>> 0;
|
|
231
|
+
}
|
|
232
|
+
function tokenize(text) {
|
|
233
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim().split(" ").filter((token) => token.length >= 2);
|
|
234
|
+
}
|
|
235
|
+
function embedText(text) {
|
|
236
|
+
const vector = new Float32Array(EMBEDDING_DIM);
|
|
237
|
+
const tokens = tokenize(text);
|
|
238
|
+
if (tokens.length === 0) {
|
|
239
|
+
return vector;
|
|
240
|
+
}
|
|
241
|
+
const addFeature = (feature, weight) => {
|
|
242
|
+
const h = fnv1a(feature);
|
|
243
|
+
const index = h % EMBEDDING_DIM;
|
|
244
|
+
const sign = (fnv1a(`#${feature}`) & 1) === 0 ? 1 : -1;
|
|
245
|
+
vector[index] += sign * weight;
|
|
246
|
+
};
|
|
247
|
+
for (let i = 0; i < tokens.length; i += 1) {
|
|
248
|
+
addFeature(tokens[i], 1);
|
|
249
|
+
if (i + 1 < tokens.length) {
|
|
250
|
+
addFeature(`${tokens[i]} ${tokens[i + 1]}`, 0.6);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
let norm = 0;
|
|
254
|
+
for (let i = 0; i < EMBEDDING_DIM; i += 1) {
|
|
255
|
+
norm += vector[i] * vector[i];
|
|
256
|
+
}
|
|
257
|
+
norm = Math.sqrt(norm);
|
|
258
|
+
if (norm > 0) {
|
|
259
|
+
for (let i = 0; i < EMBEDDING_DIM; i += 1) {
|
|
260
|
+
vector[i] /= norm;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return vector;
|
|
264
|
+
}
|
|
265
|
+
function cosineSimilarity(a, b) {
|
|
266
|
+
if (a.length !== b.length) {
|
|
267
|
+
return 0;
|
|
268
|
+
}
|
|
269
|
+
let dot = 0;
|
|
270
|
+
for (let i = 0; i < a.length; i += 1) {
|
|
271
|
+
dot += a[i] * b[i];
|
|
272
|
+
}
|
|
273
|
+
return dot;
|
|
274
|
+
}
|
|
275
|
+
function vectorToBuffer(vector) {
|
|
276
|
+
return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
277
|
+
}
|
|
278
|
+
function bufferToVector(buffer) {
|
|
279
|
+
const copy = Buffer.from(buffer);
|
|
280
|
+
return new Float32Array(
|
|
281
|
+
copy.buffer,
|
|
282
|
+
copy.byteOffset,
|
|
283
|
+
Math.floor(copy.byteLength / 4)
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// src/lib/vector-index.ts
|
|
288
|
+
function indexMemoryEmbedding(db, rowId, note) {
|
|
289
|
+
if (!embeddingsEnabled()) {
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
const vector = embedText(note);
|
|
293
|
+
db.prepare(
|
|
294
|
+
`
|
|
295
|
+
INSERT INTO memory_embeddings (memory_rowid, dim, version, vector, updated_at)
|
|
296
|
+
VALUES (?, ?, ?, ?, ?)
|
|
297
|
+
ON CONFLICT(memory_rowid) DO UPDATE SET
|
|
298
|
+
dim = excluded.dim,
|
|
299
|
+
version = excluded.version,
|
|
300
|
+
vector = excluded.vector,
|
|
301
|
+
updated_at = excluded.updated_at
|
|
302
|
+
`
|
|
303
|
+
).run(
|
|
304
|
+
rowId,
|
|
305
|
+
EMBEDDING_DIM,
|
|
306
|
+
EMBEDDING_VERSION,
|
|
307
|
+
vectorToBuffer(vector),
|
|
308
|
+
Math.floor(Date.now() / 1e3)
|
|
309
|
+
);
|
|
310
|
+
}
|
|
311
|
+
function backfillRepoEmbeddings(db, repo) {
|
|
312
|
+
if (!embeddingsEnabled()) {
|
|
313
|
+
return 0;
|
|
314
|
+
}
|
|
315
|
+
const rows = db.prepare(
|
|
316
|
+
`
|
|
317
|
+
SELECT m.rowid AS row_id, m.note
|
|
318
|
+
FROM memories AS m
|
|
319
|
+
LEFT JOIN memory_embeddings AS e ON e.memory_rowid = m.rowid
|
|
320
|
+
WHERE m.repo = ?
|
|
321
|
+
AND (e.memory_rowid IS NULL OR e.version != ? OR e.dim != ?)
|
|
322
|
+
`
|
|
323
|
+
).all(repo, EMBEDDING_VERSION, EMBEDDING_DIM);
|
|
324
|
+
if (rows.length === 0) {
|
|
325
|
+
return 0;
|
|
326
|
+
}
|
|
327
|
+
const tx = db.transaction((batch) => {
|
|
328
|
+
for (const row of batch) {
|
|
329
|
+
indexMemoryEmbedding(db, row.row_id, row.note);
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
tx(rows);
|
|
333
|
+
return rows.length;
|
|
334
|
+
}
|
|
335
|
+
function vectorSearch(db, repo, query, limit) {
|
|
336
|
+
if (!embeddingsEnabled()) {
|
|
337
|
+
return [];
|
|
338
|
+
}
|
|
339
|
+
backfillRepoEmbeddings(db, repo);
|
|
340
|
+
const queryVector = embedText(query);
|
|
341
|
+
let queryNorm = 0;
|
|
342
|
+
for (let i = 0; i < queryVector.length; i += 1) {
|
|
343
|
+
queryNorm += queryVector[i] * queryVector[i];
|
|
344
|
+
}
|
|
345
|
+
if (queryNorm === 0) {
|
|
346
|
+
return [];
|
|
347
|
+
}
|
|
348
|
+
const rows = db.prepare(
|
|
349
|
+
`
|
|
350
|
+
SELECT m.rowid AS row_id, m.id, m.repo, m.type, m.note, m.tags,
|
|
351
|
+
m.created_at, m.updated_at, m.pinned, e.vector AS vector
|
|
352
|
+
FROM memory_embeddings AS e
|
|
353
|
+
JOIN memories AS m ON m.rowid = e.memory_rowid
|
|
354
|
+
WHERE m.repo = ? AND e.dim = ? AND e.version = ?
|
|
355
|
+
`
|
|
356
|
+
).all(repo, EMBEDDING_DIM, EMBEDDING_VERSION);
|
|
357
|
+
const scored = [];
|
|
358
|
+
for (const row of rows) {
|
|
359
|
+
const { vector, ...memory } = row;
|
|
360
|
+
const score = cosineSimilarity(queryVector, bufferToVector(vector));
|
|
361
|
+
if (score > 0) {
|
|
362
|
+
scored.push({ ...memory, score });
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
scored.sort((a, b) => b.score - a.score);
|
|
366
|
+
return scored.slice(0, limit);
|
|
367
|
+
}
|
|
368
|
+
|
|
198
369
|
// src/lib/context.ts
|
|
199
370
|
var SECTION_TITLES = {
|
|
200
371
|
convention: "Conventions",
|
|
@@ -252,7 +423,10 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
252
423
|
for (const row of pinned) {
|
|
253
424
|
push(row, "pinned");
|
|
254
425
|
}
|
|
255
|
-
|
|
426
|
+
const pushRecent = () => {
|
|
427
|
+
if (rows.length >= limit) {
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
256
430
|
const recent = db.prepare(
|
|
257
431
|
`
|
|
258
432
|
SELECT rowid AS row_id, id, repo, type, note, tags, created_at, updated_at, pinned
|
|
@@ -261,13 +435,20 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
261
435
|
ORDER BY updated_at DESC
|
|
262
436
|
LIMIT ?
|
|
263
437
|
`
|
|
264
|
-
).all(repo, limit
|
|
438
|
+
).all(repo, limit);
|
|
265
439
|
for (const row of recent) {
|
|
266
440
|
push(row, "recent");
|
|
441
|
+
if (rows.length >= limit) {
|
|
442
|
+
break;
|
|
443
|
+
}
|
|
267
444
|
}
|
|
445
|
+
};
|
|
446
|
+
if (!query) {
|
|
447
|
+
pushRecent();
|
|
268
448
|
}
|
|
269
449
|
if (query && rows.length < limit) {
|
|
270
450
|
const ftsQuery = buildFtsQuery(query);
|
|
451
|
+
const ftsRows = [];
|
|
271
452
|
if (ftsQuery) {
|
|
272
453
|
try {
|
|
273
454
|
const matches = db.prepare(
|
|
@@ -281,15 +462,49 @@ function fetchRepoContext(db, repo, limit, query) {
|
|
|
281
462
|
LIMIT ?
|
|
282
463
|
`
|
|
283
464
|
).all(ftsQuery, repo, limit);
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
465
|
+
ftsRows.push(...matches);
|
|
466
|
+
} catch {
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
const vectorRows = embeddingsEnabled() ? vectorSearch(db, repo, query, limit) : [];
|
|
470
|
+
if (vectorRows.length > 0) {
|
|
471
|
+
const RRF_K = 60;
|
|
472
|
+
const fused = /* @__PURE__ */ new Map();
|
|
473
|
+
const accumulate = (list, rankOf) => {
|
|
474
|
+
list.forEach((memory, index) => {
|
|
475
|
+
const contribution = 1 / (RRF_K + index + 1);
|
|
476
|
+
const prior = fused.get(memory.row_id);
|
|
477
|
+
if (prior) {
|
|
478
|
+
prior.score += contribution;
|
|
479
|
+
} else {
|
|
480
|
+
fused.set(memory.row_id, {
|
|
481
|
+
memory,
|
|
482
|
+
score: contribution,
|
|
483
|
+
rank: rankOf?.(memory, index)
|
|
484
|
+
});
|
|
288
485
|
}
|
|
486
|
+
});
|
|
487
|
+
};
|
|
488
|
+
accumulate(ftsRows, (m) => m.rank);
|
|
489
|
+
accumulate(vectorRows);
|
|
490
|
+
const ordered = Array.from(fused.values()).sort(
|
|
491
|
+
(a, b) => b.score - a.score
|
|
492
|
+
);
|
|
493
|
+
for (const { memory, rank } of ordered) {
|
|
494
|
+
push(memory, "search", rank);
|
|
495
|
+
if (rows.length >= limit) {
|
|
496
|
+
break;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
} else {
|
|
500
|
+
for (const row of ftsRows) {
|
|
501
|
+
push(row, "search", row.rank);
|
|
502
|
+
if (rows.length >= limit) {
|
|
503
|
+
break;
|
|
289
504
|
}
|
|
290
|
-
} catch {
|
|
291
505
|
}
|
|
292
506
|
}
|
|
507
|
+
pushRecent();
|
|
293
508
|
}
|
|
294
509
|
return rows.slice(0, limit);
|
|
295
510
|
}
|
|
@@ -501,7 +716,7 @@ var DEFAULT_CANDIDATE_LIMIT = 200;
|
|
|
501
716
|
function normalizeText(text) {
|
|
502
717
|
return text.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
|
|
503
718
|
}
|
|
504
|
-
function
|
|
719
|
+
function tokenize2(text) {
|
|
505
720
|
return normalizeText(text).split(" ").filter((token) => token.length >= 2);
|
|
506
721
|
}
|
|
507
722
|
function trigrams(text) {
|
|
@@ -537,7 +752,7 @@ function similarity(a, b) {
|
|
|
537
752
|
if (normalizedA === normalizedB) {
|
|
538
753
|
return 1;
|
|
539
754
|
}
|
|
540
|
-
const wordScore = jaccard(new Set(
|
|
755
|
+
const wordScore = jaccard(new Set(tokenize2(normalizedA)), new Set(tokenize2(normalizedB)));
|
|
541
756
|
const triScore = jaccard(trigrams(normalizedA), trigrams(normalizedB));
|
|
542
757
|
return wordScore * 0.55 + triScore * 0.45;
|
|
543
758
|
}
|
|
@@ -1545,6 +1760,7 @@ function registerRememberTool(server) {
|
|
|
1545
1760
|
now,
|
|
1546
1761
|
existing.row_id
|
|
1547
1762
|
);
|
|
1763
|
+
indexMemoryEmbedding(db, existing.row_id, longerNote);
|
|
1548
1764
|
return {
|
|
1549
1765
|
content: [
|
|
1550
1766
|
{
|
|
@@ -1585,6 +1801,9 @@ function registerRememberTool(server) {
|
|
|
1585
1801
|
normalizeText(note)
|
|
1586
1802
|
);
|
|
1587
1803
|
const inserted = db.prepare("SELECT rowid AS row_id FROM memories WHERE id = ?").get(id);
|
|
1804
|
+
if (inserted) {
|
|
1805
|
+
indexMemoryEmbedding(db, inserted.row_id, note);
|
|
1806
|
+
}
|
|
1588
1807
|
return {
|
|
1589
1808
|
content: [
|
|
1590
1809
|
{
|
|
@@ -1741,6 +1960,13 @@ function registerSearchMemoryTool(server) {
|
|
|
1741
1960
|
rows = runFts(orQuery, resolvedRepo, limit);
|
|
1742
1961
|
}
|
|
1743
1962
|
}
|
|
1963
|
+
if (rows.length === 0 && resolvedRepo && embeddingsEnabled()) {
|
|
1964
|
+
const semantic = vectorSearch(db, resolvedRepo, query, limit);
|
|
1965
|
+
rows = semantic.map(({ score, ...row }) => ({
|
|
1966
|
+
...row,
|
|
1967
|
+
rank: score
|
|
1968
|
+
}));
|
|
1969
|
+
}
|
|
1744
1970
|
let usedFallback = false;
|
|
1745
1971
|
if (rows.length === 0 && resolvedRepo) {
|
|
1746
1972
|
const fallback = fetchRepoContext(db, resolvedRepo, limit);
|
|
@@ -1838,6 +2064,9 @@ function registerStoreContextTool(server) {
|
|
|
1838
2064
|
WHERE id = ?
|
|
1839
2065
|
`
|
|
1840
2066
|
).get(id);
|
|
2067
|
+
if (stored) {
|
|
2068
|
+
indexMemoryEmbedding(db, stored.row_id, note);
|
|
2069
|
+
}
|
|
1841
2070
|
return {
|
|
1842
2071
|
content: [
|
|
1843
2072
|
{
|
|
@@ -2029,6 +2258,7 @@ function registerUpdateMemoryTool(server) {
|
|
|
2029
2258
|
WHERE rowid = ?
|
|
2030
2259
|
`
|
|
2031
2260
|
).run(nextType, nextNote, nextNormalized, now, existing.row_id);
|
|
2261
|
+
indexMemoryEmbedding(db, existing.row_id, nextNote);
|
|
2032
2262
|
} else {
|
|
2033
2263
|
db.prepare(
|
|
2034
2264
|
`
|
|
@@ -2133,7 +2363,7 @@ async function startServer() {
|
|
|
2133
2363
|
initDb(dbPath);
|
|
2134
2364
|
const server = new McpServer({
|
|
2135
2365
|
name: "fossel",
|
|
2136
|
-
version: "1.
|
|
2366
|
+
version: "1.2.1"
|
|
2137
2367
|
});
|
|
2138
2368
|
registerRememberTool(server);
|
|
2139
2369
|
registerGetContextTool(server);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "fossel",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Local-first MCP memory for Cursor & Claude: repo context in SQLite
|
|
3
|
+
"version": "1.2.1",
|
|
4
|
+
"description": "Local-first MCP memory for Cursor & Claude: repo context in SQLite with hybrid keyword + semantic search, pins, PR summaries. No cloud.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
7
7
|
"model-context-protocol",
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
"memory",
|
|
12
12
|
"sqlite",
|
|
13
13
|
"fts5",
|
|
14
|
+
"semantic-search",
|
|
15
|
+
"embeddings",
|
|
14
16
|
"open-source",
|
|
15
17
|
"contributors",
|
|
16
18
|
"local-first",
|