@titan-design/brain 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ import {
2
+ addFrontmatterField
3
+ } from "./chunk-4SD4JRLS.js";
4
+
5
+ // src/services/search.ts
6
+ import { existsSync } from "fs";
7
+
8
+ // src/services/reranker.ts
9
+ var DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2";
10
+ var cachedPipeline = null;
11
+ async function getPipeline() {
12
+ if (cachedPipeline) return cachedPipeline;
13
+ const { pipeline } = await import("@huggingface/transformers");
14
+ cachedPipeline = await pipeline("text-classification", DEFAULT_MODEL, {
15
+ dtype: "q8"
16
+ });
17
+ return cachedPipeline;
18
+ }
19
+ async function rerank(query, results, topK) {
20
+ if (results.length <= 1) return results;
21
+ const classifier = await getPipeline();
22
+ const pairs = results.map((r) => [query, r.excerpt]);
23
+ const scores = await classifier(pairs, { top_k: 1 });
24
+ const scored = results.map((result, i) => ({
25
+ result,
26
+ rerankScore: scores[i].score
27
+ }));
28
+ scored.sort((a, b) => b.rerankScore - a.rerankScore);
29
+ const limit = topK ?? results.length;
30
+ return scored.slice(0, limit).map(({ result, rerankScore }) => ({
31
+ ...result,
32
+ score: rerankScore
33
+ }));
34
+ }
35
+
36
+ // src/services/search.ts
37
+ var RRF_K = 60;
38
+ var EXCERPT_MAX_LENGTH = 500;
39
+ var OVERFETCH_MULTIPLIER = 3;
40
+ function distanceToCosineSim(distance) {
41
+ return 1 - distance * distance / 2;
42
+ }
43
+ function truncateExcerpt(content) {
44
+ if (content.length <= EXCERPT_MAX_LENGTH) return content;
45
+ return content.slice(0, EXCERPT_MAX_LENGTH);
46
+ }
47
+ async function embedQuery(embedder, query) {
48
+ const queryText = embedder.model.includes("nomic") ? `search_query: ${query}` : query;
49
+ const [embedding] = await embedder.embed([queryText]);
50
+ return new Float32Array(embedding);
51
+ }
52
+ function parseTags(tagsStr) {
53
+ if (!tagsStr) return [];
54
+ return tagsStr.split(",").map((t) => t.trim()).filter(Boolean);
55
+ }
56
+ function applyDropoffFilter(results, threshold) {
57
+ if (results.length <= 1) return results;
58
+ let maxDrop = 0;
59
+ let cutIndex = -1;
60
+ for (let i = 1; i < results.length; i++) {
61
+ const prev = results[i - 1].score;
62
+ if (prev <= 0) continue;
63
+ const drop = (prev - results[i].score) / prev;
64
+ if (drop > maxDrop) {
65
+ maxDrop = drop;
66
+ cutIndex = i;
67
+ }
68
+ }
69
+ if (cutIndex > 0 && maxDrop >= threshold) {
70
+ return results.slice(0, cutIndex);
71
+ }
72
+ return results;
73
+ }
74
+ function fuseByRRF(ftsResults, vectorByNote, weights) {
75
+ const fusionMap = /* @__PURE__ */ new Map();
76
+ for (let i = 0; i < ftsResults.length; i++) {
77
+ const { noteId } = ftsResults[i];
78
+ fusionMap.set(noteId, {
79
+ noteId,
80
+ bm25Rank: i + 1,
81
+ vectorRank: null,
82
+ chunkId: null
83
+ });
84
+ }
85
+ let vectorRank = 1;
86
+ for (const [noteId, vr] of vectorByNote) {
87
+ const existing = fusionMap.get(noteId);
88
+ if (existing) {
89
+ existing.vectorRank = vectorRank;
90
+ existing.chunkId = vr.chunkId;
91
+ } else {
92
+ fusionMap.set(noteId, {
93
+ noteId,
94
+ bm25Rank: null,
95
+ vectorRank,
96
+ chunkId: vr.chunkId
97
+ });
98
+ }
99
+ vectorRank++;
100
+ }
101
+ const scored = [];
102
+ for (const entry of fusionMap.values()) {
103
+ let score = 0;
104
+ if (entry.bm25Rank !== null) {
105
+ score += weights.bm25 * (1 / (RRF_K + entry.bm25Rank));
106
+ }
107
+ if (entry.vectorRank !== null) {
108
+ score += weights.vector * (1 / (RRF_K + entry.vectorRank));
109
+ }
110
+ scored.push({ noteId: entry.noteId, score, chunkId: entry.chunkId });
111
+ }
112
+ return scored;
113
+ }
114
+ function fuseByScore(ftsResults, vectorByNote, weights) {
115
+ const fusionMap = /* @__PURE__ */ new Map();
116
+ for (const { noteId, rank } of ftsResults) {
117
+ fusionMap.set(noteId, {
118
+ noteId,
119
+ bm25Score: rank,
120
+ vectorDistance: null,
121
+ chunkId: null
122
+ });
123
+ }
124
+ for (const [noteId, vr] of vectorByNote) {
125
+ const existing = fusionMap.get(noteId);
126
+ if (existing) {
127
+ existing.vectorDistance = vr.distance;
128
+ existing.chunkId = vr.chunkId;
129
+ } else {
130
+ fusionMap.set(noteId, {
131
+ noteId,
132
+ bm25Score: null,
133
+ vectorDistance: vr.distance,
134
+ chunkId: vr.chunkId
135
+ });
136
+ }
137
+ }
138
+ const bm25Scores = [...fusionMap.values()].map((e) => e.bm25Score).filter((s) => s !== null);
139
+ let bm25Best = 0;
140
+ let bm25Worst = 0;
141
+ let bm25Range = 0;
142
+ if (bm25Scores.length > 0) {
143
+ bm25Best = Math.min(...bm25Scores);
144
+ bm25Worst = Math.max(...bm25Scores);
145
+ bm25Range = bm25Worst - bm25Best;
146
+ }
147
+ const scored = [];
148
+ for (const entry of fusionMap.values()) {
149
+ let score = 0;
150
+ if (entry.bm25Score !== null) {
151
+ const normBm25 = bm25Range === 0 ? 1 : (bm25Worst - entry.bm25Score) / bm25Range;
152
+ score += weights.bm25 * normBm25;
153
+ }
154
+ if (entry.vectorDistance !== null) {
155
+ const cosineSim = distanceToCosineSim(entry.vectorDistance);
156
+ score += weights.vector * Math.max(0, cosineSim);
157
+ }
158
+ scored.push({ noteId: entry.noteId, score, chunkId: entry.chunkId });
159
+ }
160
+ return scored;
161
+ }
162
+ async function search(db, embedder, query, options, fusionWeights = { bm25: 0.3, vector: 0.7 }, moduleRegistry) {
163
+ if (!query.trim()) return [];
164
+ const limit = options.limit;
165
+ const overfetchLimit = limit * OVERFETCH_MULTIPLIER;
166
+ let allowedNoteIds = db.getFilteredNoteIds({
167
+ tier: options.tier,
168
+ category: options.category,
169
+ confidence: options.confidence,
170
+ since: options.since,
171
+ tags: options.tags
172
+ });
173
+ if (moduleRegistry) {
174
+ const excludePm = options.excludePm || options.includePm === false;
175
+ const idsToExclude = getPrivateModuleNoteIds(db, moduleRegistry, excludePm ? void 0 : "pm");
176
+ if (idsToExclude.size > 0) {
177
+ if (allowedNoteIds) {
178
+ for (const id of idsToExclude) {
179
+ allowedNoteIds.delete(id);
180
+ }
181
+ } else {
182
+ const allNoteIds = new Set(db.getAllNotes().map((n) => n.id));
183
+ for (const id of idsToExclude) {
184
+ allNoteIds.delete(id);
185
+ }
186
+ allowedNoteIds = allNoteIds;
187
+ }
188
+ }
189
+ }
190
+ if (options.filters?.length) {
191
+ const metaIds = db.getFilteredNoteIdsByMetadata(options.filters, allowedNoteIds ?? void 0);
192
+ allowedNoteIds = metaIds;
193
+ }
194
+ const ftsResults = db.searchFTS(query, overfetchLimit);
195
+ const filteredFts = allowedNoteIds ? ftsResults.filter((r) => allowedNoteIds.has(r.noteId)) : ftsResults;
196
+ const queryVec = await embedQuery(embedder, query);
197
+ const vectorResults = db.searchVector(queryVec, overfetchLimit);
198
+ const filteredVector = allowedNoteIds ? vectorResults.filter((r) => allowedNoteIds.has(r.noteId)) : vectorResults;
199
+ const bestVectorByNote = /* @__PURE__ */ new Map();
200
+ for (const vr of filteredVector) {
201
+ const existing = bestVectorByNote.get(vr.noteId);
202
+ if (!existing || vr.distance < existing.distance) {
203
+ bestVectorByNote.set(vr.noteId, vr);
204
+ }
205
+ }
206
+ const strategy = options.fusionStrategy ?? "score";
207
+ const scored = strategy === "score" ? fuseByScore(filteredFts, bestVectorByNote, fusionWeights) : fuseByRRF(filteredFts, bestVectorByNote, fusionWeights);
208
+ scored.sort((a, b) => b.score - a.score);
209
+ const DEFAULT_MIN_SCORE = 0.25;
210
+ const effectiveMinScore = options.minScore != null ? options.minScore : strategy === "score" ? DEFAULT_MIN_SCORE : null;
211
+ const filtered = effectiveMinScore != null ? scored.filter((s) => s.score >= effectiveMinScore) : scored;
212
+ const afterDropoff = options.dropoff != null ? applyDropoffFilter(filtered, options.dropoff) : filtered;
213
+ const topResults = afterDropoff.slice(0, limit);
214
+ const results = buildSearchResults(db, topResults);
215
+ for (const result of results) {
216
+ db.recordAccess(result.noteId, "search_hit");
217
+ }
218
+ if (options.rerank && results.length > 1) {
219
+ return rerank(query, results, limit);
220
+ }
221
+ return results;
222
+ }
223
+ function computeFacets(db, facetFields, noteIds) {
224
+ return facetFields.map((field) => ({
225
+ field,
226
+ values: db.getFacetCounts(field, noteIds)
227
+ }));
228
+ }
229
+ function buildSearchResults(db, topResults) {
230
+ const noteIds = topResults.map((r) => r.noteId);
231
+ const notesById = db.getNotesByIds(noteIds);
232
+ const results = [];
233
+ for (const item of topResults) {
234
+ const note = notesById.get(item.noteId);
235
+ if (!note) continue;
236
+ const excerptContent = item.chunkId ? db.getChunkContent(item.chunkId) : db.getFirstChunkForNote(item.noteId)?.content ?? "";
237
+ results.push({
238
+ score: item.score,
239
+ filePath: note.filePath,
240
+ noteId: note.id,
241
+ heading: db.getChunkHeading(item.chunkId, item.noteId),
242
+ excerpt: truncateExcerpt(excerptContent),
243
+ tier: note.tier,
244
+ tags: parseTags(note.tags),
245
+ confidence: note.confidence
246
+ });
247
+ }
248
+ return results;
249
+ }
250
+ async function searchMemories(db, embedder, query, limit = 10, containerTag) {
251
+ if (!query.trim()) return [];
252
+ const queryVec = await embedQuery(embedder, query);
253
+ const vectorResults = db.searchMemoryVectors(queryVec, limit * 3);
254
+ if (vectorResults.length === 0) return [];
255
+ const memoryIds = vectorResults.map((vr) => vr.memoryId);
256
+ const memoriesById = db.getMemoriesByIds(memoryIds);
257
+ const results = [];
258
+ for (const vr of vectorResults) {
259
+ const memory = memoriesById.get(vr.memoryId);
260
+ if (!memory) continue;
261
+ if (!memory.isLatest || memory.isForgotten) continue;
262
+ if (containerTag && memory.containerTag !== containerTag) continue;
263
+ const cosineSim = distanceToCosineSim(vr.distance);
264
+ results.push({
265
+ score: Math.max(0, cosineSim),
266
+ memory: memory.memory,
267
+ memoryId: memory.id,
268
+ sourceNoteId: memory.sourceNoteId,
269
+ containerTag: memory.containerTag,
270
+ createdAt: memory.createdAt
271
+ });
272
+ }
273
+ results.sort((a, b) => b.score - a.score);
274
+ return results.slice(0, limit);
275
+ }
276
+ var DEFAULT_PROMOTION_THRESHOLD = 10;
277
+ function checkAndPromote(db, noteId, threshold = DEFAULT_PROMOTION_THRESHOLD) {
278
+ const note = db.getNoteById(noteId);
279
+ if (!note || note.tier !== "fast") return false;
280
+ const count = db.getAccessCount(noteId);
281
+ if (count < threshold) return false;
282
+ const promoted = { ...note, tier: "slow", reviewInterval: null };
283
+ db.upsertNote(promoted);
284
+ if (existsSync(note.filePath)) {
285
+ addFrontmatterField(note.filePath, "tier", "slow");
286
+ }
287
+ return true;
288
+ }
289
+ function getPrivateModuleNoteIds(db, registry, skipModule) {
290
+ const privateIds = /* @__PURE__ */ new Set();
291
+ for (const { module: moduleName, filter } of registry.getFilters()) {
292
+ if (filter.visibility === "private" && moduleName !== skipModule) {
293
+ const noteIds = db.getModuleNoteIds({ module: moduleName });
294
+ for (const id of noteIds) {
295
+ privateIds.add(id);
296
+ }
297
+ }
298
+ }
299
+ return privateIds;
300
+ }
301
+
302
+ export {
303
+ search,
304
+ computeFacets,
305
+ searchMemories,
306
+ checkAndPromote
307
+ };