@agenticmail/enterprise 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ANW4OHXR.js +764 -0
- package/dist/chunk-EVQPFQ55.js +9040 -0
- package/dist/chunk-JMTNHH7I.js +12666 -0
- package/dist/chunk-TYW5XTOW.js +395 -0
- package/dist/chunk-V2YIXYDJ.js +1943 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +5 -4
- package/dist/routes-ALTC4I2R.js +5674 -0
- package/dist/runtime-JLFTHMIT.js +47 -0
- package/dist/server-OGQWCOT6.js +11 -0
- package/dist/setup-HCMMUEW6.js +20 -0
- package/package.json +1 -1
- package/src/agent-tools/tools/memory.ts +42 -15
- package/src/engine/agent-memory.ts +4 -355
- package/src/lib/text-search.ts +358 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AgentRuntime,
|
|
3
|
+
EmailChannel,
|
|
4
|
+
FollowUpScheduler,
|
|
5
|
+
SessionManager,
|
|
6
|
+
SubAgentManager,
|
|
7
|
+
ToolRegistry,
|
|
8
|
+
callLLM,
|
|
9
|
+
createAgentRuntime,
|
|
10
|
+
createNoopHooks,
|
|
11
|
+
createRuntimeHooks,
|
|
12
|
+
estimateMessageTokens,
|
|
13
|
+
estimateTokens,
|
|
14
|
+
executeTool,
|
|
15
|
+
runAgentLoop,
|
|
16
|
+
toolsToDefinitions
|
|
17
|
+
} from "./chunk-JMTNHH7I.js";
|
|
18
|
+
import {
|
|
19
|
+
PROVIDER_REGISTRY,
|
|
20
|
+
listAllProviders,
|
|
21
|
+
resolveApiKeyForProvider,
|
|
22
|
+
resolveProvider
|
|
23
|
+
} from "./chunk-ZNR5DDTA.js";
|
|
24
|
+
import "./chunk-TYW5XTOW.js";
|
|
25
|
+
import "./chunk-JLSQOQ5L.js";
|
|
26
|
+
import "./chunk-KFQGP6VL.js";
|
|
27
|
+
export {
|
|
28
|
+
AgentRuntime,
|
|
29
|
+
EmailChannel,
|
|
30
|
+
FollowUpScheduler,
|
|
31
|
+
PROVIDER_REGISTRY,
|
|
32
|
+
SessionManager,
|
|
33
|
+
SubAgentManager,
|
|
34
|
+
ToolRegistry,
|
|
35
|
+
callLLM,
|
|
36
|
+
createAgentRuntime,
|
|
37
|
+
createNoopHooks,
|
|
38
|
+
createRuntimeHooks,
|
|
39
|
+
estimateMessageTokens,
|
|
40
|
+
estimateTokens,
|
|
41
|
+
executeTool,
|
|
42
|
+
listAllProviders,
|
|
43
|
+
resolveApiKeyForProvider,
|
|
44
|
+
resolveProvider,
|
|
45
|
+
runAgentLoop,
|
|
46
|
+
toolsToDefinitions
|
|
47
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import {
|
|
2
|
+
promptCompanyInfo,
|
|
3
|
+
promptDatabase,
|
|
4
|
+
promptDeployment,
|
|
5
|
+
promptDomain,
|
|
6
|
+
promptRegistration,
|
|
7
|
+
provision,
|
|
8
|
+
runSetupWizard
|
|
9
|
+
} from "./chunk-ANW4OHXR.js";
|
|
10
|
+
import "./chunk-NTVN3JHS.js";
|
|
11
|
+
import "./chunk-KFQGP6VL.js";
|
|
12
|
+
export {
|
|
13
|
+
promptCompanyInfo,
|
|
14
|
+
promptDatabase,
|
|
15
|
+
promptDeployment,
|
|
16
|
+
promptDomain,
|
|
17
|
+
promptRegistration,
|
|
18
|
+
provision,
|
|
19
|
+
runSetupWizard
|
|
20
|
+
};
|
package/package.json
CHANGED
|
@@ -10,6 +10,7 @@ import path from 'node:path';
|
|
|
10
10
|
import crypto from 'node:crypto';
|
|
11
11
|
import type { AnyAgentTool, ToolCreationOptions } from '../types.js';
|
|
12
12
|
import { readStringParam, readNumberParam, jsonResult, textResult, errorResult } from '../common.js';
|
|
13
|
+
import { MemorySearchIndex } from '../../lib/text-search.js';
|
|
13
14
|
|
|
14
15
|
const MEMORY_ACTIONS = ['set', 'get', 'search', 'list', 'delete'] as const;
|
|
15
16
|
type MemoryAction = (typeof MEMORY_ACTIONS)[number];
|
|
@@ -62,26 +63,42 @@ async function saveMemoryStore(storePath: string, store: MemoryStore): Promise<v
|
|
|
62
63
|
}
|
|
63
64
|
}
|
|
64
65
|
|
|
66
|
+
// ── Per-store BM25 search index (rebuilt on load, updated incrementally) ──
|
|
67
|
+
|
|
68
|
+
var searchIndexCache = new Map<string, MemorySearchIndex>();
|
|
69
|
+
|
|
70
|
+
function buildSearchIndex(storePath: string, entries: Record<string, MemoryEntry>): MemorySearchIndex {
|
|
71
|
+
var index = new MemorySearchIndex();
|
|
72
|
+
for (var entry of Object.values(entries)) {
|
|
73
|
+
index.addDocument(entry.key, { title: entry.key, content: entry.value, tags: entry.tags });
|
|
74
|
+
}
|
|
75
|
+
searchIndexCache.set(storePath, index);
|
|
76
|
+
return index;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function getSearchIndex(storePath: string, entries: Record<string, MemoryEntry>): MemorySearchIndex {
|
|
80
|
+
var cached = searchIndexCache.get(storePath);
|
|
81
|
+
// Rebuild if missing or entry count drifted (another process wrote the file)
|
|
82
|
+
if (!cached || cached.docCount !== Object.keys(entries).length) {
|
|
83
|
+
return buildSearchIndex(storePath, entries);
|
|
84
|
+
}
|
|
85
|
+
return cached;
|
|
86
|
+
}
|
|
87
|
+
|
|
65
88
|
function searchEntries(
|
|
89
|
+
storePath: string,
|
|
66
90
|
entries: Record<string, MemoryEntry>,
|
|
67
91
|
query: string,
|
|
68
92
|
limit: number,
|
|
69
93
|
): MemoryEntry[] {
|
|
70
|
-
var
|
|
71
|
-
var
|
|
72
|
-
|
|
73
|
-
for (var
|
|
74
|
-
var
|
|
75
|
-
if (entry
|
|
76
|
-
if (entry.value.toLowerCase().includes(queryLower)) score += 5;
|
|
77
|
-
for (var tag of entry.tags) {
|
|
78
|
-
if (tag.toLowerCase().includes(queryLower)) score += 3;
|
|
79
|
-
}
|
|
80
|
-
if (score > 0) scored.push({ entry, score });
|
|
94
|
+
var index = getSearchIndex(storePath, entries);
|
|
95
|
+
var results = index.search(query);
|
|
96
|
+
var out: MemoryEntry[] = [];
|
|
97
|
+
for (var i = 0; i < Math.min(results.length, limit); i++) {
|
|
98
|
+
var entry = entries[results[i].id];
|
|
99
|
+
if (entry) out.push(entry);
|
|
81
100
|
}
|
|
82
|
-
|
|
83
|
-
scored.sort(function(a, b) { return b.score - a.score; });
|
|
84
|
-
return scored.slice(0, limit).map(function(s) { return s.entry; });
|
|
101
|
+
return out;
|
|
85
102
|
}
|
|
86
103
|
|
|
87
104
|
export function createMemoryTool(options?: ToolCreationOptions): AnyAgentTool | null {
|
|
@@ -149,6 +166,11 @@ export function createMemoryTool(options?: ToolCreationOptions): AnyAgentTool |
|
|
|
149
166
|
updatedAt: now,
|
|
150
167
|
};
|
|
151
168
|
await saveMemoryStore(storePath, store);
|
|
169
|
+
|
|
170
|
+
// Keep BM25 index in sync
|
|
171
|
+
var idx = getSearchIndex(storePath, store.entries);
|
|
172
|
+
idx.addDocument(key, { title: key, content: value, tags: tags });
|
|
173
|
+
|
|
152
174
|
return textResult('Stored memory: ' + key);
|
|
153
175
|
}
|
|
154
176
|
|
|
@@ -162,7 +184,7 @@ export function createMemoryTool(options?: ToolCreationOptions): AnyAgentTool |
|
|
|
162
184
|
case 'search': {
|
|
163
185
|
var query = readStringParam(params, 'query', { required: true });
|
|
164
186
|
var limit = readNumberParam(params, 'limit', { integer: true }) ?? 10;
|
|
165
|
-
var results = searchEntries(store.entries, query, limit);
|
|
187
|
+
var results = searchEntries(storePath, store.entries, query, limit);
|
|
166
188
|
if (results.length === 0) return textResult('No memories matching: ' + query);
|
|
167
189
|
return jsonResult({ count: results.length, results });
|
|
168
190
|
}
|
|
@@ -183,6 +205,11 @@ export function createMemoryTool(options?: ToolCreationOptions): AnyAgentTool |
|
|
|
183
205
|
if (!store.entries[key]) return textResult('Memory not found: ' + key);
|
|
184
206
|
delete store.entries[key];
|
|
185
207
|
await saveMemoryStore(storePath, store);
|
|
208
|
+
|
|
209
|
+
// Keep BM25 index in sync
|
|
210
|
+
var idx = searchIndexCache.get(storePath);
|
|
211
|
+
if (idx) idx.removeDocument(key);
|
|
212
|
+
|
|
186
213
|
return textResult('Deleted memory: ' + key);
|
|
187
214
|
}
|
|
188
215
|
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import type { EngineDatabase } from './db-adapter.js';
|
|
17
|
+
import { MemorySearchIndex, tokenize } from '../lib/text-search.js';
|
|
17
18
|
|
|
18
19
|
// ─── Types ──────────────────────────────────────────────
|
|
19
20
|
|
|
@@ -128,361 +129,9 @@ const IMPORTANCE_WEIGHT: Record<MemoryImportance, number> = {
|
|
|
128
129
|
low: 1,
|
|
129
130
|
};
|
|
130
131
|
|
|
131
|
-
// ───
|
|
132
|
-
//
|
|
133
|
-
//
|
|
134
|
-
// Zero dependencies — pure TypeScript implementation.
|
|
135
|
-
//
|
|
136
|
-
// Features:
|
|
137
|
-
// - Pre-built inverted index maintained incrementally (no re-indexing on query)
|
|
138
|
-
// - Lightweight Porter-style stemmer (suffix stripping for English)
|
|
139
|
-
// - Field weighting via BM25F: title ×3, tags ×2, content ×1
|
|
140
|
-
// - Pre-computed IDF values updated on index mutations
|
|
141
|
-
// - Prefix matching: "deploy" matches "deployment", "deployments"
|
|
142
|
-
// - Per-agent partitioning for scoped searches
|
|
143
|
-
// - Bigram proximity boost: terms appearing adjacent score higher
|
|
144
|
-
|
|
145
|
-
// ── BM25 Parameters ──
|
|
146
|
-
|
|
147
|
-
const BM25_K1 = 1.2; // Term frequency saturation
|
|
148
|
-
const BM25_B = 0.75; // Document length normalization
|
|
149
|
-
const FIELD_WEIGHT_TITLE = 3.0;
|
|
150
|
-
const FIELD_WEIGHT_TAGS = 2.0;
|
|
151
|
-
const FIELD_WEIGHT_CONTENT = 1.0;
|
|
152
|
-
const PREFIX_MATCH_PENALTY = 0.7; // Prefix matches score 70% of exact matches
|
|
153
|
-
|
|
154
|
-
// ── Stop Words ──
|
|
155
|
-
|
|
156
|
-
const STOP_WORDS = new Set([
|
|
157
|
-
'a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an',
|
|
158
|
-
'and', 'any', 'are', 'as', 'at', 'be', 'because', 'been', 'before',
|
|
159
|
-
'being', 'below', 'between', 'both', 'but', 'by', 'can', 'could', 'did',
|
|
160
|
-
'do', 'does', 'doing', 'down', 'during', 'each', 'either', 'every',
|
|
161
|
-
'few', 'for', 'from', 'further', 'get', 'got', 'had', 'has', 'have',
|
|
162
|
-
'having', 'he', 'her', 'here', 'hers', 'herself', 'him', 'himself',
|
|
163
|
-
'his', 'how', 'i', 'if', 'in', 'into', 'is', 'it', 'its', 'itself',
|
|
164
|
-
'just', 'may', 'me', 'might', 'more', 'most', 'must', 'my', 'myself',
|
|
165
|
-
'neither', 'no', 'nor', 'not', 'now', 'of', 'off', 'on', 'once', 'only',
|
|
166
|
-
'or', 'other', 'ought', 'our', 'ours', 'ourselves', 'out', 'over', 'own',
|
|
167
|
-
'same', 'shall', 'she', 'should', 'so', 'some', 'such', 'than', 'that',
|
|
168
|
-
'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'these',
|
|
169
|
-
'they', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up',
|
|
170
|
-
'us', 'very', 'was', 'we', 'were', 'what', 'when', 'where', 'which',
|
|
171
|
-
'while', 'who', 'whom', 'why', 'will', 'with', 'would', 'yet', 'you',
|
|
172
|
-
'your', 'yours', 'yourself', 'yourselves',
|
|
173
|
-
]);
|
|
174
|
-
|
|
175
|
-
// ── Porter Stemmer (lightweight suffix stripping) ──
|
|
176
|
-
// Handles common English suffixes to normalize "deployments" → "deploy",
|
|
177
|
-
// "running" → "run", "policies" → "polici", "configured" → "configur".
|
|
178
|
-
// Not a full Porter stemmer — covers the 80/20 of suffixes that matter most.
|
|
179
|
-
|
|
180
|
-
const STEM_RULES: [RegExp, string, number][] = [
|
|
181
|
-
// Step 1: plurals and past participles
|
|
182
|
-
[/ies$/, 'i', 3], // policies → polici,eries → eri
|
|
183
|
-
[/sses$/, 'ss', 4], // addresses → address
|
|
184
|
-
[/([^s])s$/, '$1', 3], // items → item, but not "ss"
|
|
185
|
-
[/eed$/, 'ee', 4], // agreed → agree
|
|
186
|
-
[/ed$/, '', 3], // configured → configur, but min length 3
|
|
187
|
-
[/ing$/, '', 4], // running → runn → run (handled below)
|
|
188
|
-
// Step 2: derivational suffixes
|
|
189
|
-
[/ational$/, 'ate', 6], // relational → relate
|
|
190
|
-
[/tion$/, 't', 5], // adoption → adopt
|
|
191
|
-
[/ness$/, '', 5], // awareness → aware
|
|
192
|
-
[/ment$/, '', 5], // deployment → deploy
|
|
193
|
-
[/able$/, '', 5], // configurable → configur
|
|
194
|
-
[/ible$/, '', 5], // accessible → access
|
|
195
|
-
[/ful$/, '', 5], // powerful → power
|
|
196
|
-
[/ous$/, '', 5], // dangerous → danger
|
|
197
|
-
[/ive$/, '', 5], // interactive → interact
|
|
198
|
-
[/ize$/, '', 4], // normalize → normal
|
|
199
|
-
[/ise$/, '', 4], // organise → organ
|
|
200
|
-
[/ally$/, '', 5], // automatically → automat
|
|
201
|
-
[/ly$/, '', 4], // quickly → quick
|
|
202
|
-
[/er$/, '', 4], // handler → handl
|
|
203
|
-
];
|
|
204
|
-
|
|
205
|
-
/** Clean up common doubling artifacts after suffix stripping. */
|
|
206
|
-
const DOUBLE_CONSONANT = /([^aeiou])\1$/;
|
|
207
|
-
|
|
208
|
-
function stem(word: string): string {
|
|
209
|
-
if (word.length < 3) return word;
|
|
210
|
-
let stemmed = word;
|
|
211
|
-
for (const [pattern, replacement, minLen] of STEM_RULES) {
|
|
212
|
-
if (stemmed.length >= minLen && pattern.test(stemmed)) {
|
|
213
|
-
stemmed = stemmed.replace(pattern, replacement);
|
|
214
|
-
break; // Apply only the first matching rule
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
// Clean doubled consonants: runn → run, configurr → configur
|
|
218
|
-
if (stemmed.length > 2 && DOUBLE_CONSONANT.test(stemmed)) {
|
|
219
|
-
stemmed = stemmed.slice(0, -1);
|
|
220
|
-
}
|
|
221
|
-
return stemmed;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// ── Tokenizer ──
|
|
225
|
-
|
|
226
|
-
/** Tokenize text into stemmed, lowercase terms, filtering stop words. */
|
|
227
|
-
function tokenize(text: string): string[] {
|
|
228
|
-
return text.toLowerCase()
|
|
229
|
-
.split(/[^a-z0-9]+/)
|
|
230
|
-
.filter((t) => t.length > 1 && !STOP_WORDS.has(t))
|
|
231
|
-
.map(stem);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
/** Tokenize preserving original (unstemmed) forms alongside stems. */
|
|
235
|
-
function tokenizeWithOriginals(text: string): { stem: string; original: string }[] {
|
|
236
|
-
return text.toLowerCase()
|
|
237
|
-
.split(/[^a-z0-9]+/)
|
|
238
|
-
.filter((t) => t.length > 1 && !STOP_WORDS.has(t))
|
|
239
|
-
.map((t) => ({ stem: stem(t), original: t }));
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// ── Inverted Index Data Structures ──
|
|
243
|
-
|
|
244
|
-
interface DocRecord {
|
|
245
|
-
/** Weighted term frequencies across all fields: title (3x), tags (2x), content (1x) */
|
|
246
|
-
weightedTf: Map<string, number>;
|
|
247
|
-
/** Total weighted document length (for BM25 length normalization) */
|
|
248
|
-
weightedLen: number;
|
|
249
|
-
/** All unique stems in the document (for prefix matching) */
|
|
250
|
-
allStems: Set<string>;
|
|
251
|
-
/** Ordered list of stems for bigram proximity detection */
|
|
252
|
-
stemSequence: string[];
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Pre-built inverted index for fast text search.
|
|
257
|
-
* Maintained incrementally — no re-indexing needed on queries.
|
|
258
|
-
*
|
|
259
|
-
* Structure:
|
|
260
|
-
* term → Set<docId> (posting list — which docs contain this term)
|
|
261
|
-
* prefixMap: prefix → Set<stem> (3-char prefixes → full stems for prefix matching)
|
|
262
|
-
* docs: docId → DocRecord (per-doc weighted TF and length)
|
|
263
|
-
* idf: term → number (pre-computed IDF, refreshed on mutations)
|
|
264
|
-
*/
|
|
265
|
-
class MemorySearchIndex {
|
|
266
|
-
/** Posting lists: stemmed term → Set of memory IDs containing it */
|
|
267
|
-
private postings = new Map<string, Set<string>>();
|
|
268
|
-
/** Per-document metadata for BM25 scoring */
|
|
269
|
-
private docs = new Map<string, DocRecord>();
|
|
270
|
-
/** Pre-computed IDF values. Stale flag triggers lazy recomputation. */
|
|
271
|
-
private idf = new Map<string, number>();
|
|
272
|
-
private idfStale = true;
|
|
273
|
-
/** 3-character prefix map for prefix matching: prefix → Set of full stems */
|
|
274
|
-
private prefixMap = new Map<string, Set<string>>();
|
|
275
|
-
/** Total weighted document length (for computing average) */
|
|
276
|
-
private totalWeightedLen = 0;
|
|
277
|
-
|
|
278
|
-
get docCount(): number { return this.docs.size; }
|
|
279
|
-
get avgDocLen(): number { return this.docs.size > 0 ? this.totalWeightedLen / this.docs.size : 1; }
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Index a memory entry. Extracts stems from title, content, and tags
|
|
283
|
-
* with field-specific weighting and builds posting lists.
|
|
284
|
-
*/
|
|
285
|
-
addDocument(id: string, entry: { title: string; content: string; tags: string[] }): void {
|
|
286
|
-
// Remove old version if updating
|
|
287
|
-
if (this.docs.has(id)) this.removeDocument(id);
|
|
288
|
-
|
|
289
|
-
const titleTokens = tokenize(entry.title);
|
|
290
|
-
const contentTokens = tokenize(entry.content);
|
|
291
|
-
const tagTokens = entry.tags.flatMap((t) => tokenize(t));
|
|
292
|
-
|
|
293
|
-
// Build weighted term frequency map
|
|
294
|
-
const weightedTf = new Map<string, number>();
|
|
295
|
-
for (const t of titleTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_TITLE);
|
|
296
|
-
for (const t of tagTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_TAGS);
|
|
297
|
-
for (const t of contentTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_CONTENT);
|
|
298
|
-
|
|
299
|
-
const weightedLen = titleTokens.length * FIELD_WEIGHT_TITLE
|
|
300
|
-
+ tagTokens.length * FIELD_WEIGHT_TAGS
|
|
301
|
-
+ contentTokens.length * FIELD_WEIGHT_CONTENT;
|
|
302
|
-
|
|
303
|
-
const allStems = new Set<string>();
|
|
304
|
-
for (const t of weightedTf.keys()) allStems.add(t);
|
|
305
|
-
|
|
306
|
-
// Stem sequence for bigram proximity (title first, then content — most important ordering)
|
|
307
|
-
const stemSequence = [...titleTokens, ...contentTokens];
|
|
308
|
-
|
|
309
|
-
const docRecord: DocRecord = { weightedTf, weightedLen, allStems, stemSequence };
|
|
310
|
-
this.docs.set(id, docRecord);
|
|
311
|
-
this.totalWeightedLen += weightedLen;
|
|
312
|
-
|
|
313
|
-
// Update posting lists
|
|
314
|
-
for (const term of allStems) {
|
|
315
|
-
let posting = this.postings.get(term);
|
|
316
|
-
if (!posting) { posting = new Set(); this.postings.set(term, posting); }
|
|
317
|
-
posting.add(id);
|
|
318
|
-
|
|
319
|
-
// Update prefix map (3-char prefixes for prefix matching)
|
|
320
|
-
if (term.length >= 3) {
|
|
321
|
-
const prefix = term.slice(0, 3);
|
|
322
|
-
let prefixSet = this.prefixMap.get(prefix);
|
|
323
|
-
if (!prefixSet) { prefixSet = new Set(); this.prefixMap.set(prefix, prefixSet); }
|
|
324
|
-
prefixSet.add(term);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
this.idfStale = true;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
/** Remove a document from the index. */
|
|
332
|
-
removeDocument(id: string): void {
|
|
333
|
-
const doc = this.docs.get(id);
|
|
334
|
-
if (!doc) return;
|
|
335
|
-
|
|
336
|
-
this.totalWeightedLen -= doc.weightedLen;
|
|
337
|
-
this.docs.delete(id);
|
|
338
|
-
|
|
339
|
-
// Remove from posting lists
|
|
340
|
-
for (const term of doc.allStems) {
|
|
341
|
-
const posting = this.postings.get(term);
|
|
342
|
-
if (posting) {
|
|
343
|
-
posting.delete(id);
|
|
344
|
-
if (posting.size === 0) {
|
|
345
|
-
this.postings.delete(term);
|
|
346
|
-
// Clean prefix map
|
|
347
|
-
if (term.length >= 3) {
|
|
348
|
-
const prefixSet = this.prefixMap.get(term.slice(0, 3));
|
|
349
|
-
if (prefixSet) { prefixSet.delete(term); if (prefixSet.size === 0) this.prefixMap.delete(term.slice(0, 3)); }
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
this.idfStale = true;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
/** Recompute IDF values for all terms. Called lazily before search. */
|
|
359
|
-
private refreshIdf(): void {
|
|
360
|
-
if (!this.idfStale) return;
|
|
361
|
-
const N = this.docs.size;
|
|
362
|
-
this.idf.clear();
|
|
363
|
-
for (const [term, posting] of this.postings) {
|
|
364
|
-
const df = posting.size;
|
|
365
|
-
// BM25 IDF: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
366
|
-
this.idf.set(term, Math.log((N - df + 0.5) / (df + 0.5) + 1));
|
|
367
|
-
}
|
|
368
|
-
this.idfStale = false;
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
/**
|
|
372
|
-
* Expand query terms with prefix matches.
|
|
373
|
-
* "deploy" → ["deploy", "deployment", "deploying", ...] (if they exist in the index)
|
|
374
|
-
*/
|
|
375
|
-
private expandQueryTerms(queryStems: string[]): Map<string, number> {
|
|
376
|
-
const expanded = new Map<string, number>();
|
|
377
|
-
|
|
378
|
-
for (const qs of queryStems) {
|
|
379
|
-
// Exact match always gets full weight
|
|
380
|
-
if (this.postings.has(qs)) {
|
|
381
|
-
expanded.set(qs, Math.max(expanded.get(qs) || 0, 1.0));
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
// Prefix expansion: find all stems that start with the query stem (min 3 chars)
|
|
385
|
-
if (qs.length >= 3) {
|
|
386
|
-
const prefix = qs.slice(0, 3);
|
|
387
|
-
const candidates = this.prefixMap.get(prefix);
|
|
388
|
-
if (candidates) {
|
|
389
|
-
for (const candidate of candidates) {
|
|
390
|
-
if (candidate !== qs && candidate.startsWith(qs)) {
|
|
391
|
-
expanded.set(candidate, Math.max(expanded.get(candidate) || 0, PREFIX_MATCH_PENALTY));
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
return expanded;
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
/**
|
|
402
|
-
* Compute bigram proximity boost: if two query terms appear adjacent
|
|
403
|
-
* in the document's stem sequence, boost the score.
|
|
404
|
-
*/
|
|
405
|
-
private bigramProximityBoost(docId: string, queryStems: string[]): number {
|
|
406
|
-
if (queryStems.length < 2) return 0;
|
|
407
|
-
const doc = this.docs.get(docId);
|
|
408
|
-
if (!doc || doc.stemSequence.length < 2) return 0;
|
|
409
|
-
|
|
410
|
-
let boost = 0;
|
|
411
|
-
const seq = doc.stemSequence;
|
|
412
|
-
const querySet = new Set(queryStems);
|
|
413
|
-
|
|
414
|
-
for (let i = 0; i < seq.length - 1; i++) {
|
|
415
|
-
if (querySet.has(seq[i]) && querySet.has(seq[i + 1]) && seq[i] !== seq[i + 1]) {
|
|
416
|
-
boost += 0.5; // Each adjacent pair of query terms adds 0.5
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
return Math.min(boost, 2.0); // Cap at 2.0 bonus
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
/**
|
|
424
|
-
* Search the index for documents matching a query.
|
|
425
|
-
* Returns scored results sorted by BM25F relevance.
|
|
426
|
-
*
|
|
427
|
-
* @param query - Raw query string
|
|
428
|
-
* @param candidateIds - Optional: only score these document IDs (for agent-scoped search)
|
|
429
|
-
* @returns Array of { id, score } sorted by descending score
|
|
430
|
-
*/
|
|
431
|
-
search(query: string, candidateIds?: Set<string>): Array<{ id: string; score: number }> {
|
|
432
|
-
const queryStems = tokenize(query);
|
|
433
|
-
if (queryStems.length === 0) return [];
|
|
434
|
-
|
|
435
|
-
this.refreshIdf();
|
|
436
|
-
|
|
437
|
-
const expandedTerms = this.expandQueryTerms(queryStems);
|
|
438
|
-
if (expandedTerms.size === 0) return [];
|
|
439
|
-
|
|
440
|
-
const avgDl = this.avgDocLen;
|
|
441
|
-
|
|
442
|
-
// Collect candidate document IDs from posting lists
|
|
443
|
-
const candidates = new Set<string>();
|
|
444
|
-
for (const term of expandedTerms.keys()) {
|
|
445
|
-
const posting = this.postings.get(term);
|
|
446
|
-
if (posting) {
|
|
447
|
-
for (const docId of posting) {
|
|
448
|
-
if (!candidateIds || candidateIds.has(docId)) candidates.add(docId);
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
// Score each candidate
|
|
454
|
-
const results: Array<{ id: string; score: number }> = [];
|
|
455
|
-
|
|
456
|
-
for (const docId of candidates) {
|
|
457
|
-
const doc = this.docs.get(docId);
|
|
458
|
-
if (!doc) continue;
|
|
459
|
-
|
|
460
|
-
let score = 0;
|
|
461
|
-
|
|
462
|
-
for (const [term, weight] of expandedTerms) {
|
|
463
|
-
const tf = doc.weightedTf.get(term) || 0;
|
|
464
|
-
if (tf === 0) continue;
|
|
465
|
-
const termIdf = this.idf.get(term) || 0;
|
|
466
|
-
|
|
467
|
-
// BM25F: IDF × (weightedTF × (k1 + 1)) / (weightedTF + k1 × (1 - b + b × docLen/avgDocLen))
|
|
468
|
-
const numerator = tf * (BM25_K1 + 1);
|
|
469
|
-
const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * (doc.weightedLen / avgDl));
|
|
470
|
-
score += termIdf * (numerator / denominator) * weight;
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
// Bigram proximity boost
|
|
474
|
-
score += this.bigramProximityBoost(docId, queryStems);
|
|
475
|
-
|
|
476
|
-
if (score > 0) results.push({ id: docId, score });
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
results.sort((a, b) => b.score - a.score);
|
|
480
|
-
return results;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
/** Check if a document exists in the index. */
|
|
484
|
-
has(id: string): boolean { return this.docs.has(id); }
|
|
485
|
-
}
|
|
132
|
+
// ─── BM25F Search Engine ─────────────────────────────────
|
|
133
|
+
// Imported from shared module: src/lib/text-search.ts
|
|
134
|
+
// Provides: MemorySearchIndex, tokenize, stem, stop words, field weights
|
|
486
135
|
|
|
487
136
|
// ─── Agent Memory Manager ───────────────────────────────
|
|
488
137
|
|