@agenticmail/enterprise 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ANW4OHXR.js +764 -0
- package/dist/chunk-EVQPFQ55.js +9040 -0
- package/dist/chunk-JMTNHH7I.js +12666 -0
- package/dist/chunk-TYW5XTOW.js +395 -0
- package/dist/chunk-V2YIXYDJ.js +1943 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +5 -4
- package/dist/routes-ALTC4I2R.js +5674 -0
- package/dist/runtime-JLFTHMIT.js +47 -0
- package/dist/server-OGQWCOT6.js +11 -0
- package/dist/setup-HCMMUEW6.js +20 -0
- package/package.json +1 -1
- package/src/agent-tools/tools/memory.ts +42 -15
- package/src/engine/agent-memory.ts +4 -355
- package/src/lib/text-search.ts +358 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
// src/lib/text-search.ts
|
|
2
|
+
var BM25_K1 = 1.2;
|
|
3
|
+
var BM25_B = 0.75;
|
|
4
|
+
var FIELD_WEIGHT_TITLE = 3;
|
|
5
|
+
var FIELD_WEIGHT_TAGS = 2;
|
|
6
|
+
var FIELD_WEIGHT_CONTENT = 1;
|
|
7
|
+
var PREFIX_MATCH_PENALTY = 0.7;
|
|
8
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
9
|
+
"a",
|
|
10
|
+
"about",
|
|
11
|
+
"above",
|
|
12
|
+
"after",
|
|
13
|
+
"again",
|
|
14
|
+
"against",
|
|
15
|
+
"all",
|
|
16
|
+
"am",
|
|
17
|
+
"an",
|
|
18
|
+
"and",
|
|
19
|
+
"any",
|
|
20
|
+
"are",
|
|
21
|
+
"as",
|
|
22
|
+
"at",
|
|
23
|
+
"be",
|
|
24
|
+
"because",
|
|
25
|
+
"been",
|
|
26
|
+
"before",
|
|
27
|
+
"being",
|
|
28
|
+
"below",
|
|
29
|
+
"between",
|
|
30
|
+
"both",
|
|
31
|
+
"but",
|
|
32
|
+
"by",
|
|
33
|
+
"can",
|
|
34
|
+
"could",
|
|
35
|
+
"did",
|
|
36
|
+
"do",
|
|
37
|
+
"does",
|
|
38
|
+
"doing",
|
|
39
|
+
"down",
|
|
40
|
+
"during",
|
|
41
|
+
"each",
|
|
42
|
+
"either",
|
|
43
|
+
"every",
|
|
44
|
+
"few",
|
|
45
|
+
"for",
|
|
46
|
+
"from",
|
|
47
|
+
"further",
|
|
48
|
+
"get",
|
|
49
|
+
"got",
|
|
50
|
+
"had",
|
|
51
|
+
"has",
|
|
52
|
+
"have",
|
|
53
|
+
"having",
|
|
54
|
+
"he",
|
|
55
|
+
"her",
|
|
56
|
+
"here",
|
|
57
|
+
"hers",
|
|
58
|
+
"herself",
|
|
59
|
+
"him",
|
|
60
|
+
"himself",
|
|
61
|
+
"his",
|
|
62
|
+
"how",
|
|
63
|
+
"i",
|
|
64
|
+
"if",
|
|
65
|
+
"in",
|
|
66
|
+
"into",
|
|
67
|
+
"is",
|
|
68
|
+
"it",
|
|
69
|
+
"its",
|
|
70
|
+
"itself",
|
|
71
|
+
"just",
|
|
72
|
+
"may",
|
|
73
|
+
"me",
|
|
74
|
+
"might",
|
|
75
|
+
"more",
|
|
76
|
+
"most",
|
|
77
|
+
"must",
|
|
78
|
+
"my",
|
|
79
|
+
"myself",
|
|
80
|
+
"neither",
|
|
81
|
+
"no",
|
|
82
|
+
"nor",
|
|
83
|
+
"not",
|
|
84
|
+
"now",
|
|
85
|
+
"of",
|
|
86
|
+
"off",
|
|
87
|
+
"on",
|
|
88
|
+
"once",
|
|
89
|
+
"only",
|
|
90
|
+
"or",
|
|
91
|
+
"other",
|
|
92
|
+
"ought",
|
|
93
|
+
"our",
|
|
94
|
+
"ours",
|
|
95
|
+
"ourselves",
|
|
96
|
+
"out",
|
|
97
|
+
"over",
|
|
98
|
+
"own",
|
|
99
|
+
"same",
|
|
100
|
+
"shall",
|
|
101
|
+
"she",
|
|
102
|
+
"should",
|
|
103
|
+
"so",
|
|
104
|
+
"some",
|
|
105
|
+
"such",
|
|
106
|
+
"than",
|
|
107
|
+
"that",
|
|
108
|
+
"the",
|
|
109
|
+
"their",
|
|
110
|
+
"theirs",
|
|
111
|
+
"them",
|
|
112
|
+
"themselves",
|
|
113
|
+
"then",
|
|
114
|
+
"there",
|
|
115
|
+
"these",
|
|
116
|
+
"they",
|
|
117
|
+
"this",
|
|
118
|
+
"those",
|
|
119
|
+
"through",
|
|
120
|
+
"to",
|
|
121
|
+
"too",
|
|
122
|
+
"under",
|
|
123
|
+
"until",
|
|
124
|
+
"up",
|
|
125
|
+
"us",
|
|
126
|
+
"very",
|
|
127
|
+
"was",
|
|
128
|
+
"we",
|
|
129
|
+
"were",
|
|
130
|
+
"what",
|
|
131
|
+
"when",
|
|
132
|
+
"where",
|
|
133
|
+
"which",
|
|
134
|
+
"while",
|
|
135
|
+
"who",
|
|
136
|
+
"whom",
|
|
137
|
+
"why",
|
|
138
|
+
"will",
|
|
139
|
+
"with",
|
|
140
|
+
"would",
|
|
141
|
+
"yet",
|
|
142
|
+
"you",
|
|
143
|
+
"your",
|
|
144
|
+
"yours",
|
|
145
|
+
"yourself",
|
|
146
|
+
"yourselves"
|
|
147
|
+
]);
|
|
148
|
+
var STEM_RULES = [
|
|
149
|
+
// Step 1: plurals and past participles
|
|
150
|
+
[/ies$/, "i", 3],
|
|
151
|
+
// policies → polici,eries → eri
|
|
152
|
+
[/sses$/, "ss", 4],
|
|
153
|
+
// addresses → address
|
|
154
|
+
[/([^s])s$/, "$1", 3],
|
|
155
|
+
// items → item, but not "ss"
|
|
156
|
+
[/eed$/, "ee", 4],
|
|
157
|
+
// agreed → agree
|
|
158
|
+
[/ed$/, "", 3],
|
|
159
|
+
// configured → configur, but min length 3
|
|
160
|
+
[/ing$/, "", 4],
|
|
161
|
+
// running → runn → run (handled below)
|
|
162
|
+
// Step 2: derivational suffixes
|
|
163
|
+
[/ational$/, "ate", 6],
|
|
164
|
+
// relational → relate
|
|
165
|
+
[/tion$/, "t", 5],
|
|
166
|
+
// adoption → adopt
|
|
167
|
+
[/ness$/, "", 5],
|
|
168
|
+
// awareness → aware
|
|
169
|
+
[/ment$/, "", 5],
|
|
170
|
+
// deployment → deploy
|
|
171
|
+
[/able$/, "", 5],
|
|
172
|
+
// configurable → configur
|
|
173
|
+
[/ible$/, "", 5],
|
|
174
|
+
// accessible → access
|
|
175
|
+
[/ful$/, "", 5],
|
|
176
|
+
// powerful → power
|
|
177
|
+
[/ous$/, "", 5],
|
|
178
|
+
// dangerous → danger
|
|
179
|
+
[/ive$/, "", 5],
|
|
180
|
+
// interactive → interact
|
|
181
|
+
[/ize$/, "", 4],
|
|
182
|
+
// normalize → normal
|
|
183
|
+
[/ise$/, "", 4],
|
|
184
|
+
// organise → organ
|
|
185
|
+
[/ally$/, "", 5],
|
|
186
|
+
// automatically → automat
|
|
187
|
+
[/ly$/, "", 4],
|
|
188
|
+
// quickly → quick
|
|
189
|
+
[/er$/, "", 4]
|
|
190
|
+
// handler → handl
|
|
191
|
+
];
|
|
192
|
+
var DOUBLE_CONSONANT = /([^aeiou])\1$/;
|
|
193
|
+
function stem(word) {
|
|
194
|
+
if (word.length < 3) return word;
|
|
195
|
+
let stemmed = word;
|
|
196
|
+
for (const [pattern, replacement, minLen] of STEM_RULES) {
|
|
197
|
+
if (stemmed.length >= minLen && pattern.test(stemmed)) {
|
|
198
|
+
stemmed = stemmed.replace(pattern, replacement);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
if (stemmed.length > 2 && DOUBLE_CONSONANT.test(stemmed)) {
|
|
203
|
+
stemmed = stemmed.slice(0, -1);
|
|
204
|
+
}
|
|
205
|
+
return stemmed;
|
|
206
|
+
}
|
|
207
|
+
function tokenize(text) {
|
|
208
|
+
return text.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length > 1 && !STOP_WORDS.has(t)).map(stem);
|
|
209
|
+
}
|
|
210
|
+
var MemorySearchIndex = class {
|
|
211
|
+
/** Posting lists: stemmed term → Set of memory IDs containing it */
|
|
212
|
+
postings = /* @__PURE__ */ new Map();
|
|
213
|
+
/** Per-document metadata for BM25 scoring */
|
|
214
|
+
docs = /* @__PURE__ */ new Map();
|
|
215
|
+
/** Pre-computed IDF values. Stale flag triggers lazy recomputation. */
|
|
216
|
+
idf = /* @__PURE__ */ new Map();
|
|
217
|
+
idfStale = true;
|
|
218
|
+
/** 3-character prefix map for prefix matching: prefix → Set of full stems */
|
|
219
|
+
prefixMap = /* @__PURE__ */ new Map();
|
|
220
|
+
/** Total weighted document length (for computing average) */
|
|
221
|
+
totalWeightedLen = 0;
|
|
222
|
+
get docCount() {
|
|
223
|
+
return this.docs.size;
|
|
224
|
+
}
|
|
225
|
+
get avgDocLen() {
|
|
226
|
+
return this.docs.size > 0 ? this.totalWeightedLen / this.docs.size : 1;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Index a memory entry. Extracts stems from title, content, and tags
|
|
230
|
+
* with field-specific weighting and builds posting lists.
|
|
231
|
+
*/
|
|
232
|
+
addDocument(id, entry) {
|
|
233
|
+
if (this.docs.has(id)) this.removeDocument(id);
|
|
234
|
+
const titleTokens = tokenize(entry.title);
|
|
235
|
+
const contentTokens = tokenize(entry.content);
|
|
236
|
+
const tagTokens = entry.tags.flatMap((t) => tokenize(t));
|
|
237
|
+
const weightedTf = /* @__PURE__ */ new Map();
|
|
238
|
+
for (const t of titleTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_TITLE);
|
|
239
|
+
for (const t of tagTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_TAGS);
|
|
240
|
+
for (const t of contentTokens) weightedTf.set(t, (weightedTf.get(t) || 0) + FIELD_WEIGHT_CONTENT);
|
|
241
|
+
const weightedLen = titleTokens.length * FIELD_WEIGHT_TITLE + tagTokens.length * FIELD_WEIGHT_TAGS + contentTokens.length * FIELD_WEIGHT_CONTENT;
|
|
242
|
+
const allStems = /* @__PURE__ */ new Set();
|
|
243
|
+
for (const t of weightedTf.keys()) allStems.add(t);
|
|
244
|
+
const stemSequence = [...titleTokens, ...contentTokens];
|
|
245
|
+
const docRecord = { weightedTf, weightedLen, allStems, stemSequence };
|
|
246
|
+
this.docs.set(id, docRecord);
|
|
247
|
+
this.totalWeightedLen += weightedLen;
|
|
248
|
+
for (const term of allStems) {
|
|
249
|
+
let posting = this.postings.get(term);
|
|
250
|
+
if (!posting) {
|
|
251
|
+
posting = /* @__PURE__ */ new Set();
|
|
252
|
+
this.postings.set(term, posting);
|
|
253
|
+
}
|
|
254
|
+
posting.add(id);
|
|
255
|
+
if (term.length >= 3) {
|
|
256
|
+
const prefix = term.slice(0, 3);
|
|
257
|
+
let prefixSet = this.prefixMap.get(prefix);
|
|
258
|
+
if (!prefixSet) {
|
|
259
|
+
prefixSet = /* @__PURE__ */ new Set();
|
|
260
|
+
this.prefixMap.set(prefix, prefixSet);
|
|
261
|
+
}
|
|
262
|
+
prefixSet.add(term);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
this.idfStale = true;
|
|
266
|
+
}
|
|
267
|
+
/** Remove a document from the index. */
|
|
268
|
+
removeDocument(id) {
|
|
269
|
+
const doc = this.docs.get(id);
|
|
270
|
+
if (!doc) return;
|
|
271
|
+
this.totalWeightedLen -= doc.weightedLen;
|
|
272
|
+
this.docs.delete(id);
|
|
273
|
+
for (const term of doc.allStems) {
|
|
274
|
+
const posting = this.postings.get(term);
|
|
275
|
+
if (posting) {
|
|
276
|
+
posting.delete(id);
|
|
277
|
+
if (posting.size === 0) {
|
|
278
|
+
this.postings.delete(term);
|
|
279
|
+
if (term.length >= 3) {
|
|
280
|
+
const prefixSet = this.prefixMap.get(term.slice(0, 3));
|
|
281
|
+
if (prefixSet) {
|
|
282
|
+
prefixSet.delete(term);
|
|
283
|
+
if (prefixSet.size === 0) this.prefixMap.delete(term.slice(0, 3));
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
this.idfStale = true;
|
|
290
|
+
}
|
|
291
|
+
/** Recompute IDF values for all terms. Called lazily before search. */
|
|
292
|
+
refreshIdf() {
|
|
293
|
+
if (!this.idfStale) return;
|
|
294
|
+
const N = this.docs.size;
|
|
295
|
+
this.idf.clear();
|
|
296
|
+
for (const [term, posting] of this.postings) {
|
|
297
|
+
const df = posting.size;
|
|
298
|
+
this.idf.set(term, Math.log((N - df + 0.5) / (df + 0.5) + 1));
|
|
299
|
+
}
|
|
300
|
+
this.idfStale = false;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Expand query terms with prefix matches.
|
|
304
|
+
* "deploy" → ["deploy", "deployment", "deploying", ...] (if they exist in the index)
|
|
305
|
+
*/
|
|
306
|
+
expandQueryTerms(queryStems) {
|
|
307
|
+
const expanded = /* @__PURE__ */ new Map();
|
|
308
|
+
for (const qs of queryStems) {
|
|
309
|
+
if (this.postings.has(qs)) {
|
|
310
|
+
expanded.set(qs, Math.max(expanded.get(qs) || 0, 1));
|
|
311
|
+
}
|
|
312
|
+
if (qs.length >= 3) {
|
|
313
|
+
const prefix = qs.slice(0, 3);
|
|
314
|
+
const candidates = this.prefixMap.get(prefix);
|
|
315
|
+
if (candidates) {
|
|
316
|
+
for (const candidate of candidates) {
|
|
317
|
+
if (candidate !== qs && candidate.startsWith(qs)) {
|
|
318
|
+
expanded.set(candidate, Math.max(expanded.get(candidate) || 0, PREFIX_MATCH_PENALTY));
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return expanded;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Compute bigram proximity boost: if two query terms appear adjacent
|
|
328
|
+
* in the document's stem sequence, boost the score.
|
|
329
|
+
*/
|
|
330
|
+
bigramProximityBoost(docId, queryStems) {
|
|
331
|
+
if (queryStems.length < 2) return 0;
|
|
332
|
+
const doc = this.docs.get(docId);
|
|
333
|
+
if (!doc || doc.stemSequence.length < 2) return 0;
|
|
334
|
+
let boost = 0;
|
|
335
|
+
const seq = doc.stemSequence;
|
|
336
|
+
const querySet = new Set(queryStems);
|
|
337
|
+
for (let i = 0; i < seq.length - 1; i++) {
|
|
338
|
+
if (querySet.has(seq[i]) && querySet.has(seq[i + 1]) && seq[i] !== seq[i + 1]) {
|
|
339
|
+
boost += 0.5;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
return Math.min(boost, 2);
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Search the index for documents matching a query.
|
|
346
|
+
* Returns scored results sorted by BM25F relevance.
|
|
347
|
+
*
|
|
348
|
+
* @param query - Raw query string
|
|
349
|
+
* @param candidateIds - Optional: only score these document IDs (for agent-scoped search)
|
|
350
|
+
* @returns Array of { id, score } sorted by descending score
|
|
351
|
+
*/
|
|
352
|
+
search(query, candidateIds) {
|
|
353
|
+
const queryStems = tokenize(query);
|
|
354
|
+
if (queryStems.length === 0) return [];
|
|
355
|
+
this.refreshIdf();
|
|
356
|
+
const expandedTerms = this.expandQueryTerms(queryStems);
|
|
357
|
+
if (expandedTerms.size === 0) return [];
|
|
358
|
+
const avgDl = this.avgDocLen;
|
|
359
|
+
const candidates = /* @__PURE__ */ new Set();
|
|
360
|
+
for (const term of expandedTerms.keys()) {
|
|
361
|
+
const posting = this.postings.get(term);
|
|
362
|
+
if (posting) {
|
|
363
|
+
for (const docId of posting) {
|
|
364
|
+
if (!candidateIds || candidateIds.has(docId)) candidates.add(docId);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
const results = [];
|
|
369
|
+
for (const docId of candidates) {
|
|
370
|
+
const doc = this.docs.get(docId);
|
|
371
|
+
if (!doc) continue;
|
|
372
|
+
let score = 0;
|
|
373
|
+
for (const [term, weight] of expandedTerms) {
|
|
374
|
+
const tf = doc.weightedTf.get(term) || 0;
|
|
375
|
+
if (tf === 0) continue;
|
|
376
|
+
const termIdf = this.idf.get(term) || 0;
|
|
377
|
+
const numerator = tf * (BM25_K1 + 1);
|
|
378
|
+
const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * (doc.weightedLen / avgDl));
|
|
379
|
+
score += termIdf * (numerator / denominator) * weight;
|
|
380
|
+
}
|
|
381
|
+
score += this.bigramProximityBoost(docId, queryStems);
|
|
382
|
+
if (score > 0) results.push({ id: docId, score });
|
|
383
|
+
}
|
|
384
|
+
results.sort((a, b) => b.score - a.score);
|
|
385
|
+
return results;
|
|
386
|
+
}
|
|
387
|
+
/** Check if a document exists in the index. */
|
|
388
|
+
has(id) {
|
|
389
|
+
return this.docs.has(id);
|
|
390
|
+
}
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
export {
|
|
394
|
+
MemorySearchIndex
|
|
395
|
+
};
|