@1mbrain/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +42 -0
- package/src/config.ts +50 -0
- package/src/db/index.ts +38 -0
- package/src/db/postgres-provider.ts +525 -0
- package/src/db/sqlite-provider.ts +548 -0
- package/src/embedding/index.ts +56 -0
- package/src/embedding/keyword-provider.ts +71 -0
- package/src/embedding/ollama-provider.ts +78 -0
- package/src/embedding/openai-provider.ts +99 -0
- package/src/engine.ts +1073 -0
- package/src/events.ts +142 -0
- package/src/index.ts +83 -0
- package/src/logger.ts +31 -0
- package/src/passport.ts +118 -0
- package/src/ranking-policy.ts +563 -0
- package/src/schemas.ts +114 -0
- package/src/types.ts +229 -0
- package/tests/benchmark.ts +125 -0
- package/tests/embedding.test.ts +119 -0
- package/tests/engine.test.ts +1017 -0
- package/tests/passport.test.ts +83 -0
- package/tests/ranking-policy.test.ts +268 -0
- package/tsconfig.json +9 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +18 -0
package/src/engine.ts
ADDED
|
@@ -0,0 +1,1073 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Engine
|
|
3
|
+
*
|
|
4
|
+
* The core of 1MBrain — orchestrates remember, recall, forget, and associate
|
|
5
|
+
* operations. Coordinates between the database provider, embedding provider,
|
|
6
|
+
* and event bus.
|
|
7
|
+
*
|
|
8
|
+
* This is the single entry point that API routes call into.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
12
|
+
import type {
|
|
13
|
+
Memory,
|
|
14
|
+
DatabaseProvider,
|
|
15
|
+
EmbeddingProvider,
|
|
16
|
+
CreateMemoryInput,
|
|
17
|
+
SearchMemoryInput,
|
|
18
|
+
SearchResult,
|
|
19
|
+
CreateAssociationInput,
|
|
20
|
+
MemoryPassport,
|
|
21
|
+
MemoryType,
|
|
22
|
+
} from './types.js';
|
|
23
|
+
import type { EventBus } from './events.js';
|
|
24
|
+
import { createChildLogger } from './logger.js';
|
|
25
|
+
import { RankingPolicy, analyzeQueryIntent } from './ranking-policy.js';
|
|
26
|
+
|
|
27
|
+
const log = createChildLogger('memory-engine');
|
|
28
|
+
|
|
29
|
+
export class MemoryEngine {
|
|
30
|
+
private readonly db: DatabaseProvider;
|
|
31
|
+
private readonly embedder: EmbeddingProvider;
|
|
32
|
+
private readonly eventBus: EventBus;
|
|
33
|
+
private readonly rankingPolicy: RankingPolicy;
|
|
34
|
+
private decayInterval: ReturnType<typeof setInterval> | null = null;
|
|
35
|
+
|
|
36
|
+
constructor(db: DatabaseProvider, embedder: EmbeddingProvider, eventBus: EventBus) {
|
|
37
|
+
this.db = db;
|
|
38
|
+
this.embedder = embedder;
|
|
39
|
+
this.eventBus = eventBus;
|
|
40
|
+
this.rankingPolicy = new RankingPolicy((memoryId) => this.db.getAssociations(memoryId));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ─── Remember ─────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
async remember(input: CreateMemoryInput): Promise<Memory> {
|
|
46
|
+
log.info({ agentId: input.agentId, type: input.type }, 'Remembering...');
|
|
47
|
+
|
|
48
|
+
// Generate embedding
|
|
49
|
+
const embedding = await this.embedder.embed(input.content);
|
|
50
|
+
|
|
51
|
+
const memory = await this.db.createMemory({
|
|
52
|
+
id: uuidv4(),
|
|
53
|
+
agentId: input.agentId,
|
|
54
|
+
type: input.type,
|
|
55
|
+
content: input.content,
|
|
56
|
+
embeddingModel: this.embedder.model,
|
|
57
|
+
embedding,
|
|
58
|
+
importance: input.importance ?? 0.5,
|
|
59
|
+
decayScore: 1.0,
|
|
60
|
+
tags: input.tags ?? [],
|
|
61
|
+
metadata: input.metadata,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
await this.invalidateSupersededMemories(memory);
|
|
65
|
+
|
|
66
|
+
// Create explicit associations if provided
|
|
67
|
+
if (input.associations?.length) {
|
|
68
|
+
for (const assoc of input.associations) {
|
|
69
|
+
try {
|
|
70
|
+
await this.db.createAssociation({
|
|
71
|
+
sourceId: memory.id,
|
|
72
|
+
targetId: assoc.targetId,
|
|
73
|
+
strength: assoc.strength ?? 0.5,
|
|
74
|
+
origin: 'explicit',
|
|
75
|
+
relationType: assoc.relationType ?? 'relates_to',
|
|
76
|
+
});
|
|
77
|
+
} catch (err) {
|
|
78
|
+
log.warn({ err, targetId: assoc.targetId }, 'Failed to create association');
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Auto-associate with semantically similar memories
|
|
84
|
+
await this.autoAssociate(memory);
|
|
85
|
+
|
|
86
|
+
// Emit event
|
|
87
|
+
await this.eventBus.publish({
|
|
88
|
+
type: 'memory:created',
|
|
89
|
+
memoryId: memory.id,
|
|
90
|
+
agentId: memory.agentId,
|
|
91
|
+
memoryType: memory.type,
|
|
92
|
+
timestamp: new Date(),
|
|
93
|
+
data: {
|
|
94
|
+
content: memory.content,
|
|
95
|
+
tags: memory.tags,
|
|
96
|
+
importance: memory.importance,
|
|
97
|
+
decayScore: memory.decayScore,
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return memory;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ─── Recall ───────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
private async invalidateSupersededMemories(memory: Memory): Promise<void> {
|
|
107
|
+
if (!memory.embedding || !looksLikeStateUpdate(memory)) return;
|
|
108
|
+
|
|
109
|
+
const candidates = await this.db.searchByVector(memory.agentId, memory.embedding, {
|
|
110
|
+
limit: 12,
|
|
111
|
+
threshold: 0.25,
|
|
112
|
+
type: memory.type,
|
|
113
|
+
tags: memory.tags.length > 0 ? memory.tags : undefined,
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
for (const candidate of candidates) {
|
|
117
|
+
const existing = candidate.memory;
|
|
118
|
+
if (existing.id === memory.id) continue;
|
|
119
|
+
if (!shouldSupersede(memory, existing)) continue;
|
|
120
|
+
|
|
121
|
+
await this.db.updateMemory(existing.id, memory.agentId, {
|
|
122
|
+
decayScore: 0,
|
|
123
|
+
metadata: {
|
|
124
|
+
...(existing.metadata ?? {}),
|
|
125
|
+
role: 'stale',
|
|
126
|
+
supersededBy: memory.id,
|
|
127
|
+
supersededAt: new Date().toISOString(),
|
|
128
|
+
supersededReason: 'write_time_invalidation',
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
await this.db.createAssociation({
|
|
133
|
+
sourceId: memory.id,
|
|
134
|
+
targetId: existing.id,
|
|
135
|
+
strength: 1,
|
|
136
|
+
origin: 'explicit',
|
|
137
|
+
relationType: 'supersedes',
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async recall(input: SearchMemoryInput): Promise<SearchResult[]> {
|
|
143
|
+
log.info({ agentId: input.agentId, query: input.query.slice(0, 50) }, 'Recalling...');
|
|
144
|
+
const limit = input.limit ?? 10;
|
|
145
|
+
const vectorThreshold = input.threshold ?? 0.3;
|
|
146
|
+
const activationThreshold = input.activationThreshold ?? 0.15;
|
|
147
|
+
const blendWeight = input.blendWeight ?? 0.35;
|
|
148
|
+
const queryIntent = analyzeQueryIntent(input.query);
|
|
149
|
+
const includeStaleCandidates = shouldIncludeStaleCandidates(input.query);
|
|
150
|
+
const candidateLimit = input.useSpreadingActivation !== false ? Math.max(limit, limit * 3) : limit;
|
|
151
|
+
const vectorSearchLimit = includeStaleCandidates ? candidateLimit : candidateLimit * 4;
|
|
152
|
+
// Generate query embedding with lightweight expansion
|
|
153
|
+
// Embeds both the raw query and a slightly reformulated version, then averages them.
|
|
154
|
+
// This improves recall for paraphrase/synonym mismatches without requiring a separate LLM call.
|
|
155
|
+
const queryEmbedding = await this.buildExpandedQueryEmbedding(input.query);
|
|
156
|
+
|
|
157
|
+
// Pass 1: Vector similarity search
|
|
158
|
+
const rawVectorResults = await this.db.searchByVector(input.agentId, queryEmbedding, {
|
|
159
|
+
limit: vectorSearchLimit,
|
|
160
|
+
threshold: vectorThreshold,
|
|
161
|
+
type: input.type,
|
|
162
|
+
tags: input.tags,
|
|
163
|
+
});
|
|
164
|
+
const vectorResults = rawVectorResults
|
|
165
|
+
.filter((result) => includeStaleCandidates || !isStaleMemory(result.memory))
|
|
166
|
+
.slice(0, candidateLimit);
|
|
167
|
+
|
|
168
|
+
const resultsById = new Map<string, SearchResult>();
|
|
169
|
+
const vectorScores = new Map<string, number>();
|
|
170
|
+
|
|
171
|
+
for (const result of vectorResults) {
|
|
172
|
+
vectorScores.set(result.memory.id, result.similarity);
|
|
173
|
+
resultsById.set(result.memory.id, {
|
|
174
|
+
memory: result.memory,
|
|
175
|
+
score: result.similarity,
|
|
176
|
+
source: 'vector',
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (input.useSpreadingActivation !== false) {
|
|
181
|
+
const lexicalResults = await this.lexicalCandidateSearch(
|
|
182
|
+
input,
|
|
183
|
+
includeStaleCandidates,
|
|
184
|
+
Math.max(candidateLimit, limit * 2),
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
for (const result of lexicalResults) {
|
|
188
|
+
const existing = resultsById.get(result.memory.id);
|
|
189
|
+
const lexicalBoost = result.score * 0.22;
|
|
190
|
+
const trace = `lexical_seed:+${lexicalBoost.toFixed(3)}`;
|
|
191
|
+
|
|
192
|
+
if (existing) {
|
|
193
|
+
existing.score += lexicalBoost;
|
|
194
|
+
existing.rankingTrace = [...(existing.rankingTrace ?? []), trace];
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
resultsById.set(result.memory.id, {
|
|
199
|
+
memory: result.memory,
|
|
200
|
+
score: Math.max(0.05, Math.min(0.35, result.score * 0.35)),
|
|
201
|
+
source: 'lexical',
|
|
202
|
+
rankingTrace: [trace],
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Pass 2: Spreading activation (if enabled and query intent benefits from graph traversal)
|
|
208
|
+
if (
|
|
209
|
+
input.useSpreadingActivation !== false &&
|
|
210
|
+
queryIntent.needsGraphTraversal &&
|
|
211
|
+
vectorResults.length > 0
|
|
212
|
+
) {
|
|
213
|
+
const activationResults = await this.spreadingActivation(
|
|
214
|
+
vectorResults.map((r) => ({ id: r.memory.id, score: r.similarity })),
|
|
215
|
+
input.agentId,
|
|
216
|
+
input.maxHops ?? 2,
|
|
217
|
+
activationThreshold,
|
|
218
|
+
true,
|
|
219
|
+
includeStaleCandidates,
|
|
220
|
+
);
|
|
221
|
+
|
|
222
|
+
for (const activated of activationResults) {
|
|
223
|
+
if (!includeStaleCandidates && isStaleMemory(activated.memory)) continue;
|
|
224
|
+
const vectorScore = vectorScores.get(activated.memory.id);
|
|
225
|
+
const blendedScore =
|
|
226
|
+
vectorScore === undefined
|
|
227
|
+
? activated.score * blendWeight
|
|
228
|
+
: vectorScore * (1 - blendWeight) + activated.score * blendWeight;
|
|
229
|
+
|
|
230
|
+
resultsById.set(activated.memory.id, {
|
|
231
|
+
memory: activated.memory,
|
|
232
|
+
score: blendedScore,
|
|
233
|
+
source: vectorScore === undefined ? 'association' : 'combined',
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const results = [...resultsById.values()];
|
|
239
|
+
|
|
240
|
+
const rankedOutcome =
|
|
241
|
+
input.useSpreadingActivation !== false && results.length > 0
|
|
242
|
+
? await this.rankingPolicy.rank(input.query, results)
|
|
243
|
+
: null;
|
|
244
|
+
const finalResults = rankedOutcome?.abstained ? [] : (rankedOutcome?.results ?? results).slice(0, limit);
|
|
245
|
+
|
|
246
|
+
// Emit access events
|
|
247
|
+
for (const result of finalResults) {
|
|
248
|
+
await this.eventBus.publish({
|
|
249
|
+
type: 'memory:accessed',
|
|
250
|
+
memoryId: result.memory.id,
|
|
251
|
+
agentId: input.agentId,
|
|
252
|
+
memoryType: result.memory.type,
|
|
253
|
+
timestamp: new Date(),
|
|
254
|
+
data: {
|
|
255
|
+
content: result.memory.content,
|
|
256
|
+
tags: result.memory.tags,
|
|
257
|
+
score: result.score,
|
|
258
|
+
source: result.source,
|
|
259
|
+
blendWeight,
|
|
260
|
+
rankingTrace: result.rankingTrace,
|
|
261
|
+
importance: result.memory.importance,
|
|
262
|
+
decayScore: result.memory.decayScore,
|
|
263
|
+
},
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Track co-occurrence for auto-associations
|
|
268
|
+
await this.trackCoOccurrence(finalResults.map((r) => r.memory));
|
|
269
|
+
|
|
270
|
+
return finalResults;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
private async lexicalCandidateSearch(
|
|
274
|
+
input: SearchMemoryInput,
|
|
275
|
+
includeStaleCandidates: boolean,
|
|
276
|
+
limit: number,
|
|
277
|
+
): Promise<Array<{ memory: Memory; score: number }>> {
|
|
278
|
+
const queryProfile = buildLexicalProfile(input.query);
|
|
279
|
+
if (queryProfile.tokens.length === 0 && queryProfile.exactTerms.length === 0) return [];
|
|
280
|
+
|
|
281
|
+
const queryEntities = queryProfile.entities;
|
|
282
|
+
const hasQueryEntities = queryEntities.length > 0;
|
|
283
|
+
// Raise threshold when query has strong entity signals to reduce forbidden-memory leakage
|
|
284
|
+
const minScore = hasQueryEntities ? 0.36 : 0.32;
|
|
285
|
+
|
|
286
|
+
const memories = await this.db.getAllMemories(input.agentId);
|
|
287
|
+
const candidates = memories
|
|
288
|
+
.filter((memory) => !input.type || memory.type === input.type)
|
|
289
|
+
.filter((memory) => !input.tags?.length || input.tags.some((tag) => memory.tags.includes(tag)))
|
|
290
|
+
.filter((memory) => includeStaleCandidates || !isStaleMemory(memory))
|
|
291
|
+
.filter((memory) => {
|
|
292
|
+
// Entity-scoped filtering: skip memories that contain conflicting named entities
|
|
293
|
+
// and none of the query entities — they are very likely forbidden/wrong-entity memories
|
|
294
|
+
if (!hasQueryEntities) return true;
|
|
295
|
+
const memoryEntities = extractEntityTerms(memory.content);
|
|
296
|
+
if (memoryEntities.length === 0) return true;
|
|
297
|
+
const hasMatchingEntity = queryEntities.some(
|
|
298
|
+
(qe) => memoryEntities.some((me) => me === qe || me.startsWith(qe) || qe.startsWith(me)),
|
|
299
|
+
);
|
|
300
|
+
if (hasMatchingEntity) return true;
|
|
301
|
+
// Memory has its own entities but none match query entities — likely wrong entity
|
|
302
|
+
return false;
|
|
303
|
+
})
|
|
304
|
+
.map((memory) => ({ memory, score: lexicalEvidenceScore(queryProfile, memory) }))
|
|
305
|
+
.filter((result) => result.score >= minScore);
|
|
306
|
+
|
|
307
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
308
|
+
return candidates.slice(0, limit);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// ─── Forget ───────────────────────────────────────────
|
|
312
|
+
|
|
313
|
+
async forget(memoryId: string, agentId: string): Promise<boolean> {
|
|
314
|
+
log.info({ memoryId, agentId }, 'Forgetting...');
|
|
315
|
+
|
|
316
|
+
// Delete associations first (cascade should handle this, but be explicit)
|
|
317
|
+
await this.db.deleteAssociations(memoryId);
|
|
318
|
+
|
|
319
|
+
const deleted = await this.db.deleteMemory(memoryId, agentId);
|
|
320
|
+
|
|
321
|
+
if (deleted) {
|
|
322
|
+
await this.eventBus.publish({
|
|
323
|
+
type: 'memory:deleted',
|
|
324
|
+
memoryId,
|
|
325
|
+
agentId,
|
|
326
|
+
timestamp: new Date(),
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return deleted;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// ─── Associate ────────────────────────────────────────
|
|
334
|
+
|
|
335
|
+
async resetAgent(agentId: string): Promise<void> {
|
|
336
|
+
log.info({ agentId }, 'Resetting agent memories...');
|
|
337
|
+
|
|
338
|
+
const memories = await this.db.getAllMemories(agentId);
|
|
339
|
+
for (const memory of memories) {
|
|
340
|
+
await this.forget(memory.id, agentId);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async associate(input: CreateAssociationInput): Promise<void> {
|
|
345
|
+
log.info({ sourceId: input.sourceId, targetId: input.targetId }, 'Creating association...');
|
|
346
|
+
|
|
347
|
+
if (input.agentId) {
|
|
348
|
+
const [source, target] = await Promise.all([
|
|
349
|
+
this.db.getMemoryById(input.sourceId, input.agentId),
|
|
350
|
+
this.db.getMemoryById(input.targetId, input.agentId),
|
|
351
|
+
]);
|
|
352
|
+
|
|
353
|
+
if (!source || !target) {
|
|
354
|
+
throw new Error('Both associated memories must exist in the same agent namespace');
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
await this.db.createAssociation({
|
|
359
|
+
sourceId: input.sourceId,
|
|
360
|
+
targetId: input.targetId,
|
|
361
|
+
strength: input.strength ?? 0.5,
|
|
362
|
+
origin: input.origin ?? 'explicit',
|
|
363
|
+
relationType: input.relationType ?? 'relates_to',
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
await this.eventBus.publish({
|
|
367
|
+
type: 'association:created',
|
|
368
|
+
memoryId: input.sourceId,
|
|
369
|
+
agentId: input.agentId ?? '',
|
|
370
|
+
timestamp: new Date(),
|
|
371
|
+
data: { targetId: input.targetId, strength: input.strength },
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// ─── Query Expansion ──────────────────────────────────
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Builds an expanded query embedding by averaging the original query embedding
|
|
379
|
+
* with a lightly reformulated version of the query.
|
|
380
|
+
*
|
|
381
|
+
* Improves recall for paraphrase and synonym mismatches at near-zero extra cost.
|
|
382
|
+
* For keyword embedders (sparse), averaging is meaningless so we skip expansion.
|
|
383
|
+
*/
|
|
384
|
+
private async buildExpandedQueryEmbedding(query: string): Promise<number[]> {
|
|
385
|
+
const isKeywordEmbedder = this.embedder.model === 'local-keyword';
|
|
386
|
+
|
|
387
|
+
// Keyword embedder: expansion has no semantic benefit, skip it
|
|
388
|
+
if (isKeywordEmbedder) {
|
|
389
|
+
return this.embedder.embed(query);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Build a lightweight paraphrase of the query using structural templates
|
|
393
|
+
const expanded = expandQuery(query);
|
|
394
|
+
|
|
395
|
+
// Embed both versions in parallel
|
|
396
|
+
const [originalEmbedding, expandedEmbedding] = await Promise.all([
|
|
397
|
+
this.embedder.embed(query),
|
|
398
|
+
this.embedder.embed(expanded),
|
|
399
|
+
]);
|
|
400
|
+
|
|
401
|
+
// Average the two embeddings and re-normalise to unit length
|
|
402
|
+
const averaged = originalEmbedding.map((v, i) => (v + (expandedEmbedding[i] ?? 0)) / 2);
|
|
403
|
+
const magnitude = Math.sqrt(averaged.reduce((sum, v) => sum + v * v, 0));
|
|
404
|
+
return magnitude > 0 ? averaged.map((v) => v / magnitude) : averaged;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// ─── Export (Memory Passport) ─────────────────────────
|
|
408
|
+
|
|
409
|
+
async exportPassport(agentId: string): Promise<MemoryPassport> {
|
|
410
|
+
log.info({ agentId }, 'Exporting Memory Passport...');
|
|
411
|
+
|
|
412
|
+
const memories = await this.db.getAllMemories(agentId);
|
|
413
|
+
const associations = await this.db.getAllAssociations(agentId);
|
|
414
|
+
|
|
415
|
+
const typeCount = memories.reduce(
|
|
416
|
+
(acc, m) => {
|
|
417
|
+
acc[m.type] = (acc[m.type] || 0) + 1;
|
|
418
|
+
return acc;
|
|
419
|
+
},
|
|
420
|
+
{} as Record<MemoryType, number>,
|
|
421
|
+
);
|
|
422
|
+
|
|
423
|
+
return {
|
|
424
|
+
version: '1.0.0',
|
|
425
|
+
exportedAt: new Date(),
|
|
426
|
+
sourceAgent: agentId,
|
|
427
|
+
embeddingModel: this.embedder.model,
|
|
428
|
+
memories: memories.map((m) => ({
|
|
429
|
+
...m,
|
|
430
|
+
// Strip embeddings — they'll be regenerated on import
|
|
431
|
+
embedding: null,
|
|
432
|
+
embeddingModel: null,
|
|
433
|
+
})),
|
|
434
|
+
associations,
|
|
435
|
+
metadata: {
|
|
436
|
+
totalMemories: memories.length,
|
|
437
|
+
totalAssociations: associations.length,
|
|
438
|
+
memoryTypes: typeCount,
|
|
439
|
+
},
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// ─── Import (Memory Passport) ─────────────────────────
|
|
444
|
+
|
|
445
|
+
async importPassport(
|
|
446
|
+
passport: MemoryPassport,
|
|
447
|
+
targetAgentId?: string,
|
|
448
|
+
conflictStrategy: 'skip' | 'merge' | 'overwrite' = 'skip',
|
|
449
|
+
): Promise<{ imported: number; skipped: number; errors: number }> {
|
|
450
|
+
const agentId = targetAgentId || passport.sourceAgent;
|
|
451
|
+
log.info(
|
|
452
|
+
{ agentId, totalMemories: passport.memories.length, conflictStrategy },
|
|
453
|
+
'Importing Memory Passport...',
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
let imported = 0;
|
|
457
|
+
let skipped = 0;
|
|
458
|
+
let errors = 0;
|
|
459
|
+
|
|
460
|
+
// Re-embed all memory content using local embedding model
|
|
461
|
+
const contents = passport.memories.map((m) => m.content);
|
|
462
|
+
let embeddings: number[][];
|
|
463
|
+
|
|
464
|
+
try {
|
|
465
|
+
embeddings = await this.embedder.embedBatch(contents);
|
|
466
|
+
} catch (err) {
|
|
467
|
+
log.error({ err }, 'Failed to batch embed during import, falling back to sequential');
|
|
468
|
+
embeddings = [];
|
|
469
|
+
for (const content of contents) {
|
|
470
|
+
try {
|
|
471
|
+
embeddings.push(await this.embedder.embed(content));
|
|
472
|
+
} catch (innerErr) {
|
|
473
|
+
log.error({ innerErr, content: content.slice(0, 50) }, 'Failed to embed');
|
|
474
|
+
embeddings.push([]);
|
|
475
|
+
errors++;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Import memories
|
|
481
|
+
const idMapping = new Map<string, string>(); // old ID → new ID
|
|
482
|
+
|
|
483
|
+
for (let i = 0; i < passport.memories.length; i++) {
|
|
484
|
+
const m = passport.memories[i];
|
|
485
|
+
const embedding = embeddings[i];
|
|
486
|
+
|
|
487
|
+
if (!embedding || embedding.length === 0) {
|
|
488
|
+
errors++;
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
try {
|
|
493
|
+
// Check for existing memory with same content
|
|
494
|
+
const existing = await this.db.searchByVector(agentId, embedding, {
|
|
495
|
+
limit: 1,
|
|
496
|
+
threshold: 0.98,
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
if (existing.length > 0) {
|
|
500
|
+
if (conflictStrategy === 'skip') {
|
|
501
|
+
idMapping.set(m.id, existing[0].memory.id);
|
|
502
|
+
skipped++;
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
if (conflictStrategy === 'merge') {
|
|
506
|
+
// Update importance to max of both
|
|
507
|
+
await this.db.updateMemory(existing[0].memory.id, agentId, {
|
|
508
|
+
importance: Math.max(existing[0].memory.importance, m.importance),
|
|
509
|
+
tags: [...new Set([...existing[0].memory.tags, ...m.tags])],
|
|
510
|
+
});
|
|
511
|
+
idMapping.set(m.id, existing[0].memory.id);
|
|
512
|
+
imported++;
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
// overwrite: delete existing, create new
|
|
516
|
+
await this.db.deleteMemory(existing[0].memory.id, agentId);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const newMemory = await this.db.createMemory({
|
|
520
|
+
id: uuidv4(),
|
|
521
|
+
agentId,
|
|
522
|
+
type: m.type,
|
|
523
|
+
content: m.content,
|
|
524
|
+
embeddingModel: this.embedder.model,
|
|
525
|
+
embedding,
|
|
526
|
+
importance: m.importance,
|
|
527
|
+
decayScore: m.decayScore,
|
|
528
|
+
tags: m.tags,
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
idMapping.set(m.id, newMemory.id);
|
|
532
|
+
imported++;
|
|
533
|
+
} catch (err) {
|
|
534
|
+
log.error({ err, memoryId: m.id }, 'Failed to import memory');
|
|
535
|
+
errors++;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Import associations with ID remapping
|
|
540
|
+
for (const assoc of passport.associations) {
|
|
541
|
+
const newSourceId = idMapping.get(assoc.sourceId);
|
|
542
|
+
const newTargetId = idMapping.get(assoc.targetId);
|
|
543
|
+
|
|
544
|
+
if (newSourceId && newTargetId) {
|
|
545
|
+
try {
|
|
546
|
+
await this.db.createAssociation({
|
|
547
|
+
sourceId: newSourceId,
|
|
548
|
+
targetId: newTargetId,
|
|
549
|
+
strength: assoc.strength,
|
|
550
|
+
origin: assoc.origin,
|
|
551
|
+
relationType: assoc.relationType ?? 'relates_to',
|
|
552
|
+
});
|
|
553
|
+
} catch (err) {
|
|
554
|
+
log.warn({ err }, 'Failed to import association');
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
log.info({ imported, skipped, errors }, 'Memory Passport import complete');
|
|
560
|
+
return { imported, skipped, errors };
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// ─── Decay Management ─────────────────────────────────
|
|
564
|
+
|
|
565
|
+
startDecayLoop(intervalMs = 3600000, decayRate = 0.01, minScore = 0.01): void {
|
|
566
|
+
if (this.decayInterval) {
|
|
567
|
+
clearInterval(this.decayInterval);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
log.info({ intervalMs, decayRate, minScore }, 'Starting decay loop');
|
|
571
|
+
|
|
572
|
+
this.decayInterval = setInterval(async () => {
|
|
573
|
+
try {
|
|
574
|
+
const affected = await this.db.applyDecay(decayRate, minScore);
|
|
575
|
+
const affectedAssociations = await this.db.applyAssociationDecay(decayRate, minScore);
|
|
576
|
+
log.debug({ affected, affectedAssociations }, 'Decay cycle complete');
|
|
577
|
+
} catch (err) {
|
|
578
|
+
log.error({ err }, 'Decay cycle error');
|
|
579
|
+
}
|
|
580
|
+
}, intervalMs);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
stopDecayLoop(): void {
|
|
584
|
+
if (this.decayInterval) {
|
|
585
|
+
clearInterval(this.decayInterval);
|
|
586
|
+
this.decayInterval = null;
|
|
587
|
+
log.info('Decay loop stopped');
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// ─── Spreading Activation ─────────────────────────────
|
|
592
|
+
|
|
593
|
+
private async spreadingActivation(
|
|
594
|
+
seeds: Array<{ id: string; score: number }>,
|
|
595
|
+
agentId: string,
|
|
596
|
+
maxHops: number,
|
|
597
|
+
threshold: number,
|
|
598
|
+
explicitOnly = false,
|
|
599
|
+
allowStalePaths = false,
|
|
600
|
+
): Promise<SearchResult[]> {
|
|
601
|
+
const activated = new Map<string, number>(); // memoryId → activation score
|
|
602
|
+
const expanded = new Set<string>();
|
|
603
|
+
|
|
604
|
+
// Initialize activation from seed nodes
|
|
605
|
+
for (const seed of seeds) {
|
|
606
|
+
activated.set(seed.id, seed.score);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Walk the graph
|
|
610
|
+
let currentFrontier = seeds.map((s) => s.id);
|
|
611
|
+
|
|
612
|
+
for (let hop = 0; hop < maxHops && currentFrontier.length > 0; hop++) {
|
|
613
|
+
const nextFrontier: string[] = [];
|
|
614
|
+
const decayFactor = 1 / (hop + 2); // Activation decays with distance
|
|
615
|
+
|
|
616
|
+
for (const nodeId of currentFrontier) {
|
|
617
|
+
if (expanded.has(nodeId)) continue;
|
|
618
|
+
expanded.add(nodeId);
|
|
619
|
+
|
|
620
|
+
const nodeActivation = activated.get(nodeId) ?? 0;
|
|
621
|
+
const associations = await this.db.getAssociations(nodeId);
|
|
622
|
+
|
|
623
|
+
for (const assoc of associations) {
|
|
624
|
+
if (explicitOnly && assoc.origin !== 'explicit') continue;
|
|
625
|
+
|
|
626
|
+
// P2: Skip 'supersedes' edges unless query explicitly asks for historical context
|
|
627
|
+
if (assoc.relationType === 'supersedes' && !allowStalePaths) continue;
|
|
628
|
+
|
|
629
|
+
const neighborId = assoc.sourceId === nodeId ? assoc.targetId : assoc.sourceId;
|
|
630
|
+
|
|
631
|
+
// Propagated activation = parent activation * edge strength * decay
|
|
632
|
+
const propagated =
|
|
633
|
+
nodeActivation * assoc.strength * associationOriginWeight(assoc.origin) * decayFactor;
|
|
634
|
+
const current = activated.get(neighborId) ?? 0;
|
|
635
|
+
|
|
636
|
+
if (propagated > current) {
|
|
637
|
+
activated.set(neighborId, propagated);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (propagated >= threshold && !expanded.has(neighborId)) {
|
|
641
|
+
nextFrontier.push(neighborId);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
currentFrontier = nextFrontier;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
const results: SearchResult[] = [];
|
|
650
|
+
|
|
651
|
+
for (const [memoryId, score] of activated) {
|
|
652
|
+
if (score < threshold) continue;
|
|
653
|
+
|
|
654
|
+
const memory = await this.db.getMemoryById(memoryId, agentId);
|
|
655
|
+
if (memory) {
|
|
656
|
+
results.push({
|
|
657
|
+
memory,
|
|
658
|
+
score,
|
|
659
|
+
source: 'association',
|
|
660
|
+
});
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
return results;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// ─── Auto-Association ─────────────────────────────────
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
private async autoAssociate(memory: Memory): Promise<void> {
|
|
672
|
+
if (!memory.embedding) return;
|
|
673
|
+
|
|
674
|
+
// Find semantically similar memories and auto-link
|
|
675
|
+
const similar = await this.db.searchByVector(memory.agentId, memory.embedding, {
|
|
676
|
+
limit: 5,
|
|
677
|
+
threshold: 0.7, // High threshold for auto-association
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
for (const match of similar) {
|
|
681
|
+
if (match.memory.id === memory.id) continue;
|
|
682
|
+
|
|
683
|
+
try {
|
|
684
|
+
await this.db.createAssociation({
|
|
685
|
+
sourceId: memory.id,
|
|
686
|
+
targetId: match.memory.id,
|
|
687
|
+
strength: match.similarity,
|
|
688
|
+
origin: 'similarity',
|
|
689
|
+
relationType: 'relates_to',
|
|
690
|
+
});
|
|
691
|
+
} catch (err) {
|
|
692
|
+
log.warn({ err, targetId: match.memory.id }, 'Failed to auto-associate');
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// ─── Co-Occurrence Tracking ───────────────────────────
|
|
698
|
+
|
|
699
|
+
private async trackCoOccurrence(memories: Memory[]): Promise<void> {
|
|
700
|
+
if (memories.length < 2) return;
|
|
701
|
+
|
|
702
|
+
// Create weak associations between memories recalled together
|
|
703
|
+
for (let i = 0; i < memories.length - 1; i++) {
|
|
704
|
+
for (let j = i + 1; j < Math.min(memories.length, i + 4); j++) {
|
|
705
|
+
// Only top pairs
|
|
706
|
+
try {
|
|
707
|
+
await this.db.createAssociation({
|
|
708
|
+
sourceId: memories[i].id,
|
|
709
|
+
targetId: memories[j].id,
|
|
710
|
+
strength: 0.2, // Weak initial co-occurrence strength
|
|
711
|
+
origin: 'co-occurrence',
|
|
712
|
+
relationType: 'relates_to',
|
|
713
|
+
});
|
|
714
|
+
} catch {
|
|
715
|
+
// Silently ignore — these are best-effort
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// ─── Lifecycle ────────────────────────────────────────
|
|
722
|
+
|
|
723
|
+
async shutdown(): Promise<void> {
|
|
724
|
+
log.info('Shutting down Memory Engine...');
|
|
725
|
+
this.stopDecayLoop();
|
|
726
|
+
await this.eventBus.close();
|
|
727
|
+
await this.db.close();
|
|
728
|
+
log.info('Memory Engine shut down');
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
function associationOriginWeight(origin: string): number {
|
|
735
|
+
if (origin === 'explicit') return 1;
|
|
736
|
+
if (origin === 'similarity') return 0.35;
|
|
737
|
+
if (origin === 'co-occurrence') return 0.25;
|
|
738
|
+
return 0.3;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function looksLikeStateUpdate(memory: Memory): boolean {
|
|
742
|
+
const content = memory.content.toLowerCase();
|
|
743
|
+
const role = String(memory.metadata?.['role'] ?? '').toLowerCase();
|
|
744
|
+
|
|
745
|
+
if (role === 'final' || role === 'current') return true;
|
|
746
|
+
|
|
747
|
+
return /\b(now|currently|current|latest|final|resolved|after|introduced|raised|lowered|changed|moved|renamed|postponed|pushed back|increased|decreased|no longer|ended|supersedes|replaces)\b/.test(
|
|
748
|
+
content,
|
|
749
|
+
);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
function shouldSupersede(incoming: Memory, existing: Memory): boolean {
|
|
753
|
+
if (String(existing.metadata?.['role'] ?? '').toLowerCase() === 'stale') return false;
|
|
754
|
+
if (getMemoryTime(incoming) <= getMemoryTime(existing)) return false;
|
|
755
|
+
if (!hasTopicOverlap(incoming, existing)) return false;
|
|
756
|
+
if (!looksLikeSupersedableState(existing)) return false;
|
|
757
|
+
if (isDurableHistoricalMemory(existing)) return false;
|
|
758
|
+
|
|
759
|
+
return true;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
function looksLikeSupersedableState(memory: Memory): boolean {
|
|
763
|
+
const content = memory.content.toLowerCase();
|
|
764
|
+
const role = String(memory.metadata?.['role'] ?? '').toLowerCase();
|
|
765
|
+
|
|
766
|
+
if (role === 'stale' || role === 'interim') return true;
|
|
767
|
+
|
|
768
|
+
return /\b(initial|originally|original|former|formerly|used to|previously|was priced|was called|was scheduled|no annual-plan discount|no annual discount)\b/.test(
|
|
769
|
+
content,
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
function isDurableHistoricalMemory(memory: Memory): boolean {
|
|
774
|
+
const content = memory.content.toLowerCase();
|
|
775
|
+
return /\bbegan\b|\bfounded\b|\bwas born\b|\bgraduated\b|\bcompleted\b|\bpublished\b/.test(content);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
function hasTopicOverlap(a: Memory, b: Memory): boolean {
|
|
779
|
+
const tagOverlap = a.tags.some((tag) => b.tags.includes(tag));
|
|
780
|
+
if (tagOverlap) return true;
|
|
781
|
+
|
|
782
|
+
const aTokens = significantMemoryTokens(a.content);
|
|
783
|
+
const bTokens = significantMemoryTokens(b.content);
|
|
784
|
+
let overlap = 0;
|
|
785
|
+
for (const token of aTokens) {
|
|
786
|
+
if (bTokens.has(token)) overlap++;
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
return overlap >= 2;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
function significantMemoryTokens(content: string): Set<string> {
|
|
793
|
+
const stopWords = new Set([
|
|
794
|
+
'about',
|
|
795
|
+
'after',
|
|
796
|
+
'also',
|
|
797
|
+
'and',
|
|
798
|
+
'are',
|
|
799
|
+
'but',
|
|
800
|
+
'for',
|
|
801
|
+
'from',
|
|
802
|
+
'has',
|
|
803
|
+
'have',
|
|
804
|
+
'her',
|
|
805
|
+
'his',
|
|
806
|
+
'in',
|
|
807
|
+
'into',
|
|
808
|
+
'not',
|
|
809
|
+
'now',
|
|
810
|
+
'of',
|
|
811
|
+
'on',
|
|
812
|
+
'the',
|
|
813
|
+
'their',
|
|
814
|
+
'they',
|
|
815
|
+
'this',
|
|
816
|
+
'to',
|
|
817
|
+
'was',
|
|
818
|
+
'with',
|
|
819
|
+
]);
|
|
820
|
+
|
|
821
|
+
return new Set(
|
|
822
|
+
content
|
|
823
|
+
.toLowerCase()
|
|
824
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
825
|
+
.split(/\s+/)
|
|
826
|
+
.filter((token) => token.length > 3 && !stopWords.has(token)),
|
|
827
|
+
);
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
interface LexicalProfile {
|
|
831
|
+
tokens: string[];
|
|
832
|
+
exactTerms: string[];
|
|
833
|
+
entities: string[];
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
function buildLexicalProfile(query: string): LexicalProfile {
|
|
837
|
+
return {
|
|
838
|
+
tokens: lexicalTokens(query),
|
|
839
|
+
exactTerms: extractExactTerms(query),
|
|
840
|
+
entities: extractEntityTerms(query),
|
|
841
|
+
};
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
function lexicalEvidenceScore(profile: LexicalProfile, memory: Memory): number {
|
|
845
|
+
const contentTokens = new Set(lexicalTokens(memory.content));
|
|
846
|
+
const tagTokens = new Set(memory.tags.flatMap((tag) => lexicalTokens(tag)));
|
|
847
|
+
const content = memory.content.toLowerCase();
|
|
848
|
+
const tagText = memory.tags.join(' ').toLowerCase();
|
|
849
|
+
|
|
850
|
+
const tokenHits = profile.tokens.filter((token) => contentTokens.has(token) || tagTokens.has(token)).length;
|
|
851
|
+
const tokenCoverage = profile.tokens.length > 0 ? tokenHits / profile.tokens.length : 0;
|
|
852
|
+
|
|
853
|
+
const exactHits = profile.exactTerms.filter((term) => content.includes(term)).length;
|
|
854
|
+
const exactCoverage = profile.exactTerms.length > 0 ? exactHits / profile.exactTerms.length : 0;
|
|
855
|
+
|
|
856
|
+
const entityHits = profile.entities.filter((entity) => content.includes(entity) || tagText.includes(entity)).length;
|
|
857
|
+
const entityCoverage = profile.entities.length > 0 ? entityHits / profile.entities.length : 0;
|
|
858
|
+
|
|
859
|
+
const tagHits = profile.tokens.filter((token) => tagTokens.has(token)).length;
|
|
860
|
+
const tagCoverage = profile.tokens.length > 0 ? tagHits / profile.tokens.length : 0;
|
|
861
|
+
|
|
862
|
+
let score = tokenCoverage * 0.58 + exactCoverage * 0.22 + entityCoverage * 0.16 + tagCoverage * 0.12;
|
|
863
|
+
|
|
864
|
+
if (profile.exactTerms.length > 0 && exactHits === 0 && hasConflictingExactTerm(profile.exactTerms, content)) {
|
|
865
|
+
score -= 0.18;
|
|
866
|
+
}
|
|
867
|
+
if (profile.entities.length > 0 && entityHits === 0 && extractEntityTerms(memory.content).length > 0) {
|
|
868
|
+
score -= 0.16;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
return Math.max(0, Math.min(1, score));
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
function lexicalTokens(text: string): string[] {
|
|
875
|
+
const stopWords = new Set([
|
|
876
|
+
'a',
|
|
877
|
+
'about',
|
|
878
|
+
'after',
|
|
879
|
+
'an',
|
|
880
|
+
'and',
|
|
881
|
+
'are',
|
|
882
|
+
'as',
|
|
883
|
+
'at',
|
|
884
|
+
'be',
|
|
885
|
+
'by',
|
|
886
|
+
'did',
|
|
887
|
+
'does',
|
|
888
|
+
'for',
|
|
889
|
+
'from',
|
|
890
|
+
'has',
|
|
891
|
+
'have',
|
|
892
|
+
'how',
|
|
893
|
+
'in',
|
|
894
|
+
'is',
|
|
895
|
+
'it',
|
|
896
|
+
'of',
|
|
897
|
+
'on',
|
|
898
|
+
'or',
|
|
899
|
+
'the',
|
|
900
|
+
'their',
|
|
901
|
+
'there',
|
|
902
|
+
'to',
|
|
903
|
+
'was',
|
|
904
|
+
'what',
|
|
905
|
+
'when',
|
|
906
|
+
'where',
|
|
907
|
+
'whether',
|
|
908
|
+
'which',
|
|
909
|
+
'who',
|
|
910
|
+
'will',
|
|
911
|
+
'with',
|
|
912
|
+
]);
|
|
913
|
+
|
|
914
|
+
return Array.from(
|
|
915
|
+
new Set(
|
|
916
|
+
text
|
|
917
|
+
.toLowerCase()
|
|
918
|
+
.replace(/'s\b/g, '')
|
|
919
|
+
.replace(/[^a-z0-9.$:%/-]+/g, ' ')
|
|
920
|
+
.split(/\s+/)
|
|
921
|
+
.map(normalizeLexicalToken)
|
|
922
|
+
.filter((token) => token.length > 2 && !stopWords.has(token)),
|
|
923
|
+
),
|
|
924
|
+
);
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
function normalizeLexicalToken(token: string): string {
|
|
928
|
+
if (/^v\d/.test(token)) return token;
|
|
929
|
+
if (token.endsWith('ing') && token.length > 5) return token.slice(0, -3);
|
|
930
|
+
if (token.endsWith('ed') && token.length > 4) return token.slice(0, -2);
|
|
931
|
+
if (token.endsWith('d') && token.length > 4) return token.slice(0, -1);
|
|
932
|
+
if (token.endsWith('s') && token.length > 4) return token.slice(0, -1);
|
|
933
|
+
return token;
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
function extractExactTerms(text: string): string[] {
|
|
937
|
+
const normalized = text.toLowerCase();
|
|
938
|
+
const terms = new Set<string>();
|
|
939
|
+
const patterns = [
|
|
940
|
+
/\bv\d+(?:\.\d+)+\b/g,
|
|
941
|
+
/\$\d+(?:,\d{3})*(?:\.\d+)?(?:\/month)?/g,
|
|
942
|
+
/\b\d+(?:,\d{3})*(?:\.\d+)?%(?=\W|$)/g,
|
|
943
|
+
/\b\d+:\d+\b/g,
|
|
944
|
+
/\b\d+(?:,\d{3})+\b/g,
|
|
945
|
+
/\b\d+(?:\.\d+)?\s*(?:mg|episodes|employees|participants|people|targets?)\b/g,
|
|
946
|
+
/\b[A-Za-z]+\s+\d{1,2},?\s+\d{4}\b/g,
|
|
947
|
+
/\b(?:january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{4}\b/g,
|
|
948
|
+
/\bglp-\d+\b/g,
|
|
949
|
+
/\bhba1c\b/g,
|
|
950
|
+
/\b\d+k\b/g,
|
|
951
|
+
];
|
|
952
|
+
|
|
953
|
+
for (const pattern of patterns) {
|
|
954
|
+
for (const match of normalized.match(pattern) ?? []) {
|
|
955
|
+
terms.add(match);
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
for (const quoted of normalized.match(/'[^']+'|"[^"]+"/g) ?? []) {
|
|
960
|
+
terms.add(quoted.slice(1, -1));
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
return [...terms];
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
function extractEntityTerms(text: string): string[] {
|
|
967
|
+
const ignored = new Set([
|
|
968
|
+
'did',
|
|
969
|
+
'does',
|
|
970
|
+
'has',
|
|
971
|
+
'how',
|
|
972
|
+
'is',
|
|
973
|
+
'what',
|
|
974
|
+
'when',
|
|
975
|
+
'where',
|
|
976
|
+
'which',
|
|
977
|
+
'who',
|
|
978
|
+
'will',
|
|
979
|
+
]);
|
|
980
|
+
const terms = new Set<string>();
|
|
981
|
+
|
|
982
|
+
for (const match of text.match(/\b[A-Z][a-zA-Z0-9]*(?:['-][A-Z]?[a-zA-Z0-9]+)?\b/g) ?? []) {
|
|
983
|
+
const normalized = match.toLowerCase().replace(/'s$/, '');
|
|
984
|
+
if (normalized.length > 2 && !ignored.has(normalized)) {
|
|
985
|
+
terms.add(normalized);
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
for (const quoted of text.match(/"[^"]+"/g) ?? []) {
|
|
990
|
+
const normalized = quoted.slice(1, -1).toLowerCase();
|
|
991
|
+
if (normalized.length > 2) {
|
|
992
|
+
terms.add(normalized);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
return [...terms];
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
function hasConflictingExactTerm(queryTerms: string[], content: string): boolean {
|
|
1000
|
+
const contentTerms = extractExactTerms(content);
|
|
1001
|
+
if (contentTerms.length === 0) return false;
|
|
1002
|
+
|
|
1003
|
+
return queryTerms.some((queryTerm) =>
|
|
1004
|
+
contentTerms.some((contentTerm) => exactTermFamily(queryTerm) === exactTermFamily(contentTerm)),
|
|
1005
|
+
);
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
function exactTermFamily(term: string): string {
|
|
1009
|
+
if (/^v\d/.test(term)) return 'version';
|
|
1010
|
+
if (term.startsWith('$')) return 'money';
|
|
1011
|
+
if (term.endsWith('%')) return 'percent';
|
|
1012
|
+
if (/^\d+:\d+$/.test(term)) return 'duration';
|
|
1013
|
+
if (/\bhba1c\b/.test(term)) return 'code';
|
|
1014
|
+
return 'number';
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
function getMemoryTime(memory: Memory): number {
|
|
1018
|
+
const timestamp = memory.metadata?.['benchTimestamp'];
|
|
1019
|
+
if (typeof timestamp === 'string') {
|
|
1020
|
+
const parsed = Date.parse(timestamp);
|
|
1021
|
+
if (Number.isFinite(parsed)) return parsed;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
return memory.createdAt.getTime();
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
function isStaleMemory(memory: Memory): boolean {
|
|
1028
|
+
return (
|
|
1029
|
+
String(memory.metadata?.['role'] ?? '').toLowerCase() === 'stale' ||
|
|
1030
|
+
memory.metadata?.['supersededBy'] !== undefined
|
|
1031
|
+
);
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
function shouldIncludeStaleCandidates(query: string): boolean {
|
|
1035
|
+
const normalized = query.toLowerCase();
|
|
1036
|
+
return /\b(previous|original|former|formerly|used to|what changed|change from|changed from|prior|earlier|old value|old state|history|historical)\b/.test(
|
|
1037
|
+
normalized,
|
|
1038
|
+
);
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
/**
|
|
1042
|
+
* Lightweight query expansion for embedding-based retrieval.
|
|
1043
|
+
*
|
|
1044
|
+
* Converts a question into a declarative form that is closer in embedding space
|
|
1045
|
+
* to the stored memory documents. For example:
|
|
1046
|
+
* "When did the project start?" → "The project started on [date]. When did the project start?"
|
|
1047
|
+
*
|
|
1048
|
+
* This is a deterministic alternative to full HyDE (which requires LLM inference)
|
|
1049
|
+
* and costs only one extra embed() call.
|
|
1050
|
+
*/
|
|
1051
|
+
function expandQuery(query: string): string {
|
|
1052
|
+
const q = query.trim();
|
|
1053
|
+
const lower = q.toLowerCase();
|
|
1054
|
+
|
|
1055
|
+
// Strip leading question words and reformulate as declarative statement
|
|
1056
|
+
const declarative = q
|
|
1057
|
+
.replace(/^when\s+(did|was|were|is|are|has|have)\s+/i, 'The time that ')
|
|
1058
|
+
.replace(/^what\s+(is|was|were|are|did|has|have)\s+/i, 'Information about ')
|
|
1059
|
+
.replace(/^who\s+(is|was|were|are|did)\s+/i, 'The person who ')
|
|
1060
|
+
.replace(/^where\s+(is|was|were|are|did)\s+/i, 'The location where ')
|
|
1061
|
+
.replace(/^why\s+(is|was|were|are|did|has|have)\s+/i, 'The reason why ')
|
|
1062
|
+
.replace(/^how\s+(is|was|were|are|did|has|have|many|much|long|often)\s+/i, 'Details on how ')
|
|
1063
|
+
.replace(/^which\s+/i, 'The specific ')
|
|
1064
|
+
.replace(/\?$/, '');
|
|
1065
|
+
|
|
1066
|
+
// If transformation had no effect (no question words), just append a suffix to hint doc-like text
|
|
1067
|
+
if (declarative.toLowerCase() === lower.replace(/\?$/, '')) {
|
|
1068
|
+
return `${q} — relevant memory about ${q.replace(/\?$/, '').trim()}`;
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
// Combine declarative + original to capture both document and query semantics
|
|
1072
|
+
return `${declarative}. ${q}`;
|
|
1073
|
+
}
|