memory-braid 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ import { normalizeForHash, sha256 } from "./chunking.js";
2
+ import type { MemoryBraidConfig } from "./config.js";
3
+ import {
4
+ asRecord,
5
+ asString,
6
+ buildTaxonomy,
7
+ formatTaxonomySummary,
8
+ inferMemoryLayer,
9
+ normalizeMemoryKind,
10
+ normalizeTaxonomy,
11
+ primaryTaxonomyAnchor,
12
+ summarizeClusterText,
13
+ taxonomyOverlap,
14
+ taxonomyTerms,
15
+ } from "./memory-model.js";
16
+ import { scoreSemanticPromotion } from "./memory-selection.js";
17
+ import { resolveResultTimeMs } from "./temporal.js";
18
+ import type {
19
+ ConsolidationState,
20
+ LifecycleEntry,
21
+ MemoryBraidResult,
22
+ MemoryKind,
23
+ TaxonomyBuckets,
24
+ } from "./types.js";
25
+
26
+ type Cluster = {
27
+ kind: MemoryKind;
28
+ anchor?: string;
29
+ taxonomy: TaxonomyBuckets;
30
+ memories: MemoryBraidResult[];
31
+ firstSeenAt: number;
32
+ lastSeenAt: number;
33
+ recallSupport: number;
34
+ sessionKeys: Set<string>;
35
+ };
36
+
37
+ export type SemanticDraft = {
38
+ compendiumKey: string;
39
+ existingMemoryId?: string;
40
+ text: string;
41
+ metadata: Record<string, unknown>;
42
+ sourceMemories: MemoryBraidResult[];
43
+ latestAt: number;
44
+ kind: MemoryKind;
45
+ anchor?: string;
46
+ };
47
+
48
+ export type SupersedeDraft = {
49
+ memoryId: string;
50
+ text: string;
51
+ metadata: Record<string, unknown>;
52
+ };
53
+
54
+ function tokenize(text: string): Set<string> {
55
+ const tokens = text.match(/[\p{L}\p{N}]+/gu) ?? [];
56
+ const out = new Set<string>();
57
+ for (const token of tokens) {
58
+ const normalized = token
59
+ .toLowerCase()
60
+ .normalize("NFKD")
61
+ .replace(/\p{M}+/gu, "");
62
+ if (normalized.length >= 4) {
63
+ out.add(normalized);
64
+ }
65
+ }
66
+ return out;
67
+ }
68
+
69
+ function lexicalSimilarity(left: string, right: string): number {
70
+ const leftTokens = tokenize(left);
71
+ const rightTokens = tokenize(right);
72
+ if (leftTokens.size === 0 || rightTokens.size === 0) {
73
+ return 0;
74
+ }
75
+ let shared = 0;
76
+ for (const token of leftTokens) {
77
+ if (rightTokens.has(token)) {
78
+ shared += 1;
79
+ }
80
+ }
81
+ return shared / Math.max(leftTokens.size, rightTokens.size);
82
+ }
83
+
84
+ function resolveKind(memory: MemoryBraidResult): MemoryKind {
85
+ const metadata = asRecord(memory.metadata);
86
+ return normalizeMemoryKind(metadata.memoryKind) ?? "other";
87
+ }
88
+
89
+ function resolveTaxonomy(memory: MemoryBraidResult): TaxonomyBuckets {
90
+ const metadata = asRecord(memory.metadata);
91
+ return buildTaxonomy({
92
+ text: memory.snippet,
93
+ entities: metadata.entities,
94
+ existingTaxonomy: metadata.taxonomy,
95
+ });
96
+ }
97
+
98
+ function resolveAnchor(memory: MemoryBraidResult): string | undefined {
99
+ return primaryTaxonomyAnchor(resolveTaxonomy(memory));
100
+ }
101
+
102
+ function resolveLifecycleRecall(
103
+ memoryId: string | undefined,
104
+ lifecycle: Record<string, LifecycleEntry>,
105
+ ): number {
106
+ if (!memoryId) {
107
+ return 0;
108
+ }
109
+ const entry = lifecycle[memoryId];
110
+ return entry ? Math.max(0, entry.recallCount ?? 0) : 0;
111
+ }
112
+
113
+ function buildCompendiumKey(params: {
114
+ kind: MemoryKind;
115
+ anchor?: string;
116
+ taxonomy: TaxonomyBuckets;
117
+ text: string;
118
+ }): string {
119
+ const signature = [
120
+ params.kind,
121
+ params.anchor ?? "",
122
+ ...taxonomyTerms(params.taxonomy).slice(0, 6).map((value) => normalizeForHash(value)),
123
+ normalizeForHash(params.text),
124
+ ]
125
+ .filter(Boolean)
126
+ .join("|");
127
+ return sha256(signature);
128
+ }
129
+
130
+ async function shouldJoinCluster(params: {
131
+ cluster: Cluster;
132
+ memory: MemoryBraidResult;
133
+ semanticSimilarity?: (leftText: string, rightText: string) => Promise<number | undefined>;
134
+ }): Promise<boolean> {
135
+ const memoryTaxonomy = resolveTaxonomy(params.memory);
136
+ const memoryAnchor = primaryTaxonomyAnchor(memoryTaxonomy);
137
+ if (params.cluster.kind !== resolveKind(params.memory)) {
138
+ return false;
139
+ }
140
+ if (params.cluster.anchor && memoryAnchor && params.cluster.anchor === memoryAnchor) {
141
+ return true;
142
+ }
143
+ if (taxonomyOverlap(params.cluster.taxonomy, memoryTaxonomy) >= 0.34) {
144
+ return true;
145
+ }
146
+ const latest = params.cluster.memories[params.cluster.memories.length - 1];
147
+ const lexical = lexicalSimilarity(latest?.snippet ?? "", params.memory.snippet);
148
+ if (lexical >= 0.42) {
149
+ return true;
150
+ }
151
+ if (params.semanticSimilarity) {
152
+ const semantic = await params.semanticSimilarity(latest?.snippet ?? "", params.memory.snippet);
153
+ if (typeof semantic === "number" && semantic >= 0.86) {
154
+ return true;
155
+ }
156
+ }
157
+ return false;
158
+ }
159
+
160
+ export async function buildConsolidationDrafts(params: {
161
+ episodic: MemoryBraidResult[];
162
+ existingSemantic: MemoryBraidResult[];
163
+ lifecycleEntries: Record<string, LifecycleEntry>;
164
+ cfg: MemoryBraidConfig;
165
+ minSupportCount: number;
166
+ minRecallCount: number;
167
+ semanticMaxSourceIds: number;
168
+ state: ConsolidationState;
169
+ semanticSimilarity?: (leftText: string, rightText: string) => Promise<number | undefined>;
170
+ }): Promise<{ candidates: number; clustersFormed: number; drafts: SemanticDraft[] }> {
171
+ const episodic = params.episodic
172
+ .filter((memory) => inferMemoryLayer(memory) === "episodic")
173
+ .sort((left, right) => (resolveResultTimeMs(left) ?? 0) - (resolveResultTimeMs(right) ?? 0));
174
+ const clusters: Cluster[] = [];
175
+
176
+ for (const memory of episodic) {
177
+ let matched: Cluster | undefined;
178
+ for (const cluster of clusters) {
179
+ if (await shouldJoinCluster({
180
+ cluster,
181
+ memory,
182
+ semanticSimilarity: params.semanticSimilarity,
183
+ })) {
184
+ matched = cluster;
185
+ break;
186
+ }
187
+ }
188
+ const ts = resolveResultTimeMs(memory) ?? Date.now();
189
+ if (!matched) {
190
+ clusters.push({
191
+ kind: resolveKind(memory),
192
+ anchor: resolveAnchor(memory),
193
+ taxonomy: resolveTaxonomy(memory),
194
+ memories: [memory],
195
+ firstSeenAt: ts,
196
+ lastSeenAt: ts,
197
+ recallSupport: resolveLifecycleRecall(memory.id, params.lifecycleEntries),
198
+ sessionKeys: new Set(
199
+ asString(asRecord(memory.metadata).sessionKey)
200
+ ? [asString(asRecord(memory.metadata).sessionKey)!]
201
+ : [],
202
+ ),
203
+ });
204
+ continue;
205
+ }
206
+ matched.memories.push(memory);
207
+ matched.taxonomy = buildTaxonomy({
208
+ text: `${formatTaxonomySummary(matched.taxonomy)} ${memory.snippet}`,
209
+ entities: Array.isArray(asRecord(memory.metadata).entities)
210
+ ? (asRecord(memory.metadata).entities as unknown[])
211
+ : [],
212
+ existingTaxonomy: matched.taxonomy,
213
+ });
214
+ matched.firstSeenAt = Math.min(matched.firstSeenAt, ts);
215
+ matched.lastSeenAt = Math.max(matched.lastSeenAt, ts);
216
+ matched.recallSupport += resolveLifecycleRecall(memory.id, params.lifecycleEntries);
217
+ const sessionKey = asString(asRecord(memory.metadata).sessionKey);
218
+ if (sessionKey) {
219
+ matched.sessionKeys.add(sessionKey);
220
+ }
221
+ }
222
+
223
+ const existingByKey = new Map<string, MemoryBraidResult>();
224
+ for (const memory of params.existingSemantic) {
225
+ const metadata = asRecord(memory.metadata);
226
+ const key = asString(metadata.compendiumKey);
227
+ if (key) {
228
+ existingByKey.set(key, memory);
229
+ }
230
+ }
231
+ for (const [key, value] of Object.entries(params.state.semanticByCompendiumKey)) {
232
+ if (!existingByKey.has(key) && value?.memoryId) {
233
+ existingByKey.set(key, {
234
+ id: value.memoryId,
235
+ source: "mem0",
236
+ snippet: "",
237
+ score: 0,
238
+ metadata: { compendiumKey: key, memoryLayer: "semantic", sourceType: "compendium" },
239
+ });
240
+ }
241
+ }
242
+
243
+ const drafts: SemanticDraft[] = [];
244
+ for (const cluster of clusters) {
245
+ const supportCount = cluster.memories.length;
246
+ const recallSupport = cluster.recallSupport;
247
+ if (
248
+ supportCount < params.minSupportCount &&
249
+ !(supportCount >= 1 && recallSupport >= params.minRecallCount)
250
+ ) {
251
+ continue;
252
+ }
253
+ if (
254
+ (cluster.kind === "task" || cluster.kind === "other") &&
255
+ supportCount < params.minSupportCount + 1 &&
256
+ recallSupport < params.minRecallCount + 1
257
+ ) {
258
+ continue;
259
+ }
260
+ const texts = cluster.memories.map((memory) => memory.snippet);
261
+ const summary = summarizeClusterText(texts, cluster.kind);
262
+ if (!summary) {
263
+ continue;
264
+ }
265
+ const selection = scoreSemanticPromotion({
266
+ kind: cluster.kind,
267
+ supportCount,
268
+ recallSupport,
269
+ taxonomy: cluster.taxonomy,
270
+ firstSeenAt: cluster.firstSeenAt,
271
+ lastSeenAt: cluster.lastSeenAt,
272
+ sessionKeys: cluster.sessionKeys,
273
+ text: summary,
274
+ cfg: params.cfg,
275
+ });
276
+ if (selection.decision !== "semantic") {
277
+ continue;
278
+ }
279
+ const taxonomy = cluster.taxonomy;
280
+ const compendiumKey = buildCompendiumKey({
281
+ kind: cluster.kind,
282
+ anchor: cluster.anchor,
283
+ taxonomy,
284
+ text: summary,
285
+ });
286
+ const existing = existingByKey.get(compendiumKey);
287
+ const sourceMemoryIds = cluster.memories
288
+ .map((memory) => memory.id)
289
+ .filter((value): value is string => Boolean(value))
290
+ .slice(-params.semanticMaxSourceIds);
291
+ const supportIds = new Set(sourceMemoryIds);
292
+ const metadata: Record<string, unknown> = {
293
+ ...(asRecord(existing?.metadata) ?? {}),
294
+ sourceType: "compendium",
295
+ memoryLayer: "semantic",
296
+ memoryOwner: "user",
297
+ memoryKind: cluster.kind,
298
+ stability: "durable",
299
+ supportCount: Math.max(
300
+ supportIds.size,
301
+ typeof asRecord(existing?.metadata).supportCount === "number"
302
+ ? Math.round(asRecord(existing?.metadata).supportCount as number)
303
+ : 0,
304
+ ),
305
+ sourceMemoryIds,
306
+ firstSeenAt: new Date(cluster.firstSeenAt).toISOString(),
307
+ lastSeenAt: new Date(cluster.lastSeenAt).toISOString(),
308
+ lastConfirmedAt: new Date(cluster.lastSeenAt).toISOString(),
309
+ compendiumKey,
310
+ taxonomy,
311
+ taxonomySummary: formatTaxonomySummary(taxonomy),
312
+ selectionDecision: selection.decision,
313
+ rememberabilityScore: selection.score,
314
+ rememberabilityReasons: selection.reasons,
315
+ promotionScore: selection.score,
316
+ promotionReasons: selection.reasons,
317
+ };
318
+ if (cluster.anchor) {
319
+ metadata.primaryAnchor = cluster.anchor;
320
+ }
321
+ drafts.push({
322
+ compendiumKey,
323
+ existingMemoryId: existing?.id,
324
+ text: summary,
325
+ metadata,
326
+ sourceMemories: cluster.memories,
327
+ latestAt: cluster.lastSeenAt,
328
+ kind: cluster.kind,
329
+ anchor: cluster.anchor,
330
+ });
331
+ }
332
+
333
+ return {
334
+ candidates: episodic.length,
335
+ clustersFormed: clusters.length,
336
+ drafts,
337
+ };
338
+ }
339
+
340
+ export async function findSupersededSemanticMemories(params: {
341
+ semanticMemories: MemoryBraidResult[];
342
+ semanticSimilarity?: (leftText: string, rightText: string) => Promise<number | undefined>;
343
+ }): Promise<SupersedeDraft[]> {
344
+ const grouped = new Map<string, MemoryBraidResult[]>();
345
+ for (const memory of params.semanticMemories) {
346
+ const metadata = asRecord(memory.metadata);
347
+ const kind = normalizeMemoryKind(metadata.memoryKind);
348
+ if (kind !== "preference" && kind !== "decision") {
349
+ continue;
350
+ }
351
+ const anchor =
352
+ asString(metadata.primaryAnchor) ??
353
+ primaryTaxonomyAnchor(normalizeTaxonomy(metadata.taxonomy));
354
+ if (!anchor || !memory.id) {
355
+ continue;
356
+ }
357
+ const key = `${kind}|${normalizeForHash(anchor)}`;
358
+ const rows = grouped.get(key) ?? [];
359
+ rows.push(memory);
360
+ grouped.set(key, rows);
361
+ }
362
+
363
+ const updates: SupersedeDraft[] = [];
364
+ for (const rows of grouped.values()) {
365
+ const ordered = [...rows].sort((left, right) => {
366
+ const rightTs = resolveResultTimeMs(right) ?? 0;
367
+ const leftTs = resolveResultTimeMs(left) ?? 0;
368
+ return rightTs - leftTs;
369
+ });
370
+ const newest = ordered[0];
371
+ if (!newest?.id) {
372
+ continue;
373
+ }
374
+ for (const older of ordered.slice(1)) {
375
+ if (!older.id) {
376
+ continue;
377
+ }
378
+ const lexical = lexicalSimilarity(newest.snippet, older.snippet);
379
+ let semantic = lexical;
380
+ if (params.semanticSimilarity) {
381
+ const compared = await params.semanticSimilarity(newest.snippet, older.snippet);
382
+ if (typeof compared === "number") {
383
+ semantic = compared;
384
+ }
385
+ }
386
+ if (semantic >= 0.72) {
387
+ continue;
388
+ }
389
+ const metadata = {
390
+ ...asRecord(older.metadata),
391
+ supersededBy: newest.id,
392
+ supersededAt: new Date().toISOString(),
393
+ };
394
+ updates.push({
395
+ memoryId: older.id,
396
+ text: older.snippet,
397
+ metadata,
398
+ });
399
+ }
400
+ }
401
+
402
+ return updates;
403
+ }