memory-braid 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import type { ManagedSourceType, ManifestChunk, TargetWorkspace } from "./types.js";
5
+
6
+ export function sha256(value: string): string {
7
+ return crypto.createHash("sha256").update(value).digest("hex");
8
+ }
9
+
10
+ export function normalizeWhitespace(value: string): string {
11
+ return value.replace(/\s+/g, " ").trim();
12
+ }
13
+
14
+ export function normalizeForHash(value: string): string {
15
+ return normalizeWhitespace(value).toLowerCase();
16
+ }
17
+
18
+ function buildChunkKey(params: {
19
+ workspaceHash: string;
20
+ agentId: string;
21
+ sourceType: ManagedSourceType;
22
+ path: string;
23
+ index: number;
24
+ text: string;
25
+ }): string {
26
+ return sha256(
27
+ [
28
+ params.workspaceHash,
29
+ params.agentId,
30
+ params.sourceType,
31
+ params.path,
32
+ String(params.index),
33
+ normalizeForHash(params.text),
34
+ ].join("|"),
35
+ );
36
+ }
37
+
38
+ export function chunkText(value: string, chunkSize = 1200, overlap = 180): string[] {
39
+ const text = value.trim();
40
+ if (!text) {
41
+ return [];
42
+ }
43
+ const out: string[] = [];
44
+ let cursor = 0;
45
+ while (cursor < text.length) {
46
+ const end = Math.min(text.length, cursor + chunkSize);
47
+ out.push(text.slice(cursor, end).trim());
48
+ if (end >= text.length) {
49
+ break;
50
+ }
51
+ cursor = Math.max(cursor + 1, end - overlap);
52
+ }
53
+ return out.filter(Boolean);
54
+ }
55
+
56
+ async function walkMarkdownFiles(dir: string, out: string[]): Promise<void> {
57
+ const entries = await fs.readdir(dir, { withFileTypes: true });
58
+ for (const entry of entries) {
59
+ if (entry.isSymbolicLink()) {
60
+ continue;
61
+ }
62
+ const full = path.join(dir, entry.name);
63
+ if (entry.isDirectory()) {
64
+ await walkMarkdownFiles(full, out);
65
+ continue;
66
+ }
67
+ if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
68
+ out.push(full);
69
+ }
70
+ }
71
+ }
72
+
73
+ export async function listCanonicalMarkdownMemoryFiles(workspaceDir: string): Promise<string[]> {
74
+ const result: string[] = [];
75
+ const candidates = [path.join(workspaceDir, "MEMORY.md"), path.join(workspaceDir, "memory.md")];
76
+ for (const candidate of candidates) {
77
+ try {
78
+ const stat = await fs.lstat(candidate);
79
+ if (stat.isFile() && !stat.isSymbolicLink()) {
80
+ result.push(candidate);
81
+ }
82
+ } catch {
83
+ // ignore
84
+ }
85
+ }
86
+
87
+ const memoryDir = path.join(workspaceDir, "memory");
88
+ try {
89
+ const stat = await fs.lstat(memoryDir);
90
+ if (stat.isDirectory() && !stat.isSymbolicLink()) {
91
+ await walkMarkdownFiles(memoryDir, result);
92
+ }
93
+ } catch {
94
+ // ignore
95
+ }
96
+
97
+ return Array.from(new Set(result.map((filePath) => path.resolve(filePath))));
98
+ }
99
+
100
+ function normalizeSessionMessageText(content: unknown): string | null {
101
+ if (typeof content === "string") {
102
+ const normalized = normalizeWhitespace(content);
103
+ return normalized || null;
104
+ }
105
+ if (!Array.isArray(content)) {
106
+ return null;
107
+ }
108
+
109
+ const parts: string[] = [];
110
+ for (const block of content) {
111
+ if (!block || typeof block !== "object") {
112
+ continue;
113
+ }
114
+ const item = block as { type?: unknown; text?: unknown };
115
+ if (item.type === "text" && typeof item.text === "string") {
116
+ const normalized = normalizeWhitespace(item.text);
117
+ if (normalized) {
118
+ parts.push(normalized);
119
+ }
120
+ }
121
+ }
122
+ if (parts.length === 0) {
123
+ return null;
124
+ }
125
+ return parts.join(" ");
126
+ }
127
+
128
+ export async function listRecentSessionFiles(
129
+ stateDir: string,
130
+ agentId: string,
131
+ lookbackDays: number,
132
+ ): Promise<string[]> {
133
+ const dir = path.join(stateDir, "agents", agentId, "sessions");
134
+ const threshold = Date.now() - lookbackDays * 24 * 60 * 60 * 1000;
135
+ try {
136
+ const entries = await fs.readdir(dir, { withFileTypes: true });
137
+ const files: string[] = [];
138
+ for (const entry of entries) {
139
+ if (!entry.isFile() || !entry.name.endsWith(".jsonl")) {
140
+ continue;
141
+ }
142
+ const abs = path.join(dir, entry.name);
143
+ try {
144
+ const stat = await fs.stat(abs);
145
+ if (stat.mtimeMs >= threshold) {
146
+ files.push(abs);
147
+ }
148
+ } catch {
149
+ // ignore single file failure
150
+ }
151
+ }
152
+ return files;
153
+ } catch {
154
+ return [];
155
+ }
156
+ }
157
+
158
+ export async function buildMarkdownChunks(target: TargetWorkspace): Promise<ManifestChunk[]> {
159
+ const files = await listCanonicalMarkdownMemoryFiles(target.workspaceDir);
160
+ const chunks: ManifestChunk[] = [];
161
+
162
+ for (const filePath of files) {
163
+ let raw = "";
164
+ let statMtime = Date.now();
165
+ try {
166
+ raw = await fs.readFile(filePath, "utf8");
167
+ const stat = await fs.stat(filePath);
168
+ statMtime = stat.mtimeMs;
169
+ } catch {
170
+ continue;
171
+ }
172
+
173
+ const relPath = path.relative(target.workspaceDir, filePath).replace(/\\/g, "/");
174
+ const pieces = chunkText(raw);
175
+ pieces.forEach((piece, index) => {
176
+ chunks.push({
177
+ chunkKey: buildChunkKey({
178
+ workspaceHash: target.workspaceHash,
179
+ agentId: target.agentId,
180
+ sourceType: "markdown",
181
+ path: relPath,
182
+ index,
183
+ text: piece,
184
+ }),
185
+ contentHash: sha256(normalizeForHash(piece)),
186
+ sourceType: "markdown",
187
+ text: piece,
188
+ path: relPath,
189
+ workspaceHash: target.workspaceHash,
190
+ agentId: target.agentId,
191
+ updatedAt: statMtime,
192
+ });
193
+ });
194
+ }
195
+
196
+ return chunks;
197
+ }
198
+
199
+ export async function buildSessionChunks(
200
+ target: TargetWorkspace,
201
+ lookbackDays: number,
202
+ ): Promise<ManifestChunk[]> {
203
+ const files = await listRecentSessionFiles(target.stateDir, target.agentId, lookbackDays);
204
+ const chunks: ManifestChunk[] = [];
205
+
206
+ for (const filePath of files) {
207
+ let raw = "";
208
+ let statMtime = Date.now();
209
+ try {
210
+ raw = await fs.readFile(filePath, "utf8");
211
+ const stat = await fs.stat(filePath);
212
+ statMtime = stat.mtimeMs;
213
+ } catch {
214
+ continue;
215
+ }
216
+
217
+ const lines = raw.split("\n");
218
+ const conversationParts: string[] = [];
219
+ for (const line of lines) {
220
+ if (!line.trim()) {
221
+ continue;
222
+ }
223
+ let parsed: unknown;
224
+ try {
225
+ parsed = JSON.parse(line);
226
+ } catch {
227
+ continue;
228
+ }
229
+ if (!parsed || typeof parsed !== "object") {
230
+ continue;
231
+ }
232
+ const record = parsed as { type?: unknown; message?: unknown };
233
+ if (record.type !== "message") {
234
+ continue;
235
+ }
236
+ const message = record.message as { role?: unknown; content?: unknown } | undefined;
237
+ if (!message || typeof message.role !== "string") {
238
+ continue;
239
+ }
240
+ if (message.role !== "user" && message.role !== "assistant") {
241
+ continue;
242
+ }
243
+ const text = normalizeSessionMessageText(message.content);
244
+ if (!text) {
245
+ continue;
246
+ }
247
+ const roleLabel = message.role === "user" ? "User" : "Assistant";
248
+ conversationParts.push(`${roleLabel}: ${text}`);
249
+ }
250
+
251
+ if (conversationParts.length === 0) {
252
+ continue;
253
+ }
254
+
255
+ const sessionText = conversationParts.join("\n");
256
+ const sessionRelPath = path.join("sessions", target.agentId, path.basename(filePath)).replace(/\\/g, "/");
257
+ const pieces = chunkText(sessionText);
258
+ pieces.forEach((piece, index) => {
259
+ chunks.push({
260
+ chunkKey: buildChunkKey({
261
+ workspaceHash: target.workspaceHash,
262
+ agentId: target.agentId,
263
+ sourceType: "session",
264
+ path: sessionRelPath,
265
+ index,
266
+ text: piece,
267
+ }),
268
+ contentHash: sha256(normalizeForHash(piece)),
269
+ sourceType: "session",
270
+ text: piece,
271
+ path: sessionRelPath,
272
+ workspaceHash: target.workspaceHash,
273
+ agentId: target.agentId,
274
+ updatedAt: statMtime,
275
+ });
276
+ });
277
+ }
278
+
279
+ return chunks;
280
+ }
package/src/config.ts ADDED
@@ -0,0 +1,271 @@
1
+ export type MemoryBraidConfig = {
2
+ enabled: boolean;
3
+ mem0: {
4
+ mode: "cloud" | "oss";
5
+ apiKey?: string;
6
+ host?: string;
7
+ organizationId?: string;
8
+ projectId?: string;
9
+ ossConfig: Record<string, unknown>;
10
+ };
11
+ recall: {
12
+ maxResults: number;
13
+ injectTopK: number;
14
+ merge: {
15
+ strategy: "rrf";
16
+ rrfK: number;
17
+ localWeight: number;
18
+ mem0Weight: number;
19
+ };
20
+ };
21
+ capture: {
22
+ enabled: boolean;
23
+ extraction: {
24
+ mode: "heuristic" | "heuristic_plus_ml";
25
+ };
26
+ ml: {
27
+ provider?: "openai" | "anthropic" | "gemini";
28
+ model?: string;
29
+ timeoutMs: number;
30
+ maxItemsPerRun: number;
31
+ };
32
+ };
33
+ bootstrap: {
34
+ enabled: boolean;
35
+ startupMode: "async";
36
+ includeMarkdown: boolean;
37
+ includeSessions: boolean;
38
+ sessionLookbackDays: number;
39
+ batchSize: number;
40
+ concurrency: number;
41
+ };
42
+ reconcile: {
43
+ enabled: boolean;
44
+ intervalMinutes: number;
45
+ batchSize: number;
46
+ deleteStale: boolean;
47
+ };
48
+ dedupe: {
49
+ lexical: {
50
+ minJaccard: number;
51
+ };
52
+ semantic: {
53
+ enabled: boolean;
54
+ minScore: number;
55
+ };
56
+ };
57
+ debug: {
58
+ enabled: boolean;
59
+ includePayloads: boolean;
60
+ maxSnippetChars: number;
61
+ logSamplingRate: number;
62
+ };
63
+ };
64
+
65
+ const DEFAULTS: MemoryBraidConfig = {
66
+ enabled: true,
67
+ mem0: {
68
+ mode: "cloud",
69
+ apiKey: undefined,
70
+ host: undefined,
71
+ organizationId: undefined,
72
+ projectId: undefined,
73
+ ossConfig: {},
74
+ },
75
+ recall: {
76
+ maxResults: 8,
77
+ injectTopK: 5,
78
+ merge: {
79
+ strategy: "rrf",
80
+ rrfK: 60,
81
+ localWeight: 1,
82
+ mem0Weight: 1,
83
+ },
84
+ },
85
+ capture: {
86
+ enabled: true,
87
+ extraction: {
88
+ mode: "heuristic",
89
+ },
90
+ ml: {
91
+ provider: undefined,
92
+ model: undefined,
93
+ timeoutMs: 2500,
94
+ maxItemsPerRun: 6,
95
+ },
96
+ },
97
+ bootstrap: {
98
+ enabled: true,
99
+ startupMode: "async",
100
+ includeMarkdown: true,
101
+ includeSessions: true,
102
+ sessionLookbackDays: 90,
103
+ batchSize: 50,
104
+ concurrency: 3,
105
+ },
106
+ reconcile: {
107
+ enabled: true,
108
+ intervalMinutes: 30,
109
+ batchSize: 100,
110
+ deleteStale: true,
111
+ },
112
+ dedupe: {
113
+ lexical: {
114
+ minJaccard: 0.3,
115
+ },
116
+ semantic: {
117
+ enabled: true,
118
+ minScore: 0.92,
119
+ },
120
+ },
121
+ debug: {
122
+ enabled: false,
123
+ includePayloads: false,
124
+ maxSnippetChars: 500,
125
+ logSamplingRate: 1,
126
+ },
127
+ };
128
+
129
+ function asRecord(value: unknown): Record<string, unknown> {
130
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
131
+ return {};
132
+ }
133
+ return value as Record<string, unknown>;
134
+ }
135
+
136
+ function asBoolean(value: unknown, fallback: boolean): boolean {
137
+ return typeof value === "boolean" ? value : fallback;
138
+ }
139
+
140
+ function asString(value: unknown): string | undefined {
141
+ if (typeof value !== "string") {
142
+ return undefined;
143
+ }
144
+ const trimmed = value.trim();
145
+ return trimmed ? trimmed : undefined;
146
+ }
147
+
148
+ function asNumber(value: unknown, fallback: number, min: number, max: number): number {
149
+ const n = typeof value === "number" && Number.isFinite(value) ? value : fallback;
150
+ return Math.min(max, Math.max(min, n));
151
+ }
152
+
153
+ function asInt(value: unknown, fallback: number, min: number, max: number): number {
154
+ return Math.round(asNumber(value, fallback, min, max));
155
+ }
156
+
157
+ export function parseConfig(raw: unknown): MemoryBraidConfig {
158
+ const root = asRecord(raw);
159
+ const mem0 = asRecord(root.mem0);
160
+ const recall = asRecord(root.recall);
161
+ const merge = asRecord(recall.merge);
162
+ const capture = asRecord(root.capture);
163
+ const extraction = asRecord(capture.extraction);
164
+ const ml = asRecord(capture.ml);
165
+ const bootstrap = asRecord(root.bootstrap);
166
+ const reconcile = asRecord(root.reconcile);
167
+ const dedupe = asRecord(root.dedupe);
168
+ const lexical = asRecord(dedupe.lexical);
169
+ const semantic = asRecord(dedupe.semantic);
170
+ const debug = asRecord(root.debug);
171
+
172
+ const mode = mem0.mode === "oss" ? "oss" : "cloud";
173
+ const extractionMode =
174
+ extraction.mode === "heuristic_plus_ml" ? "heuristic_plus_ml" : "heuristic";
175
+
176
+ return {
177
+ enabled: asBoolean(root.enabled, DEFAULTS.enabled),
178
+ mem0: {
179
+ mode,
180
+ apiKey: asString(mem0.apiKey),
181
+ host: asString(mem0.host),
182
+ organizationId: asString(mem0.organizationId),
183
+ projectId: asString(mem0.projectId),
184
+ ossConfig: asRecord(mem0.ossConfig),
185
+ },
186
+ recall: {
187
+ maxResults: asInt(recall.maxResults, DEFAULTS.recall.maxResults, 1, 50),
188
+ injectTopK: asInt(recall.injectTopK, DEFAULTS.recall.injectTopK, 1, 20),
189
+ merge: {
190
+ strategy: "rrf",
191
+ rrfK: asInt(merge.rrfK, DEFAULTS.recall.merge.rrfK, 1, 500),
192
+ localWeight: asNumber(merge.localWeight, DEFAULTS.recall.merge.localWeight, 0, 5),
193
+ mem0Weight: asNumber(merge.mem0Weight, DEFAULTS.recall.merge.mem0Weight, 0, 5),
194
+ },
195
+ },
196
+ capture: {
197
+ enabled: asBoolean(capture.enabled, DEFAULTS.capture.enabled),
198
+ extraction: {
199
+ mode: extractionMode,
200
+ },
201
+ ml: {
202
+ provider:
203
+ ml.provider === "openai" || ml.provider === "anthropic" || ml.provider === "gemini"
204
+ ? ml.provider
205
+ : DEFAULTS.capture.ml.provider,
206
+ model: asString(ml.model),
207
+ timeoutMs: asInt(ml.timeoutMs, DEFAULTS.capture.ml.timeoutMs, 250, 30_000),
208
+ maxItemsPerRun: asInt(ml.maxItemsPerRun, DEFAULTS.capture.ml.maxItemsPerRun, 1, 50),
209
+ },
210
+ },
211
+ bootstrap: {
212
+ enabled: asBoolean(bootstrap.enabled, DEFAULTS.bootstrap.enabled),
213
+ startupMode: "async",
214
+ includeMarkdown: asBoolean(bootstrap.includeMarkdown, DEFAULTS.bootstrap.includeMarkdown),
215
+ includeSessions: asBoolean(bootstrap.includeSessions, DEFAULTS.bootstrap.includeSessions),
216
+ sessionLookbackDays: asInt(
217
+ bootstrap.sessionLookbackDays,
218
+ DEFAULTS.bootstrap.sessionLookbackDays,
219
+ 1,
220
+ 3650,
221
+ ),
222
+ batchSize: asInt(bootstrap.batchSize, DEFAULTS.bootstrap.batchSize, 1, 1000),
223
+ concurrency: asInt(bootstrap.concurrency, DEFAULTS.bootstrap.concurrency, 1, 16),
224
+ },
225
+ reconcile: {
226
+ enabled: asBoolean(reconcile.enabled, DEFAULTS.reconcile.enabled),
227
+ intervalMinutes: asInt(
228
+ reconcile.intervalMinutes,
229
+ DEFAULTS.reconcile.intervalMinutes,
230
+ 1,
231
+ 1440,
232
+ ),
233
+ batchSize: asInt(reconcile.batchSize, DEFAULTS.reconcile.batchSize, 1, 5000),
234
+ deleteStale: asBoolean(reconcile.deleteStale, DEFAULTS.reconcile.deleteStale),
235
+ },
236
+ dedupe: {
237
+ lexical: {
238
+ minJaccard: asNumber(lexical.minJaccard, DEFAULTS.dedupe.lexical.minJaccard, 0, 1),
239
+ },
240
+ semantic: {
241
+ enabled: asBoolean(semantic.enabled, DEFAULTS.dedupe.semantic.enabled),
242
+ minScore: asNumber(semantic.minScore, DEFAULTS.dedupe.semantic.minScore, 0, 1),
243
+ },
244
+ },
245
+ debug: {
246
+ enabled: asBoolean(debug.enabled, DEFAULTS.debug.enabled),
247
+ includePayloads: asBoolean(debug.includePayloads, DEFAULTS.debug.includePayloads),
248
+ maxSnippetChars: asInt(debug.maxSnippetChars, DEFAULTS.debug.maxSnippetChars, 40, 8000),
249
+ logSamplingRate: asNumber(debug.logSamplingRate, DEFAULTS.debug.logSamplingRate, 0, 1),
250
+ },
251
+ };
252
+ }
253
+
254
+ export const pluginConfigSchema = {
255
+ parse(value: unknown) {
256
+ return parseConfig(value);
257
+ },
258
+ safeParse(value: unknown) {
259
+ try {
260
+ const data = parseConfig(value);
261
+ return { success: true as const, data };
262
+ } catch (err) {
263
+ return {
264
+ success: false as const,
265
+ error: {
266
+ issues: [{ path: [], message: err instanceof Error ? err.message : String(err) }],
267
+ },
268
+ };
269
+ }
270
+ },
271
+ };
package/src/dedupe.ts ADDED
@@ -0,0 +1,96 @@
1
+ import { normalizeForHash, sha256 } from "./chunking.js";
2
+ import type { MemoryBraidResult } from "./types.js";
3
+
4
+ export type SemanticCompareFn = (
5
+ left: MemoryBraidResult,
6
+ right: MemoryBraidResult,
7
+ ) => Promise<number | undefined>;
8
+
9
+ export type DedupeOptions = {
10
+ lexicalMinJaccard: number;
11
+ semanticEnabled: boolean;
12
+ semanticMinScore: number;
13
+ semanticCompare?: SemanticCompareFn;
14
+ };
15
+
16
+ function tokenize(value: string): Set<string> {
17
+ const tokens = value
18
+ .toLowerCase()
19
+ .replace(/[^a-z0-9\s]/gi, " ")
20
+ .split(/\s+/)
21
+ .map((token) => token.trim())
22
+ .filter((token) => token.length >= 2);
23
+ return new Set(tokens);
24
+ }
25
+
26
+ function jaccard(a: Set<string>, b: Set<string>): number {
27
+ if (a.size === 0 || b.size === 0) {
28
+ return 0;
29
+ }
30
+ let intersection = 0;
31
+ for (const token of a) {
32
+ if (b.has(token)) {
33
+ intersection += 1;
34
+ }
35
+ }
36
+ const union = a.size + b.size - intersection;
37
+ return union <= 0 ? 0 : intersection / union;
38
+ }
39
+
40
+ export async function stagedDedupe(
41
+ input: MemoryBraidResult[],
42
+ options: DedupeOptions,
43
+ ): Promise<MemoryBraidResult[]> {
44
+ const out: MemoryBraidResult[] = [];
45
+ const exact = new Set<string>();
46
+
47
+ for (const candidate of input) {
48
+ const normalized = normalizeForHash(candidate.snippet);
49
+ if (!normalized) {
50
+ continue;
51
+ }
52
+
53
+ const exactKey = sha256(normalized);
54
+ if (exact.has(exactKey)) {
55
+ continue;
56
+ }
57
+
58
+ const candidateTokens = tokenize(normalized);
59
+ let duplicate = false;
60
+
61
+ for (const chosen of out) {
62
+ const chosenTokens = tokenize(normalizeForHash(chosen.snippet));
63
+ const lexicalScore = jaccard(candidateTokens, chosenTokens);
64
+ if (lexicalScore < options.lexicalMinJaccard) {
65
+ continue;
66
+ }
67
+
68
+ if (!options.semanticEnabled) {
69
+ duplicate = true;
70
+ break;
71
+ }
72
+
73
+ if (!options.semanticCompare) {
74
+ duplicate = true;
75
+ break;
76
+ }
77
+
78
+ const semantic = await options.semanticCompare(candidate, chosen);
79
+ if (typeof semantic !== "number") {
80
+ duplicate = true;
81
+ break;
82
+ }
83
+ if (semantic >= options.semanticMinScore) {
84
+ duplicate = true;
85
+ break;
86
+ }
87
+ }
88
+
89
+ if (!duplicate) {
90
+ exact.add(exactKey);
91
+ out.push(candidate);
92
+ }
93
+ }
94
+
95
+ return out;
96
+ }