memory-braid 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -11
- package/openclaw.plugin.json +2 -1
- package/package.json +1 -1
- package/src/config.ts +19 -6
- package/src/entities.ts +196 -44
- package/src/extract.ts +11 -0
- package/src/index.ts +120 -71
- package/src/mem0-client.ts +60 -7
- package/src/state.ts +2 -2
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
|
|
|
7
7
|
- Hybrid recall: local memory + Mem0, merged with weighted RRF.
|
|
8
8
|
- Capture-first Mem0 memory: plugin writes only captured memories to Mem0 (no markdown/session indexing).
|
|
9
9
|
- Capture pipeline modes: `local`, `hybrid`, `ml`.
|
|
10
|
-
- Optional entity extraction: multilingual NER with canonical `entity://...` URIs in memory metadata.
|
|
10
|
+
- Optional entity extraction: local multilingual NER or OpenAI NER with canonical `entity://...` URIs in memory metadata.
|
|
11
11
|
- Structured debug logs for troubleshooting and tuning.
|
|
12
12
|
|
|
13
13
|
## Breaking changes in 0.4.0
|
|
@@ -109,7 +109,7 @@ Note:
|
|
|
109
109
|
- `fixCommand` (copy/paste command for that machine)
|
|
110
110
|
- `pluginDir` (resolved extension directory when available)
|
|
111
111
|
|
|
112
|
-
## Quick start: hybrid capture +
|
|
112
|
+
## Quick start: hybrid capture + entity extraction
|
|
113
113
|
|
|
114
114
|
Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
115
115
|
|
|
@@ -156,8 +156,9 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
|
156
156
|
},
|
|
157
157
|
"entityExtraction": {
|
|
158
158
|
"enabled": true,
|
|
159
|
-
"provider": "
|
|
160
|
-
"model": "
|
|
159
|
+
"provider": "openai",
|
|
160
|
+
"model": "gpt-4o-mini",
|
|
161
|
+
"timeoutMs": 2500,
|
|
161
162
|
"minScore": 0.65,
|
|
162
163
|
"maxEntitiesPerMemory": 8,
|
|
163
164
|
"startup": {
|
|
@@ -171,6 +172,18 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
|
171
172
|
}
|
|
172
173
|
```
|
|
173
174
|
|
|
175
|
+
Local-model alternative (fully backward compatible):
|
|
176
|
+
|
|
177
|
+
```json
|
|
178
|
+
{
|
|
179
|
+
"entityExtraction": {
|
|
180
|
+
"enabled": true,
|
|
181
|
+
"provider": "multilingual_ner",
|
|
182
|
+
"model": "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
174
187
|
Then restart:
|
|
175
188
|
|
|
176
189
|
```bash
|
|
@@ -186,7 +199,7 @@ openclaw plugins info memory-braid
|
|
|
186
199
|
openclaw gateway status
|
|
187
200
|
```
|
|
188
201
|
|
|
189
|
-
2. Trigger/inspect
|
|
202
|
+
2. Trigger/inspect entity warmup:
|
|
190
203
|
|
|
191
204
|
```bash
|
|
192
205
|
openclaw agent --agent main --message "/memorybraid warmup" --json
|
|
@@ -206,7 +219,7 @@ rg -n "memory_braid\\.startup|memory_braid\\.capture|memory_braid\\.entity|memor
|
|
|
206
219
|
|
|
207
220
|
Expected events:
|
|
208
221
|
- `memory_braid.startup`
|
|
209
|
-
- `memory_braid.entity.model_load`
|
|
222
|
+
- `memory_braid.entity.model_load` (local `multilingual_ner` provider only)
|
|
210
223
|
- `memory_braid.entity.warmup`
|
|
211
224
|
- `memory_braid.capture.extract`
|
|
212
225
|
- `memory_braid.capture.ml` (for `capture.mode=hybrid|ml`)
|
|
@@ -436,8 +449,9 @@ Use this preset when:
|
|
|
436
449
|
},
|
|
437
450
|
"entityExtraction": {
|
|
438
451
|
"enabled": true,
|
|
439
|
-
"provider": "
|
|
440
|
-
"model": "
|
|
452
|
+
"provider": "openai",
|
|
453
|
+
"model": "gpt-4o-mini",
|
|
454
|
+
"timeoutMs": 2500,
|
|
441
455
|
"minScore": 0.65,
|
|
442
456
|
"maxEntitiesPerMemory": 8,
|
|
443
457
|
"startup": {
|
|
@@ -505,7 +519,8 @@ Entity extraction defaults are:
|
|
|
505
519
|
|
|
506
520
|
- `entityExtraction.enabled`: `false`
|
|
507
521
|
- `entityExtraction.provider`: `"multilingual_ner"`
|
|
508
|
-
- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"`
|
|
522
|
+
- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"` (or `"gpt-4o-mini"` when `provider: "openai"` and model is unset)
|
|
523
|
+
- `entityExtraction.timeoutMs`: `2500`
|
|
509
524
|
- `entityExtraction.minScore`: `0.65`
|
|
510
525
|
- `entityExtraction.maxEntitiesPerMemory`: `8`
|
|
511
526
|
- `entityExtraction.startup.downloadOnStartup`: `true`
|
|
@@ -513,9 +528,9 @@ Entity extraction defaults are:
|
|
|
513
528
|
|
|
514
529
|
When enabled:
|
|
515
530
|
|
|
516
|
-
-
|
|
531
|
+
- Local NER model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
|
|
517
532
|
- Captured memories get `metadata.entities` and `metadata.entityUris` (canonical IDs like `entity://person/john-doe`).
|
|
518
|
-
- Startup
|
|
533
|
+
- Startup warmup runs for both providers (`downloadOnStartup: true`).
|
|
519
534
|
|
|
520
535
|
Warmup command:
|
|
521
536
|
|
package/openclaw.plugin.json
CHANGED
|
@@ -72,13 +72,14 @@
|
|
|
72
72
|
"enabled": { "type": "boolean", "default": false },
|
|
73
73
|
"provider": {
|
|
74
74
|
"type": "string",
|
|
75
|
-
"enum": ["multilingual_ner"],
|
|
75
|
+
"enum": ["multilingual_ner", "openai"],
|
|
76
76
|
"default": "multilingual_ner"
|
|
77
77
|
},
|
|
78
78
|
"model": {
|
|
79
79
|
"type": "string",
|
|
80
80
|
"default": "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
81
81
|
},
|
|
82
|
+
"timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 },
|
|
82
83
|
"minScore": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.65 },
|
|
83
84
|
"maxEntitiesPerMemory": { "type": "integer", "minimum": 1, "maximum": 50, "default": 8 },
|
|
84
85
|
"startup": {
|
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -31,8 +31,9 @@ export type MemoryBraidConfig = {
|
|
|
31
31
|
};
|
|
32
32
|
entityExtraction: {
|
|
33
33
|
enabled: boolean;
|
|
34
|
-
provider: "multilingual_ner";
|
|
34
|
+
provider: "multilingual_ner" | "openai";
|
|
35
35
|
model: string;
|
|
36
|
+
timeoutMs: number;
|
|
36
37
|
minScore: number;
|
|
37
38
|
maxEntitiesPerMemory: number;
|
|
38
39
|
startup: {
|
|
@@ -101,6 +102,7 @@ const DEFAULTS: MemoryBraidConfig = {
|
|
|
101
102
|
enabled: false,
|
|
102
103
|
provider: "multilingual_ner",
|
|
103
104
|
model: "Xenova/bert-base-multilingual-cased-ner-hrl",
|
|
105
|
+
timeoutMs: 2500,
|
|
104
106
|
minScore: 0.65,
|
|
105
107
|
maxEntitiesPerMemory: 8,
|
|
106
108
|
startup: {
|
|
@@ -184,6 +186,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
184
186
|
rawCaptureMode === "local" || rawCaptureMode === "hybrid" || rawCaptureMode === "ml"
|
|
185
187
|
? rawCaptureMode
|
|
186
188
|
: DEFAULTS.capture.mode;
|
|
189
|
+
const entityProvider = entityExtraction.provider === "openai" ? "openai" : "multilingual_ner";
|
|
190
|
+
const parsedEntityModel = asString(entityExtraction.model);
|
|
191
|
+
const entityModel =
|
|
192
|
+
entityProvider === "openai"
|
|
193
|
+
? parsedEntityModel && parsedEntityModel !== DEFAULTS.entityExtraction.model
|
|
194
|
+
? parsedEntityModel
|
|
195
|
+
: "gpt-4o-mini"
|
|
196
|
+
: parsedEntityModel ?? DEFAULTS.entityExtraction.model;
|
|
187
197
|
|
|
188
198
|
return {
|
|
189
199
|
enabled: asBoolean(root.enabled, DEFAULTS.enabled),
|
|
@@ -221,11 +231,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
221
231
|
},
|
|
222
232
|
entityExtraction: {
|
|
223
233
|
enabled: asBoolean(entityExtraction.enabled, DEFAULTS.entityExtraction.enabled),
|
|
224
|
-
provider:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
234
|
+
provider: entityProvider,
|
|
235
|
+
model: entityModel,
|
|
236
|
+
timeoutMs: asInt(
|
|
237
|
+
entityExtraction.timeoutMs,
|
|
238
|
+
DEFAULTS.entityExtraction.timeoutMs,
|
|
239
|
+
250,
|
|
240
|
+
30_000,
|
|
241
|
+
),
|
|
229
242
|
minScore: asNumber(entityExtraction.minScore, DEFAULTS.entityExtraction.minScore, 0, 1),
|
|
230
243
|
maxEntitiesPerMemory: asInt(
|
|
231
244
|
entityExtraction.maxEntitiesPerMemory,
|
package/src/entities.ts
CHANGED
|
@@ -15,6 +15,15 @@ type NerRecord = {
|
|
|
15
15
|
end?: unknown;
|
|
16
16
|
};
|
|
17
17
|
|
|
18
|
+
type LlmEntityRecord = {
|
|
19
|
+
text?: unknown;
|
|
20
|
+
type?: unknown;
|
|
21
|
+
label?: unknown;
|
|
22
|
+
entity?: unknown;
|
|
23
|
+
entity_group?: unknown;
|
|
24
|
+
score?: unknown;
|
|
25
|
+
};
|
|
26
|
+
|
|
18
27
|
export type ExtractedEntity = {
|
|
19
28
|
text: string;
|
|
20
29
|
type: "person" | "organization" | "location" | "misc";
|
|
@@ -81,6 +90,44 @@ function normalizeEntityText(raw: unknown): string {
|
|
|
81
90
|
return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
|
|
82
91
|
}
|
|
83
92
|
|
|
93
|
+
function clampScore(value: unknown, fallback = 0): number {
|
|
94
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
95
|
+
return Math.max(0, Math.min(1, fallback));
|
|
96
|
+
}
|
|
97
|
+
return Math.max(0, Math.min(1, value));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function parseJsonObjectArray(raw: string): Array<Record<string, unknown>> {
|
|
101
|
+
const attempts = [raw.trim()];
|
|
102
|
+
|
|
103
|
+
const fencedMatch = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/i);
|
|
104
|
+
if (fencedMatch?.[1]) {
|
|
105
|
+
attempts.push(fencedMatch[1].trim());
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const firstBracket = raw.indexOf("[");
|
|
109
|
+
const lastBracket = raw.lastIndexOf("]");
|
|
110
|
+
if (firstBracket >= 0 && lastBracket > firstBracket) {
|
|
111
|
+
attempts.push(raw.slice(firstBracket, lastBracket + 1).trim());
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const attempt of attempts) {
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(attempt) as unknown;
|
|
117
|
+
if (!Array.isArray(parsed)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
return parsed.filter((entry) => entry && typeof entry === "object") as Array<
|
|
121
|
+
Record<string, unknown>
|
|
122
|
+
>;
|
|
123
|
+
} catch {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return [];
|
|
129
|
+
}
|
|
130
|
+
|
|
84
131
|
type NormalizedEntityToken = {
|
|
85
132
|
text: string;
|
|
86
133
|
type: ExtractedEntity["type"];
|
|
@@ -213,6 +260,29 @@ function collapseAdjacentEntityTokens(
|
|
|
213
260
|
return collapsed;
|
|
214
261
|
}
|
|
215
262
|
|
|
263
|
+
function dedupeAndLimitEntities(
|
|
264
|
+
entities: Array<Omit<ExtractedEntity, "canonicalUri">>,
|
|
265
|
+
maxEntities: number,
|
|
266
|
+
): ExtractedEntity[] {
|
|
267
|
+
const deduped = new Map<string, ExtractedEntity>();
|
|
268
|
+
for (const entity of entities) {
|
|
269
|
+
const canonicalUri = buildCanonicalEntityUri(entity.type, entity.text);
|
|
270
|
+
const current = deduped.get(canonicalUri);
|
|
271
|
+
if (!current || entity.score > current.score) {
|
|
272
|
+
deduped.set(canonicalUri, {
|
|
273
|
+
text: entity.text,
|
|
274
|
+
type: entity.type,
|
|
275
|
+
score: entity.score,
|
|
276
|
+
canonicalUri,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return Array.from(deduped.values())
|
|
282
|
+
.sort((a, b) => b.score - a.score)
|
|
283
|
+
.slice(0, maxEntities);
|
|
284
|
+
}
|
|
285
|
+
|
|
216
286
|
type EntityExtractionOptions = {
|
|
217
287
|
stateDir?: string;
|
|
218
288
|
};
|
|
@@ -256,7 +326,10 @@ export class EntityExtractionManager {
|
|
|
256
326
|
model: this.cfg.model,
|
|
257
327
|
minScore: this.cfg.minScore,
|
|
258
328
|
maxEntitiesPerMemory: this.cfg.maxEntitiesPerMemory,
|
|
259
|
-
cacheDir:
|
|
329
|
+
cacheDir:
|
|
330
|
+
this.cfg.provider === "multilingual_ner"
|
|
331
|
+
? resolveEntityModelCacheDir(this.stateDir)
|
|
332
|
+
: "n/a",
|
|
260
333
|
};
|
|
261
334
|
}
|
|
262
335
|
|
|
@@ -274,10 +347,14 @@ export class EntityExtractionManager {
|
|
|
274
347
|
error?: string;
|
|
275
348
|
}> {
|
|
276
349
|
const startedAt = Date.now();
|
|
350
|
+
const cacheDir =
|
|
351
|
+
this.cfg.provider === "multilingual_ner"
|
|
352
|
+
? resolveEntityModelCacheDir(this.stateDir)
|
|
353
|
+
: "n/a";
|
|
277
354
|
if (!this.cfg.enabled) {
|
|
278
355
|
return {
|
|
279
356
|
ok: false,
|
|
280
|
-
cacheDir
|
|
357
|
+
cacheDir,
|
|
281
358
|
model: this.cfg.model,
|
|
282
359
|
entities: 0,
|
|
283
360
|
durMs: Date.now() - startedAt,
|
|
@@ -285,29 +362,17 @@ export class EntityExtractionManager {
|
|
|
285
362
|
};
|
|
286
363
|
}
|
|
287
364
|
|
|
288
|
-
const pipeline = await this.ensurePipeline(params?.forceReload);
|
|
289
|
-
if (!pipeline) {
|
|
290
|
-
return {
|
|
291
|
-
ok: false,
|
|
292
|
-
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
293
|
-
model: this.cfg.model,
|
|
294
|
-
entities: 0,
|
|
295
|
-
durMs: Date.now() - startedAt,
|
|
296
|
-
error: "model_load_failed",
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
|
|
300
365
|
try {
|
|
301
|
-
const entities = await this.
|
|
302
|
-
pipeline,
|
|
366
|
+
const entities = await this.extractWithProvider({
|
|
303
367
|
text: params?.text ?? this.cfg.startup.warmupText,
|
|
368
|
+
forceReload: params?.forceReload,
|
|
304
369
|
});
|
|
305
370
|
this.log.info("memory_braid.entity.warmup", {
|
|
306
371
|
runId: params?.runId,
|
|
307
372
|
reason: params?.reason ?? "manual",
|
|
308
373
|
provider: this.cfg.provider,
|
|
309
374
|
model: this.cfg.model,
|
|
310
|
-
cacheDir
|
|
375
|
+
cacheDir,
|
|
311
376
|
entities: entities.length,
|
|
312
377
|
entityTypes: summarizeEntityTypes(entities),
|
|
313
378
|
sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
|
|
@@ -315,7 +380,7 @@ export class EntityExtractionManager {
|
|
|
315
380
|
});
|
|
316
381
|
return {
|
|
317
382
|
ok: true,
|
|
318
|
-
cacheDir
|
|
383
|
+
cacheDir,
|
|
319
384
|
model: this.cfg.model,
|
|
320
385
|
entities: entities.length,
|
|
321
386
|
durMs: Date.now() - startedAt,
|
|
@@ -327,12 +392,12 @@ export class EntityExtractionManager {
|
|
|
327
392
|
reason: params?.reason ?? "manual",
|
|
328
393
|
provider: this.cfg.provider,
|
|
329
394
|
model: this.cfg.model,
|
|
330
|
-
cacheDir
|
|
395
|
+
cacheDir,
|
|
331
396
|
error: message,
|
|
332
397
|
});
|
|
333
398
|
return {
|
|
334
399
|
ok: false,
|
|
335
|
-
cacheDir
|
|
400
|
+
cacheDir,
|
|
336
401
|
model: this.cfg.model,
|
|
337
402
|
entities: 0,
|
|
338
403
|
durMs: Date.now() - startedAt,
|
|
@@ -351,13 +416,8 @@ export class EntityExtractionManager {
|
|
|
351
416
|
return [];
|
|
352
417
|
}
|
|
353
418
|
|
|
354
|
-
const pipeline = await this.ensurePipeline();
|
|
355
|
-
if (!pipeline) {
|
|
356
|
-
return [];
|
|
357
|
-
}
|
|
358
|
-
|
|
359
419
|
try {
|
|
360
|
-
const entities = await this.
|
|
420
|
+
const entities = await this.extractWithProvider({ text });
|
|
361
421
|
this.log.debug("memory_braid.entity.extract", {
|
|
362
422
|
runId: params.runId,
|
|
363
423
|
provider: this.cfg.provider,
|
|
@@ -378,11 +438,112 @@ export class EntityExtractionManager {
|
|
|
378
438
|
}
|
|
379
439
|
}
|
|
380
440
|
|
|
441
|
+
private async extractWithProvider(params: {
|
|
442
|
+
text: string;
|
|
443
|
+
forceReload?: boolean;
|
|
444
|
+
}): Promise<ExtractedEntity[]> {
|
|
445
|
+
if (this.cfg.provider === "openai") {
|
|
446
|
+
return this.extractWithOpenAi(params.text);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const pipeline = await this.ensurePipeline(params.forceReload);
|
|
450
|
+
if (!pipeline) {
|
|
451
|
+
throw new Error("model_load_failed");
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return this.extractWithPipeline({ pipeline, text: params.text });
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
private async extractWithOpenAi(text: string): Promise<ExtractedEntity[]> {
|
|
458
|
+
const key = process.env.OPENAI_API_KEY?.trim();
|
|
459
|
+
if (!key) {
|
|
460
|
+
throw new Error("OPENAI_API_KEY is not set");
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const controller = new AbortController();
|
|
464
|
+
const timer = setTimeout(() => controller.abort(), this.cfg.timeoutMs);
|
|
465
|
+
|
|
466
|
+
try {
|
|
467
|
+
const prompt = [
|
|
468
|
+
"Extract named entities from this text.",
|
|
469
|
+
"Return ONLY JSON array.",
|
|
470
|
+
"Each item: {text:string, type:string, score:number}.",
|
|
471
|
+
"type must be one of: person, organization, location, misc.",
|
|
472
|
+
"score must be between 0 and 1.",
|
|
473
|
+
"Do not include duplicates.",
|
|
474
|
+
text,
|
|
475
|
+
].join("\n");
|
|
476
|
+
|
|
477
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
478
|
+
method: "POST",
|
|
479
|
+
headers: {
|
|
480
|
+
Authorization: `Bearer ${key}`,
|
|
481
|
+
"Content-Type": "application/json",
|
|
482
|
+
},
|
|
483
|
+
body: JSON.stringify({
|
|
484
|
+
model: this.cfg.model,
|
|
485
|
+
temperature: 0,
|
|
486
|
+
messages: [
|
|
487
|
+
{
|
|
488
|
+
role: "system",
|
|
489
|
+
content: "You return strict JSON only.",
|
|
490
|
+
},
|
|
491
|
+
{
|
|
492
|
+
role: "user",
|
|
493
|
+
content: prompt,
|
|
494
|
+
},
|
|
495
|
+
],
|
|
496
|
+
}),
|
|
497
|
+
signal: controller.signal,
|
|
498
|
+
});
|
|
499
|
+
const data = (await response.json()) as {
|
|
500
|
+
error?: { message?: string };
|
|
501
|
+
choices?: Array<{ message?: { content?: string } }>;
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
if (!response.ok) {
|
|
505
|
+
throw new Error(data.error?.message ?? `OpenAI HTTP ${response.status}`);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
const content = data.choices?.[0]?.message?.content ?? "";
|
|
509
|
+
const parsed = parseJsonObjectArray(content);
|
|
510
|
+
|
|
511
|
+
const normalized: Array<Omit<ExtractedEntity, "canonicalUri">> = [];
|
|
512
|
+
for (const row of parsed) {
|
|
513
|
+
const record = row as LlmEntityRecord;
|
|
514
|
+
const entityText = normalizeEntityText(record.text);
|
|
515
|
+
if (!entityText) {
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
const score = clampScore(record.score, 0.5);
|
|
519
|
+
if (score < this.cfg.minScore) {
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
const type = normalizeEntityType(
|
|
523
|
+
record.type ?? record.label ?? record.entity_group ?? record.entity,
|
|
524
|
+
);
|
|
525
|
+
normalized.push({
|
|
526
|
+
text: entityText,
|
|
527
|
+
type,
|
|
528
|
+
score,
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
return dedupeAndLimitEntities(normalized, this.cfg.maxEntitiesPerMemory);
|
|
533
|
+
} finally {
|
|
534
|
+
clearTimeout(timer);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
381
538
|
private async ensurePipeline(forceReload = false): Promise<NerPipeline | null> {
|
|
382
539
|
if (!this.cfg.enabled) {
|
|
383
540
|
return null;
|
|
384
541
|
}
|
|
385
542
|
|
|
543
|
+
if (this.cfg.provider !== "multilingual_ner") {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
|
|
386
547
|
if (forceReload) {
|
|
387
548
|
this.pipelinePromise = null;
|
|
388
549
|
}
|
|
@@ -463,7 +624,7 @@ export class EntityExtractionManager {
|
|
|
463
624
|
if (!entityText) {
|
|
464
625
|
continue;
|
|
465
626
|
}
|
|
466
|
-
const score =
|
|
627
|
+
const score = clampScore(record.score);
|
|
467
628
|
if (score < this.cfg.minScore) {
|
|
468
629
|
continue;
|
|
469
630
|
}
|
|
@@ -479,22 +640,13 @@ export class EntityExtractionManager {
|
|
|
479
640
|
}
|
|
480
641
|
|
|
481
642
|
const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
score: token.score,
|
|
491
|
-
canonicalUri,
|
|
492
|
-
});
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
return Array.from(deduped.values())
|
|
497
|
-
.sort((a, b) => b.score - a.score)
|
|
498
|
-
.slice(0, this.cfg.maxEntitiesPerMemory);
|
|
643
|
+
return dedupeAndLimitEntities(
|
|
644
|
+
collapsed.map((token) => ({
|
|
645
|
+
text: token.text,
|
|
646
|
+
type: token.type,
|
|
647
|
+
score: token.score,
|
|
648
|
+
})),
|
|
649
|
+
this.cfg.maxEntitiesPerMemory,
|
|
650
|
+
);
|
|
499
651
|
}
|
|
500
652
|
}
|
package/src/extract.ts
CHANGED
|
@@ -468,6 +468,17 @@ export async function extractCandidates(params: {
|
|
|
468
468
|
|
|
469
469
|
try {
|
|
470
470
|
if (params.cfg.capture.mode === "hybrid") {
|
|
471
|
+
if (heuristic.length === 0) {
|
|
472
|
+
params.log.debug("memory_braid.capture.ml", {
|
|
473
|
+
runId: params.runId,
|
|
474
|
+
mode: params.cfg.capture.mode,
|
|
475
|
+
provider: params.cfg.capture.ml.provider,
|
|
476
|
+
model: params.cfg.capture.ml.model,
|
|
477
|
+
decision: "skip_ml_enrichment_no_heuristic_candidates",
|
|
478
|
+
});
|
|
479
|
+
return heuristic;
|
|
480
|
+
}
|
|
481
|
+
|
|
471
482
|
const ml = await callMlEnrichment({
|
|
472
483
|
provider: params.cfg.capture.ml.provider,
|
|
473
484
|
model: params.cfg.capture.ml.model,
|
package/src/index.ts
CHANGED
|
@@ -1345,105 +1345,153 @@ const memoryBraidPlugin = {
|
|
|
1345
1345
|
return;
|
|
1346
1346
|
}
|
|
1347
1347
|
|
|
1348
|
-
|
|
1348
|
+
const thirtyDays = 30 * 24 * 60 * 60 * 1000;
|
|
1349
|
+
const candidateEntries = candidates.map((candidate) => ({
|
|
1350
|
+
candidate,
|
|
1351
|
+
hash: sha256(normalizeForHash(candidate.text)),
|
|
1352
|
+
}));
|
|
1353
|
+
|
|
1354
|
+
const prepared = await withStateLock(runtimeStatePaths.stateLockFile, async () => {
|
|
1349
1355
|
const dedupe = await readCaptureDedupeState(runtimeStatePaths);
|
|
1350
|
-
const stats = await readStatsState(runtimeStatePaths);
|
|
1351
|
-
const lifecycle = cfg.lifecycle.enabled
|
|
1352
|
-
? await readLifecycleState(runtimeStatePaths)
|
|
1353
|
-
: null;
|
|
1354
1356
|
const now = Date.now();
|
|
1355
|
-
|
|
1357
|
+
|
|
1358
|
+
let pruned = 0;
|
|
1356
1359
|
for (const [key, ts] of Object.entries(dedupe.seen)) {
|
|
1357
1360
|
if (now - ts > thirtyDays) {
|
|
1358
1361
|
delete dedupe.seen[key];
|
|
1362
|
+
pruned += 1;
|
|
1359
1363
|
}
|
|
1360
1364
|
}
|
|
1361
1365
|
|
|
1362
|
-
let persisted = 0;
|
|
1363
1366
|
let dedupeSkipped = 0;
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
let mem0AddWithoutId = 0;
|
|
1369
|
-
for (const candidate of candidates) {
|
|
1370
|
-
const hash = sha256(normalizeForHash(candidate.text));
|
|
1371
|
-
if (dedupe.seen[hash]) {
|
|
1367
|
+
const pending: typeof candidateEntries = [];
|
|
1368
|
+
const seenInBatch = new Set<string>();
|
|
1369
|
+
for (const entry of candidateEntries) {
|
|
1370
|
+
if (dedupe.seen[entry.hash] || seenInBatch.has(entry.hash)) {
|
|
1372
1371
|
dedupeSkipped += 1;
|
|
1373
1372
|
continue;
|
|
1374
1373
|
}
|
|
1374
|
+
seenInBatch.add(entry.hash);
|
|
1375
|
+
pending.push(entry);
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
if (pruned > 0) {
|
|
1379
|
+
await writeCaptureDedupeState(runtimeStatePaths, dedupe);
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
return {
|
|
1383
|
+
dedupeSkipped,
|
|
1384
|
+
pending,
|
|
1385
|
+
};
|
|
1386
|
+
});
|
|
1387
|
+
|
|
1388
|
+
let entityAnnotatedCandidates = 0;
|
|
1389
|
+
let totalEntitiesAttached = 0;
|
|
1390
|
+
let mem0AddAttempts = 0;
|
|
1391
|
+
let mem0AddWithId = 0;
|
|
1392
|
+
let mem0AddWithoutId = 0;
|
|
1393
|
+
const successfulAdds: Array<{
|
|
1394
|
+
memoryId: string;
|
|
1395
|
+
hash: string;
|
|
1396
|
+
category: (typeof candidates)[number]["category"];
|
|
1397
|
+
}> = [];
|
|
1398
|
+
|
|
1399
|
+
for (const entry of prepared.pending) {
|
|
1400
|
+
const { candidate, hash } = entry;
|
|
1401
|
+
const metadata: Record<string, unknown> = {
|
|
1402
|
+
sourceType: "capture",
|
|
1403
|
+
workspaceHash: scope.workspaceHash,
|
|
1404
|
+
agentId: scope.agentId,
|
|
1405
|
+
sessionKey: scope.sessionKey,
|
|
1406
|
+
category: candidate.category,
|
|
1407
|
+
captureScore: candidate.score,
|
|
1408
|
+
extractionSource: candidate.source,
|
|
1409
|
+
contentHash: hash,
|
|
1410
|
+
indexedAt: new Date().toISOString(),
|
|
1411
|
+
};
|
|
1412
|
+
|
|
1413
|
+
if (cfg.entityExtraction.enabled) {
|
|
1414
|
+
const entities = await entityExtraction.extract({
|
|
1415
|
+
text: candidate.text,
|
|
1416
|
+
runId,
|
|
1417
|
+
});
|
|
1418
|
+
if (entities.length > 0) {
|
|
1419
|
+
entityAnnotatedCandidates += 1;
|
|
1420
|
+
totalEntitiesAttached += entities.length;
|
|
1421
|
+
metadata.entityUris = entities.map((entity) => entity.canonicalUri);
|
|
1422
|
+
metadata.entities = entities;
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1375
1425
|
|
|
1376
|
-
|
|
1377
|
-
|
|
1426
|
+
mem0AddAttempts += 1;
|
|
1427
|
+
const addResult = await mem0.addMemory({
|
|
1428
|
+
text: candidate.text,
|
|
1429
|
+
scope,
|
|
1430
|
+
metadata,
|
|
1431
|
+
runId,
|
|
1432
|
+
});
|
|
1433
|
+
if (addResult.id) {
|
|
1434
|
+
mem0AddWithId += 1;
|
|
1435
|
+
successfulAdds.push({
|
|
1436
|
+
memoryId: addResult.id,
|
|
1437
|
+
hash,
|
|
1438
|
+
category: candidate.category,
|
|
1439
|
+
});
|
|
1440
|
+
} else {
|
|
1441
|
+
mem0AddWithoutId += 1;
|
|
1442
|
+
log.warn("memory_braid.capture.persist", {
|
|
1443
|
+
runId,
|
|
1444
|
+
reason: "mem0_add_missing_id",
|
|
1378
1445
|
workspaceHash: scope.workspaceHash,
|
|
1379
1446
|
agentId: scope.agentId,
|
|
1380
1447
|
sessionKey: scope.sessionKey,
|
|
1448
|
+
contentHashPrefix: hash.slice(0, 12),
|
|
1381
1449
|
category: candidate.category,
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
indexedAt: new Date(now).toISOString(),
|
|
1386
|
-
};
|
|
1450
|
+
});
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1387
1453
|
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1454
|
+
await withStateLock(runtimeStatePaths.stateLockFile, async () => {
|
|
1455
|
+
const dedupe = await readCaptureDedupeState(runtimeStatePaths);
|
|
1456
|
+
const stats = await readStatsState(runtimeStatePaths);
|
|
1457
|
+
const lifecycle = cfg.lifecycle.enabled
|
|
1458
|
+
? await readLifecycleState(runtimeStatePaths)
|
|
1459
|
+
: null;
|
|
1460
|
+
const now = Date.now();
|
|
1461
|
+
|
|
1462
|
+
for (const [key, ts] of Object.entries(dedupe.seen)) {
|
|
1463
|
+
if (now - ts > thirtyDays) {
|
|
1464
|
+
delete dedupe.seen[key];
|
|
1399
1465
|
}
|
|
1466
|
+
}
|
|
1400
1467
|
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
persisted += 1;
|
|
1412
|
-
if (lifecycle) {
|
|
1413
|
-
const memoryId = addResult.id;
|
|
1414
|
-
const existing = lifecycle.entries[memoryId];
|
|
1415
|
-
lifecycle.entries[memoryId] = {
|
|
1416
|
-
memoryId,
|
|
1417
|
-
contentHash: hash,
|
|
1418
|
-
workspaceHash: scope.workspaceHash,
|
|
1419
|
-
agentId: scope.agentId,
|
|
1420
|
-
sessionKey: scope.sessionKey,
|
|
1421
|
-
category: candidate.category,
|
|
1422
|
-
createdAt: existing?.createdAt ?? now,
|
|
1423
|
-
lastCapturedAt: now,
|
|
1424
|
-
lastRecalledAt: existing?.lastRecalledAt,
|
|
1425
|
-
recallCount: existing?.recallCount ?? 0,
|
|
1426
|
-
updatedAt: now,
|
|
1427
|
-
};
|
|
1428
|
-
}
|
|
1429
|
-
} else {
|
|
1430
|
-
mem0AddWithoutId += 1;
|
|
1431
|
-
log.warn("memory_braid.capture.persist", {
|
|
1432
|
-
runId,
|
|
1433
|
-
reason: "mem0_add_missing_id",
|
|
1468
|
+
let persisted = 0;
|
|
1469
|
+
for (const entry of successfulAdds) {
|
|
1470
|
+
dedupe.seen[entry.hash] = now;
|
|
1471
|
+
persisted += 1;
|
|
1472
|
+
|
|
1473
|
+
if (lifecycle) {
|
|
1474
|
+
const existing = lifecycle.entries[entry.memoryId];
|
|
1475
|
+
lifecycle.entries[entry.memoryId] = {
|
|
1476
|
+
memoryId: entry.memoryId,
|
|
1477
|
+
contentHash: entry.hash,
|
|
1434
1478
|
workspaceHash: scope.workspaceHash,
|
|
1435
1479
|
agentId: scope.agentId,
|
|
1436
1480
|
sessionKey: scope.sessionKey,
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1481
|
+
category: entry.category,
|
|
1482
|
+
createdAt: existing?.createdAt ?? now,
|
|
1483
|
+
lastCapturedAt: now,
|
|
1484
|
+
lastRecalledAt: existing?.lastRecalledAt,
|
|
1485
|
+
recallCount: existing?.recallCount ?? 0,
|
|
1486
|
+
updatedAt: now,
|
|
1487
|
+
};
|
|
1440
1488
|
}
|
|
1441
1489
|
}
|
|
1442
1490
|
|
|
1443
1491
|
stats.capture.runs += 1;
|
|
1444
1492
|
stats.capture.runsWithCandidates += 1;
|
|
1445
1493
|
stats.capture.candidates += candidates.length;
|
|
1446
|
-
stats.capture.dedupeSkipped += dedupeSkipped;
|
|
1494
|
+
stats.capture.dedupeSkipped += prepared.dedupeSkipped;
|
|
1447
1495
|
stats.capture.persisted += persisted;
|
|
1448
1496
|
stats.capture.mem0AddAttempts += mem0AddAttempts;
|
|
1449
1497
|
stats.capture.mem0AddWithId += mem0AddWithId;
|
|
@@ -1464,7 +1512,8 @@ const memoryBraidPlugin = {
|
|
|
1464
1512
|
agentId: scope.agentId,
|
|
1465
1513
|
sessionKey: scope.sessionKey,
|
|
1466
1514
|
candidates: candidates.length,
|
|
1467
|
-
|
|
1515
|
+
pending: prepared.pending.length,
|
|
1516
|
+
dedupeSkipped: prepared.dedupeSkipped,
|
|
1468
1517
|
persisted,
|
|
1469
1518
|
mem0AddAttempts,
|
|
1470
1519
|
mem0AddWithId,
|
package/src/mem0-client.ts
CHANGED
|
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
|
-
import { normalizeForHash } from "./chunking.js";
|
|
5
|
+
import { normalizeForHash, sha256 } from "./chunking.js";
|
|
6
6
|
import type { MemoryBraidConfig } from "./config.js";
|
|
7
7
|
import { MemoryBraidLogger } from "./logger.js";
|
|
8
8
|
import type { MemoryBraidResult, ScopeKey } from "./types.js";
|
|
@@ -403,6 +403,9 @@ type Mem0AdapterOptions = {
|
|
|
403
403
|
stateDir?: string;
|
|
404
404
|
};
|
|
405
405
|
|
|
406
|
+
const SEMANTIC_SEARCH_CACHE_TTL_MS = 30_000;
|
|
407
|
+
const SEMANTIC_SEARCH_CACHE_MAX_ENTRIES = 256;
|
|
408
|
+
|
|
406
409
|
export class Mem0Adapter {
|
|
407
410
|
private cloudClient: CloudClientLike | null = null;
|
|
408
411
|
private ossClient: OssClientLike | null = null;
|
|
@@ -410,6 +413,13 @@ export class Mem0Adapter {
|
|
|
410
413
|
private readonly log: MemoryBraidLogger;
|
|
411
414
|
private readonly pluginDir?: string;
|
|
412
415
|
private stateDir?: string;
|
|
416
|
+
private readonly semanticSearchCache = new Map<
|
|
417
|
+
string,
|
|
418
|
+
{
|
|
419
|
+
expiresAt: number;
|
|
420
|
+
results: MemoryBraidResult[];
|
|
421
|
+
}
|
|
422
|
+
>();
|
|
413
423
|
|
|
414
424
|
constructor(cfg: MemoryBraidConfig, log: MemoryBraidLogger, options?: Mem0AdapterOptions) {
|
|
415
425
|
this.cfg = cfg;
|
|
@@ -425,6 +435,7 @@ export class Mem0Adapter {
|
|
|
425
435
|
}
|
|
426
436
|
this.stateDir = next;
|
|
427
437
|
this.ossClient = null;
|
|
438
|
+
this.semanticSearchCache.clear();
|
|
428
439
|
}
|
|
429
440
|
|
|
430
441
|
private async ensureCloudClient(): Promise<CloudClientLike | null> {
|
|
@@ -844,12 +855,38 @@ export class Mem0Adapter {
|
|
|
844
855
|
runId?: string;
|
|
845
856
|
}): Promise<number | undefined> {
|
|
846
857
|
const rightHash = normalizeForHash(params.rightText);
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
858
|
+
if (!rightHash) {
|
|
859
|
+
return undefined;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
const leftHash = normalizeForHash(params.leftText);
|
|
863
|
+
if (!leftHash) {
|
|
864
|
+
return undefined;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const now = Date.now();
|
|
868
|
+
this.pruneSemanticSearchCache(now);
|
|
869
|
+
const scopeSession = params.scope.sessionKey ?? "";
|
|
870
|
+
const cacheKey = `${params.scope.workspaceHash}|${params.scope.agentId}|${scopeSession}|${sha256(leftHash)}`;
|
|
871
|
+
const cached = this.semanticSearchCache.get(cacheKey);
|
|
872
|
+
const results =
|
|
873
|
+
cached && cached.expiresAt > now
|
|
874
|
+
? cached.results
|
|
875
|
+
: await this.searchMemories({
|
|
876
|
+
query: params.leftText,
|
|
877
|
+
maxResults: 5,
|
|
878
|
+
scope: params.scope,
|
|
879
|
+
runId: params.runId,
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
if (!cached || cached.expiresAt <= now) {
|
|
883
|
+
this.semanticSearchCache.set(cacheKey, {
|
|
884
|
+
expiresAt: now + SEMANTIC_SEARCH_CACHE_TTL_MS,
|
|
885
|
+
results,
|
|
886
|
+
});
|
|
887
|
+
this.pruneSemanticSearchCache(now);
|
|
888
|
+
}
|
|
889
|
+
|
|
853
890
|
for (const result of results) {
|
|
854
891
|
if (normalizeForHash(result.snippet) === rightHash) {
|
|
855
892
|
return result.score;
|
|
@@ -857,4 +894,20 @@ export class Mem0Adapter {
|
|
|
857
894
|
}
|
|
858
895
|
return undefined;
|
|
859
896
|
}
|
|
897
|
+
|
|
898
|
+
private pruneSemanticSearchCache(now = Date.now()): void {
|
|
899
|
+
for (const [key, entry] of this.semanticSearchCache.entries()) {
|
|
900
|
+
if (entry.expiresAt <= now) {
|
|
901
|
+
this.semanticSearchCache.delete(key);
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
while (this.semanticSearchCache.size > SEMANTIC_SEARCH_CACHE_MAX_ENTRIES) {
|
|
906
|
+
const oldest = this.semanticSearchCache.keys().next().value as string | undefined;
|
|
907
|
+
if (!oldest) {
|
|
908
|
+
break;
|
|
909
|
+
}
|
|
910
|
+
this.semanticSearchCache.delete(oldest);
|
|
911
|
+
}
|
|
912
|
+
}
|
|
860
913
|
}
|
package/src/state.ts
CHANGED
|
@@ -77,7 +77,7 @@ export async function readCaptureDedupeState(paths: StatePaths): Promise<Capture
|
|
|
77
77
|
const value = await readJsonFile(paths.captureDedupeFile, DEFAULT_CAPTURE_DEDUPE);
|
|
78
78
|
return {
|
|
79
79
|
version: 1,
|
|
80
|
-
seen: value.seen ?? {},
|
|
80
|
+
seen: { ...(value.seen ?? {}) },
|
|
81
81
|
};
|
|
82
82
|
}
|
|
83
83
|
|
|
@@ -92,7 +92,7 @@ export async function readLifecycleState(paths: StatePaths): Promise<LifecycleSt
|
|
|
92
92
|
const value = await readJsonFile(paths.lifecycleFile, DEFAULT_LIFECYCLE);
|
|
93
93
|
return {
|
|
94
94
|
version: 1,
|
|
95
|
-
entries: value.entries ?? {},
|
|
95
|
+
entries: { ...(value.entries ?? {}) },
|
|
96
96
|
lastCleanupAt: value.lastCleanupAt,
|
|
97
97
|
lastCleanupReason: value.lastCleanupReason,
|
|
98
98
|
lastCleanupScanned: value.lastCleanupScanned,
|