memory-braid 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,12 +7,157 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
7
7
  - Hybrid recall: local memory + Mem0, merged with weighted RRF.
8
8
  - Install-time bootstrap import: indexes existing `MEMORY.md`, `memory.md`, `memory/**/*.md`, and recent sessions.
9
9
  - Periodic reconcile: keeps remote Mem0 chunks updated and deletes stale remote chunks.
10
- - Capture pipeline: heuristic extraction with optional ML enrichment mode.
10
+ - Capture pipeline modes: `local`, `hybrid`, `ml`.
11
+ - Optional entity extraction: multilingual NER with canonical `entity://...` URIs in memory metadata.
11
12
  - Structured debug logs for troubleshooting and tuning.
12
13
 
13
14
  ## Install
14
15
 
15
- Add this plugin to your OpenClaw plugin load path, then enable it as the active memory plugin.
16
+ ### Install from npm (recommended)
17
+
18
+ On the target machine:
19
+
20
+ 1. Install from npm:
21
+
22
+ ```bash
23
+ openclaw plugins install memory-braid@0.3.0
24
+ ```
25
+
26
+ 2. Enable and set as active memory slot:
27
+
28
+ ```bash
29
+ openclaw plugins enable memory-braid
30
+ openclaw config set plugins.slots.memory memory-braid
31
+ ```
32
+
33
+ 3. Restart gateway:
34
+
35
+ ```bash
36
+ openclaw gateway restart
37
+ ```
38
+
39
+ 4. Confirm plugin is loaded:
40
+
41
+ ```bash
42
+ openclaw plugins info memory-braid
43
+ ```
44
+
45
+ Expected:
46
+ - `Status: loaded`
47
+ - `Tools: memory_search, memory_get`
48
+ - `Services: memory-braid-service`
49
+
50
+ ### Install from local path (development)
51
+
52
+ ```bash
53
+ openclaw plugins install --link /absolute/path/to/memory-braid
54
+ openclaw plugins enable memory-braid
55
+ openclaw config set plugins.slots.memory memory-braid
56
+ openclaw gateway restart
57
+ ```
58
+
59
+ ## Quick start: hybrid capture + multilingual NER
60
+
61
+ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
62
+
63
+ ```json
64
+ {
65
+ "mem0": {
66
+ "mode": "oss",
67
+ "ossConfig": {
68
+ "version": "v1.1",
69
+ "embedder": {
70
+ "provider": "openai",
71
+ "config": {
72
+ "apiKey": "${OPENAI_API_KEY}",
73
+ "model": "text-embedding-3-small"
74
+ }
75
+ },
76
+ "vectorStore": {
77
+ "provider": "memory",
78
+ "config": {
79
+ "collectionName": "memories",
80
+ "dimension": 1536
81
+ }
82
+ },
83
+ "llm": {
84
+ "provider": "openai",
85
+ "config": {
86
+ "apiKey": "${OPENAI_API_KEY}",
87
+ "model": "gpt-4o-mini"
88
+ }
89
+ },
90
+ "enableGraph": false
91
+ }
92
+ },
93
+ "capture": {
94
+ "enabled": true,
95
+ "mode": "hybrid",
96
+ "maxItemsPerRun": 6,
97
+ "ml": {
98
+ "provider": "openai",
99
+ "model": "gpt-4o-mini",
100
+ "timeoutMs": 2500
101
+ }
102
+ },
103
+ "entityExtraction": {
104
+ "enabled": true,
105
+ "provider": "multilingual_ner",
106
+ "model": "Xenova/bert-base-multilingual-cased-ner-hrl",
107
+ "minScore": 0.65,
108
+ "maxEntitiesPerMemory": 8,
109
+ "startup": {
110
+ "downloadOnStartup": true,
111
+ "warmupText": "John works at Acme in Berlin."
112
+ }
113
+ },
114
+ "debug": {
115
+ "enabled": true
116
+ }
117
+ }
118
+ ```
119
+
120
+ Then restart:
121
+
122
+ ```bash
123
+ openclaw gateway restart
124
+ ```
125
+
126
+ ## Verification checklist
127
+
128
+ 1. Check runtime status:
129
+
130
+ ```bash
131
+ openclaw plugins info memory-braid
132
+ openclaw gateway status
133
+ ```
134
+
135
+ 2. Trigger/inspect NER warmup:
136
+
137
+ ```bash
138
+ openclaw agent --agent main --message "/memorybraid warmup" --json
139
+ ```
140
+
141
+ 3. Send a message that should be captured:
142
+
143
+ ```bash
144
+ openclaw agent --agent main --message "Remember that Ana works at OpenClaw and likes ramen." --json
145
+ ```
146
+
147
+ 4. Inspect logs for capture + NER:
148
+
149
+ ```bash
150
+ rg -n "memory_braid\\.startup|memory_braid\\.capture|memory_braid\\.entity|memory_braid\\.mem0" ~/.openclaw/logs/gateway.log | tail -n 80
151
+ ```
152
+
153
+ Expected events:
154
+ - `memory_braid.startup`
155
+ - `memory_braid.entity.model_load`
156
+ - `memory_braid.entity.warmup`
157
+ - `memory_braid.capture.extract`
158
+ - `memory_braid.capture.ml` (for `capture.mode=hybrid|ml`)
159
+ - `memory_braid.entity.extract`
160
+ - `memory_braid.capture.persist`
16
161
 
17
162
  ## Self-hosting quick guide
18
163
 
@@ -241,14 +386,23 @@ Use this preset when:
241
386
  },
242
387
  "capture": {
243
388
  "enabled": true,
244
- "extraction": {
245
- "mode": "heuristic"
246
- },
389
+ "mode": "hybrid",
390
+ "maxItemsPerRun": 6,
247
391
  "ml": {
248
392
  "provider": "openai",
249
393
  "model": "gpt-4o-mini",
250
- "timeoutMs": 2500,
251
- "maxItemsPerRun": 6
394
+ "timeoutMs": 2500
395
+ }
396
+ },
397
+ "entityExtraction": {
398
+ "enabled": true,
399
+ "provider": "multilingual_ner",
400
+ "model": "Xenova/bert-base-multilingual-cased-ner-hrl",
401
+ "minScore": 0.65,
402
+ "maxEntitiesPerMemory": 8,
403
+ "startup": {
404
+ "downloadOnStartup": true,
405
+ "warmupText": "John works at Acme in Berlin."
252
406
  }
253
407
  },
254
408
  "dedupe": {
@@ -266,6 +420,48 @@ Use this preset when:
266
420
  }
267
421
  ```
268
422
 
423
+ ## Capture defaults
424
+
425
+ Capture defaults are:
426
+
427
+ - `capture.enabled`: `true`
428
+ - `capture.mode`: `"local"`
429
+ - `capture.maxItemsPerRun`: `6`
430
+ - `capture.ml.provider`: unset
431
+ - `capture.ml.model`: unset
432
+ - `capture.ml.timeoutMs`: `2500`
433
+
434
+ Important behavior:
435
+
436
+ - `capture.mode = "local"`: heuristic-only extraction.
437
+ - `capture.mode = "hybrid"`: heuristic extraction + ML enrichment when ML config is set.
438
+ - `capture.mode = "ml"`: ML-first extraction; falls back to heuristic if ML config/call is unavailable.
439
+ - ML calls run only when both `capture.ml.provider` and `capture.ml.model` are set.
440
+
441
+ ## Entity extraction defaults
442
+
443
+ Entity extraction defaults are:
444
+
445
+ - `entityExtraction.enabled`: `false`
446
+ - `entityExtraction.provider`: `"multilingual_ner"`
447
+ - `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"`
448
+ - `entityExtraction.minScore`: `0.65`
449
+ - `entityExtraction.maxEntitiesPerMemory`: `8`
450
+ - `entityExtraction.startup.downloadOnStartup`: `true`
451
+ - `entityExtraction.startup.warmupText`: `"John works at Acme in Berlin."`
452
+
453
+ When enabled:
454
+
455
+ - Model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
456
+ - Captured memories get `metadata.entities` and `metadata.entityUris` (canonical IDs like `entity://person/john-doe`).
457
+ - Startup can pre-download/warm the model (`downloadOnStartup: true`).
458
+
459
+ Warmup command:
460
+
461
+ - `/memorybraid status`
462
+ - `/memorybraid warmup`
463
+ - `/memorybraid warmup --force`
464
+
269
465
  ## Debugging
270
466
 
271
467
  Set:
@@ -285,14 +481,35 @@ Set:
285
481
  Key events:
286
482
 
287
483
  - `memory_braid.startup`
484
+ - `memory_braid.config`
288
485
  - `memory_braid.bootstrap.begin|complete|error`
289
486
  - `memory_braid.reconcile.begin|progress|complete|error`
290
- - `memory_braid.search.local|mem0|merge|inject`
487
+ - `memory_braid.search.local|mem0|merge|inject|skip`
291
488
  - `memory_braid.capture.extract|ml|persist|skip`
489
+ - `memory_braid.entity.model_load|warmup|extract`
292
490
  - `memory_braid.mem0.request|response|error`
293
491
 
294
492
  `debug.includePayloads=true` includes payload fields; otherwise sensitive text fields are omitted.
295
493
 
494
+ Traceability tips:
495
+
496
+ - Use `runId` to follow one execution end-to-end across capture/search/entity/mem0 events.
497
+ - `memory_braid.capture.persist` includes high-signal counters:
498
+ - `dedupeSkipped`
499
+ - `mem0AddAttempts`
500
+ - `mem0AddWithId`
501
+ - `mem0AddWithoutId`
502
+ - `entityAnnotatedCandidates`
503
+ - `totalEntitiesAttached`
504
+ - `memory_braid.capture.ml` includes `fallbackUsed` and fallback reasons when ML is unavailable.
505
+ - `memory_braid.entity.extract` includes `entityTypes` and `sampleEntityUris`.
506
+
507
+ Example:
508
+
509
+ ```bash
510
+ rg -n "memory_braid\\.|runId\":\"<RUN_ID>\"" ~/.openclaw/logs/gateway.log | tail -n 120
511
+ ```
512
+
296
513
  ## Tests
297
514
 
298
515
  ```bash
@@ -47,25 +47,48 @@
47
47
  "additionalProperties": false,
48
48
  "properties": {
49
49
  "enabled": { "type": "boolean", "default": true },
50
- "extraction": {
50
+ "mode": {
51
+ "type": "string",
52
+ "enum": ["local", "hybrid", "ml"],
53
+ "default": "local"
54
+ },
55
+ "maxItemsPerRun": { "type": "integer", "minimum": 1, "maximum": 50, "default": 6 },
56
+ "ml": {
51
57
  "type": "object",
52
58
  "additionalProperties": false,
53
59
  "properties": {
54
- "mode": {
55
- "type": "string",
56
- "enum": ["heuristic", "heuristic_plus_ml"],
57
- "default": "heuristic"
58
- }
60
+ "provider": { "type": "string", "enum": ["openai", "anthropic", "gemini"] },
61
+ "model": { "type": "string" },
62
+ "timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 }
59
63
  }
64
+ }
65
+ }
66
+ },
67
+ "entityExtraction": {
68
+ "type": "object",
69
+ "additionalProperties": false,
70
+ "properties": {
71
+ "enabled": { "type": "boolean", "default": false },
72
+ "provider": {
73
+ "type": "string",
74
+ "enum": ["multilingual_ner"],
75
+ "default": "multilingual_ner"
60
76
  },
61
- "ml": {
77
+ "model": {
78
+ "type": "string",
79
+ "default": "Xenova/bert-base-multilingual-cased-ner-hrl"
80
+ },
81
+ "minScore": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.65 },
82
+ "maxEntitiesPerMemory": { "type": "integer", "minimum": 1, "maximum": 50, "default": 8 },
83
+ "startup": {
62
84
  "type": "object",
63
85
  "additionalProperties": false,
64
86
  "properties": {
65
- "provider": { "type": "string", "enum": ["openai", "anthropic", "gemini"] },
66
- "model": { "type": "string" },
67
- "timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 },
68
- "maxItemsPerRun": { "type": "integer", "minimum": 1, "maximum": 50, "default": 6 }
87
+ "downloadOnStartup": { "type": "boolean", "default": true },
88
+ "warmupText": {
89
+ "type": "string",
90
+ "default": "John works at Acme in Berlin."
91
+ }
69
92
  }
70
93
  }
71
94
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memory-braid",
3
- "version": "0.2.0",
3
+ "version": "0.3.1",
4
4
  "description": "OpenClaw memory plugin that augments local memory with Mem0, bootstrap import, reconcile, and capture.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -31,6 +31,7 @@
31
31
  "openclaw": ">=2026.2.18"
32
32
  },
33
33
  "dependencies": {
34
+ "@xenova/transformers": "^2.17.2",
34
35
  "mem0ai": "^2.2.3"
35
36
  },
36
37
  "devDependencies": {
package/src/config.ts CHANGED
@@ -20,14 +20,23 @@ export type MemoryBraidConfig = {
20
20
  };
21
21
  capture: {
22
22
  enabled: boolean;
23
- extraction: {
24
- mode: "heuristic" | "heuristic_plus_ml";
25
- };
23
+ mode: "local" | "hybrid" | "ml";
24
+ maxItemsPerRun: number;
26
25
  ml: {
27
26
  provider?: "openai" | "anthropic" | "gemini";
28
27
  model?: string;
29
28
  timeoutMs: number;
30
- maxItemsPerRun: number;
29
+ };
30
+ };
31
+ entityExtraction: {
32
+ enabled: boolean;
33
+ provider: "multilingual_ner";
34
+ model: string;
35
+ minScore: number;
36
+ maxEntitiesPerMemory: number;
37
+ startup: {
38
+ downloadOnStartup: boolean;
39
+ warmupText: string;
31
40
  };
32
41
  };
33
42
  bootstrap: {
@@ -84,14 +93,23 @@ const DEFAULTS: MemoryBraidConfig = {
84
93
  },
85
94
  capture: {
86
95
  enabled: true,
87
- extraction: {
88
- mode: "heuristic",
89
- },
96
+ mode: "local",
97
+ maxItemsPerRun: 6,
90
98
  ml: {
91
99
  provider: undefined,
92
100
  model: undefined,
93
101
  timeoutMs: 2500,
94
- maxItemsPerRun: 6,
102
+ },
103
+ },
104
+ entityExtraction: {
105
+ enabled: false,
106
+ provider: "multilingual_ner",
107
+ model: "Xenova/bert-base-multilingual-cased-ner-hrl",
108
+ minScore: 0.65,
109
+ maxEntitiesPerMemory: 8,
110
+ startup: {
111
+ downloadOnStartup: true,
112
+ warmupText: "John works at Acme in Berlin.",
95
113
  },
96
114
  },
97
115
  bootstrap: {
@@ -160,7 +178,8 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
160
178
  const recall = asRecord(root.recall);
161
179
  const merge = asRecord(recall.merge);
162
180
  const capture = asRecord(root.capture);
163
- const extraction = asRecord(capture.extraction);
181
+ const entityExtraction = asRecord(root.entityExtraction);
182
+ const entityStartup = asRecord(entityExtraction.startup);
164
183
  const ml = asRecord(capture.ml);
165
184
  const bootstrap = asRecord(root.bootstrap);
166
185
  const reconcile = asRecord(root.reconcile);
@@ -170,8 +189,11 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
170
189
  const debug = asRecord(root.debug);
171
190
 
172
191
  const mode = mem0.mode === "oss" ? "oss" : "cloud";
173
- const extractionMode =
174
- extraction.mode === "heuristic_plus_ml" ? "heuristic_plus_ml" : "heuristic";
192
+ const rawCaptureMode = asString(capture.mode)?.toLowerCase();
193
+ const captureMode =
194
+ rawCaptureMode === "local" || rawCaptureMode === "hybrid" || rawCaptureMode === "ml"
195
+ ? rawCaptureMode
196
+ : DEFAULTS.capture.mode;
175
197
 
176
198
  return {
177
199
  enabled: asBoolean(root.enabled, DEFAULTS.enabled),
@@ -195,9 +217,8 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
195
217
  },
196
218
  capture: {
197
219
  enabled: asBoolean(capture.enabled, DEFAULTS.capture.enabled),
198
- extraction: {
199
- mode: extractionMode,
200
- },
220
+ mode: captureMode,
221
+ maxItemsPerRun: asInt(capture.maxItemsPerRun, DEFAULTS.capture.maxItemsPerRun, 1, 50),
201
222
  ml: {
202
223
  provider:
203
224
  ml.provider === "openai" || ml.provider === "anthropic" || ml.provider === "gemini"
@@ -205,7 +226,29 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
205
226
  : DEFAULTS.capture.ml.provider,
206
227
  model: asString(ml.model),
207
228
  timeoutMs: asInt(ml.timeoutMs, DEFAULTS.capture.ml.timeoutMs, 250, 30_000),
208
- maxItemsPerRun: asInt(ml.maxItemsPerRun, DEFAULTS.capture.ml.maxItemsPerRun, 1, 50),
229
+ },
230
+ },
231
+ entityExtraction: {
232
+ enabled: asBoolean(entityExtraction.enabled, DEFAULTS.entityExtraction.enabled),
233
+ provider:
234
+ entityExtraction.provider === "multilingual_ner"
235
+ ? "multilingual_ner"
236
+ : DEFAULTS.entityExtraction.provider,
237
+ model: asString(entityExtraction.model) ?? DEFAULTS.entityExtraction.model,
238
+ minScore: asNumber(entityExtraction.minScore, DEFAULTS.entityExtraction.minScore, 0, 1),
239
+ maxEntitiesPerMemory: asInt(
240
+ entityExtraction.maxEntitiesPerMemory,
241
+ DEFAULTS.entityExtraction.maxEntitiesPerMemory,
242
+ 1,
243
+ 50,
244
+ ),
245
+ startup: {
246
+ downloadOnStartup: asBoolean(
247
+ entityStartup.downloadOnStartup,
248
+ DEFAULTS.entityExtraction.startup.downloadOnStartup,
249
+ ),
250
+ warmupText:
251
+ asString(entityStartup.warmupText) ?? DEFAULTS.entityExtraction.startup.warmupText,
209
252
  },
210
253
  },
211
254
  bootstrap: {
@@ -0,0 +1,354 @@
1
+ import os from "node:os";
2
+ import path from "node:path";
3
+ import { normalizeWhitespace } from "./chunking.js";
4
+ import type { MemoryBraidConfig } from "./config.js";
5
+ import { MemoryBraidLogger } from "./logger.js";
6
+
7
+ type NerPipeline = (text: string, options?: Record<string, unknown>) => Promise<unknown>;
8
+
9
+ type NerRecord = {
10
+ word?: unknown;
11
+ entity_group?: unknown;
12
+ entity?: unknown;
13
+ score?: unknown;
14
+ };
15
+
16
+ export type ExtractedEntity = {
17
+ text: string;
18
+ type: "person" | "organization" | "location" | "misc";
19
+ score: number;
20
+ canonicalUri: string;
21
+ };
22
+
23
+ function summarizeEntityTypes(entities: ExtractedEntity[]): Record<string, number> {
24
+ const summary: Record<string, number> = {};
25
+ for (const entity of entities) {
26
+ summary[entity.type] = (summary[entity.type] ?? 0) + 1;
27
+ }
28
+ return summary;
29
+ }
30
+
31
+ function resolveStateDir(explicitStateDir?: string): string {
32
+ const resolved =
33
+ explicitStateDir?.trim() ||
34
+ process.env.OPENCLAW_STATE_DIR?.trim() ||
35
+ path.join(os.homedir(), ".openclaw");
36
+ return path.resolve(resolved);
37
+ }
38
+
39
+ export function resolveEntityModelCacheDir(stateDir?: string): string {
40
+ return path.join(resolveStateDir(stateDir), "memory-braid", "models", "entity-extraction");
41
+ }
42
+
43
+ function slugify(value: string): string {
44
+ const ascii = value
45
+ .normalize("NFKD")
46
+ .replace(/[\u0300-\u036f]/g, "");
47
+ const slug = ascii
48
+ .toLowerCase()
49
+ .replace(/[^a-z0-9]+/g, "-")
50
+ .replace(/^-+|-+$/g, "");
51
+ return slug || "unknown";
52
+ }
53
+
54
+ export function buildCanonicalEntityUri(
55
+ type: ExtractedEntity["type"],
56
+ text: string,
57
+ ): string {
58
+ return `entity://${type}/${slugify(text)}`;
59
+ }
60
+
61
+ function normalizeEntityType(raw: unknown): ExtractedEntity["type"] {
62
+ const label = typeof raw === "string" ? raw.toUpperCase() : "";
63
+ if (label.includes("PER")) {
64
+ return "person";
65
+ }
66
+ if (label.includes("ORG")) {
67
+ return "organization";
68
+ }
69
+ if (label.includes("LOC") || label.includes("GPE")) {
70
+ return "location";
71
+ }
72
+ return "misc";
73
+ }
74
+
75
+ function normalizeEntityText(raw: unknown): string {
76
+ if (typeof raw !== "string") {
77
+ return "";
78
+ }
79
+ return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
80
+ }
81
+
82
+ type EntityExtractionOptions = {
83
+ stateDir?: string;
84
+ };
85
+
86
+ export class EntityExtractionManager {
87
+ private readonly cfg: MemoryBraidConfig["entityExtraction"];
88
+ private readonly log: MemoryBraidLogger;
89
+ private stateDir?: string;
90
+ private pipelinePromise: Promise<NerPipeline | null> | null = null;
91
+
92
+ constructor(
93
+ cfg: MemoryBraidConfig["entityExtraction"],
94
+ log: MemoryBraidLogger,
95
+ options?: EntityExtractionOptions,
96
+ ) {
97
+ this.cfg = cfg;
98
+ this.log = log;
99
+ this.stateDir = options?.stateDir;
100
+ }
101
+
102
+ setStateDir(stateDir?: string): void {
103
+ const next = stateDir?.trim();
104
+ if (!next || next === this.stateDir) {
105
+ return;
106
+ }
107
+ this.stateDir = next;
108
+ this.pipelinePromise = null;
109
+ }
110
+
111
+ getStatus(): {
112
+ enabled: boolean;
113
+ provider: MemoryBraidConfig["entityExtraction"]["provider"];
114
+ model: string;
115
+ minScore: number;
116
+ maxEntitiesPerMemory: number;
117
+ cacheDir: string;
118
+ } {
119
+ return {
120
+ enabled: this.cfg.enabled,
121
+ provider: this.cfg.provider,
122
+ model: this.cfg.model,
123
+ minScore: this.cfg.minScore,
124
+ maxEntitiesPerMemory: this.cfg.maxEntitiesPerMemory,
125
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
126
+ };
127
+ }
128
+
129
+ async warmup(params?: {
130
+ runId?: string;
131
+ reason?: string;
132
+ forceReload?: boolean;
133
+ text?: string;
134
+ }): Promise<{
135
+ ok: boolean;
136
+ cacheDir: string;
137
+ model: string;
138
+ entities: number;
139
+ durMs: number;
140
+ error?: string;
141
+ }> {
142
+ const startedAt = Date.now();
143
+ if (!this.cfg.enabled) {
144
+ return {
145
+ ok: false,
146
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
147
+ model: this.cfg.model,
148
+ entities: 0,
149
+ durMs: Date.now() - startedAt,
150
+ error: "entity_extraction_disabled",
151
+ };
152
+ }
153
+
154
+ const pipeline = await this.ensurePipeline(params?.forceReload);
155
+ if (!pipeline) {
156
+ return {
157
+ ok: false,
158
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
159
+ model: this.cfg.model,
160
+ entities: 0,
161
+ durMs: Date.now() - startedAt,
162
+ error: "model_load_failed",
163
+ };
164
+ }
165
+
166
+ try {
167
+ const entities = await this.extractWithPipeline({
168
+ pipeline,
169
+ text: params?.text ?? this.cfg.startup.warmupText,
170
+ });
171
+ this.log.info("memory_braid.entity.warmup", {
172
+ runId: params?.runId,
173
+ reason: params?.reason ?? "manual",
174
+ provider: this.cfg.provider,
175
+ model: this.cfg.model,
176
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
177
+ entities: entities.length,
178
+ entityTypes: summarizeEntityTypes(entities),
179
+ sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
180
+ durMs: Date.now() - startedAt,
181
+ });
182
+ return {
183
+ ok: true,
184
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
185
+ model: this.cfg.model,
186
+ entities: entities.length,
187
+ durMs: Date.now() - startedAt,
188
+ };
189
+ } catch (err) {
190
+ const message = err instanceof Error ? err.message : String(err);
191
+ this.log.warn("memory_braid.entity.warmup", {
192
+ runId: params?.runId,
193
+ reason: params?.reason ?? "manual",
194
+ provider: this.cfg.provider,
195
+ model: this.cfg.model,
196
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
197
+ error: message,
198
+ });
199
+ return {
200
+ ok: false,
201
+ cacheDir: resolveEntityModelCacheDir(this.stateDir),
202
+ model: this.cfg.model,
203
+ entities: 0,
204
+ durMs: Date.now() - startedAt,
205
+ error: message,
206
+ };
207
+ }
208
+ }
209
+
210
+ async extract(params: { text: string; runId?: string }): Promise<ExtractedEntity[]> {
211
+ if (!this.cfg.enabled) {
212
+ return [];
213
+ }
214
+
215
+ const text = normalizeWhitespace(params.text);
216
+ if (!text) {
217
+ return [];
218
+ }
219
+
220
+ const pipeline = await this.ensurePipeline();
221
+ if (!pipeline) {
222
+ return [];
223
+ }
224
+
225
+ try {
226
+ const entities = await this.extractWithPipeline({ pipeline, text });
227
+ this.log.debug("memory_braid.entity.extract", {
228
+ runId: params.runId,
229
+ provider: this.cfg.provider,
230
+ model: this.cfg.model,
231
+ entities: entities.length,
232
+ entityTypes: summarizeEntityTypes(entities),
233
+ sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
234
+ });
235
+ return entities;
236
+ } catch (err) {
237
+ this.log.warn("memory_braid.entity.extract", {
238
+ runId: params.runId,
239
+ provider: this.cfg.provider,
240
+ model: this.cfg.model,
241
+ error: err instanceof Error ? err.message : String(err),
242
+ });
243
+ return [];
244
+ }
245
+ }
246
+
247
+ private async ensurePipeline(forceReload = false): Promise<NerPipeline | null> {
248
+ if (!this.cfg.enabled) {
249
+ return null;
250
+ }
251
+
252
+ if (forceReload) {
253
+ this.pipelinePromise = null;
254
+ }
255
+
256
+ if (this.pipelinePromise) {
257
+ return this.pipelinePromise;
258
+ }
259
+
260
+ this.pipelinePromise = this.loadPipeline();
261
+ return this.pipelinePromise;
262
+ }
263
+
264
+ private async loadPipeline(): Promise<NerPipeline | null> {
265
+ const cacheDir = resolveEntityModelCacheDir(this.stateDir);
266
+ this.log.info("memory_braid.entity.model_load", {
267
+ provider: this.cfg.provider,
268
+ model: this.cfg.model,
269
+ cacheDir,
270
+ });
271
+
272
+ try {
273
+ const mod = (await import("@xenova/transformers")) as {
274
+ env?: Record<string, unknown>;
275
+ pipeline?: (
276
+ task: string,
277
+ model: string,
278
+ options?: Record<string, unknown>,
279
+ ) => Promise<unknown>;
280
+ };
281
+
282
+ if (!mod.pipeline) {
283
+ throw new Error("@xenova/transformers pipeline export not found");
284
+ }
285
+
286
+ if (mod.env) {
287
+ mod.env.cacheDir = cacheDir;
288
+ mod.env.allowRemoteModels = true;
289
+ mod.env.allowLocalModels = true;
290
+ mod.env.useFS = true;
291
+ }
292
+
293
+ const classifier = await mod.pipeline("token-classification", this.cfg.model, {
294
+ quantized: true,
295
+ });
296
+
297
+ if (typeof classifier !== "function") {
298
+ throw new Error("token-classification pipeline is not callable");
299
+ }
300
+
301
+ return classifier as NerPipeline;
302
+ } catch (err) {
303
+ this.log.error("memory_braid.entity.model_load", {
304
+ provider: this.cfg.provider,
305
+ model: this.cfg.model,
306
+ cacheDir,
307
+ error: err instanceof Error ? err.message : String(err),
308
+ });
309
+ return null;
310
+ }
311
+ }
312
+
313
+ private async extractWithPipeline(params: {
314
+ pipeline: NerPipeline;
315
+ text: string;
316
+ }): Promise<ExtractedEntity[]> {
317
+ const raw = await params.pipeline(params.text, {
318
+ aggregation_strategy: "simple",
319
+ });
320
+ const rows = Array.isArray(raw) ? raw : [];
321
+
322
+ const deduped = new Map<string, ExtractedEntity>();
323
+ for (const row of rows) {
324
+ if (!row || typeof row !== "object") {
325
+ continue;
326
+ }
327
+ const record = row as NerRecord;
328
+ const entityText = normalizeEntityText(record.word);
329
+ if (!entityText) {
330
+ continue;
331
+ }
332
+ const score = typeof record.score === "number" ? Math.max(0, Math.min(1, record.score)) : 0;
333
+ if (score < this.cfg.minScore) {
334
+ continue;
335
+ }
336
+
337
+ const type = normalizeEntityType(record.entity_group ?? record.entity);
338
+ const canonicalUri = buildCanonicalEntityUri(type, entityText);
339
+ const current = deduped.get(canonicalUri);
340
+ if (!current || score > current.score) {
341
+ deduped.set(canonicalUri, {
342
+ text: entityText,
343
+ type,
344
+ score,
345
+ canonicalUri,
346
+ });
347
+ }
348
+ }
349
+
350
+ return Array.from(deduped.values())
351
+ .sort((a, b) => b.score - a.score)
352
+ .slice(0, this.cfg.maxEntitiesPerMemory);
353
+ }
354
+ }
package/src/extract.ts CHANGED
@@ -3,6 +3,8 @@ import type { MemoryBraidConfig } from "./config.js";
3
3
  import { MemoryBraidLogger } from "./logger.js";
4
4
  import type { ExtractedCandidate } from "./types.js";
5
5
 
6
+ type MlProvider = "openai" | "anthropic" | "gemini";
7
+
6
8
  const HEURISTIC_PATTERNS = [
7
9
  /remember|remember that|keep in mind|note that/i,
8
10
  /i prefer|prefer to|don't like|do not like|hate|love/i,
@@ -145,14 +147,11 @@ function parseJsonObjectArray(raw: string): Array<Record<string, unknown>> {
145
147
  }
146
148
 
147
149
  async function callMlEnrichment(params: {
148
- provider: "openai" | "anthropic" | "gemini";
150
+ provider: MlProvider;
149
151
  model: string;
150
152
  timeoutMs: number;
151
153
  candidates: ExtractedCandidate[];
152
154
  }): Promise<Array<Record<string, unknown>>> {
153
- const controller = new AbortController();
154
- const timer = setTimeout(() => controller.abort(), params.timeoutMs);
155
-
156
155
  const prompt = [
157
156
  "Classify the memory candidates.",
158
157
  "Return ONLY JSON array.",
@@ -160,6 +159,52 @@ async function callMlEnrichment(params: {
160
159
  "Category one of: preference, decision, fact, task, other.",
161
160
  JSON.stringify(params.candidates.map((candidate, index) => ({ index, text: candidate.text }))),
162
161
  ].join("\n");
162
+ return callMlJson({
163
+ provider: params.provider,
164
+ model: params.model,
165
+ timeoutMs: params.timeoutMs,
166
+ prompt,
167
+ });
168
+ }
169
+
170
+ async function callMlExtraction(params: {
171
+ provider: MlProvider;
172
+ model: string;
173
+ timeoutMs: number;
174
+ maxItems: number;
175
+ messages: Array<{ role: string; text: string }>;
176
+ }): Promise<Array<Record<string, unknown>>> {
177
+ const recent = params.messages.slice(-30).map((item) => ({
178
+ role: item.role,
179
+ text: item.text,
180
+ }));
181
+
182
+ const prompt = [
183
+ "Extract durable user memories from this conversation.",
184
+ "Return ONLY JSON array.",
185
+ "Each item: {text:string, category:string, score:number}.",
186
+ "Category one of: preference, decision, fact, task, other.",
187
+ "Keep each text concise and atomic.",
188
+ `Maximum items: ${params.maxItems}.`,
189
+ JSON.stringify(recent),
190
+ ].join("\n");
191
+
192
+ return callMlJson({
193
+ provider: params.provider,
194
+ model: params.model,
195
+ timeoutMs: params.timeoutMs,
196
+ prompt,
197
+ });
198
+ }
199
+
200
+ async function callMlJson(params: {
201
+ provider: MlProvider;
202
+ model: string;
203
+ timeoutMs: number;
204
+ prompt: string;
205
+ }): Promise<Array<Record<string, unknown>>> {
206
+ const controller = new AbortController();
207
+ const timer = setTimeout(() => controller.abort(), params.timeoutMs);
163
208
 
164
209
  try {
165
210
  if (params.provider === "openai") {
@@ -183,7 +228,7 @@ async function callMlEnrichment(params: {
183
228
  },
184
229
  {
185
230
  role: "user",
186
- content: prompt,
231
+ content: params.prompt,
187
232
  },
188
233
  ],
189
234
  }),
@@ -212,7 +257,7 @@ async function callMlEnrichment(params: {
212
257
  model: params.model,
213
258
  max_tokens: 1000,
214
259
  temperature: 0,
215
- messages: [{ role: "user", content: prompt }],
260
+ messages: [{ role: "user", content: params.prompt }],
216
261
  }),
217
262
  signal: controller.signal,
218
263
  });
@@ -236,7 +281,7 @@ async function callMlEnrichment(params: {
236
281
  },
237
282
  body: JSON.stringify({
238
283
  generationConfig: { temperature: 0 },
239
- contents: [{ role: "user", parts: [{ text: prompt }] }],
284
+ contents: [{ role: "user", parts: [{ text: params.prompt }] }],
240
285
  }),
241
286
  signal: controller.signal,
242
287
  },
@@ -251,6 +296,19 @@ async function callMlEnrichment(params: {
251
296
  }
252
297
  }
253
298
 
299
+ function normalizeCategory(value: unknown, fallback: ExtractedCandidate["category"] = "other"): ExtractedCandidate["category"] {
300
+ if (
301
+ value === "preference" ||
302
+ value === "decision" ||
303
+ value === "fact" ||
304
+ value === "task" ||
305
+ value === "other"
306
+ ) {
307
+ return value;
308
+ }
309
+ return fallback;
310
+ }
311
+
254
312
  function applyMlResult(
255
313
  candidates: ExtractedCandidate[],
256
314
  result: Array<Record<string, unknown>>,
@@ -282,14 +340,7 @@ function applyMlResult(
282
340
  if (!keep) {
283
341
  continue;
284
342
  }
285
- const category =
286
- ml.category === "preference" ||
287
- ml.category === "decision" ||
288
- ml.category === "fact" ||
289
- ml.category === "task" ||
290
- ml.category === "other"
291
- ? (ml.category as ExtractedCandidate["category"])
292
- : candidate.category;
343
+ const category = normalizeCategory(ml.category, candidate.category);
293
344
  const score = typeof ml.score === "number" ? Math.max(0, Math.min(1, ml.score)) : candidate.score;
294
345
  out.push({
295
346
  ...candidate,
@@ -301,6 +352,39 @@ function applyMlResult(
301
352
  return out;
302
353
  }
303
354
 
355
+ function applyMlExtractionResult(
356
+ result: Array<Record<string, unknown>>,
357
+ maxItems: number,
358
+ ): ExtractedCandidate[] {
359
+ const out: ExtractedCandidate[] = [];
360
+ const seen = new Set<string>();
361
+
362
+ for (const item of result) {
363
+ const rawText = typeof item.text === "string" ? item.text : "";
364
+ const text = normalizeWhitespace(rawText);
365
+ if (!text || text.length < 20 || text.length > 3000) {
366
+ continue;
367
+ }
368
+ const key = sha256(normalizeForHash(text));
369
+ if (seen.has(key)) {
370
+ continue;
371
+ }
372
+ seen.add(key);
373
+
374
+ out.push({
375
+ text,
376
+ category: normalizeCategory(item.category),
377
+ score: typeof item.score === "number" ? Math.max(0, Math.min(1, item.score)) : 0.5,
378
+ source: "ml",
379
+ });
380
+ if (out.length >= maxItems) {
381
+ break;
382
+ }
383
+ }
384
+
385
+ return out;
386
+ }
387
+
304
388
  export async function extractCandidates(params: {
305
389
  messages: unknown[];
306
390
  cfg: MemoryBraidConfig;
@@ -308,43 +392,86 @@ export async function extractCandidates(params: {
308
392
  runId?: string;
309
393
  }): Promise<ExtractedCandidate[]> {
310
394
  const normalized = normalizeMessages(params.messages);
311
- const heuristic = pickHeuristicCandidates(normalized, params.cfg.capture.ml.maxItemsPerRun);
395
+ const heuristic = pickHeuristicCandidates(normalized, params.cfg.capture.maxItemsPerRun);
312
396
 
313
397
  params.log.debug("memory_braid.capture.extract", {
314
398
  runId: params.runId,
399
+ mode: params.cfg.capture.mode,
400
+ maxItemsPerRun: params.cfg.capture.maxItemsPerRun,
315
401
  totalMessages: normalized.length,
316
402
  heuristicCandidates: heuristic.length,
317
403
  });
318
404
 
319
- if (
320
- params.cfg.capture.extraction.mode !== "heuristic_plus_ml" ||
321
- !params.cfg.capture.ml.provider ||
322
- !params.cfg.capture.ml.model
323
- ) {
405
+ if (params.cfg.capture.mode === "local") {
406
+ params.log.debug("memory_braid.capture.mode", {
407
+ runId: params.runId,
408
+ mode: params.cfg.capture.mode,
409
+ decision: "heuristic_only",
410
+ candidates: heuristic.length,
411
+ });
412
+ return heuristic;
413
+ }
414
+
415
+ if (!params.cfg.capture.ml.provider || !params.cfg.capture.ml.model) {
416
+ params.log.warn("memory_braid.capture.ml", {
417
+ runId: params.runId,
418
+ reason: "missing_provider_or_model",
419
+ mode: params.cfg.capture.mode,
420
+ hasProvider: Boolean(params.cfg.capture.ml.provider),
421
+ hasModel: Boolean(params.cfg.capture.ml.model),
422
+ fallback: "heuristic",
423
+ candidates: heuristic.length,
424
+ });
324
425
  return heuristic;
325
426
  }
326
427
 
327
428
  try {
328
- const ml = await callMlEnrichment({
429
+ if (params.cfg.capture.mode === "hybrid") {
430
+ const ml = await callMlEnrichment({
431
+ provider: params.cfg.capture.ml.provider,
432
+ model: params.cfg.capture.ml.model,
433
+ timeoutMs: params.cfg.capture.ml.timeoutMs,
434
+ candidates: heuristic,
435
+ });
436
+ const enriched = applyMlResult(heuristic, ml);
437
+ params.log.debug("memory_braid.capture.ml", {
438
+ runId: params.runId,
439
+ mode: params.cfg.capture.mode,
440
+ provider: params.cfg.capture.ml.provider,
441
+ model: params.cfg.capture.ml.model,
442
+ requested: heuristic.length,
443
+ returned: ml.length,
444
+ enriched: enriched.length,
445
+ fallbackUsed: ml.length === 0,
446
+ });
447
+ return enriched;
448
+ }
449
+
450
+ const mlExtractedRaw = await callMlExtraction({
329
451
  provider: params.cfg.capture.ml.provider,
330
452
  model: params.cfg.capture.ml.model,
331
453
  timeoutMs: params.cfg.capture.ml.timeoutMs,
332
- candidates: heuristic,
454
+ maxItems: params.cfg.capture.maxItemsPerRun,
455
+ messages: normalized,
333
456
  });
334
- const enriched = applyMlResult(heuristic, ml);
457
+ const mlExtracted = applyMlExtractionResult(mlExtractedRaw, params.cfg.capture.maxItemsPerRun);
335
458
  params.log.debug("memory_braid.capture.ml", {
336
459
  runId: params.runId,
460
+ mode: params.cfg.capture.mode,
337
461
  provider: params.cfg.capture.ml.provider,
338
462
  model: params.cfg.capture.ml.model,
339
- requested: heuristic.length,
340
- returned: ml.length,
341
- enriched: enriched.length,
463
+ returned: mlExtractedRaw.length,
464
+ extracted: mlExtracted.length,
465
+ fallbackUsed: mlExtracted.length === 0,
342
466
  });
343
- return enriched;
467
+ return mlExtracted.length > 0 ? mlExtracted : heuristic;
344
468
  } catch (err) {
345
469
  params.log.warn("memory_braid.capture.ml", {
346
470
  runId: params.runId,
471
+ mode: params.cfg.capture.mode,
347
472
  error: err instanceof Error ? err.message : String(err),
473
+ fallback: "heuristic",
474
+ candidates: heuristic.length,
348
475
  });
349
476
  return heuristic;
350
477
  }
package/src/index.ts CHANGED
@@ -5,6 +5,7 @@ import type {
5
5
  } from "openclaw/plugin-sdk";
6
6
  import { parseConfig, pluginConfigSchema } from "./config.js";
7
7
  import { stagedDedupe } from "./dedupe.js";
8
+ import { EntityExtractionManager } from "./entities.js";
8
9
  import { extractCandidates } from "./extract.js";
9
10
  import { MemoryBraidLogger } from "./logger.js";
10
11
  import { resolveLocalTools, runLocalGet, runLocalSearch } from "./local-memory.js";
@@ -75,6 +76,25 @@ function formatRelevantMemories(results: MemoryBraidResult[], maxChars = 600): s
75
76
  ].join("\n");
76
77
  }
77
78
 
79
+ function formatEntityExtractionStatus(params: {
80
+ enabled: boolean;
81
+ provider: string;
82
+ model: string;
83
+ minScore: number;
84
+ maxEntitiesPerMemory: number;
85
+ cacheDir: string;
86
+ }): string {
87
+ return [
88
+ "Memory Braid entity extraction:",
89
+ `- enabled: ${params.enabled}`,
90
+ `- provider: ${params.provider}`,
91
+ `- model: ${params.model}`,
92
+ `- minScore: ${params.minScore}`,
93
+ `- maxEntitiesPerMemory: ${params.maxEntitiesPerMemory}`,
94
+ `- cacheDir: ${params.cacheDir}`,
95
+ ].join("\n");
96
+ }
97
+
78
98
  async function runHybridRecall(params: {
79
99
  api: OpenClawPluginApi;
80
100
  cfg: ReturnType<typeof parseConfig>;
@@ -94,6 +114,13 @@ async function runHybridRecall(params: {
94
114
  }> {
95
115
  const local = resolveLocalTools(params.api, params.ctx);
96
116
  if (!local.searchTool) {
117
+ params.log.warn("memory_braid.search.skip", {
118
+ runId: params.runId,
119
+ reason: "local_search_tool_unavailable",
120
+ agentId: params.ctx.agentId,
121
+ sessionKey: params.ctx.sessionKey,
122
+ workspaceHash: workspaceHashFromDir(params.ctx.workspaceDir),
123
+ });
97
124
  return { local: [], mem0: [], merged: [] };
98
125
  }
99
126
 
@@ -190,6 +217,9 @@ const memoryBraidPlugin = {
190
217
  const log = new MemoryBraidLogger(api.logger, cfg.debug);
191
218
  const initialStateDir = api.runtime.state.resolveStateDir();
192
219
  const mem0 = new Mem0Adapter(cfg, log, { stateDir: initialStateDir });
220
+ const entityExtraction = new EntityExtractionManager(cfg.entityExtraction, log, {
221
+ stateDir: initialStateDir,
222
+ });
193
223
 
194
224
  let serviceTimer: NodeJS.Timeout | null = null;
195
225
  let statePaths: StatePaths | null = null;
@@ -288,6 +318,61 @@ const memoryBraidPlugin = {
288
318
  { names: ["memory_search", "memory_get"] },
289
319
  );
290
320
 
321
+ api.registerCommand({
322
+ name: "memorybraid",
323
+ description: "Memory Braid status and entity extraction warmup.",
324
+ acceptsArgs: true,
325
+ handler: async (ctx) => {
326
+ const args = ctx.args?.trim() ?? "";
327
+ const tokens = args.split(/\s+/).filter(Boolean);
328
+ const action = (tokens[0] ?? "status").toLowerCase();
329
+
330
+ if (action === "status") {
331
+ return {
332
+ text: [
333
+ `capture.mode: ${cfg.capture.mode}`,
334
+ formatEntityExtractionStatus(entityExtraction.getStatus()),
335
+ ].join("\n\n"),
336
+ };
337
+ }
338
+
339
+ if (action === "warmup") {
340
+ const runId = log.newRunId();
341
+ const forceReload = tokens.some((token) => token === "--force");
342
+ const result = await entityExtraction.warmup({
343
+ runId,
344
+ reason: "command",
345
+ forceReload,
346
+ });
347
+ if (!result.ok) {
348
+ return {
349
+ text: [
350
+ "Entity extraction warmup failed.",
351
+ `- model: ${result.model}`,
352
+ `- cacheDir: ${result.cacheDir}`,
353
+ `- durMs: ${result.durMs}`,
354
+ `- error: ${result.error ?? "unknown"}`,
355
+ ].join("\n"),
356
+ isError: true,
357
+ };
358
+ }
359
+ return {
360
+ text: [
361
+ "Entity extraction warmup complete.",
362
+ `- model: ${result.model}`,
363
+ `- cacheDir: ${result.cacheDir}`,
364
+ `- entities: ${result.entities}`,
365
+ `- durMs: ${result.durMs}`,
366
+ ].join("\n"),
367
+ };
368
+ }
369
+
370
+ return {
371
+ text: "Usage: /memorybraid [status|warmup [--force]]",
372
+ };
373
+ },
374
+ });
375
+
291
376
  api.on("before_agent_start", async (event, ctx) => {
292
377
  const runId = log.newRunId();
293
378
  const toolCtx: OpenClawPluginToolContext = {
@@ -375,14 +460,21 @@ const memoryBraidPlugin = {
375
460
  }
376
461
 
377
462
  let persisted = 0;
463
+ let dedupeSkipped = 0;
464
+ let entityAnnotatedCandidates = 0;
465
+ let totalEntitiesAttached = 0;
466
+ let mem0AddAttempts = 0;
467
+ let mem0AddWithId = 0;
468
+ let mem0AddWithoutId = 0;
378
469
  for (const candidate of candidates) {
379
470
  const hash = sha256(normalizeForHash(candidate.text));
380
471
  if (dedupe.seen[hash]) {
472
+ dedupeSkipped += 1;
381
473
  continue;
382
474
  }
383
475
  dedupe.seen[hash] = now;
384
476
 
385
- const metadata = {
477
+ const metadata: Record<string, unknown> = {
386
478
  sourceType: "capture",
387
479
  workspaceHash: scope.workspaceHash,
388
480
  agentId: scope.agentId,
@@ -394,23 +486,59 @@ const memoryBraidPlugin = {
394
486
  indexedAt: new Date().toISOString(),
395
487
  };
396
488
 
397
- await mem0.addMemory({
489
+ if (cfg.entityExtraction.enabled) {
490
+ const entities = await entityExtraction.extract({
491
+ text: candidate.text,
492
+ runId,
493
+ });
494
+ if (entities.length > 0) {
495
+ entityAnnotatedCandidates += 1;
496
+ totalEntitiesAttached += entities.length;
497
+ metadata.entityUris = entities.map((entity) => entity.canonicalUri);
498
+ metadata.entities = entities;
499
+ }
500
+ }
501
+
502
+ mem0AddAttempts += 1;
503
+ const addResult = await mem0.addMemory({
398
504
  text: candidate.text,
399
505
  scope,
400
506
  metadata,
401
507
  runId,
402
508
  });
509
+ if (addResult.id) {
510
+ mem0AddWithId += 1;
511
+ } else {
512
+ mem0AddWithoutId += 1;
513
+ log.warn("memory_braid.capture.persist", {
514
+ runId,
515
+ reason: "mem0_add_missing_id",
516
+ workspaceHash: scope.workspaceHash,
517
+ agentId: scope.agentId,
518
+ sessionKey: scope.sessionKey,
519
+ contentHashPrefix: hash.slice(0, 12),
520
+ category: candidate.category,
521
+ });
522
+ }
403
523
  persisted += 1;
404
524
  }
405
525
 
406
526
  await writeCaptureDedupeState(statePaths, dedupe);
407
527
  log.debug("memory_braid.capture.persist", {
408
528
  runId,
529
+ mode: cfg.capture.mode,
409
530
  workspaceHash: scope.workspaceHash,
410
531
  agentId: scope.agentId,
411
532
  sessionKey: scope.sessionKey,
412
533
  candidates: candidates.length,
534
+ dedupeSkipped,
413
535
  persisted,
536
+ mem0AddAttempts,
537
+ mem0AddWithId,
538
+ mem0AddWithoutId,
539
+ entityExtractionEnabled: cfg.entityExtraction.enabled,
540
+ entityAnnotatedCandidates,
541
+ totalEntitiesAttached,
414
542
  }, true);
415
543
  });
416
544
 
@@ -418,6 +546,7 @@ const memoryBraidPlugin = {
418
546
  id: "memory-braid-service",
419
547
  start: async (ctx) => {
420
548
  mem0.setStateDir(ctx.stateDir);
549
+ entityExtraction.setStateDir(ctx.stateDir);
421
550
  statePaths = createStatePaths(ctx.stateDir);
422
551
  await ensureStateDir(statePaths);
423
552
  targets = await resolveTargets({
@@ -437,6 +566,24 @@ const memoryBraidPlugin = {
437
566
  stateDir: ctx.stateDir,
438
567
  targets: targets.length,
439
568
  });
569
+ log.info("memory_braid.config", {
570
+ runId,
571
+ mem0Mode: cfg.mem0.mode,
572
+ captureEnabled: cfg.capture.enabled,
573
+ captureMode: cfg.capture.mode,
574
+ captureMaxItemsPerRun: cfg.capture.maxItemsPerRun,
575
+ captureMlProvider: cfg.capture.ml.provider ?? "unset",
576
+ captureMlModel: cfg.capture.ml.model ?? "unset",
577
+ entityExtractionEnabled: cfg.entityExtraction.enabled,
578
+ entityProvider: cfg.entityExtraction.provider,
579
+ entityModel: cfg.entityExtraction.model,
580
+ entityMinScore: cfg.entityExtraction.minScore,
581
+ entityMaxPerMemory: cfg.entityExtraction.maxEntitiesPerMemory,
582
+ entityWarmupOnStartup: cfg.entityExtraction.startup.downloadOnStartup,
583
+ debugEnabled: cfg.debug.enabled,
584
+ debugIncludePayloads: cfg.debug.includePayloads,
585
+ debugSamplingRate: cfg.debug.logSamplingRate,
586
+ });
440
587
 
441
588
  // Bootstrap is async by design so tool availability is not blocked.
442
589
  void runBootstrapIfNeeded({
@@ -458,6 +605,21 @@ const memoryBraidPlugin = {
458
605
  reason: "startup",
459
606
  });
460
607
 
608
+ if (cfg.entityExtraction.enabled && cfg.entityExtraction.startup.downloadOnStartup) {
609
+ void entityExtraction
610
+ .warmup({
611
+ runId,
612
+ reason: "startup",
613
+ })
614
+ .catch((err) => {
615
+ log.warn("memory_braid.entity.warmup", {
616
+ runId,
617
+ reason: "startup",
618
+ error: err instanceof Error ? err.message : String(err),
619
+ });
620
+ });
621
+ }
622
+
461
623
  if (cfg.reconcile.enabled) {
462
624
  const intervalMs = cfg.reconcile.intervalMinutes * 60 * 1000;
463
625
  serviceTimer = setInterval(() => {