memory-braid 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +225 -8
- package/openclaw.plugin.json +34 -11
- package/package.json +2 -1
- package/src/config.ts +58 -15
- package/src/entities.ts +354 -0
- package/src/extract.ts +155 -28
- package/src/index.ts +164 -2
package/README.md
CHANGED
|
@@ -7,12 +7,157 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
|
|
|
7
7
|
- Hybrid recall: local memory + Mem0, merged with weighted RRF.
|
|
8
8
|
- Install-time bootstrap import: indexes existing `MEMORY.md`, `memory.md`, `memory/**/*.md`, and recent sessions.
|
|
9
9
|
- Periodic reconcile: keeps remote Mem0 chunks updated and deletes stale remote chunks.
|
|
10
|
-
- Capture pipeline:
|
|
10
|
+
- Capture pipeline modes: `local`, `hybrid`, `ml`.
|
|
11
|
+
- Optional entity extraction: multilingual NER with canonical `entity://...` URIs in memory metadata.
|
|
11
12
|
- Structured debug logs for troubleshooting and tuning.
|
|
12
13
|
|
|
13
14
|
## Install
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
### Install from npm (recommended)
|
|
17
|
+
|
|
18
|
+
On the target machine:
|
|
19
|
+
|
|
20
|
+
1. Install from npm:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
openclaw plugins install memory-braid@0.3.0
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
2. Enable and set as active memory slot:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
openclaw plugins enable memory-braid
|
|
30
|
+
openclaw config set plugins.slots.memory memory-braid
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
3. Restart gateway:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
openclaw gateway restart
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
4. Confirm plugin is loaded:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
openclaw plugins info memory-braid
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Expected:
|
|
46
|
+
- `Status: loaded`
|
|
47
|
+
- `Tools: memory_search, memory_get`
|
|
48
|
+
- `Services: memory-braid-service`
|
|
49
|
+
|
|
50
|
+
### Install from local path (development)
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
openclaw plugins install --link /absolute/path/to/memory-braid
|
|
54
|
+
openclaw plugins enable memory-braid
|
|
55
|
+
openclaw config set plugins.slots.memory memory-braid
|
|
56
|
+
openclaw gateway restart
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quick start: hybrid capture + multilingual NER
|
|
60
|
+
|
|
61
|
+
Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mem0": {
|
|
66
|
+
"mode": "oss",
|
|
67
|
+
"ossConfig": {
|
|
68
|
+
"version": "v1.1",
|
|
69
|
+
"embedder": {
|
|
70
|
+
"provider": "openai",
|
|
71
|
+
"config": {
|
|
72
|
+
"apiKey": "${OPENAI_API_KEY}",
|
|
73
|
+
"model": "text-embedding-3-small"
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"vectorStore": {
|
|
77
|
+
"provider": "memory",
|
|
78
|
+
"config": {
|
|
79
|
+
"collectionName": "memories",
|
|
80
|
+
"dimension": 1536
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"llm": {
|
|
84
|
+
"provider": "openai",
|
|
85
|
+
"config": {
|
|
86
|
+
"apiKey": "${OPENAI_API_KEY}",
|
|
87
|
+
"model": "gpt-4o-mini"
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"enableGraph": false
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
"capture": {
|
|
94
|
+
"enabled": true,
|
|
95
|
+
"mode": "hybrid",
|
|
96
|
+
"maxItemsPerRun": 6,
|
|
97
|
+
"ml": {
|
|
98
|
+
"provider": "openai",
|
|
99
|
+
"model": "gpt-4o-mini",
|
|
100
|
+
"timeoutMs": 2500
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
"entityExtraction": {
|
|
104
|
+
"enabled": true,
|
|
105
|
+
"provider": "multilingual_ner",
|
|
106
|
+
"model": "Xenova/bert-base-multilingual-cased-ner-hrl",
|
|
107
|
+
"minScore": 0.65,
|
|
108
|
+
"maxEntitiesPerMemory": 8,
|
|
109
|
+
"startup": {
|
|
110
|
+
"downloadOnStartup": true,
|
|
111
|
+
"warmupText": "John works at Acme in Berlin."
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
"debug": {
|
|
115
|
+
"enabled": true
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Then restart:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
openclaw gateway restart
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Verification checklist
|
|
127
|
+
|
|
128
|
+
1. Check runtime status:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
openclaw plugins info memory-braid
|
|
132
|
+
openclaw gateway status
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
2. Trigger/inspect NER warmup:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
openclaw agent --agent main --message "/memorybraid warmup" --json
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
3. Send a message that should be captured:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
openclaw agent --agent main --message "Remember that Ana works at OpenClaw and likes ramen." --json
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
4. Inspect logs for capture + NER:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
rg -n "memory_braid\\.startup|memory_braid\\.capture|memory_braid\\.entity|memory_braid\\.mem0" ~/.openclaw/logs/gateway.log | tail -n 80
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Expected events:
|
|
154
|
+
- `memory_braid.startup`
|
|
155
|
+
- `memory_braid.entity.model_load`
|
|
156
|
+
- `memory_braid.entity.warmup`
|
|
157
|
+
- `memory_braid.capture.extract`
|
|
158
|
+
- `memory_braid.capture.ml` (for `capture.mode=hybrid|ml`)
|
|
159
|
+
- `memory_braid.entity.extract`
|
|
160
|
+
- `memory_braid.capture.persist`
|
|
16
161
|
|
|
17
162
|
## Self-hosting quick guide
|
|
18
163
|
|
|
@@ -241,14 +386,23 @@ Use this preset when:
|
|
|
241
386
|
},
|
|
242
387
|
"capture": {
|
|
243
388
|
"enabled": true,
|
|
244
|
-
"
|
|
245
|
-
|
|
246
|
-
},
|
|
389
|
+
"mode": "hybrid",
|
|
390
|
+
"maxItemsPerRun": 6,
|
|
247
391
|
"ml": {
|
|
248
392
|
"provider": "openai",
|
|
249
393
|
"model": "gpt-4o-mini",
|
|
250
|
-
"timeoutMs": 2500
|
|
251
|
-
|
|
394
|
+
"timeoutMs": 2500
|
|
395
|
+
}
|
|
396
|
+
},
|
|
397
|
+
"entityExtraction": {
|
|
398
|
+
"enabled": true,
|
|
399
|
+
"provider": "multilingual_ner",
|
|
400
|
+
"model": "Xenova/bert-base-multilingual-cased-ner-hrl",
|
|
401
|
+
"minScore": 0.65,
|
|
402
|
+
"maxEntitiesPerMemory": 8,
|
|
403
|
+
"startup": {
|
|
404
|
+
"downloadOnStartup": true,
|
|
405
|
+
"warmupText": "John works at Acme in Berlin."
|
|
252
406
|
}
|
|
253
407
|
},
|
|
254
408
|
"dedupe": {
|
|
@@ -266,6 +420,48 @@ Use this preset when:
|
|
|
266
420
|
}
|
|
267
421
|
```
|
|
268
422
|
|
|
423
|
+
## Capture defaults
|
|
424
|
+
|
|
425
|
+
Capture defaults are:
|
|
426
|
+
|
|
427
|
+
- `capture.enabled`: `true`
|
|
428
|
+
- `capture.mode`: `"local"`
|
|
429
|
+
- `capture.maxItemsPerRun`: `6`
|
|
430
|
+
- `capture.ml.provider`: unset
|
|
431
|
+
- `capture.ml.model`: unset
|
|
432
|
+
- `capture.ml.timeoutMs`: `2500`
|
|
433
|
+
|
|
434
|
+
Important behavior:
|
|
435
|
+
|
|
436
|
+
- `capture.mode = "local"`: heuristic-only extraction.
|
|
437
|
+
- `capture.mode = "hybrid"`: heuristic extraction + ML enrichment when ML config is set.
|
|
438
|
+
- `capture.mode = "ml"`: ML-first extraction; falls back to heuristic if ML config/call is unavailable.
|
|
439
|
+
- ML calls run only when both `capture.ml.provider` and `capture.ml.model` are set.
|
|
440
|
+
|
|
441
|
+
## Entity extraction defaults
|
|
442
|
+
|
|
443
|
+
Entity extraction defaults are:
|
|
444
|
+
|
|
445
|
+
- `entityExtraction.enabled`: `false`
|
|
446
|
+
- `entityExtraction.provider`: `"multilingual_ner"`
|
|
447
|
+
- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"`
|
|
448
|
+
- `entityExtraction.minScore`: `0.65`
|
|
449
|
+
- `entityExtraction.maxEntitiesPerMemory`: `8`
|
|
450
|
+
- `entityExtraction.startup.downloadOnStartup`: `true`
|
|
451
|
+
- `entityExtraction.startup.warmupText`: `"John works at Acme in Berlin."`
|
|
452
|
+
|
|
453
|
+
When enabled:
|
|
454
|
+
|
|
455
|
+
- Model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
|
|
456
|
+
- Captured memories get `metadata.entities` and `metadata.entityUris` (canonical IDs like `entity://person/john-doe`).
|
|
457
|
+
- Startup can pre-download/warm the model (`downloadOnStartup: true`).
|
|
458
|
+
|
|
459
|
+
Warmup command:
|
|
460
|
+
|
|
461
|
+
- `/memorybraid status`
|
|
462
|
+
- `/memorybraid warmup`
|
|
463
|
+
- `/memorybraid warmup --force`
|
|
464
|
+
|
|
269
465
|
## Debugging
|
|
270
466
|
|
|
271
467
|
Set:
|
|
@@ -285,14 +481,35 @@ Set:
|
|
|
285
481
|
Key events:
|
|
286
482
|
|
|
287
483
|
- `memory_braid.startup`
|
|
484
|
+
- `memory_braid.config`
|
|
288
485
|
- `memory_braid.bootstrap.begin|complete|error`
|
|
289
486
|
- `memory_braid.reconcile.begin|progress|complete|error`
|
|
290
|
-
- `memory_braid.search.local|mem0|merge|inject`
|
|
487
|
+
- `memory_braid.search.local|mem0|merge|inject|skip`
|
|
291
488
|
- `memory_braid.capture.extract|ml|persist|skip`
|
|
489
|
+
- `memory_braid.entity.model_load|warmup|extract`
|
|
292
490
|
- `memory_braid.mem0.request|response|error`
|
|
293
491
|
|
|
294
492
|
`debug.includePayloads=true` includes payload fields; otherwise sensitive text fields are omitted.
|
|
295
493
|
|
|
494
|
+
Traceability tips:
|
|
495
|
+
|
|
496
|
+
- Use `runId` to follow one execution end-to-end across capture/search/entity/mem0 events.
|
|
497
|
+
- `memory_braid.capture.persist` includes high-signal counters:
|
|
498
|
+
- `dedupeSkipped`
|
|
499
|
+
- `mem0AddAttempts`
|
|
500
|
+
- `mem0AddWithId`
|
|
501
|
+
- `mem0AddWithoutId`
|
|
502
|
+
- `entityAnnotatedCandidates`
|
|
503
|
+
- `totalEntitiesAttached`
|
|
504
|
+
- `memory_braid.capture.ml` includes `fallbackUsed` and fallback reasons when ML is unavailable.
|
|
505
|
+
- `memory_braid.entity.extract` includes `entityTypes` and `sampleEntityUris`.
|
|
506
|
+
|
|
507
|
+
Example:
|
|
508
|
+
|
|
509
|
+
```bash
|
|
510
|
+
rg -n "memory_braid\\.|runId\":\"<RUN_ID>\"" ~/.openclaw/logs/gateway.log | tail -n 120
|
|
511
|
+
```
|
|
512
|
+
|
|
296
513
|
## Tests
|
|
297
514
|
|
|
298
515
|
```bash
|
package/openclaw.plugin.json
CHANGED
|
@@ -47,25 +47,48 @@
|
|
|
47
47
|
"additionalProperties": false,
|
|
48
48
|
"properties": {
|
|
49
49
|
"enabled": { "type": "boolean", "default": true },
|
|
50
|
-
"
|
|
50
|
+
"mode": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"enum": ["local", "hybrid", "ml"],
|
|
53
|
+
"default": "local"
|
|
54
|
+
},
|
|
55
|
+
"maxItemsPerRun": { "type": "integer", "minimum": 1, "maximum": 50, "default": 6 },
|
|
56
|
+
"ml": {
|
|
51
57
|
"type": "object",
|
|
52
58
|
"additionalProperties": false,
|
|
53
59
|
"properties": {
|
|
54
|
-
"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
"default": "heuristic"
|
|
58
|
-
}
|
|
60
|
+
"provider": { "type": "string", "enum": ["openai", "anthropic", "gemini"] },
|
|
61
|
+
"model": { "type": "string" },
|
|
62
|
+
"timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 }
|
|
59
63
|
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"entityExtraction": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"additionalProperties": false,
|
|
70
|
+
"properties": {
|
|
71
|
+
"enabled": { "type": "boolean", "default": false },
|
|
72
|
+
"provider": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"enum": ["multilingual_ner"],
|
|
75
|
+
"default": "multilingual_ner"
|
|
60
76
|
},
|
|
61
|
-
"
|
|
77
|
+
"model": {
|
|
78
|
+
"type": "string",
|
|
79
|
+
"default": "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
80
|
+
},
|
|
81
|
+
"minScore": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.65 },
|
|
82
|
+
"maxEntitiesPerMemory": { "type": "integer", "minimum": 1, "maximum": 50, "default": 8 },
|
|
83
|
+
"startup": {
|
|
62
84
|
"type": "object",
|
|
63
85
|
"additionalProperties": false,
|
|
64
86
|
"properties": {
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
|
|
68
|
-
|
|
87
|
+
"downloadOnStartup": { "type": "boolean", "default": true },
|
|
88
|
+
"warmupText": {
|
|
89
|
+
"type": "string",
|
|
90
|
+
"default": "John works at Acme in Berlin."
|
|
91
|
+
}
|
|
69
92
|
}
|
|
70
93
|
}
|
|
71
94
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "memory-braid",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "OpenClaw memory plugin that augments local memory with Mem0, bootstrap import, reconcile, and capture.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
"openclaw": ">=2026.2.18"
|
|
32
32
|
},
|
|
33
33
|
"dependencies": {
|
|
34
|
+
"@xenova/transformers": "^2.17.2",
|
|
34
35
|
"mem0ai": "^2.2.3"
|
|
35
36
|
},
|
|
36
37
|
"devDependencies": {
|
package/src/config.ts
CHANGED
|
@@ -20,14 +20,23 @@ export type MemoryBraidConfig = {
|
|
|
20
20
|
};
|
|
21
21
|
capture: {
|
|
22
22
|
enabled: boolean;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
};
|
|
23
|
+
mode: "local" | "hybrid" | "ml";
|
|
24
|
+
maxItemsPerRun: number;
|
|
26
25
|
ml: {
|
|
27
26
|
provider?: "openai" | "anthropic" | "gemini";
|
|
28
27
|
model?: string;
|
|
29
28
|
timeoutMs: number;
|
|
30
|
-
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
entityExtraction: {
|
|
32
|
+
enabled: boolean;
|
|
33
|
+
provider: "multilingual_ner";
|
|
34
|
+
model: string;
|
|
35
|
+
minScore: number;
|
|
36
|
+
maxEntitiesPerMemory: number;
|
|
37
|
+
startup: {
|
|
38
|
+
downloadOnStartup: boolean;
|
|
39
|
+
warmupText: string;
|
|
31
40
|
};
|
|
32
41
|
};
|
|
33
42
|
bootstrap: {
|
|
@@ -84,14 +93,23 @@ const DEFAULTS: MemoryBraidConfig = {
|
|
|
84
93
|
},
|
|
85
94
|
capture: {
|
|
86
95
|
enabled: true,
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
},
|
|
96
|
+
mode: "local",
|
|
97
|
+
maxItemsPerRun: 6,
|
|
90
98
|
ml: {
|
|
91
99
|
provider: undefined,
|
|
92
100
|
model: undefined,
|
|
93
101
|
timeoutMs: 2500,
|
|
94
|
-
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
entityExtraction: {
|
|
105
|
+
enabled: false,
|
|
106
|
+
provider: "multilingual_ner",
|
|
107
|
+
model: "Xenova/bert-base-multilingual-cased-ner-hrl",
|
|
108
|
+
minScore: 0.65,
|
|
109
|
+
maxEntitiesPerMemory: 8,
|
|
110
|
+
startup: {
|
|
111
|
+
downloadOnStartup: true,
|
|
112
|
+
warmupText: "John works at Acme in Berlin.",
|
|
95
113
|
},
|
|
96
114
|
},
|
|
97
115
|
bootstrap: {
|
|
@@ -160,7 +178,8 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
160
178
|
const recall = asRecord(root.recall);
|
|
161
179
|
const merge = asRecord(recall.merge);
|
|
162
180
|
const capture = asRecord(root.capture);
|
|
163
|
-
const
|
|
181
|
+
const entityExtraction = asRecord(root.entityExtraction);
|
|
182
|
+
const entityStartup = asRecord(entityExtraction.startup);
|
|
164
183
|
const ml = asRecord(capture.ml);
|
|
165
184
|
const bootstrap = asRecord(root.bootstrap);
|
|
166
185
|
const reconcile = asRecord(root.reconcile);
|
|
@@ -170,8 +189,11 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
170
189
|
const debug = asRecord(root.debug);
|
|
171
190
|
|
|
172
191
|
const mode = mem0.mode === "oss" ? "oss" : "cloud";
|
|
173
|
-
const
|
|
174
|
-
|
|
192
|
+
const rawCaptureMode = asString(capture.mode)?.toLowerCase();
|
|
193
|
+
const captureMode =
|
|
194
|
+
rawCaptureMode === "local" || rawCaptureMode === "hybrid" || rawCaptureMode === "ml"
|
|
195
|
+
? rawCaptureMode
|
|
196
|
+
: DEFAULTS.capture.mode;
|
|
175
197
|
|
|
176
198
|
return {
|
|
177
199
|
enabled: asBoolean(root.enabled, DEFAULTS.enabled),
|
|
@@ -195,9 +217,8 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
195
217
|
},
|
|
196
218
|
capture: {
|
|
197
219
|
enabled: asBoolean(capture.enabled, DEFAULTS.capture.enabled),
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
},
|
|
220
|
+
mode: captureMode,
|
|
221
|
+
maxItemsPerRun: asInt(capture.maxItemsPerRun, DEFAULTS.capture.maxItemsPerRun, 1, 50),
|
|
201
222
|
ml: {
|
|
202
223
|
provider:
|
|
203
224
|
ml.provider === "openai" || ml.provider === "anthropic" || ml.provider === "gemini"
|
|
@@ -205,7 +226,29 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
205
226
|
: DEFAULTS.capture.ml.provider,
|
|
206
227
|
model: asString(ml.model),
|
|
207
228
|
timeoutMs: asInt(ml.timeoutMs, DEFAULTS.capture.ml.timeoutMs, 250, 30_000),
|
|
208
|
-
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
entityExtraction: {
|
|
232
|
+
enabled: asBoolean(entityExtraction.enabled, DEFAULTS.entityExtraction.enabled),
|
|
233
|
+
provider:
|
|
234
|
+
entityExtraction.provider === "multilingual_ner"
|
|
235
|
+
? "multilingual_ner"
|
|
236
|
+
: DEFAULTS.entityExtraction.provider,
|
|
237
|
+
model: asString(entityExtraction.model) ?? DEFAULTS.entityExtraction.model,
|
|
238
|
+
minScore: asNumber(entityExtraction.minScore, DEFAULTS.entityExtraction.minScore, 0, 1),
|
|
239
|
+
maxEntitiesPerMemory: asInt(
|
|
240
|
+
entityExtraction.maxEntitiesPerMemory,
|
|
241
|
+
DEFAULTS.entityExtraction.maxEntitiesPerMemory,
|
|
242
|
+
1,
|
|
243
|
+
50,
|
|
244
|
+
),
|
|
245
|
+
startup: {
|
|
246
|
+
downloadOnStartup: asBoolean(
|
|
247
|
+
entityStartup.downloadOnStartup,
|
|
248
|
+
DEFAULTS.entityExtraction.startup.downloadOnStartup,
|
|
249
|
+
),
|
|
250
|
+
warmupText:
|
|
251
|
+
asString(entityStartup.warmupText) ?? DEFAULTS.entityExtraction.startup.warmupText,
|
|
209
252
|
},
|
|
210
253
|
},
|
|
211
254
|
bootstrap: {
|
package/src/entities.ts
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
import os from "node:os";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { normalizeWhitespace } from "./chunking.js";
|
|
4
|
+
import type { MemoryBraidConfig } from "./config.js";
|
|
5
|
+
import { MemoryBraidLogger } from "./logger.js";
|
|
6
|
+
|
|
7
|
+
type NerPipeline = (text: string, options?: Record<string, unknown>) => Promise<unknown>;
|
|
8
|
+
|
|
9
|
+
type NerRecord = {
|
|
10
|
+
word?: unknown;
|
|
11
|
+
entity_group?: unknown;
|
|
12
|
+
entity?: unknown;
|
|
13
|
+
score?: unknown;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type ExtractedEntity = {
|
|
17
|
+
text: string;
|
|
18
|
+
type: "person" | "organization" | "location" | "misc";
|
|
19
|
+
score: number;
|
|
20
|
+
canonicalUri: string;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function summarizeEntityTypes(entities: ExtractedEntity[]): Record<string, number> {
|
|
24
|
+
const summary: Record<string, number> = {};
|
|
25
|
+
for (const entity of entities) {
|
|
26
|
+
summary[entity.type] = (summary[entity.type] ?? 0) + 1;
|
|
27
|
+
}
|
|
28
|
+
return summary;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function resolveStateDir(explicitStateDir?: string): string {
|
|
32
|
+
const resolved =
|
|
33
|
+
explicitStateDir?.trim() ||
|
|
34
|
+
process.env.OPENCLAW_STATE_DIR?.trim() ||
|
|
35
|
+
path.join(os.homedir(), ".openclaw");
|
|
36
|
+
return path.resolve(resolved);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function resolveEntityModelCacheDir(stateDir?: string): string {
|
|
40
|
+
return path.join(resolveStateDir(stateDir), "memory-braid", "models", "entity-extraction");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function slugify(value: string): string {
|
|
44
|
+
const ascii = value
|
|
45
|
+
.normalize("NFKD")
|
|
46
|
+
.replace(/[\u0300-\u036f]/g, "");
|
|
47
|
+
const slug = ascii
|
|
48
|
+
.toLowerCase()
|
|
49
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
50
|
+
.replace(/^-+|-+$/g, "");
|
|
51
|
+
return slug || "unknown";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function buildCanonicalEntityUri(
|
|
55
|
+
type: ExtractedEntity["type"],
|
|
56
|
+
text: string,
|
|
57
|
+
): string {
|
|
58
|
+
return `entity://${type}/${slugify(text)}`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function normalizeEntityType(raw: unknown): ExtractedEntity["type"] {
|
|
62
|
+
const label = typeof raw === "string" ? raw.toUpperCase() : "";
|
|
63
|
+
if (label.includes("PER")) {
|
|
64
|
+
return "person";
|
|
65
|
+
}
|
|
66
|
+
if (label.includes("ORG")) {
|
|
67
|
+
return "organization";
|
|
68
|
+
}
|
|
69
|
+
if (label.includes("LOC") || label.includes("GPE")) {
|
|
70
|
+
return "location";
|
|
71
|
+
}
|
|
72
|
+
return "misc";
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function normalizeEntityText(raw: unknown): string {
|
|
76
|
+
if (typeof raw !== "string") {
|
|
77
|
+
return "";
|
|
78
|
+
}
|
|
79
|
+
return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
type EntityExtractionOptions = {
|
|
83
|
+
stateDir?: string;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
export class EntityExtractionManager {
|
|
87
|
+
private readonly cfg: MemoryBraidConfig["entityExtraction"];
|
|
88
|
+
private readonly log: MemoryBraidLogger;
|
|
89
|
+
private stateDir?: string;
|
|
90
|
+
private pipelinePromise: Promise<NerPipeline | null> | null = null;
|
|
91
|
+
|
|
92
|
+
constructor(
|
|
93
|
+
cfg: MemoryBraidConfig["entityExtraction"],
|
|
94
|
+
log: MemoryBraidLogger,
|
|
95
|
+
options?: EntityExtractionOptions,
|
|
96
|
+
) {
|
|
97
|
+
this.cfg = cfg;
|
|
98
|
+
this.log = log;
|
|
99
|
+
this.stateDir = options?.stateDir;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
setStateDir(stateDir?: string): void {
|
|
103
|
+
const next = stateDir?.trim();
|
|
104
|
+
if (!next || next === this.stateDir) {
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
this.stateDir = next;
|
|
108
|
+
this.pipelinePromise = null;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
getStatus(): {
|
|
112
|
+
enabled: boolean;
|
|
113
|
+
provider: MemoryBraidConfig["entityExtraction"]["provider"];
|
|
114
|
+
model: string;
|
|
115
|
+
minScore: number;
|
|
116
|
+
maxEntitiesPerMemory: number;
|
|
117
|
+
cacheDir: string;
|
|
118
|
+
} {
|
|
119
|
+
return {
|
|
120
|
+
enabled: this.cfg.enabled,
|
|
121
|
+
provider: this.cfg.provider,
|
|
122
|
+
model: this.cfg.model,
|
|
123
|
+
minScore: this.cfg.minScore,
|
|
124
|
+
maxEntitiesPerMemory: this.cfg.maxEntitiesPerMemory,
|
|
125
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async warmup(params?: {
|
|
130
|
+
runId?: string;
|
|
131
|
+
reason?: string;
|
|
132
|
+
forceReload?: boolean;
|
|
133
|
+
text?: string;
|
|
134
|
+
}): Promise<{
|
|
135
|
+
ok: boolean;
|
|
136
|
+
cacheDir: string;
|
|
137
|
+
model: string;
|
|
138
|
+
entities: number;
|
|
139
|
+
durMs: number;
|
|
140
|
+
error?: string;
|
|
141
|
+
}> {
|
|
142
|
+
const startedAt = Date.now();
|
|
143
|
+
if (!this.cfg.enabled) {
|
|
144
|
+
return {
|
|
145
|
+
ok: false,
|
|
146
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
147
|
+
model: this.cfg.model,
|
|
148
|
+
entities: 0,
|
|
149
|
+
durMs: Date.now() - startedAt,
|
|
150
|
+
error: "entity_extraction_disabled",
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const pipeline = await this.ensurePipeline(params?.forceReload);
|
|
155
|
+
if (!pipeline) {
|
|
156
|
+
return {
|
|
157
|
+
ok: false,
|
|
158
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
159
|
+
model: this.cfg.model,
|
|
160
|
+
entities: 0,
|
|
161
|
+
durMs: Date.now() - startedAt,
|
|
162
|
+
error: "model_load_failed",
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
const entities = await this.extractWithPipeline({
|
|
168
|
+
pipeline,
|
|
169
|
+
text: params?.text ?? this.cfg.startup.warmupText,
|
|
170
|
+
});
|
|
171
|
+
this.log.info("memory_braid.entity.warmup", {
|
|
172
|
+
runId: params?.runId,
|
|
173
|
+
reason: params?.reason ?? "manual",
|
|
174
|
+
provider: this.cfg.provider,
|
|
175
|
+
model: this.cfg.model,
|
|
176
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
177
|
+
entities: entities.length,
|
|
178
|
+
entityTypes: summarizeEntityTypes(entities),
|
|
179
|
+
sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
|
|
180
|
+
durMs: Date.now() - startedAt,
|
|
181
|
+
});
|
|
182
|
+
return {
|
|
183
|
+
ok: true,
|
|
184
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
185
|
+
model: this.cfg.model,
|
|
186
|
+
entities: entities.length,
|
|
187
|
+
durMs: Date.now() - startedAt,
|
|
188
|
+
};
|
|
189
|
+
} catch (err) {
|
|
190
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
191
|
+
this.log.warn("memory_braid.entity.warmup", {
|
|
192
|
+
runId: params?.runId,
|
|
193
|
+
reason: params?.reason ?? "manual",
|
|
194
|
+
provider: this.cfg.provider,
|
|
195
|
+
model: this.cfg.model,
|
|
196
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
197
|
+
error: message,
|
|
198
|
+
});
|
|
199
|
+
return {
|
|
200
|
+
ok: false,
|
|
201
|
+
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
202
|
+
model: this.cfg.model,
|
|
203
|
+
entities: 0,
|
|
204
|
+
durMs: Date.now() - startedAt,
|
|
205
|
+
error: message,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async extract(params: { text: string; runId?: string }): Promise<ExtractedEntity[]> {
|
|
211
|
+
if (!this.cfg.enabled) {
|
|
212
|
+
return [];
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const text = normalizeWhitespace(params.text);
|
|
216
|
+
if (!text) {
|
|
217
|
+
return [];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const pipeline = await this.ensurePipeline();
|
|
221
|
+
if (!pipeline) {
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
try {
|
|
226
|
+
const entities = await this.extractWithPipeline({ pipeline, text });
|
|
227
|
+
this.log.debug("memory_braid.entity.extract", {
|
|
228
|
+
runId: params.runId,
|
|
229
|
+
provider: this.cfg.provider,
|
|
230
|
+
model: this.cfg.model,
|
|
231
|
+
entities: entities.length,
|
|
232
|
+
entityTypes: summarizeEntityTypes(entities),
|
|
233
|
+
sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
|
|
234
|
+
});
|
|
235
|
+
return entities;
|
|
236
|
+
} catch (err) {
|
|
237
|
+
this.log.warn("memory_braid.entity.extract", {
|
|
238
|
+
runId: params.runId,
|
|
239
|
+
provider: this.cfg.provider,
|
|
240
|
+
model: this.cfg.model,
|
|
241
|
+
error: err instanceof Error ? err.message : String(err),
|
|
242
|
+
});
|
|
243
|
+
return [];
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
private async ensurePipeline(forceReload = false): Promise<NerPipeline | null> {
|
|
248
|
+
if (!this.cfg.enabled) {
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (forceReload) {
|
|
253
|
+
this.pipelinePromise = null;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (this.pipelinePromise) {
|
|
257
|
+
return this.pipelinePromise;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
this.pipelinePromise = this.loadPipeline();
|
|
261
|
+
return this.pipelinePromise;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
private async loadPipeline(): Promise<NerPipeline | null> {
|
|
265
|
+
const cacheDir = resolveEntityModelCacheDir(this.stateDir);
|
|
266
|
+
this.log.info("memory_braid.entity.model_load", {
|
|
267
|
+
provider: this.cfg.provider,
|
|
268
|
+
model: this.cfg.model,
|
|
269
|
+
cacheDir,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
const mod = (await import("@xenova/transformers")) as {
|
|
274
|
+
env?: Record<string, unknown>;
|
|
275
|
+
pipeline?: (
|
|
276
|
+
task: string,
|
|
277
|
+
model: string,
|
|
278
|
+
options?: Record<string, unknown>,
|
|
279
|
+
) => Promise<unknown>;
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
if (!mod.pipeline) {
|
|
283
|
+
throw new Error("@xenova/transformers pipeline export not found");
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (mod.env) {
|
|
287
|
+
mod.env.cacheDir = cacheDir;
|
|
288
|
+
mod.env.allowRemoteModels = true;
|
|
289
|
+
mod.env.allowLocalModels = true;
|
|
290
|
+
mod.env.useFS = true;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const classifier = await mod.pipeline("token-classification", this.cfg.model, {
|
|
294
|
+
quantized: true,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
if (typeof classifier !== "function") {
|
|
298
|
+
throw new Error("token-classification pipeline is not callable");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return classifier as NerPipeline;
|
|
302
|
+
} catch (err) {
|
|
303
|
+
this.log.error("memory_braid.entity.model_load", {
|
|
304
|
+
provider: this.cfg.provider,
|
|
305
|
+
model: this.cfg.model,
|
|
306
|
+
cacheDir,
|
|
307
|
+
error: err instanceof Error ? err.message : String(err),
|
|
308
|
+
});
|
|
309
|
+
return null;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
private async extractWithPipeline(params: {
|
|
314
|
+
pipeline: NerPipeline;
|
|
315
|
+
text: string;
|
|
316
|
+
}): Promise<ExtractedEntity[]> {
|
|
317
|
+
const raw = await params.pipeline(params.text, {
|
|
318
|
+
aggregation_strategy: "simple",
|
|
319
|
+
});
|
|
320
|
+
const rows = Array.isArray(raw) ? raw : [];
|
|
321
|
+
|
|
322
|
+
const deduped = new Map<string, ExtractedEntity>();
|
|
323
|
+
for (const row of rows) {
|
|
324
|
+
if (!row || typeof row !== "object") {
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
const record = row as NerRecord;
|
|
328
|
+
const entityText = normalizeEntityText(record.word);
|
|
329
|
+
if (!entityText) {
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
const score = typeof record.score === "number" ? Math.max(0, Math.min(1, record.score)) : 0;
|
|
333
|
+
if (score < this.cfg.minScore) {
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const type = normalizeEntityType(record.entity_group ?? record.entity);
|
|
338
|
+
const canonicalUri = buildCanonicalEntityUri(type, entityText);
|
|
339
|
+
const current = deduped.get(canonicalUri);
|
|
340
|
+
if (!current || score > current.score) {
|
|
341
|
+
deduped.set(canonicalUri, {
|
|
342
|
+
text: entityText,
|
|
343
|
+
type,
|
|
344
|
+
score,
|
|
345
|
+
canonicalUri,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return Array.from(deduped.values())
|
|
351
|
+
.sort((a, b) => b.score - a.score)
|
|
352
|
+
.slice(0, this.cfg.maxEntitiesPerMemory);
|
|
353
|
+
}
|
|
354
|
+
}
|
package/src/extract.ts
CHANGED
|
@@ -3,6 +3,8 @@ import type { MemoryBraidConfig } from "./config.js";
|
|
|
3
3
|
import { MemoryBraidLogger } from "./logger.js";
|
|
4
4
|
import type { ExtractedCandidate } from "./types.js";
|
|
5
5
|
|
|
6
|
+
type MlProvider = "openai" | "anthropic" | "gemini";
|
|
7
|
+
|
|
6
8
|
const HEURISTIC_PATTERNS = [
|
|
7
9
|
/remember|remember that|keep in mind|note that/i,
|
|
8
10
|
/i prefer|prefer to|don't like|do not like|hate|love/i,
|
|
@@ -145,14 +147,11 @@ function parseJsonObjectArray(raw: string): Array<Record<string, unknown>> {
|
|
|
145
147
|
}
|
|
146
148
|
|
|
147
149
|
async function callMlEnrichment(params: {
|
|
148
|
-
provider:
|
|
150
|
+
provider: MlProvider;
|
|
149
151
|
model: string;
|
|
150
152
|
timeoutMs: number;
|
|
151
153
|
candidates: ExtractedCandidate[];
|
|
152
154
|
}): Promise<Array<Record<string, unknown>>> {
|
|
153
|
-
const controller = new AbortController();
|
|
154
|
-
const timer = setTimeout(() => controller.abort(), params.timeoutMs);
|
|
155
|
-
|
|
156
155
|
const prompt = [
|
|
157
156
|
"Classify the memory candidates.",
|
|
158
157
|
"Return ONLY JSON array.",
|
|
@@ -160,6 +159,52 @@ async function callMlEnrichment(params: {
|
|
|
160
159
|
"Category one of: preference, decision, fact, task, other.",
|
|
161
160
|
JSON.stringify(params.candidates.map((candidate, index) => ({ index, text: candidate.text }))),
|
|
162
161
|
].join("\n");
|
|
162
|
+
return callMlJson({
|
|
163
|
+
provider: params.provider,
|
|
164
|
+
model: params.model,
|
|
165
|
+
timeoutMs: params.timeoutMs,
|
|
166
|
+
prompt,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async function callMlExtraction(params: {
|
|
171
|
+
provider: MlProvider;
|
|
172
|
+
model: string;
|
|
173
|
+
timeoutMs: number;
|
|
174
|
+
maxItems: number;
|
|
175
|
+
messages: Array<{ role: string; text: string }>;
|
|
176
|
+
}): Promise<Array<Record<string, unknown>>> {
|
|
177
|
+
const recent = params.messages.slice(-30).map((item) => ({
|
|
178
|
+
role: item.role,
|
|
179
|
+
text: item.text,
|
|
180
|
+
}));
|
|
181
|
+
|
|
182
|
+
const prompt = [
|
|
183
|
+
"Extract durable user memories from this conversation.",
|
|
184
|
+
"Return ONLY JSON array.",
|
|
185
|
+
"Each item: {text:string, category:string, score:number}.",
|
|
186
|
+
"Category one of: preference, decision, fact, task, other.",
|
|
187
|
+
"Keep each text concise and atomic.",
|
|
188
|
+
`Maximum items: ${params.maxItems}.`,
|
|
189
|
+
JSON.stringify(recent),
|
|
190
|
+
].join("\n");
|
|
191
|
+
|
|
192
|
+
return callMlJson({
|
|
193
|
+
provider: params.provider,
|
|
194
|
+
model: params.model,
|
|
195
|
+
timeoutMs: params.timeoutMs,
|
|
196
|
+
prompt,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async function callMlJson(params: {
|
|
201
|
+
provider: MlProvider;
|
|
202
|
+
model: string;
|
|
203
|
+
timeoutMs: number;
|
|
204
|
+
prompt: string;
|
|
205
|
+
}): Promise<Array<Record<string, unknown>>> {
|
|
206
|
+
const controller = new AbortController();
|
|
207
|
+
const timer = setTimeout(() => controller.abort(), params.timeoutMs);
|
|
163
208
|
|
|
164
209
|
try {
|
|
165
210
|
if (params.provider === "openai") {
|
|
@@ -183,7 +228,7 @@ async function callMlEnrichment(params: {
|
|
|
183
228
|
},
|
|
184
229
|
{
|
|
185
230
|
role: "user",
|
|
186
|
-
content: prompt,
|
|
231
|
+
content: params.prompt,
|
|
187
232
|
},
|
|
188
233
|
],
|
|
189
234
|
}),
|
|
@@ -212,7 +257,7 @@ async function callMlEnrichment(params: {
|
|
|
212
257
|
model: params.model,
|
|
213
258
|
max_tokens: 1000,
|
|
214
259
|
temperature: 0,
|
|
215
|
-
messages: [{ role: "user", content: prompt }],
|
|
260
|
+
messages: [{ role: "user", content: params.prompt }],
|
|
216
261
|
}),
|
|
217
262
|
signal: controller.signal,
|
|
218
263
|
});
|
|
@@ -236,7 +281,7 @@ async function callMlEnrichment(params: {
|
|
|
236
281
|
},
|
|
237
282
|
body: JSON.stringify({
|
|
238
283
|
generationConfig: { temperature: 0 },
|
|
239
|
-
contents: [{ role: "user", parts: [{ text: prompt }] }],
|
|
284
|
+
contents: [{ role: "user", parts: [{ text: params.prompt }] }],
|
|
240
285
|
}),
|
|
241
286
|
signal: controller.signal,
|
|
242
287
|
},
|
|
@@ -251,6 +296,19 @@ async function callMlEnrichment(params: {
|
|
|
251
296
|
}
|
|
252
297
|
}
|
|
253
298
|
|
|
299
|
+
function normalizeCategory(value: unknown, fallback: ExtractedCandidate["category"] = "other"): ExtractedCandidate["category"] {
|
|
300
|
+
if (
|
|
301
|
+
value === "preference" ||
|
|
302
|
+
value === "decision" ||
|
|
303
|
+
value === "fact" ||
|
|
304
|
+
value === "task" ||
|
|
305
|
+
value === "other"
|
|
306
|
+
) {
|
|
307
|
+
return value;
|
|
308
|
+
}
|
|
309
|
+
return fallback;
|
|
310
|
+
}
|
|
311
|
+
|
|
254
312
|
function applyMlResult(
|
|
255
313
|
candidates: ExtractedCandidate[],
|
|
256
314
|
result: Array<Record<string, unknown>>,
|
|
@@ -282,14 +340,7 @@ function applyMlResult(
|
|
|
282
340
|
if (!keep) {
|
|
283
341
|
continue;
|
|
284
342
|
}
|
|
285
|
-
const category =
|
|
286
|
-
ml.category === "preference" ||
|
|
287
|
-
ml.category === "decision" ||
|
|
288
|
-
ml.category === "fact" ||
|
|
289
|
-
ml.category === "task" ||
|
|
290
|
-
ml.category === "other"
|
|
291
|
-
? (ml.category as ExtractedCandidate["category"])
|
|
292
|
-
: candidate.category;
|
|
343
|
+
const category = normalizeCategory(ml.category, candidate.category);
|
|
293
344
|
const score = typeof ml.score === "number" ? Math.max(0, Math.min(1, ml.score)) : candidate.score;
|
|
294
345
|
out.push({
|
|
295
346
|
...candidate,
|
|
@@ -301,6 +352,39 @@ function applyMlResult(
|
|
|
301
352
|
return out;
|
|
302
353
|
}
|
|
303
354
|
|
|
355
|
+
function applyMlExtractionResult(
|
|
356
|
+
result: Array<Record<string, unknown>>,
|
|
357
|
+
maxItems: number,
|
|
358
|
+
): ExtractedCandidate[] {
|
|
359
|
+
const out: ExtractedCandidate[] = [];
|
|
360
|
+
const seen = new Set<string>();
|
|
361
|
+
|
|
362
|
+
for (const item of result) {
|
|
363
|
+
const rawText = typeof item.text === "string" ? item.text : "";
|
|
364
|
+
const text = normalizeWhitespace(rawText);
|
|
365
|
+
if (!text || text.length < 20 || text.length > 3000) {
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
const key = sha256(normalizeForHash(text));
|
|
369
|
+
if (seen.has(key)) {
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
seen.add(key);
|
|
373
|
+
|
|
374
|
+
out.push({
|
|
375
|
+
text,
|
|
376
|
+
category: normalizeCategory(item.category),
|
|
377
|
+
score: typeof item.score === "number" ? Math.max(0, Math.min(1, item.score)) : 0.5,
|
|
378
|
+
source: "ml",
|
|
379
|
+
});
|
|
380
|
+
if (out.length >= maxItems) {
|
|
381
|
+
break;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return out;
|
|
386
|
+
}
|
|
387
|
+
|
|
304
388
|
export async function extractCandidates(params: {
|
|
305
389
|
messages: unknown[];
|
|
306
390
|
cfg: MemoryBraidConfig;
|
|
@@ -308,43 +392,86 @@ export async function extractCandidates(params: {
|
|
|
308
392
|
runId?: string;
|
|
309
393
|
}): Promise<ExtractedCandidate[]> {
|
|
310
394
|
const normalized = normalizeMessages(params.messages);
|
|
311
|
-
const heuristic = pickHeuristicCandidates(normalized, params.cfg.capture.
|
|
395
|
+
const heuristic = pickHeuristicCandidates(normalized, params.cfg.capture.maxItemsPerRun);
|
|
312
396
|
|
|
313
397
|
params.log.debug("memory_braid.capture.extract", {
|
|
314
398
|
runId: params.runId,
|
|
399
|
+
mode: params.cfg.capture.mode,
|
|
400
|
+
maxItemsPerRun: params.cfg.capture.maxItemsPerRun,
|
|
315
401
|
totalMessages: normalized.length,
|
|
316
402
|
heuristicCandidates: heuristic.length,
|
|
317
403
|
});
|
|
318
404
|
|
|
319
|
-
if (
|
|
320
|
-
params.
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
405
|
+
if (params.cfg.capture.mode === "local") {
|
|
406
|
+
params.log.debug("memory_braid.capture.mode", {
|
|
407
|
+
runId: params.runId,
|
|
408
|
+
mode: params.cfg.capture.mode,
|
|
409
|
+
decision: "heuristic_only",
|
|
410
|
+
candidates: heuristic.length,
|
|
411
|
+
});
|
|
412
|
+
return heuristic;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (!params.cfg.capture.ml.provider || !params.cfg.capture.ml.model) {
|
|
416
|
+
params.log.warn("memory_braid.capture.ml", {
|
|
417
|
+
runId: params.runId,
|
|
418
|
+
reason: "missing_provider_or_model",
|
|
419
|
+
mode: params.cfg.capture.mode,
|
|
420
|
+
hasProvider: Boolean(params.cfg.capture.ml.provider),
|
|
421
|
+
hasModel: Boolean(params.cfg.capture.ml.model),
|
|
422
|
+
fallback: "heuristic",
|
|
423
|
+
candidates: heuristic.length,
|
|
424
|
+
});
|
|
324
425
|
return heuristic;
|
|
325
426
|
}
|
|
326
427
|
|
|
327
428
|
try {
|
|
328
|
-
|
|
429
|
+
if (params.cfg.capture.mode === "hybrid") {
|
|
430
|
+
const ml = await callMlEnrichment({
|
|
431
|
+
provider: params.cfg.capture.ml.provider,
|
|
432
|
+
model: params.cfg.capture.ml.model,
|
|
433
|
+
timeoutMs: params.cfg.capture.ml.timeoutMs,
|
|
434
|
+
candidates: heuristic,
|
|
435
|
+
});
|
|
436
|
+
const enriched = applyMlResult(heuristic, ml);
|
|
437
|
+
params.log.debug("memory_braid.capture.ml", {
|
|
438
|
+
runId: params.runId,
|
|
439
|
+
mode: params.cfg.capture.mode,
|
|
440
|
+
provider: params.cfg.capture.ml.provider,
|
|
441
|
+
model: params.cfg.capture.ml.model,
|
|
442
|
+
requested: heuristic.length,
|
|
443
|
+
returned: ml.length,
|
|
444
|
+
enriched: enriched.length,
|
|
445
|
+
fallbackUsed: ml.length === 0,
|
|
446
|
+
});
|
|
447
|
+
return enriched;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const mlExtractedRaw = await callMlExtraction({
|
|
329
451
|
provider: params.cfg.capture.ml.provider,
|
|
330
452
|
model: params.cfg.capture.ml.model,
|
|
331
453
|
timeoutMs: params.cfg.capture.ml.timeoutMs,
|
|
332
|
-
|
|
454
|
+
maxItems: params.cfg.capture.maxItemsPerRun,
|
|
455
|
+
messages: normalized,
|
|
333
456
|
});
|
|
334
|
-
const
|
|
457
|
+
const mlExtracted = applyMlExtractionResult(mlExtractedRaw, params.cfg.capture.maxItemsPerRun);
|
|
335
458
|
params.log.debug("memory_braid.capture.ml", {
|
|
336
459
|
runId: params.runId,
|
|
460
|
+
mode: params.cfg.capture.mode,
|
|
337
461
|
provider: params.cfg.capture.ml.provider,
|
|
338
462
|
model: params.cfg.capture.ml.model,
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
463
|
+
returned: mlExtractedRaw.length,
|
|
464
|
+
extracted: mlExtracted.length,
|
|
465
|
+
fallbackUsed: mlExtracted.length === 0,
|
|
342
466
|
});
|
|
343
|
-
return
|
|
467
|
+
return mlExtracted.length > 0 ? mlExtracted : heuristic;
|
|
344
468
|
} catch (err) {
|
|
345
469
|
params.log.warn("memory_braid.capture.ml", {
|
|
346
470
|
runId: params.runId,
|
|
471
|
+
mode: params.cfg.capture.mode,
|
|
347
472
|
error: err instanceof Error ? err.message : String(err),
|
|
473
|
+
fallback: "heuristic",
|
|
474
|
+
candidates: heuristic.length,
|
|
348
475
|
});
|
|
349
476
|
return heuristic;
|
|
350
477
|
}
|
package/src/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ import type {
|
|
|
5
5
|
} from "openclaw/plugin-sdk";
|
|
6
6
|
import { parseConfig, pluginConfigSchema } from "./config.js";
|
|
7
7
|
import { stagedDedupe } from "./dedupe.js";
|
|
8
|
+
import { EntityExtractionManager } from "./entities.js";
|
|
8
9
|
import { extractCandidates } from "./extract.js";
|
|
9
10
|
import { MemoryBraidLogger } from "./logger.js";
|
|
10
11
|
import { resolveLocalTools, runLocalGet, runLocalSearch } from "./local-memory.js";
|
|
@@ -75,6 +76,25 @@ function formatRelevantMemories(results: MemoryBraidResult[], maxChars = 600): s
|
|
|
75
76
|
].join("\n");
|
|
76
77
|
}
|
|
77
78
|
|
|
79
|
+
function formatEntityExtractionStatus(params: {
|
|
80
|
+
enabled: boolean;
|
|
81
|
+
provider: string;
|
|
82
|
+
model: string;
|
|
83
|
+
minScore: number;
|
|
84
|
+
maxEntitiesPerMemory: number;
|
|
85
|
+
cacheDir: string;
|
|
86
|
+
}): string {
|
|
87
|
+
return [
|
|
88
|
+
"Memory Braid entity extraction:",
|
|
89
|
+
`- enabled: ${params.enabled}`,
|
|
90
|
+
`- provider: ${params.provider}`,
|
|
91
|
+
`- model: ${params.model}`,
|
|
92
|
+
`- minScore: ${params.minScore}`,
|
|
93
|
+
`- maxEntitiesPerMemory: ${params.maxEntitiesPerMemory}`,
|
|
94
|
+
`- cacheDir: ${params.cacheDir}`,
|
|
95
|
+
].join("\n");
|
|
96
|
+
}
|
|
97
|
+
|
|
78
98
|
async function runHybridRecall(params: {
|
|
79
99
|
api: OpenClawPluginApi;
|
|
80
100
|
cfg: ReturnType<typeof parseConfig>;
|
|
@@ -94,6 +114,13 @@ async function runHybridRecall(params: {
|
|
|
94
114
|
}> {
|
|
95
115
|
const local = resolveLocalTools(params.api, params.ctx);
|
|
96
116
|
if (!local.searchTool) {
|
|
117
|
+
params.log.warn("memory_braid.search.skip", {
|
|
118
|
+
runId: params.runId,
|
|
119
|
+
reason: "local_search_tool_unavailable",
|
|
120
|
+
agentId: params.ctx.agentId,
|
|
121
|
+
sessionKey: params.ctx.sessionKey,
|
|
122
|
+
workspaceHash: workspaceHashFromDir(params.ctx.workspaceDir),
|
|
123
|
+
});
|
|
97
124
|
return { local: [], mem0: [], merged: [] };
|
|
98
125
|
}
|
|
99
126
|
|
|
@@ -190,6 +217,9 @@ const memoryBraidPlugin = {
|
|
|
190
217
|
const log = new MemoryBraidLogger(api.logger, cfg.debug);
|
|
191
218
|
const initialStateDir = api.runtime.state.resolveStateDir();
|
|
192
219
|
const mem0 = new Mem0Adapter(cfg, log, { stateDir: initialStateDir });
|
|
220
|
+
const entityExtraction = new EntityExtractionManager(cfg.entityExtraction, log, {
|
|
221
|
+
stateDir: initialStateDir,
|
|
222
|
+
});
|
|
193
223
|
|
|
194
224
|
let serviceTimer: NodeJS.Timeout | null = null;
|
|
195
225
|
let statePaths: StatePaths | null = null;
|
|
@@ -288,6 +318,61 @@ const memoryBraidPlugin = {
|
|
|
288
318
|
{ names: ["memory_search", "memory_get"] },
|
|
289
319
|
);
|
|
290
320
|
|
|
321
|
+
api.registerCommand({
|
|
322
|
+
name: "memorybraid",
|
|
323
|
+
description: "Memory Braid status and entity extraction warmup.",
|
|
324
|
+
acceptsArgs: true,
|
|
325
|
+
handler: async (ctx) => {
|
|
326
|
+
const args = ctx.args?.trim() ?? "";
|
|
327
|
+
const tokens = args.split(/\s+/).filter(Boolean);
|
|
328
|
+
const action = (tokens[0] ?? "status").toLowerCase();
|
|
329
|
+
|
|
330
|
+
if (action === "status") {
|
|
331
|
+
return {
|
|
332
|
+
text: [
|
|
333
|
+
`capture.mode: ${cfg.capture.mode}`,
|
|
334
|
+
formatEntityExtractionStatus(entityExtraction.getStatus()),
|
|
335
|
+
].join("\n\n"),
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (action === "warmup") {
|
|
340
|
+
const runId = log.newRunId();
|
|
341
|
+
const forceReload = tokens.some((token) => token === "--force");
|
|
342
|
+
const result = await entityExtraction.warmup({
|
|
343
|
+
runId,
|
|
344
|
+
reason: "command",
|
|
345
|
+
forceReload,
|
|
346
|
+
});
|
|
347
|
+
if (!result.ok) {
|
|
348
|
+
return {
|
|
349
|
+
text: [
|
|
350
|
+
"Entity extraction warmup failed.",
|
|
351
|
+
`- model: ${result.model}`,
|
|
352
|
+
`- cacheDir: ${result.cacheDir}`,
|
|
353
|
+
`- durMs: ${result.durMs}`,
|
|
354
|
+
`- error: ${result.error ?? "unknown"}`,
|
|
355
|
+
].join("\n"),
|
|
356
|
+
isError: true,
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
return {
|
|
360
|
+
text: [
|
|
361
|
+
"Entity extraction warmup complete.",
|
|
362
|
+
`- model: ${result.model}`,
|
|
363
|
+
`- cacheDir: ${result.cacheDir}`,
|
|
364
|
+
`- entities: ${result.entities}`,
|
|
365
|
+
`- durMs: ${result.durMs}`,
|
|
366
|
+
].join("\n"),
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return {
|
|
371
|
+
text: "Usage: /memorybraid [status|warmup [--force]]",
|
|
372
|
+
};
|
|
373
|
+
},
|
|
374
|
+
});
|
|
375
|
+
|
|
291
376
|
api.on("before_agent_start", async (event, ctx) => {
|
|
292
377
|
const runId = log.newRunId();
|
|
293
378
|
const toolCtx: OpenClawPluginToolContext = {
|
|
@@ -375,14 +460,21 @@ const memoryBraidPlugin = {
|
|
|
375
460
|
}
|
|
376
461
|
|
|
377
462
|
let persisted = 0;
|
|
463
|
+
let dedupeSkipped = 0;
|
|
464
|
+
let entityAnnotatedCandidates = 0;
|
|
465
|
+
let totalEntitiesAttached = 0;
|
|
466
|
+
let mem0AddAttempts = 0;
|
|
467
|
+
let mem0AddWithId = 0;
|
|
468
|
+
let mem0AddWithoutId = 0;
|
|
378
469
|
for (const candidate of candidates) {
|
|
379
470
|
const hash = sha256(normalizeForHash(candidate.text));
|
|
380
471
|
if (dedupe.seen[hash]) {
|
|
472
|
+
dedupeSkipped += 1;
|
|
381
473
|
continue;
|
|
382
474
|
}
|
|
383
475
|
dedupe.seen[hash] = now;
|
|
384
476
|
|
|
385
|
-
const metadata = {
|
|
477
|
+
const metadata: Record<string, unknown> = {
|
|
386
478
|
sourceType: "capture",
|
|
387
479
|
workspaceHash: scope.workspaceHash,
|
|
388
480
|
agentId: scope.agentId,
|
|
@@ -394,23 +486,59 @@ const memoryBraidPlugin = {
|
|
|
394
486
|
indexedAt: new Date().toISOString(),
|
|
395
487
|
};
|
|
396
488
|
|
|
397
|
-
|
|
489
|
+
if (cfg.entityExtraction.enabled) {
|
|
490
|
+
const entities = await entityExtraction.extract({
|
|
491
|
+
text: candidate.text,
|
|
492
|
+
runId,
|
|
493
|
+
});
|
|
494
|
+
if (entities.length > 0) {
|
|
495
|
+
entityAnnotatedCandidates += 1;
|
|
496
|
+
totalEntitiesAttached += entities.length;
|
|
497
|
+
metadata.entityUris = entities.map((entity) => entity.canonicalUri);
|
|
498
|
+
metadata.entities = entities;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
mem0AddAttempts += 1;
|
|
503
|
+
const addResult = await mem0.addMemory({
|
|
398
504
|
text: candidate.text,
|
|
399
505
|
scope,
|
|
400
506
|
metadata,
|
|
401
507
|
runId,
|
|
402
508
|
});
|
|
509
|
+
if (addResult.id) {
|
|
510
|
+
mem0AddWithId += 1;
|
|
511
|
+
} else {
|
|
512
|
+
mem0AddWithoutId += 1;
|
|
513
|
+
log.warn("memory_braid.capture.persist", {
|
|
514
|
+
runId,
|
|
515
|
+
reason: "mem0_add_missing_id",
|
|
516
|
+
workspaceHash: scope.workspaceHash,
|
|
517
|
+
agentId: scope.agentId,
|
|
518
|
+
sessionKey: scope.sessionKey,
|
|
519
|
+
contentHashPrefix: hash.slice(0, 12),
|
|
520
|
+
category: candidate.category,
|
|
521
|
+
});
|
|
522
|
+
}
|
|
403
523
|
persisted += 1;
|
|
404
524
|
}
|
|
405
525
|
|
|
406
526
|
await writeCaptureDedupeState(statePaths, dedupe);
|
|
407
527
|
log.debug("memory_braid.capture.persist", {
|
|
408
528
|
runId,
|
|
529
|
+
mode: cfg.capture.mode,
|
|
409
530
|
workspaceHash: scope.workspaceHash,
|
|
410
531
|
agentId: scope.agentId,
|
|
411
532
|
sessionKey: scope.sessionKey,
|
|
412
533
|
candidates: candidates.length,
|
|
534
|
+
dedupeSkipped,
|
|
413
535
|
persisted,
|
|
536
|
+
mem0AddAttempts,
|
|
537
|
+
mem0AddWithId,
|
|
538
|
+
mem0AddWithoutId,
|
|
539
|
+
entityExtractionEnabled: cfg.entityExtraction.enabled,
|
|
540
|
+
entityAnnotatedCandidates,
|
|
541
|
+
totalEntitiesAttached,
|
|
414
542
|
}, true);
|
|
415
543
|
});
|
|
416
544
|
|
|
@@ -418,6 +546,7 @@ const memoryBraidPlugin = {
|
|
|
418
546
|
id: "memory-braid-service",
|
|
419
547
|
start: async (ctx) => {
|
|
420
548
|
mem0.setStateDir(ctx.stateDir);
|
|
549
|
+
entityExtraction.setStateDir(ctx.stateDir);
|
|
421
550
|
statePaths = createStatePaths(ctx.stateDir);
|
|
422
551
|
await ensureStateDir(statePaths);
|
|
423
552
|
targets = await resolveTargets({
|
|
@@ -437,6 +566,24 @@ const memoryBraidPlugin = {
|
|
|
437
566
|
stateDir: ctx.stateDir,
|
|
438
567
|
targets: targets.length,
|
|
439
568
|
});
|
|
569
|
+
log.info("memory_braid.config", {
|
|
570
|
+
runId,
|
|
571
|
+
mem0Mode: cfg.mem0.mode,
|
|
572
|
+
captureEnabled: cfg.capture.enabled,
|
|
573
|
+
captureMode: cfg.capture.mode,
|
|
574
|
+
captureMaxItemsPerRun: cfg.capture.maxItemsPerRun,
|
|
575
|
+
captureMlProvider: cfg.capture.ml.provider ?? "unset",
|
|
576
|
+
captureMlModel: cfg.capture.ml.model ?? "unset",
|
|
577
|
+
entityExtractionEnabled: cfg.entityExtraction.enabled,
|
|
578
|
+
entityProvider: cfg.entityExtraction.provider,
|
|
579
|
+
entityModel: cfg.entityExtraction.model,
|
|
580
|
+
entityMinScore: cfg.entityExtraction.minScore,
|
|
581
|
+
entityMaxPerMemory: cfg.entityExtraction.maxEntitiesPerMemory,
|
|
582
|
+
entityWarmupOnStartup: cfg.entityExtraction.startup.downloadOnStartup,
|
|
583
|
+
debugEnabled: cfg.debug.enabled,
|
|
584
|
+
debugIncludePayloads: cfg.debug.includePayloads,
|
|
585
|
+
debugSamplingRate: cfg.debug.logSamplingRate,
|
|
586
|
+
});
|
|
440
587
|
|
|
441
588
|
// Bootstrap is async by design so tool availability is not blocked.
|
|
442
589
|
void runBootstrapIfNeeded({
|
|
@@ -458,6 +605,21 @@ const memoryBraidPlugin = {
|
|
|
458
605
|
reason: "startup",
|
|
459
606
|
});
|
|
460
607
|
|
|
608
|
+
if (cfg.entityExtraction.enabled && cfg.entityExtraction.startup.downloadOnStartup) {
|
|
609
|
+
void entityExtraction
|
|
610
|
+
.warmup({
|
|
611
|
+
runId,
|
|
612
|
+
reason: "startup",
|
|
613
|
+
})
|
|
614
|
+
.catch((err) => {
|
|
615
|
+
log.warn("memory_braid.entity.warmup", {
|
|
616
|
+
runId,
|
|
617
|
+
reason: "startup",
|
|
618
|
+
error: err instanceof Error ? err.message : String(err),
|
|
619
|
+
});
|
|
620
|
+
});
|
|
621
|
+
}
|
|
622
|
+
|
|
461
623
|
if (cfg.reconcile.enabled) {
|
|
462
624
|
const intervalMs = cfg.reconcile.intervalMinutes * 60 * 1000;
|
|
463
625
|
serviceTimer = setInterval(() => {
|