memory-braid 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/package.json +1 -1
- package/src/capture.ts +251 -0
- package/src/extract.ts +7 -0
- package/src/index.ts +521 -76
- package/src/local-memory.ts +9 -4
- package/src/logger.ts +6 -1
- package/src/mem0-client.ts +295 -45
- package/src/observability.ts +269 -0
- package/src/remediation.ts +257 -0
- package/src/state.ts +30 -0
- package/src/types.ts +47 -0
package/README.md
CHANGED
|
@@ -9,6 +9,65 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
|
|
|
9
9
|
- Capture pipeline modes: `local`, `hybrid`, `ml`.
|
|
10
10
|
- Optional entity extraction: local multilingual NER or OpenAI NER with canonical `entity://...` URIs in memory metadata.
|
|
11
11
|
- Structured debug logs for troubleshooting and tuning.
|
|
12
|
+
- Debug-only LLM usage observability: per-turn cache usage, rolling windows, and rising/stable/improving trend logs.
|
|
13
|
+
|
|
14
|
+
## Hardening update
|
|
15
|
+
|
|
16
|
+
This release hardens capture and remediation for historical installs.
|
|
17
|
+
|
|
18
|
+
- Bug class: historical prompt or transcript content could be captured as Mem0 memories and later re-injected.
|
|
19
|
+
- Impact: inflated prompt size, noisier recall, and potentially higher Anthropic cache-write costs.
|
|
20
|
+
- Fix: new captures are assembled from the trusted current turn instead of mining the full `agent_end` transcript.
|
|
21
|
+
- Metadata: new captured memories now include additive provenance fields such as `captureOrigin`, `captureMessageHash`, `captureTurnHash`, `capturePath`, and `pluginCaptureVersion`.
|
|
22
|
+
- Historical installs: no startup mutation is performed automatically. Operators should audit first, then explicitly quarantine or delete suspicious captured memories.
|
|
23
|
+
|
|
24
|
+
## Remediation commands
|
|
25
|
+
|
|
26
|
+
Memory Braid now exposes read-only audit and explicit remediation commands:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
/memorybraid audit
|
|
30
|
+
/memorybraid remediate audit
|
|
31
|
+
/memorybraid remediate quarantine
|
|
32
|
+
/memorybraid remediate quarantine --apply
|
|
33
|
+
/memorybraid remediate delete --apply
|
|
34
|
+
/memorybraid remediate purge-all-captured --apply
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Notes:
|
|
38
|
+
|
|
39
|
+
- Dry-run is the default for remediation commands. Nothing mutates until you pass `--apply`.
|
|
40
|
+
- `audit` reports counts by `sourceType`, `captureOrigin`, and `pluginCaptureVersion`, plus suspicious legacy samples.
|
|
41
|
+
- `quarantine --apply` excludes suspicious captured memories from future Mem0 injection. It records quarantine state locally and also tags Mem0 metadata where supported.
|
|
42
|
+
- `delete --apply` deletes suspicious captured memories only.
|
|
43
|
+
- `purge-all-captured --apply` deletes all plugin-captured Mem0 records for the current workspace scope without touching local markdown memory.
|
|
44
|
+
- Optional flags:
|
|
45
|
+
- `--limit N` controls how many Mem0 records are fetched during audit/remediation.
|
|
46
|
+
- `--sample N` controls how many suspicious samples are shown in the audit report.
|
|
47
|
+
|
|
48
|
+
## Debug cost observability
|
|
49
|
+
|
|
50
|
+
When `debug.enabled` is `true`, Memory Braid also emits debug-only LLM usage observability logs from the `llm_output` hook:
|
|
51
|
+
|
|
52
|
+
- `memory_braid.cost.turn`: per-turn input/output/cache tokens, cache ratios, and a best-effort estimated USD cost when the provider/model has a known pricing profile.
|
|
53
|
+
- `memory_braid.cost.window`: rolling 5-turn and 20-turn averages plus `rising|stable|improving` trend labels for prompt size, cache-write rate, cache-hit rate, and estimated cost.
|
|
54
|
+
- `memory_braid.cost.alert`: emitted only when recent cache writes, prompt size, or estimated cost rise materially above the previous short window.
|
|
55
|
+
|
|
56
|
+
Important:
|
|
57
|
+
|
|
58
|
+
- `estimatedCostUsd` is intentionally labeled as an estimate.
|
|
59
|
+
- Unknown models still log token and cache trends, but the cost basis becomes `token_only`.
|
|
60
|
+
|
|
61
|
+
## Self-hosted reset option
|
|
62
|
+
|
|
63
|
+
If you are self-hosting and prefer a full reset instead of selective remediation, you can clear Memory Braid's OSS Mem0 state and restart OpenClaw:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
rm -rf ~/.openclaw/memory-braid
|
|
67
|
+
openclaw gateway restart
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
This is intentionally not done by the plugin itself. It is an operator choice.
|
|
12
71
|
|
|
13
72
|
## Breaking changes in 0.4.0
|
|
14
73
|
|
package/package.json
CHANGED
package/src/capture.ts
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
import { normalizeForHash, normalizeWhitespace, sha256 } from "./chunking.js";
|
|
2
|
+
import type {
|
|
3
|
+
AssembledCaptureInput,
|
|
4
|
+
CaptureInputMessage,
|
|
5
|
+
PendingInboundTurn,
|
|
6
|
+
} from "./types.js";
|
|
7
|
+
|
|
8
|
+
type NormalizedHookMessage = {
|
|
9
|
+
role: string;
|
|
10
|
+
text: string;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
function asRecord(value: unknown): Record<string, unknown> {
|
|
14
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
15
|
+
return {};
|
|
16
|
+
}
|
|
17
|
+
return value as Record<string, unknown>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function extractHookMessageText(content: unknown): string {
|
|
21
|
+
if (typeof content === "string") {
|
|
22
|
+
return normalizeWhitespace(content);
|
|
23
|
+
}
|
|
24
|
+
if (!Array.isArray(content)) {
|
|
25
|
+
return "";
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const parts: string[] = [];
|
|
29
|
+
for (const block of content) {
|
|
30
|
+
if (!block || typeof block !== "object") {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const item = block as { type?: unknown; text?: unknown };
|
|
34
|
+
if (item.type === "text" && typeof item.text === "string") {
|
|
35
|
+
const normalized = normalizeWhitespace(item.text);
|
|
36
|
+
if (normalized) {
|
|
37
|
+
parts.push(normalized);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return parts.join(" ");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function normalizeHookMessages(messages: unknown[]): NormalizedHookMessage[] {
|
|
45
|
+
const out: NormalizedHookMessage[] = [];
|
|
46
|
+
for (const entry of messages) {
|
|
47
|
+
if (!entry || typeof entry !== "object") {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const direct = entry as { role?: unknown; content?: unknown };
|
|
52
|
+
if (typeof direct.role === "string") {
|
|
53
|
+
const text = extractHookMessageText(direct.content);
|
|
54
|
+
if (text) {
|
|
55
|
+
out.push({ role: direct.role, text });
|
|
56
|
+
}
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const wrapped = entry as { message?: { role?: unknown; content?: unknown } };
|
|
61
|
+
if (wrapped.message && typeof wrapped.message.role === "string") {
|
|
62
|
+
const text = extractHookMessageText(wrapped.message.content);
|
|
63
|
+
if (text) {
|
|
64
|
+
out.push({ role: wrapped.message.role, text });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return out;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function normalizeProvenanceKind(value: unknown): string | undefined {
|
|
72
|
+
const record = asRecord(value);
|
|
73
|
+
const kind = typeof record.kind === "string" ? record.kind.trim().toLowerCase() : "";
|
|
74
|
+
return kind || undefined;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function getPendingInboundTurn(message: unknown): PendingInboundTurn | undefined {
|
|
78
|
+
const record = asRecord(message);
|
|
79
|
+
const role = typeof record.role === "string" ? record.role.trim().toLowerCase() : "";
|
|
80
|
+
const provenanceKind = normalizeProvenanceKind(record.provenance);
|
|
81
|
+
if (role !== "user" || provenanceKind !== "external_user") {
|
|
82
|
+
return undefined;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const text = extractHookMessageText(record.content);
|
|
86
|
+
if (!text) {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
text,
|
|
92
|
+
messageHash: sha256(normalizeForHash(text)),
|
|
93
|
+
receivedAt: Date.now(),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function buildCaptureInputMessage(
|
|
98
|
+
role: "user" | "assistant",
|
|
99
|
+
origin: "external_user" | "assistant_derived",
|
|
100
|
+
text: string,
|
|
101
|
+
): CaptureInputMessage {
|
|
102
|
+
return {
|
|
103
|
+
role,
|
|
104
|
+
origin,
|
|
105
|
+
text,
|
|
106
|
+
messageHash: sha256(normalizeForHash(text)),
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function assembleCaptureInput(params: {
|
|
111
|
+
messages: unknown[];
|
|
112
|
+
includeAssistant: boolean;
|
|
113
|
+
pendingInboundTurn?: PendingInboundTurn;
|
|
114
|
+
}): AssembledCaptureInput | undefined {
|
|
115
|
+
const normalized = normalizeHookMessages(params.messages);
|
|
116
|
+
const lastUserIndex = (() => {
|
|
117
|
+
for (let i = normalized.length - 1; i >= 0; i -= 1) {
|
|
118
|
+
if (normalized[i]?.role === "user") {
|
|
119
|
+
return i;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return -1;
|
|
123
|
+
})();
|
|
124
|
+
|
|
125
|
+
const userText = params.pendingInboundTurn?.text ?? normalized[lastUserIndex]?.text ?? "";
|
|
126
|
+
if (!userText) {
|
|
127
|
+
return undefined;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const assembled: CaptureInputMessage[] = [
|
|
131
|
+
buildCaptureInputMessage("user", "external_user", userText),
|
|
132
|
+
];
|
|
133
|
+
|
|
134
|
+
if (params.includeAssistant) {
|
|
135
|
+
const assistantStart = lastUserIndex >= 0 ? lastUserIndex + 1 : normalized.length;
|
|
136
|
+
for (let i = assistantStart; i < normalized.length; i += 1) {
|
|
137
|
+
const message = normalized[i];
|
|
138
|
+
if (!message || message.role !== "assistant" || !message.text) {
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
assembled.push(buildCaptureInputMessage("assistant", "assistant_derived", message.text));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const hashInput = assembled.map((message) => message.messageHash).join("|");
|
|
146
|
+
return {
|
|
147
|
+
messages: assembled,
|
|
148
|
+
capturePath: params.pendingInboundTurn ? "before_message_write" : "agent_end_last_turn",
|
|
149
|
+
turnHash: sha256(hashInput),
|
|
150
|
+
fallbackUsed: !params.pendingInboundTurn,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function tokenize(text: string): Set<string> {
|
|
155
|
+
const tokens = text.match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
156
|
+
const out = new Set<string>();
|
|
157
|
+
for (const token of tokens) {
|
|
158
|
+
const normalized = token
|
|
159
|
+
.toLowerCase()
|
|
160
|
+
.normalize("NFKD")
|
|
161
|
+
.replace(/\p{M}+/gu, "");
|
|
162
|
+
if (normalized.length >= 3) {
|
|
163
|
+
out.add(normalized);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function overlapRatio(left: Set<string>, right: Set<string>): number {
|
|
170
|
+
if (left.size === 0 || right.size === 0) {
|
|
171
|
+
return 0;
|
|
172
|
+
}
|
|
173
|
+
let shared = 0;
|
|
174
|
+
for (const token of left) {
|
|
175
|
+
if (right.has(token)) {
|
|
176
|
+
shared += 1;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return shared / Math.max(left.size, right.size);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function matchCandidateToCaptureInput(
|
|
183
|
+
candidateText: string,
|
|
184
|
+
messages: CaptureInputMessage[],
|
|
185
|
+
): CaptureInputMessage | undefined {
|
|
186
|
+
const candidateHash = sha256(normalizeForHash(candidateText));
|
|
187
|
+
for (const message of messages) {
|
|
188
|
+
if (message.messageHash === candidateHash) {
|
|
189
|
+
return message;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const candidateTokens = tokenize(candidateText);
|
|
194
|
+
let bestMatch: CaptureInputMessage | undefined;
|
|
195
|
+
let bestScore = 0;
|
|
196
|
+
|
|
197
|
+
for (const message of messages) {
|
|
198
|
+
const score = overlapRatio(candidateTokens, tokenize(message.text));
|
|
199
|
+
if (score > bestScore) {
|
|
200
|
+
bestScore = score;
|
|
201
|
+
bestMatch = message;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return bestScore >= 0.24 ? bestMatch : undefined;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const ROLE_PREFIX_LINE = /^(?:assistant|system|developer|tool|user|human|bot|ai|agent)\s*:/i;
|
|
209
|
+
const INLINE_ROLE_LABEL = /\b(?:assistant|system|developer|tool|user)\s*:/gi;
|
|
210
|
+
const STRUCTURED_METADATA_KEY =
|
|
211
|
+
/^\s*["']?(?:message_id|reply_to_id|sender_id|sender|timestamp|thread|conversation|channel|metadata)\b/i;
|
|
212
|
+
|
|
213
|
+
export function isLikelyTranscriptLikeText(text: string): boolean {
|
|
214
|
+
const normalized = normalizeWhitespace(text);
|
|
215
|
+
if (!normalized) {
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const lines = normalized
|
|
220
|
+
.split(/\r?\n/)
|
|
221
|
+
.map((line) => line.trim())
|
|
222
|
+
.filter(Boolean);
|
|
223
|
+
if (lines.length === 0) {
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const rolePrefixedLines = lines.filter((line) => ROLE_PREFIX_LINE.test(line)).length;
|
|
228
|
+
const inlineRoleLabels = normalized.match(INLINE_ROLE_LABEL)?.length ?? 0;
|
|
229
|
+
const fencedBlocks = normalized.match(/```/g)?.length ?? 0;
|
|
230
|
+
const metadataLines = lines.filter((line) => STRUCTURED_METADATA_KEY.test(line)).length;
|
|
231
|
+
|
|
232
|
+
if (rolePrefixedLines >= 2) {
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
if (inlineRoleLabels >= 3) {
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
if (fencedBlocks >= 2 && metadataLines >= 2) {
|
|
239
|
+
return true;
|
|
240
|
+
}
|
|
241
|
+
return metadataLines >= 4 && lines.length >= 6;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export function isOversizedAtomicMemory(text: string): boolean {
|
|
245
|
+
const normalized = normalizeWhitespace(text);
|
|
246
|
+
if (!normalized) {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
const lines = normalized.split(/\r?\n/).filter((line) => line.trim().length > 0);
|
|
250
|
+
return normalized.length > 1600 || lines.length > 18;
|
|
251
|
+
}
|
package/src/extract.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { normalizeForHash, normalizeWhitespace, sha256 } from "./chunking.js";
|
|
2
2
|
import type { MemoryBraidConfig } from "./config.js";
|
|
3
|
+
import { isLikelyTranscriptLikeText, isOversizedAtomicMemory } from "./capture.js";
|
|
3
4
|
import { MemoryBraidLogger } from "./logger.js";
|
|
4
5
|
import type { ExtractedCandidate } from "./types.js";
|
|
5
6
|
|
|
@@ -18,6 +19,9 @@ const FEED_TAG_PATTERN = /\[(?:n8n|rss|alert|news|cron|slack|discord|telegram|em
|
|
|
18
19
|
const ROLE_LABEL_PATTERN = /\b(?:assistant|system|tool|developer)\s*:/gi;
|
|
19
20
|
|
|
20
21
|
function isLikelyFeedOrImportedText(text: string): boolean {
|
|
22
|
+
if (isLikelyTranscriptLikeText(text) || isOversizedAtomicMemory(text)) {
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
21
25
|
if (FEED_TAG_PATTERN.test(text)) {
|
|
22
26
|
return true;
|
|
23
27
|
}
|
|
@@ -398,6 +402,9 @@ function applyMlExtractionResult(
|
|
|
398
402
|
if (!text || text.length < 20 || text.length > 3000) {
|
|
399
403
|
continue;
|
|
400
404
|
}
|
|
405
|
+
if (isLikelyFeedOrImportedText(text)) {
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
401
408
|
const key = sha256(normalizeForHash(text));
|
|
402
409
|
if (seen.has(key)) {
|
|
403
410
|
continue;
|