memory-braid 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memory-braid",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "OpenClaw memory plugin that augments local memory with Mem0 capture and recall.",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/entities.ts CHANGED
@@ -11,6 +11,8 @@ type NerRecord = {
11
11
  entity_group?: unknown;
12
12
  entity?: unknown;
13
13
  score?: unknown;
14
+ start?: unknown;
15
+ end?: unknown;
14
16
  };
15
17
 
16
18
  export type ExtractedEntity = {
@@ -79,6 +81,138 @@ function normalizeEntityText(raw: unknown): string {
79
81
  return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
80
82
  }
81
83
 
84
+ type NormalizedEntityToken = {
85
+ text: string;
86
+ type: ExtractedEntity["type"];
87
+ score: number;
88
+ start?: number;
89
+ end?: number;
90
+ };
91
+
92
+ const ENTITY_CONNECTOR_WORDS = new Set([
93
+ "and",
94
+ "da",
95
+ "de",
96
+ "del",
97
+ "la",
98
+ "las",
99
+ "los",
100
+ "of",
101
+ "the",
102
+ "y",
103
+ ]);
104
+ const ENTITY_MAX_MERGED_WORDS = 3;
105
+
106
+ function asFiniteNumber(value: unknown): number | undefined {
107
+ if (typeof value !== "number" || !Number.isFinite(value)) {
108
+ return undefined;
109
+ }
110
+ return value;
111
+ }
112
+
113
+ function splitEntityWords(text: string): string[] {
114
+ return text.match(/[\p{L}\p{N}]+/gu) ?? [];
115
+ }
116
+
117
+ function isLikelyNoisyShortWord(word: string): boolean {
118
+ const normalized = word.toLowerCase();
119
+ if (normalized.length >= 3) {
120
+ return false;
121
+ }
122
+ if (ENTITY_CONNECTOR_WORDS.has(normalized)) {
123
+ return false;
124
+ }
125
+ return !/^[A-Z]\.?$/.test(word);
126
+ }
127
+
128
+ function joinEntityText(left: NormalizedEntityToken, right: NormalizedEntityToken): string {
129
+ const leftEnd = left.end;
130
+ const rightStart = right.start;
131
+ if (typeof leftEnd === "number" && typeof rightStart === "number") {
132
+ const gap = rightStart - leftEnd;
133
+ if (gap <= 0) {
134
+ return `${left.text}${right.text}`;
135
+ }
136
+ }
137
+ return `${left.text} ${right.text}`;
138
+ }
139
+
140
+ function shouldMergeEntityTokens(
141
+ left: NormalizedEntityToken,
142
+ right: NormalizedEntityToken,
143
+ sourceText?: string,
144
+ ): boolean {
145
+ if (left.type !== right.type || !left.text || !right.text) {
146
+ return false;
147
+ }
148
+
149
+ const leftWords = splitEntityWords(left.text);
150
+ const rightWords = splitEntityWords(right.text);
151
+ if (leftWords.length === 0 || rightWords.length === 0) {
152
+ return false;
153
+ }
154
+ if (leftWords.length + rightWords.length > ENTITY_MAX_MERGED_WORDS) {
155
+ return false;
156
+ }
157
+ const leftLastWord = leftWords[leftWords.length - 1];
158
+ const rightFirstWord = rightWords[0];
159
+ if (!leftLastWord || !rightFirstWord) {
160
+ return false;
161
+ }
162
+ if (isLikelyNoisyShortWord(leftLastWord) || isLikelyNoisyShortWord(rightFirstWord)) {
163
+ return false;
164
+ }
165
+
166
+ const leftEnd = left.end;
167
+ const rightStart = right.start;
168
+ if (typeof leftEnd === "number" && typeof rightStart === "number") {
169
+ const gap = rightStart - leftEnd;
170
+ if (gap < 0) {
171
+ return false;
172
+ }
173
+ if (gap > 1) {
174
+ return false;
175
+ }
176
+ if (sourceText && gap > 0) {
177
+ const between = sourceText.slice(leftEnd, rightStart);
178
+ if (between && /[^\s]/u.test(between)) {
179
+ return false;
180
+ }
181
+ }
182
+ return true;
183
+ }
184
+
185
+ if (/[.,!?;:]$/.test(left.text) || /^[.,!?;:]/.test(right.text)) {
186
+ return false;
187
+ }
188
+ return true;
189
+ }
190
+
191
+ function collapseAdjacentEntityTokens(
192
+ tokens: NormalizedEntityToken[],
193
+ sourceText?: string,
194
+ ): NormalizedEntityToken[] {
195
+ if (tokens.length <= 1) {
196
+ return tokens;
197
+ }
198
+
199
+ const collapsed: NormalizedEntityToken[] = [];
200
+ for (const token of tokens) {
201
+ const previous = collapsed[collapsed.length - 1];
202
+ if (!previous || !shouldMergeEntityTokens(previous, token, sourceText)) {
203
+ collapsed.push({ ...token });
204
+ continue;
205
+ }
206
+
207
+ previous.text = normalizeWhitespace(joinEntityText(previous, token));
208
+ previous.score = Math.min(previous.score, token.score);
209
+ previous.start = typeof previous.start === "number" ? previous.start : token.start;
210
+ previous.end = typeof token.end === "number" ? token.end : previous.end;
211
+ }
212
+
213
+ return collapsed;
214
+ }
215
+
82
216
  type EntityExtractionOptions = {
83
217
  stateDir?: string;
84
218
  };
@@ -319,7 +453,7 @@ export class EntityExtractionManager {
319
453
  });
320
454
  const rows = Array.isArray(raw) ? raw : [];
321
455
 
322
- const deduped = new Map<string, ExtractedEntity>();
456
+ const normalized: NormalizedEntityToken[] = [];
323
457
  for (const row of rows) {
324
458
  if (!row || typeof row !== "object") {
325
459
  continue;
@@ -335,13 +469,25 @@ export class EntityExtractionManager {
335
469
  }
336
470
 
337
471
  const type = normalizeEntityType(record.entity_group ?? record.entity);
338
- const canonicalUri = buildCanonicalEntityUri(type, entityText);
472
+ normalized.push({
473
+ text: entityText,
474
+ type,
475
+ score,
476
+ start: asFiniteNumber(record.start),
477
+ end: asFiniteNumber(record.end),
478
+ });
479
+ }
480
+
481
+ const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
482
+ const deduped = new Map<string, ExtractedEntity>();
483
+ for (const token of collapsed) {
484
+ const canonicalUri = buildCanonicalEntityUri(token.type, token.text);
339
485
  const current = deduped.get(canonicalUri);
340
- if (!current || score > current.score) {
486
+ if (!current || token.score > current.score) {
341
487
  deduped.set(canonicalUri, {
342
- text: entityText,
343
- type,
344
- score,
488
+ text: token.text,
489
+ type: token.type,
490
+ score: token.score,
345
491
  canonicalUri,
346
492
  });
347
493
  }
package/src/extract.ts CHANGED
@@ -12,6 +12,34 @@ const HEURISTIC_PATTERNS = [
12
12
  /my name is|i am|contact me at|email is|phone is/i,
13
13
  /deadline|due date|todo|action item|follow up/i,
14
14
  ];
15
+ const HEURISTIC_LOOKBACK_MULTIPLIER = 4;
16
+ const HEURISTIC_MIN_LOOKBACK_MESSAGES = 12;
17
+ const FEED_TAG_PATTERN = /\[(?:n8n|rss|alert|news|cron|slack|discord|telegram|email|github|jira)[^[]*]/i;
18
+ const ROLE_LABEL_PATTERN = /\b(?:assistant|system|tool|developer)\s*:/gi;
19
+
20
+ function isLikelyFeedOrImportedText(text: string): boolean {
21
+ if (FEED_TAG_PATTERN.test(text)) {
22
+ return true;
23
+ }
24
+
25
+ const roleLabels = text.match(ROLE_LABEL_PATTERN)?.length ?? 0;
26
+ if (roleLabels >= 2) {
27
+ return true;
28
+ }
29
+
30
+ const lines = text
31
+ .split(/\r?\n+/)
32
+ .map((line) => line.trim())
33
+ .filter(Boolean);
34
+ if (lines.length === 0) {
35
+ return false;
36
+ }
37
+
38
+ const rolePrefixedLines = lines.filter((line) =>
39
+ /^(assistant|system|tool|developer|user)\s*:/i.test(line),
40
+ ).length;
41
+ return rolePrefixedLines >= 2;
42
+ }
15
43
 
16
44
  function extractMessageText(content: unknown): string {
17
45
  if (typeof content === "string") {
@@ -98,8 +126,10 @@ function pickHeuristicCandidates(
98
126
  ): ExtractedCandidate[] {
99
127
  const out: ExtractedCandidate[] = [];
100
128
  const seen = new Set<string>();
129
+ const lookback = Math.max(HEURISTIC_MIN_LOOKBACK_MESSAGES, maxItems * HEURISTIC_LOOKBACK_MULTIPLIER);
130
+ const startIndex = Math.max(0, messages.length - lookback);
101
131
 
102
- for (let i = messages.length - 1; i >= 0; i -= 1) {
132
+ for (let i = messages.length - 1; i >= startIndex; i -= 1) {
103
133
  const message = messages[i];
104
134
  if (!message || (message.role !== "user" && message.role !== "assistant")) {
105
135
  continue;
@@ -107,6 +137,9 @@ function pickHeuristicCandidates(
107
137
  if (message.text.length < 20 || message.text.length > 3000) {
108
138
  continue;
109
139
  }
140
+ if (isLikelyFeedOrImportedText(message.text)) {
141
+ continue;
142
+ }
110
143
 
111
144
  const score = scoreHeuristic(message.text);
112
145
  if (score < 0.2) {
package/src/index.ts CHANGED
@@ -24,7 +24,7 @@ import {
24
24
  writeStatsState,
25
25
  } from "./state.js";
26
26
  import type { LifecycleEntry, MemoryBraidResult, ScopeKey } from "./types.js";
27
- import { normalizeForHash, sha256 } from "./chunking.js";
27
+ import { normalizeForHash, normalizeWhitespace, sha256 } from "./chunking.js";
28
28
 
29
29
  function jsonToolResult(payload: unknown) {
30
30
  return {
@@ -225,6 +225,17 @@ function isGenericUserSummary(text: string): boolean {
225
225
  );
226
226
  }
227
227
 
228
+ function sanitizeRecallQuery(text: string): string {
229
+ if (!text) {
230
+ return "";
231
+ }
232
+ const withoutInjectedMemories = text.replace(
233
+ /<relevant-memories>[\s\S]*?<\/relevant-memories>/gi,
234
+ " ",
235
+ );
236
+ return normalizeWhitespace(withoutInjectedMemories);
237
+ }
238
+
228
239
  function applyMem0QualityAdjustments(params: {
229
240
  results: MemoryBraidResult[];
230
241
  query: string;
@@ -1220,6 +1231,10 @@ const memoryBraidPlugin = {
1220
1231
 
1221
1232
  api.on("before_agent_start", async (event, ctx) => {
1222
1233
  const runId = log.newRunId();
1234
+ const recallQuery = sanitizeRecallQuery(event.prompt);
1235
+ if (!recallQuery) {
1236
+ return;
1237
+ }
1223
1238
  const toolCtx: OpenClawPluginToolContext = {
1224
1239
  config: api.config,
1225
1240
  workspaceDir: ctx.workspaceDir,
@@ -1235,17 +1250,17 @@ const memoryBraidPlugin = {
1235
1250
  log,
1236
1251
  ctx: toolCtx,
1237
1252
  statePaths: runtimeStatePaths,
1238
- query: event.prompt,
1253
+ query: recallQuery,
1239
1254
  args: {
1240
- query: event.prompt,
1255
+ query: recallQuery,
1241
1256
  maxResults: cfg.recall.maxResults,
1242
1257
  },
1243
1258
  runId,
1244
1259
  });
1245
1260
 
1246
1261
  const selected = selectMemoriesForInjection({
1247
- query: event.prompt,
1248
- results: recall.merged,
1262
+ query: recallQuery,
1263
+ results: recall.mem0,
1249
1264
  limit: cfg.recall.injectTopK,
1250
1265
  });
1251
1266
  if (selected.injected.length === 0) {
@@ -1256,6 +1271,7 @@ const memoryBraidPlugin = {
1256
1271
  sessionKey: scope.sessionKey,
1257
1272
  workspaceHash: scope.workspaceHash,
1258
1273
  count: 0,
1274
+ source: "mem0",
1259
1275
  queryTokens: selected.queryTokens,
1260
1276
  filteredOut: selected.filteredOut,
1261
1277
  genericRejected: selected.genericRejected,
@@ -1272,6 +1288,7 @@ const memoryBraidPlugin = {
1272
1288
  sessionKey: scope.sessionKey,
1273
1289
  workspaceHash: scope.workspaceHash,
1274
1290
  count: selected.injected.length,
1291
+ source: "mem0",
1275
1292
  queryTokens: selected.queryTokens,
1276
1293
  filteredOut: selected.filteredOut,
1277
1294
  genericRejected: selected.genericRejected,