memory-braid 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/entities.ts +152 -6
- package/src/extract.ts +34 -1
- package/src/index.ts +22 -5
package/package.json
CHANGED
package/src/entities.ts
CHANGED
|
@@ -11,6 +11,8 @@ type NerRecord = {
|
|
|
11
11
|
entity_group?: unknown;
|
|
12
12
|
entity?: unknown;
|
|
13
13
|
score?: unknown;
|
|
14
|
+
start?: unknown;
|
|
15
|
+
end?: unknown;
|
|
14
16
|
};
|
|
15
17
|
|
|
16
18
|
export type ExtractedEntity = {
|
|
@@ -79,6 +81,138 @@ function normalizeEntityText(raw: unknown): string {
|
|
|
79
81
|
return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
|
|
80
82
|
}
|
|
81
83
|
|
|
84
|
+
type NormalizedEntityToken = {
|
|
85
|
+
text: string;
|
|
86
|
+
type: ExtractedEntity["type"];
|
|
87
|
+
score: number;
|
|
88
|
+
start?: number;
|
|
89
|
+
end?: number;
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const ENTITY_CONNECTOR_WORDS = new Set([
|
|
93
|
+
"and",
|
|
94
|
+
"da",
|
|
95
|
+
"de",
|
|
96
|
+
"del",
|
|
97
|
+
"la",
|
|
98
|
+
"las",
|
|
99
|
+
"los",
|
|
100
|
+
"of",
|
|
101
|
+
"the",
|
|
102
|
+
"y",
|
|
103
|
+
]);
|
|
104
|
+
const ENTITY_MAX_MERGED_WORDS = 3;
|
|
105
|
+
|
|
106
|
+
function asFiniteNumber(value: unknown): number | undefined {
|
|
107
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
108
|
+
return undefined;
|
|
109
|
+
}
|
|
110
|
+
return value;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function splitEntityWords(text: string): string[] {
|
|
114
|
+
return text.match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function isLikelyNoisyShortWord(word: string): boolean {
|
|
118
|
+
const normalized = word.toLowerCase();
|
|
119
|
+
if (normalized.length >= 3) {
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
if (ENTITY_CONNECTOR_WORDS.has(normalized)) {
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
return !/^[A-Z]\.?$/.test(word);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function joinEntityText(left: NormalizedEntityToken, right: NormalizedEntityToken): string {
|
|
129
|
+
const leftEnd = left.end;
|
|
130
|
+
const rightStart = right.start;
|
|
131
|
+
if (typeof leftEnd === "number" && typeof rightStart === "number") {
|
|
132
|
+
const gap = rightStart - leftEnd;
|
|
133
|
+
if (gap <= 0) {
|
|
134
|
+
return `${left.text}${right.text}`;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return `${left.text} ${right.text}`;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function shouldMergeEntityTokens(
|
|
141
|
+
left: NormalizedEntityToken,
|
|
142
|
+
right: NormalizedEntityToken,
|
|
143
|
+
sourceText?: string,
|
|
144
|
+
): boolean {
|
|
145
|
+
if (left.type !== right.type || !left.text || !right.text) {
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const leftWords = splitEntityWords(left.text);
|
|
150
|
+
const rightWords = splitEntityWords(right.text);
|
|
151
|
+
if (leftWords.length === 0 || rightWords.length === 0) {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
if (leftWords.length + rightWords.length > ENTITY_MAX_MERGED_WORDS) {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
const leftLastWord = leftWords[leftWords.length - 1];
|
|
158
|
+
const rightFirstWord = rightWords[0];
|
|
159
|
+
if (!leftLastWord || !rightFirstWord) {
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
if (isLikelyNoisyShortWord(leftLastWord) || isLikelyNoisyShortWord(rightFirstWord)) {
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const leftEnd = left.end;
|
|
167
|
+
const rightStart = right.start;
|
|
168
|
+
if (typeof leftEnd === "number" && typeof rightStart === "number") {
|
|
169
|
+
const gap = rightStart - leftEnd;
|
|
170
|
+
if (gap < 0) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
if (gap > 1) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
if (sourceText && gap > 0) {
|
|
177
|
+
const between = sourceText.slice(leftEnd, rightStart);
|
|
178
|
+
if (between && /[^\s]/u.test(between)) {
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (/[.,!?;:]$/.test(left.text) || /^[.,!?;:]/.test(right.text)) {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
return true;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function collapseAdjacentEntityTokens(
|
|
192
|
+
tokens: NormalizedEntityToken[],
|
|
193
|
+
sourceText?: string,
|
|
194
|
+
): NormalizedEntityToken[] {
|
|
195
|
+
if (tokens.length <= 1) {
|
|
196
|
+
return tokens;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const collapsed: NormalizedEntityToken[] = [];
|
|
200
|
+
for (const token of tokens) {
|
|
201
|
+
const previous = collapsed[collapsed.length - 1];
|
|
202
|
+
if (!previous || !shouldMergeEntityTokens(previous, token, sourceText)) {
|
|
203
|
+
collapsed.push({ ...token });
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
previous.text = normalizeWhitespace(joinEntityText(previous, token));
|
|
208
|
+
previous.score = Math.min(previous.score, token.score);
|
|
209
|
+
previous.start = typeof previous.start === "number" ? previous.start : token.start;
|
|
210
|
+
previous.end = typeof token.end === "number" ? token.end : previous.end;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return collapsed;
|
|
214
|
+
}
|
|
215
|
+
|
|
82
216
|
type EntityExtractionOptions = {
|
|
83
217
|
stateDir?: string;
|
|
84
218
|
};
|
|
@@ -319,7 +453,7 @@ export class EntityExtractionManager {
|
|
|
319
453
|
});
|
|
320
454
|
const rows = Array.isArray(raw) ? raw : [];
|
|
321
455
|
|
|
322
|
-
const
|
|
456
|
+
const normalized: NormalizedEntityToken[] = [];
|
|
323
457
|
for (const row of rows) {
|
|
324
458
|
if (!row || typeof row !== "object") {
|
|
325
459
|
continue;
|
|
@@ -335,13 +469,25 @@ export class EntityExtractionManager {
|
|
|
335
469
|
}
|
|
336
470
|
|
|
337
471
|
const type = normalizeEntityType(record.entity_group ?? record.entity);
|
|
338
|
-
|
|
472
|
+
normalized.push({
|
|
473
|
+
text: entityText,
|
|
474
|
+
type,
|
|
475
|
+
score,
|
|
476
|
+
start: asFiniteNumber(record.start),
|
|
477
|
+
end: asFiniteNumber(record.end),
|
|
478
|
+
});
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
|
|
482
|
+
const deduped = new Map<string, ExtractedEntity>();
|
|
483
|
+
for (const token of collapsed) {
|
|
484
|
+
const canonicalUri = buildCanonicalEntityUri(token.type, token.text);
|
|
339
485
|
const current = deduped.get(canonicalUri);
|
|
340
|
-
if (!current || score > current.score) {
|
|
486
|
+
if (!current || token.score > current.score) {
|
|
341
487
|
deduped.set(canonicalUri, {
|
|
342
|
-
text:
|
|
343
|
-
type,
|
|
344
|
-
score,
|
|
488
|
+
text: token.text,
|
|
489
|
+
type: token.type,
|
|
490
|
+
score: token.score,
|
|
345
491
|
canonicalUri,
|
|
346
492
|
});
|
|
347
493
|
}
|
package/src/extract.ts
CHANGED
|
@@ -12,6 +12,34 @@ const HEURISTIC_PATTERNS = [
|
|
|
12
12
|
/my name is|i am|contact me at|email is|phone is/i,
|
|
13
13
|
/deadline|due date|todo|action item|follow up/i,
|
|
14
14
|
];
|
|
15
|
+
const HEURISTIC_LOOKBACK_MULTIPLIER = 4;
|
|
16
|
+
const HEURISTIC_MIN_LOOKBACK_MESSAGES = 12;
|
|
17
|
+
const FEED_TAG_PATTERN = /\[(?:n8n|rss|alert|news|cron|slack|discord|telegram|email|github|jira)[^[]*]/i;
|
|
18
|
+
const ROLE_LABEL_PATTERN = /\b(?:assistant|system|tool|developer)\s*:/gi;
|
|
19
|
+
|
|
20
|
+
function isLikelyFeedOrImportedText(text: string): boolean {
|
|
21
|
+
if (FEED_TAG_PATTERN.test(text)) {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const roleLabels = text.match(ROLE_LABEL_PATTERN)?.length ?? 0;
|
|
26
|
+
if (roleLabels >= 2) {
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const lines = text
|
|
31
|
+
.split(/\r?\n+/)
|
|
32
|
+
.map((line) => line.trim())
|
|
33
|
+
.filter(Boolean);
|
|
34
|
+
if (lines.length === 0) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const rolePrefixedLines = lines.filter((line) =>
|
|
39
|
+
/^(assistant|system|tool|developer|user)\s*:/i.test(line),
|
|
40
|
+
).length;
|
|
41
|
+
return rolePrefixedLines >= 2;
|
|
42
|
+
}
|
|
15
43
|
|
|
16
44
|
function extractMessageText(content: unknown): string {
|
|
17
45
|
if (typeof content === "string") {
|
|
@@ -98,8 +126,10 @@ function pickHeuristicCandidates(
|
|
|
98
126
|
): ExtractedCandidate[] {
|
|
99
127
|
const out: ExtractedCandidate[] = [];
|
|
100
128
|
const seen = new Set<string>();
|
|
129
|
+
const lookback = Math.max(HEURISTIC_MIN_LOOKBACK_MESSAGES, maxItems * HEURISTIC_LOOKBACK_MULTIPLIER);
|
|
130
|
+
const startIndex = Math.max(0, messages.length - lookback);
|
|
101
131
|
|
|
102
|
-
for (let i = messages.length - 1; i >=
|
|
132
|
+
for (let i = messages.length - 1; i >= startIndex; i -= 1) {
|
|
103
133
|
const message = messages[i];
|
|
104
134
|
if (!message || (message.role !== "user" && message.role !== "assistant")) {
|
|
105
135
|
continue;
|
|
@@ -107,6 +137,9 @@ function pickHeuristicCandidates(
|
|
|
107
137
|
if (message.text.length < 20 || message.text.length > 3000) {
|
|
108
138
|
continue;
|
|
109
139
|
}
|
|
140
|
+
if (isLikelyFeedOrImportedText(message.text)) {
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
110
143
|
|
|
111
144
|
const score = scoreHeuristic(message.text);
|
|
112
145
|
if (score < 0.2) {
|
package/src/index.ts
CHANGED
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
writeStatsState,
|
|
25
25
|
} from "./state.js";
|
|
26
26
|
import type { LifecycleEntry, MemoryBraidResult, ScopeKey } from "./types.js";
|
|
27
|
-
import { normalizeForHash, sha256 } from "./chunking.js";
|
|
27
|
+
import { normalizeForHash, normalizeWhitespace, sha256 } from "./chunking.js";
|
|
28
28
|
|
|
29
29
|
function jsonToolResult(payload: unknown) {
|
|
30
30
|
return {
|
|
@@ -225,6 +225,17 @@ function isGenericUserSummary(text: string): boolean {
|
|
|
225
225
|
);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
|
+
function sanitizeRecallQuery(text: string): string {
|
|
229
|
+
if (!text) {
|
|
230
|
+
return "";
|
|
231
|
+
}
|
|
232
|
+
const withoutInjectedMemories = text.replace(
|
|
233
|
+
/<relevant-memories>[\s\S]*?<\/relevant-memories>/gi,
|
|
234
|
+
" ",
|
|
235
|
+
);
|
|
236
|
+
return normalizeWhitespace(withoutInjectedMemories);
|
|
237
|
+
}
|
|
238
|
+
|
|
228
239
|
function applyMem0QualityAdjustments(params: {
|
|
229
240
|
results: MemoryBraidResult[];
|
|
230
241
|
query: string;
|
|
@@ -1220,6 +1231,10 @@ const memoryBraidPlugin = {
|
|
|
1220
1231
|
|
|
1221
1232
|
api.on("before_agent_start", async (event, ctx) => {
|
|
1222
1233
|
const runId = log.newRunId();
|
|
1234
|
+
const recallQuery = sanitizeRecallQuery(event.prompt);
|
|
1235
|
+
if (!recallQuery) {
|
|
1236
|
+
return;
|
|
1237
|
+
}
|
|
1223
1238
|
const toolCtx: OpenClawPluginToolContext = {
|
|
1224
1239
|
config: api.config,
|
|
1225
1240
|
workspaceDir: ctx.workspaceDir,
|
|
@@ -1235,17 +1250,17 @@ const memoryBraidPlugin = {
|
|
|
1235
1250
|
log,
|
|
1236
1251
|
ctx: toolCtx,
|
|
1237
1252
|
statePaths: runtimeStatePaths,
|
|
1238
|
-
query:
|
|
1253
|
+
query: recallQuery,
|
|
1239
1254
|
args: {
|
|
1240
|
-
query:
|
|
1255
|
+
query: recallQuery,
|
|
1241
1256
|
maxResults: cfg.recall.maxResults,
|
|
1242
1257
|
},
|
|
1243
1258
|
runId,
|
|
1244
1259
|
});
|
|
1245
1260
|
|
|
1246
1261
|
const selected = selectMemoriesForInjection({
|
|
1247
|
-
query:
|
|
1248
|
-
results: recall.
|
|
1262
|
+
query: recallQuery,
|
|
1263
|
+
results: recall.mem0,
|
|
1249
1264
|
limit: cfg.recall.injectTopK,
|
|
1250
1265
|
});
|
|
1251
1266
|
if (selected.injected.length === 0) {
|
|
@@ -1256,6 +1271,7 @@ const memoryBraidPlugin = {
|
|
|
1256
1271
|
sessionKey: scope.sessionKey,
|
|
1257
1272
|
workspaceHash: scope.workspaceHash,
|
|
1258
1273
|
count: 0,
|
|
1274
|
+
source: "mem0",
|
|
1259
1275
|
queryTokens: selected.queryTokens,
|
|
1260
1276
|
filteredOut: selected.filteredOut,
|
|
1261
1277
|
genericRejected: selected.genericRejected,
|
|
@@ -1272,6 +1288,7 @@ const memoryBraidPlugin = {
|
|
|
1272
1288
|
sessionKey: scope.sessionKey,
|
|
1273
1289
|
workspaceHash: scope.workspaceHash,
|
|
1274
1290
|
count: selected.injected.length,
|
|
1291
|
+
source: "mem0",
|
|
1275
1292
|
queryTokens: selected.queryTokens,
|
|
1276
1293
|
filteredOut: selected.filteredOut,
|
|
1277
1294
|
genericRejected: selected.genericRejected,
|