memory-braid 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/entities.ts +67 -5
- package/src/extract.ts +34 -1
- package/src/index.ts +22 -5
package/package.json
CHANGED
package/src/entities.ts
CHANGED
|
@@ -89,6 +89,20 @@ type NormalizedEntityToken = {
|
|
|
89
89
|
end?: number;
|
|
90
90
|
};
|
|
91
91
|
|
|
92
|
+
const ENTITY_CONNECTOR_WORDS = new Set([
|
|
93
|
+
"and",
|
|
94
|
+
"da",
|
|
95
|
+
"de",
|
|
96
|
+
"del",
|
|
97
|
+
"la",
|
|
98
|
+
"las",
|
|
99
|
+
"los",
|
|
100
|
+
"of",
|
|
101
|
+
"the",
|
|
102
|
+
"y",
|
|
103
|
+
]);
|
|
104
|
+
const ENTITY_MAX_MERGED_WORDS = 3;
|
|
105
|
+
|
|
92
106
|
function asFiniteNumber(value: unknown): number | undefined {
|
|
93
107
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
94
108
|
return undefined;
|
|
@@ -96,6 +110,21 @@ function asFiniteNumber(value: unknown): number | undefined {
|
|
|
96
110
|
return value;
|
|
97
111
|
}
|
|
98
112
|
|
|
113
|
+
function splitEntityWords(text: string): string[] {
|
|
114
|
+
return text.match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function isLikelyNoisyShortWord(word: string): boolean {
|
|
118
|
+
const normalized = word.toLowerCase();
|
|
119
|
+
if (normalized.length >= 3) {
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
if (ENTITY_CONNECTOR_WORDS.has(normalized)) {
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
return !/^[A-Z]\.?$/.test(word);
|
|
126
|
+
}
|
|
127
|
+
|
|
99
128
|
function joinEntityText(left: NormalizedEntityToken, right: NormalizedEntityToken): string {
|
|
100
129
|
const leftEnd = left.end;
|
|
101
130
|
const rightStart = right.start;
|
|
@@ -108,11 +137,32 @@ function joinEntityText(left: NormalizedEntityToken, right: NormalizedEntityToke
|
|
|
108
137
|
return `${left.text} ${right.text}`;
|
|
109
138
|
}
|
|
110
139
|
|
|
111
|
-
function shouldMergeEntityTokens(
|
|
140
|
+
function shouldMergeEntityTokens(
|
|
141
|
+
left: NormalizedEntityToken,
|
|
142
|
+
right: NormalizedEntityToken,
|
|
143
|
+
sourceText?: string,
|
|
144
|
+
): boolean {
|
|
112
145
|
if (left.type !== right.type || !left.text || !right.text) {
|
|
113
146
|
return false;
|
|
114
147
|
}
|
|
115
148
|
|
|
149
|
+
const leftWords = splitEntityWords(left.text);
|
|
150
|
+
const rightWords = splitEntityWords(right.text);
|
|
151
|
+
if (leftWords.length === 0 || rightWords.length === 0) {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
if (leftWords.length + rightWords.length > ENTITY_MAX_MERGED_WORDS) {
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
const leftLastWord = leftWords[leftWords.length - 1];
|
|
158
|
+
const rightFirstWord = rightWords[0];
|
|
159
|
+
if (!leftLastWord || !rightFirstWord) {
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
if (isLikelyNoisyShortWord(leftLastWord) || isLikelyNoisyShortWord(rightFirstWord)) {
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
116
166
|
const leftEnd = left.end;
|
|
117
167
|
const rightStart = right.start;
|
|
118
168
|
if (typeof leftEnd === "number" && typeof rightStart === "number") {
|
|
@@ -120,7 +170,16 @@ function shouldMergeEntityTokens(left: NormalizedEntityToken, right: NormalizedE
|
|
|
120
170
|
if (gap < 0) {
|
|
121
171
|
return false;
|
|
122
172
|
}
|
|
123
|
-
|
|
173
|
+
if (gap > 1) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
if (sourceText && gap > 0) {
|
|
177
|
+
const between = sourceText.slice(leftEnd, rightStart);
|
|
178
|
+
if (between && /[^\s]/u.test(between)) {
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return true;
|
|
124
183
|
}
|
|
125
184
|
|
|
126
185
|
if (/[.,!?;:]$/.test(left.text) || /^[.,!?;:]/.test(right.text)) {
|
|
@@ -129,7 +188,10 @@ function shouldMergeEntityTokens(left: NormalizedEntityToken, right: NormalizedE
|
|
|
129
188
|
return true;
|
|
130
189
|
}
|
|
131
190
|
|
|
132
|
-
function collapseAdjacentEntityTokens(
|
|
191
|
+
function collapseAdjacentEntityTokens(
|
|
192
|
+
tokens: NormalizedEntityToken[],
|
|
193
|
+
sourceText?: string,
|
|
194
|
+
): NormalizedEntityToken[] {
|
|
133
195
|
if (tokens.length <= 1) {
|
|
134
196
|
return tokens;
|
|
135
197
|
}
|
|
@@ -137,7 +199,7 @@ function collapseAdjacentEntityTokens(tokens: NormalizedEntityToken[]): Normaliz
|
|
|
137
199
|
const collapsed: NormalizedEntityToken[] = [];
|
|
138
200
|
for (const token of tokens) {
|
|
139
201
|
const previous = collapsed[collapsed.length - 1];
|
|
140
|
-
if (!previous || !shouldMergeEntityTokens(previous, token)) {
|
|
202
|
+
if (!previous || !shouldMergeEntityTokens(previous, token, sourceText)) {
|
|
141
203
|
collapsed.push({ ...token });
|
|
142
204
|
continue;
|
|
143
205
|
}
|
|
@@ -416,7 +478,7 @@ export class EntityExtractionManager {
|
|
|
416
478
|
});
|
|
417
479
|
}
|
|
418
480
|
|
|
419
|
-
const collapsed = collapseAdjacentEntityTokens(normalized);
|
|
481
|
+
const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
|
|
420
482
|
const deduped = new Map<string, ExtractedEntity>();
|
|
421
483
|
for (const token of collapsed) {
|
|
422
484
|
const canonicalUri = buildCanonicalEntityUri(token.type, token.text);
|
package/src/extract.ts
CHANGED
|
@@ -12,6 +12,34 @@ const HEURISTIC_PATTERNS = [
|
|
|
12
12
|
/my name is|i am|contact me at|email is|phone is/i,
|
|
13
13
|
/deadline|due date|todo|action item|follow up/i,
|
|
14
14
|
];
|
|
15
|
+
const HEURISTIC_LOOKBACK_MULTIPLIER = 4;
|
|
16
|
+
const HEURISTIC_MIN_LOOKBACK_MESSAGES = 12;
|
|
17
|
+
const FEED_TAG_PATTERN = /\[(?:n8n|rss|alert|news|cron|slack|discord|telegram|email|github|jira)[^[]*]/i;
|
|
18
|
+
const ROLE_LABEL_PATTERN = /\b(?:assistant|system|tool|developer)\s*:/gi;
|
|
19
|
+
|
|
20
|
+
function isLikelyFeedOrImportedText(text: string): boolean {
|
|
21
|
+
if (FEED_TAG_PATTERN.test(text)) {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const roleLabels = text.match(ROLE_LABEL_PATTERN)?.length ?? 0;
|
|
26
|
+
if (roleLabels >= 2) {
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const lines = text
|
|
31
|
+
.split(/\r?\n+/)
|
|
32
|
+
.map((line) => line.trim())
|
|
33
|
+
.filter(Boolean);
|
|
34
|
+
if (lines.length === 0) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const rolePrefixedLines = lines.filter((line) =>
|
|
39
|
+
/^(assistant|system|tool|developer|user)\s*:/i.test(line),
|
|
40
|
+
).length;
|
|
41
|
+
return rolePrefixedLines >= 2;
|
|
42
|
+
}
|
|
15
43
|
|
|
16
44
|
function extractMessageText(content: unknown): string {
|
|
17
45
|
if (typeof content === "string") {
|
|
@@ -98,8 +126,10 @@ function pickHeuristicCandidates(
|
|
|
98
126
|
): ExtractedCandidate[] {
|
|
99
127
|
const out: ExtractedCandidate[] = [];
|
|
100
128
|
const seen = new Set<string>();
|
|
129
|
+
const lookback = Math.max(HEURISTIC_MIN_LOOKBACK_MESSAGES, maxItems * HEURISTIC_LOOKBACK_MULTIPLIER);
|
|
130
|
+
const startIndex = Math.max(0, messages.length - lookback);
|
|
101
131
|
|
|
102
|
-
for (let i = messages.length - 1; i >=
|
|
132
|
+
for (let i = messages.length - 1; i >= startIndex; i -= 1) {
|
|
103
133
|
const message = messages[i];
|
|
104
134
|
if (!message || (message.role !== "user" && message.role !== "assistant")) {
|
|
105
135
|
continue;
|
|
@@ -107,6 +137,9 @@ function pickHeuristicCandidates(
|
|
|
107
137
|
if (message.text.length < 20 || message.text.length > 3000) {
|
|
108
138
|
continue;
|
|
109
139
|
}
|
|
140
|
+
if (isLikelyFeedOrImportedText(message.text)) {
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
110
143
|
|
|
111
144
|
const score = scoreHeuristic(message.text);
|
|
112
145
|
if (score < 0.2) {
|
package/src/index.ts
CHANGED
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
writeStatsState,
|
|
25
25
|
} from "./state.js";
|
|
26
26
|
import type { LifecycleEntry, MemoryBraidResult, ScopeKey } from "./types.js";
|
|
27
|
-
import { normalizeForHash, sha256 } from "./chunking.js";
|
|
27
|
+
import { normalizeForHash, normalizeWhitespace, sha256 } from "./chunking.js";
|
|
28
28
|
|
|
29
29
|
function jsonToolResult(payload: unknown) {
|
|
30
30
|
return {
|
|
@@ -225,6 +225,17 @@ function isGenericUserSummary(text: string): boolean {
|
|
|
225
225
|
);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
|
+
function sanitizeRecallQuery(text: string): string {
|
|
229
|
+
if (!text) {
|
|
230
|
+
return "";
|
|
231
|
+
}
|
|
232
|
+
const withoutInjectedMemories = text.replace(
|
|
233
|
+
/<relevant-memories>[\s\S]*?<\/relevant-memories>/gi,
|
|
234
|
+
" ",
|
|
235
|
+
);
|
|
236
|
+
return normalizeWhitespace(withoutInjectedMemories);
|
|
237
|
+
}
|
|
238
|
+
|
|
228
239
|
function applyMem0QualityAdjustments(params: {
|
|
229
240
|
results: MemoryBraidResult[];
|
|
230
241
|
query: string;
|
|
@@ -1220,6 +1231,10 @@ const memoryBraidPlugin = {
|
|
|
1220
1231
|
|
|
1221
1232
|
api.on("before_agent_start", async (event, ctx) => {
|
|
1222
1233
|
const runId = log.newRunId();
|
|
1234
|
+
const recallQuery = sanitizeRecallQuery(event.prompt);
|
|
1235
|
+
if (!recallQuery) {
|
|
1236
|
+
return;
|
|
1237
|
+
}
|
|
1223
1238
|
const toolCtx: OpenClawPluginToolContext = {
|
|
1224
1239
|
config: api.config,
|
|
1225
1240
|
workspaceDir: ctx.workspaceDir,
|
|
@@ -1235,17 +1250,17 @@ const memoryBraidPlugin = {
|
|
|
1235
1250
|
log,
|
|
1236
1251
|
ctx: toolCtx,
|
|
1237
1252
|
statePaths: runtimeStatePaths,
|
|
1238
|
-
query:
|
|
1253
|
+
query: recallQuery,
|
|
1239
1254
|
args: {
|
|
1240
|
-
query:
|
|
1255
|
+
query: recallQuery,
|
|
1241
1256
|
maxResults: cfg.recall.maxResults,
|
|
1242
1257
|
},
|
|
1243
1258
|
runId,
|
|
1244
1259
|
});
|
|
1245
1260
|
|
|
1246
1261
|
const selected = selectMemoriesForInjection({
|
|
1247
|
-
query:
|
|
1248
|
-
results: recall.
|
|
1262
|
+
query: recallQuery,
|
|
1263
|
+
results: recall.mem0,
|
|
1249
1264
|
limit: cfg.recall.injectTopK,
|
|
1250
1265
|
});
|
|
1251
1266
|
if (selected.injected.length === 0) {
|
|
@@ -1256,6 +1271,7 @@ const memoryBraidPlugin = {
|
|
|
1256
1271
|
sessionKey: scope.sessionKey,
|
|
1257
1272
|
workspaceHash: scope.workspaceHash,
|
|
1258
1273
|
count: 0,
|
|
1274
|
+
source: "mem0",
|
|
1259
1275
|
queryTokens: selected.queryTokens,
|
|
1260
1276
|
filteredOut: selected.filteredOut,
|
|
1261
1277
|
genericRejected: selected.genericRejected,
|
|
@@ -1272,6 +1288,7 @@ const memoryBraidPlugin = {
|
|
|
1272
1288
|
sessionKey: scope.sessionKey,
|
|
1273
1289
|
workspaceHash: scope.workspaceHash,
|
|
1274
1290
|
count: selected.injected.length,
|
|
1291
|
+
source: "mem0",
|
|
1275
1292
|
queryTokens: selected.queryTokens,
|
|
1276
1293
|
filteredOut: selected.filteredOut,
|
|
1277
1294
|
genericRejected: selected.genericRejected,
|