@xdarkicex/openclaw-memory-libravdb 1.6.25 → 1.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
+ import { randomUUID } from "node:crypto";
1
2
  import { resolveIdentity } from "./identity.js";
2
3
  import { resolveUserCollection } from "./memory-scopes.js";
4
+ import { manifestStore } from "./manifest.js";
3
5
  const APPROX_CHARS_PER_TOKEN = 4;
4
6
  const PROMPT_AUTHORITY_PREASSEMBLY_MAY_OVERFLOW = "preassembly_may_overflow";
5
7
  const ASSEMBLE_BUDGET_HEADROOM_TOKENS = 256;
@@ -12,6 +14,15 @@ const EXACT_RECALL_SEARCH_K = 32;
12
14
  const EXACT_RECALL_MAX_TOKENS = 4;
13
15
  const RESERVED_CURRENT_TURN_TOKENS = 150;
14
16
  const AFTER_TURN_INGEST_MAX_TOKENS = 2048;
17
+ const OPENCLAW_LEADING_TIMESTAMP_PREFIX_RE = /^\[[A-Za-z]{3} \d{4}-\d{2}-\d{2} \d{2}:\d{2}[^\]]*\] */;
18
+ const OPENCLAW_METADATA_HEADERS = [
19
+ "Conversation info (untrusted metadata):",
20
+ "Sender (untrusted metadata):",
21
+ "Thread starter (untrusted, for context):",
22
+ "Reply target of current user message (untrusted, for context):",
23
+ "Forwarded message context (untrusted metadata):",
24
+ "Chat history since last reply (untrusted, for context):",
25
+ ];
15
26
  const COMMON_QUERY_WORDS = new Set([
16
27
  "what", "does", "mean", "remember", "recall", "about", "this", "that",
17
28
  "the", "and", "for", "with", "from", "your", "have", "been", "were",
@@ -94,14 +105,171 @@ function stringifyKernelBlock(block) {
94
105
  /**
95
106
  * Normalizes kernel content (string or block array) to a flat string.
96
107
  */
97
- function normalizeKernelContent(content) {
98
- if (typeof content === "string") {
99
- return content;
108
+ function normalizeKernelContent(content, options = {}) {
109
+ const text = typeof content === "string"
110
+ ? content
111
+ : Array.isArray(content)
112
+ ? content.map(stringifyKernelBlock).filter((part) => part.length > 0).join("\n")
113
+ : "";
114
+ return stripOpenClawUntrustedMetadataEnvelope(text, {
115
+ retainContext: options.retainOpenClawContext === true,
116
+ });
117
+ }
118
+ function stripOpenClawUntrustedMetadataEnvelope(text, options = {}) {
119
+ let remaining = text
120
+ .replace(OPENCLAW_LEADING_TIMESTAMP_PREFIX_RE, "")
121
+ .replace(/\r\n/g, "\n");
122
+ // Capture any preamble that precedes the first metadata header.
123
+ const preambleEnd = findFirstHeaderPosition(remaining);
124
+ let preamble = "";
125
+ if (preambleEnd > 0) {
126
+ const newlineIndex = remaining.lastIndexOf("\n", preambleEnd);
127
+ preamble = newlineIndex >= 0 ? remaining.slice(0, newlineIndex + 1) : remaining.slice(0, preambleEnd);
128
+ remaining = remaining.slice(preamble.length);
100
129
  }
101
- if (!Array.isArray(content)) {
102
- return "";
130
+ const retainedContext = [];
131
+ let stripped = false;
132
+ while (true) {
133
+ const next = stripOneOpenClawMetadataBlock(remaining);
134
+ if (next.text === remaining) {
135
+ break;
136
+ }
137
+ stripped = true;
138
+ if (next.context.length > 0) {
139
+ retainedContext.push(...next.context);
140
+ }
141
+ remaining = next.text;
142
+ }
143
+ if (!stripped) {
144
+ return text;
145
+ }
146
+ const contextLine = options.retainContext === true
147
+ ? formatRetainedOpenClawContext(retainedContext)
148
+ : "";
149
+ const strippedText = remaining.trimStart();
150
+ const result = contextLine ? `${contextLine}\n${strippedText}` : strippedText;
151
+ return preamble ? `${preamble}${result}` : result;
152
+ }
153
+ function findFirstHeaderPosition(text) {
154
+ let pos = -1;
155
+ for (const header of OPENCLAW_METADATA_HEADERS) {
156
+ const p = text.indexOf(header);
157
+ if (p >= 0 && (pos < 0 || p < pos)) {
158
+ pos = p;
159
+ }
160
+ }
161
+ return pos;
162
+ }
163
+ function stripOneOpenClawMetadataBlock(text) {
164
+ const leadingWhitespaceLength = text.length - text.trimStart().length;
165
+ const offsetText = text.slice(leadingWhitespaceLength);
166
+ const header = OPENCLAW_METADATA_HEADERS.find((candidate) => offsetText.startsWith(candidate)) ?? null;
167
+ if (!header) {
168
+ return { text, context: [] };
169
+ }
170
+ const afterHeader = offsetText.slice(header.length);
171
+ const fenceStartMatch = afterHeader.match(/^\n```(?:json)?\n/i);
172
+ if (!fenceStartMatch) {
173
+ const afterHeaderLines = afterHeader.replace(/^\n?/, "").split("\n");
174
+ const firstBlankIndex = afterHeaderLines.findIndex((line) => line.trim() === "");
175
+ if (firstBlankIndex < 0) {
176
+ // No fence and no blank line — cannot positively identify envelope shape.
177
+ // Return original text unchanged to avoid silently erasing content.
178
+ return { text, context: [] };
179
+ }
180
+ return { text: afterHeaderLines.slice(firstBlankIndex + 1).join("\n"), context: [] };
181
+ }
182
+ const bodyStart = header.length + fenceStartMatch[0].length;
183
+ const fenceEnd = offsetText.indexOf("\n```", bodyStart);
184
+ if (fenceEnd < 0) {
185
+ // Unclosed fence — cannot positively identify envelope shape.
186
+ return { text, context: [] };
187
+ }
188
+ const jsonText = offsetText.slice(bodyStart, fenceEnd);
189
+ const afterFence = fenceEnd + "\n```".length;
190
+ const trailingNewlineLength = offsetText.slice(afterFence).startsWith("\n") ? 1 : 0;
191
+ return {
192
+ text: offsetText.slice(afterFence + trailingNewlineLength),
193
+ context: summarizeOpenClawMetadataBlock(header, jsonText),
194
+ };
195
+ }
196
+ function summarizeOpenClawMetadataBlock(header, jsonText) {
197
+ const parsed = parseJsonRecord(jsonText);
198
+ if (!parsed) {
199
+ return [];
200
+ }
201
+ if (header === "Conversation info (untrusted metadata):") {
202
+ const hasIMessageContext = firstString(parsed.chat_guid, parsed.chatGuid, parsed.chat_identifier, parsed.chatIdentifier, parsed.chat_name, parsed.chatName, parsed.service) != null;
203
+ return [
204
+ labelValue("channel", firstString(parsed.group_channel, parsed.channel, parsed.group_subject)),
205
+ labelValue("channel_id", firstString(parsed.chat_id, parsed.channel_id)),
206
+ labelValue("account_id", firstString(parsed.account_id, parsed.accountId)),
207
+ labelValue("provider", firstString(parsed.provider, parsed.surface)),
208
+ labelValue("chat_id", hasIMessageContext ? firstString(parsed.chat_id, parsed.chatId) : undefined),
209
+ labelValue("chat_guid", firstString(parsed.chat_guid, parsed.chatGuid)),
210
+ labelValue("chat_identifier", firstString(parsed.chat_identifier, parsed.chatIdentifier)),
211
+ labelValue("chat_name", firstString(parsed.chat_name, parsed.chatName)),
212
+ labelValue("is_group", firstString(parsed.is_group, parsed.isGroup, parsed.is_group_chat)),
213
+ labelValue("chat_type", firstString(parsed.chat_type, parsed.chatType)),
214
+ labelValue("service", firstString(parsed.service)),
215
+ labelValue("server_id", firstString(parsed.group_space, parsed.guild_id, parsed.server_id)),
216
+ labelValue("sender_id", firstString(parsed.sender_id, parsed.user_id)),
217
+ labelValue("sender", firstString(parsed.sender)),
218
+ labelValue("emoji_id", firstString(parsed.emoji_id, parsed.server_emoji_id, parsed.guild_emoji_id)),
219
+ labelValue("emoji", firstString(parsed.emoji_name, parsed.emoji)),
220
+ ].filter(isNonEmptyString);
221
+ }
222
+ if (header === "Sender (untrusted metadata):") {
223
+ return [
224
+ labelValue("username", firstString(parsed.username, parsed.tag, parsed.name, parsed.label)),
225
+ labelValue("user_id", firstString(parsed.id, parsed.user_id, parsed.sender_id)),
226
+ labelValue("sender", firstString(parsed.sender, parsed.e164)),
227
+ ].filter(isNonEmptyString);
103
228
  }
104
- return content.map(stringifyKernelBlock).filter((part) => part.length > 0).join("\n");
229
+ return [];
230
+ }
231
+ function parseJsonRecord(jsonText) {
232
+ try {
233
+ const parsed = JSON.parse(jsonText);
234
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
235
+ ? parsed
236
+ : null;
237
+ }
238
+ catch {
239
+ return null;
240
+ }
241
+ }
242
+ function labelValue(label, value) {
243
+ return value ? `${label}=${sanitizeOpenClawContextValue(value)}` : "";
244
+ }
245
+ function firstString(...values) {
246
+ for (const value of values) {
247
+ if (typeof value === "string" && value.trim().length > 0) {
248
+ return value.trim();
249
+ }
250
+ if (typeof value === "number" && Number.isFinite(value)) {
251
+ return String(value);
252
+ }
253
+ if (typeof value === "boolean") {
254
+ return String(value);
255
+ }
256
+ }
257
+ return undefined;
258
+ }
259
+ function sanitizeOpenClawContextValue(value) {
260
+ // 120 chars is a conservative bound for a single routing field value
261
+ // (channel name, server id, etc.). Any field exceeding this is likely
262
+ // malformed or adversarial input, not useful routing metadata.
263
+ return value.replace(/[\r\n;]+/g, " ").trim().slice(0, 120);
264
+ }
265
+ function formatRetainedOpenClawContext(values) {
266
+ const uniqueValues = [...new Set(values.filter(isNonEmptyString))];
267
+ return uniqueValues.length > 0
268
+ ? `[OpenClaw context: ${uniqueValues.join("; ")}]`
269
+ : "";
270
+ }
271
+ function isNonEmptyString(value) {
272
+ return value.trim().length > 0;
105
273
  }
106
274
  /**
107
275
  * Approximates token count for a text string.
@@ -373,18 +541,24 @@ function resolveAfterTurnPredictiveCompactionTokenCount(args) {
373
541
  /**
374
542
  * Normalizes a single kernel message into the kernel-compatible format.
375
543
  */
376
- export function normalizeKernelMessage(message) {
544
+ export function normalizeKernelMessage(message, options = {}) {
377
545
  return {
378
546
  role: message.role,
379
- content: normalizeKernelContent(message.content),
380
- ...(typeof message.id === "string" ? { id: message.id } : {}),
547
+ content: normalizeKernelContent(message.content, options),
548
+ id: typeof message.id === "string" ? message.id : randomUUID(),
381
549
  };
382
550
  }
383
551
  /**
384
552
  * Normalizes an array of kernel messages.
553
+ *
554
+ * Non-user messages whose normalized content is empty or whitespace-only
555
+ * are dropped. This prevents assistant/system turns that consisted entirely
556
+ * of stripped metadata from persisting as empty records.
385
557
  */
386
- export function normalizeKernelMessages(messages) {
387
- return messages.map((message) => normalizeKernelMessage(message));
558
+ export function normalizeKernelMessages(messages, options = {}) {
559
+ return messages
560
+ .map((message) => normalizeKernelMessage(message, options))
561
+ .filter((message) => message.role === "user" || message.content.trim().length > 0);
388
562
  }
389
563
  /**
390
564
  * Extracts tokens for exact recall matching from text.
@@ -471,9 +645,12 @@ function escapeMemoryFactText(text) {
471
645
  .replaceAll("\t", "&#9;");
472
646
  }
473
647
  // Tool-call pattern detection for sanitization
474
- const TOOL_CALL_BRACKET_RE = /\[tool:([^\]]+)\]/gi;
475
- const TOOL_CALL_JSON_RE = /\{\s*"name"\s*:\s*"([^"]+)"[^}]*\}/g;
476
- const TOOL_RESULT_ANNOTATION_RE = /\[tool:[^\]]+\](?:\s*[^{\[]*)?/g;
648
+ // Matches [tool:name] followed by optional whitespace and any trailing JSON object {...}, array [...], or string "..."
649
+ const TOOL_CALL_BRACKET_RE = /\[tool:([^\]]+)\](?:\s*(?:\{[\s\S]*?\}|\[[\s\S]*?\]|".*?"))?/gi;
650
+ // Matches raw JSON tool-call objects targeting a "name\" field
651
+ const TOOL_CALL_JSON_RE = /\{\s*"name"\s*:\s*"([^"]+)"[\s\S]*?\}/g;
652
+ // Matches older annotations, aggressively consuming trailing characters on the same line
653
+ const TOOL_RESULT_ANNOTATION_RE = /\[tool:[^\]]+\][^\n]*/g;
477
654
  /**
478
655
  * Sanitizes text that may contain tool-call syntax to prevent loop-priming.
479
656
  * Replaces executable-looking patterns with neutral summaries rather than
@@ -697,9 +874,10 @@ export function normalizeAssembleResult(result, sourceMessages) {
697
874
  isRealTranscript = message.role === "user" || message.role === "assistant";
698
875
  }
699
876
  if (isRealTranscript) {
877
+ // BUG PATH A SEALED: Sanitize the content before pushing to the trajectory
700
878
  messages.push({
701
879
  role: message.role === "user" ? "user" : "assistant",
702
- content,
880
+ content: sanitizeToolCallPatterns(content),
703
881
  ...(typeof message.id === "string" ? { id: message.id } : {}),
704
882
  });
705
883
  }
@@ -724,6 +902,20 @@ export function normalizeAssembleResult(result, sourceMessages) {
724
902
  ...(result.debug != null ? { debug: result.debug } : {}),
725
903
  };
726
904
  }
905
+ function extractCursorFromResult(result) {
906
+ if (result && typeof result === "object" && "cursor" in result) {
907
+ const cursor = result.cursor;
908
+ if (cursor && typeof cursor === "object") {
909
+ const c = cursor;
910
+ if (typeof c.lastProcessedIndex === "number" &&
911
+ typeof c.sessionVersion === "number" &&
912
+ typeof c.manifestTailHash === "string") {
913
+ return c;
914
+ }
915
+ }
916
+ }
917
+ return undefined;
918
+ }
727
919
  /**
728
920
  * Builds the context engine factory with the given client getter.
729
921
  */
@@ -999,6 +1191,9 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
999
1191
  sessionKey: args.sessionKey,
1000
1192
  });
1001
1193
  const messages = normalizeKernelMessages(args.messages);
1194
+ const strippedPrompt = args.prompt
1195
+ ? normalizeKernelContent(args.prompt, { retainOpenClawContext: false })
1196
+ : "";
1002
1197
  const lastUserMessage = findLastReplaySafeUserMessage(messages);
1003
1198
  const reservedCurrentTurnTokens = lastUserMessage
1004
1199
  ? approximateMessageTokens(lastUserMessage)
@@ -1006,7 +1201,7 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
1006
1201
  const currentContextTokens = resolvePredictiveCompactionTokenCount({
1007
1202
  currentTokenCount: args.currentTokenCount,
1008
1203
  messages,
1009
- prompt: args.prompt,
1204
+ prompt: strippedPrompt,
1010
1205
  });
1011
1206
  const dynamicCompactThreshold = getDynamicCompactThreshold(args.tokenBudget);
1012
1207
  const predictiveTargetSize = resolvePredictiveCompactionTarget({
@@ -1053,7 +1248,7 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
1053
1248
  sessionId,
1054
1249
  sessionKey: args.sessionKey,
1055
1250
  userId,
1056
- prompt: args.prompt ?? "",
1251
+ prompt: strippedPrompt,
1057
1252
  messages,
1058
1253
  tokenBudget: args.tokenBudget,
1059
1254
  config: buildAssemblyConfig(args.tokenBudget),
@@ -1061,7 +1256,7 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
1061
1256
  });
1062
1257
  const assembled = normalizeAssembleResult(resp, args.messages);
1063
1258
  let enforced = enforceTokenBudgetInvariant(await augmentWithExactRecall(assembled, {
1064
- queryText: args.prompt ?? messages[messages.length - 1]?.content ?? "",
1259
+ queryText: strippedPrompt || (messages[messages.length - 1]?.content ?? ""),
1065
1260
  userId,
1066
1261
  sessionId,
1067
1262
  tokenBudget: args.tokenBudget,
@@ -1140,12 +1335,24 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
1140
1335
  userIdOverride: args.userId,
1141
1336
  sessionKey: args.sessionKey,
1142
1337
  });
1338
+ // Load manifest and normalize messages in parallel
1339
+ const manifest = manifestStore.load(sessionId, logger);
1143
1340
  const afterTurnMessages = selectAfterTurnMessages(args.messages, args.prePromptMessageCount, logger);
1144
- const messages = normalizeKernelMessages(afterTurnMessages);
1145
- const ingestMessages = boundAfterTurnMessagesForIngest(messages, logger, sessionId);
1146
- const msgCount = messages.length;
1341
+ const messages = normalizeKernelMessages(afterTurnMessages, { retainOpenClawContext: true });
1342
+ // Find overlap: messages already in our manifest
1343
+ const overlapIndex = manifestStore.findOverlapIndex(manifest, messages);
1344
+ const newMessages = messages.slice(overlapIndex);
1345
+ // Apply token budget cap only to new messages
1346
+ const ingestMessages = boundAfterTurnMessagesForIngest(newMessages, logger, sessionId);
1347
+ const startIndex = manifestStore.deriveStartingIndex(manifest, args.prePromptMessageCount);
1348
+ const cursor = {
1349
+ lastProcessedIndex: startIndex > 0 ? startIndex - 1 : 0,
1350
+ sessionVersion: manifest.version,
1351
+ manifestTailHash: manifest.tailHash,
1352
+ };
1147
1353
  logger.info?.(`LibraVDB afterTurn sessionId=${sessionId} userId=${userId} ` +
1148
- `messageCount=${msgCount} totalMessages=${args.messages.length} ` +
1354
+ `messageCount=${messages.length} newMessages=${newMessages.length} ` +
1355
+ `overlapIndex=${overlapIndex} startIndex=${startIndex} ` +
1149
1356
  `prePromptMessageCount=${args.prePromptMessageCount ?? "unknown"} ` +
1150
1357
  `heartbeat=${args.isHeartbeat ?? false}`);
1151
1358
  try {
@@ -1158,8 +1365,40 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
1158
1365
  sessionKey: args.sessionKey,
1159
1366
  userId,
1160
1367
  messages: ingestMessages,
1368
+ prePromptMessageCount: args.prePromptMessageCount,
1161
1369
  isHeartbeat: args.isHeartbeat,
1370
+ cursor,
1162
1371
  });
1372
+ // Reconcile manifest with daemon-confirmed cursor.
1373
+ // The daemon returns a cursor even when it ingests zero messages
1374
+ // (e.g. gap detected, all messages deduped). Trust its
1375
+ // lastProcessedIndex over our optimistic startIndex math.
1376
+ const daemonCursor = extractCursorFromResult(result);
1377
+ if (daemonCursor) {
1378
+ if (!daemonCursor.manifestTailHash) {
1379
+ // Daemon detected a gap: its DB is behind our manifest.
1380
+ // It did NOT ingest our messages. Reset the manifest so the
1381
+ // next turn does a full re-sync.
1382
+ logger.warn?.(`[LibraVDB] Daemon reported cursor gap for session ${sessionId}. ` +
1383
+ `Resetting manifest for full re-sync next turn.`);
1384
+ manifestStore.save(manifestStore.createEmpty(sessionId));
1385
+ }
1386
+ else if (ingestMessages.length > 0) {
1387
+ // Normal path: reconcile to what the daemon actually confirmed.
1388
+ const confirmedIndex = daemonCursor.lastProcessedIndex;
1389
+ const ackCount = Math.max(0, confirmedIndex - startIndex + 1);
1390
+ if (ackCount > 0) {
1391
+ const ackedMessages = ingestMessages.slice(0, ackCount);
1392
+ const updatedManifest = manifestStore.appendACKedMessages(manifest, ackedMessages, startIndex);
1393
+ manifestStore.save(updatedManifest);
1394
+ }
1395
+ }
1396
+ }
1397
+ else if (ingestMessages.length > 0) {
1398
+ // Legacy daemon (no cursor in response): optimistic ACK.
1399
+ const updatedManifest = manifestStore.appendACKedMessages(manifest, ingestMessages, startIndex);
1400
+ manifestStore.save(updatedManifest);
1401
+ }
1163
1402
  await performAfterTurnPredictiveCompaction({
1164
1403
  sessionId,
1165
1404
  messages,
@@ -1,4 +1,4 @@
1
- import type { LoggerLike } from "./types.js";
1
+ import type { LoggerLike, PluginConfig } from "./types.js";
2
2
  export type IdentitySource = "config" | "file" | "auto" | "session-key" | "default";
3
3
  export type ResolvedIdentity = {
4
4
  userId: string;
@@ -13,3 +13,12 @@ export declare function resolveIdentity(params: {
13
13
  * read-only commands (e.g. status --deep) that should not mutate disk. */
14
14
  noAutoPersist?: boolean;
15
15
  }): ResolvedIdentity;
16
+ /**
17
+ * Resolves a stable tenant key for multi-agent DB routing.
18
+ *
19
+ * Priority chain:
20
+ * 1. cfg.tenantId (explicit config, highest priority)
21
+ * 2. LIBRAVDB_AGENT_ID env var (container/CI override)
22
+ * 3. Fall back to resolved userId (existing identity system)
23
+ */
24
+ export declare function resolveTenantKey(cfg: PluginConfig): string;
package/dist/identity.js CHANGED
@@ -118,3 +118,23 @@ export function resolveIdentity(params) {
118
118
  }
119
119
  return { userId: autoId, source: "auto" };
120
120
  }
121
+ /**
122
+ * Resolves a stable tenant key for multi-agent DB routing.
123
+ *
124
+ * Priority chain:
125
+ * 1. cfg.tenantId (explicit config, highest priority)
126
+ * 2. LIBRAVDB_AGENT_ID env var (container/CI override)
127
+ * 3. Fall back to resolved userId (existing identity system)
128
+ */
129
+ export function resolveTenantKey(cfg) {
130
+ const explicit = cfg.tenantId?.trim();
131
+ if (explicit)
132
+ return explicit;
133
+ const envId = process.env.LIBRAVDB_AGENT_ID?.trim();
134
+ if (envId)
135
+ return envId;
136
+ return resolveIdentity({
137
+ configUserId: cfg.userId,
138
+ identityPath: cfg.identityPath,
139
+ }).userId;
140
+ }