clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
package/src/profile.ts ADDED
@@ -0,0 +1,346 @@
1
+ /**
2
+ * User Profile Abstraction - Two-tier profile (static facts + dynamic context)
3
+ *
4
+ * Builds a profile document from vault contents:
5
+ * - Static: persistent facts extracted from decisions, hubs, and notes
6
+ * - Dynamic: recent context from last sessions and progress docs
7
+ *
8
+ * Stored at _clawmem/profile.md, injected at session start.
9
+ */
10
+
11
+ import type { Store } from "./store.ts";
12
+ import { hashContent } from "./indexer.ts";
13
+ import { smartTruncate } from "./hooks.ts";
14
+ import { MAX_LEVENSHTEIN_LENGTH } from "./limits.ts";
15
+
16
+ // =============================================================================
17
+ // Types
18
+ // =============================================================================
19
+
20
+ export type Profile = {
21
+ static: string[];
22
+ dynamic: string[];
23
+ updatedAt: string;
24
+ };
25
+
26
+ // =============================================================================
27
+ // Config
28
+ // =============================================================================
29
+
30
+ const STATIC_MAX_TOKENS = 500;
31
+ const DYNAMIC_MAX_TOKENS = 300;
32
+ const STATIC_MAX_FACTS = 30;
33
+ const DYNAMIC_MAX_ITEMS = 10;
34
+ const PROFILE_PATH = "profile.md";
35
+ const PROFILE_COLLECTION = "_clawmem";
36
+ const STALE_SESSION_THRESHOLD = 5;
37
+
38
+ // =============================================================================
39
+ // Profile Building
40
+ // =============================================================================
41
+
42
+ export function buildStaticProfile(store: Store): string[] {
43
+ const facts: string[] = [];
44
+ const seen = new Set<string>();
45
+
46
+ // Extract from decisions
47
+ const decisions = store.getDocumentsByType("decision", 20);
48
+ for (const d of decisions) {
49
+ const body = store.getDocumentBody({
50
+ filepath: `${d.collection}/${d.path}`,
51
+ displayPath: `${d.collection}/${d.path}`,
52
+ } as any);
53
+ if (!body) continue;
54
+
55
+ const bullets = extractBullets(body);
56
+ for (const bullet of bullets) {
57
+ const key = bullet.toLowerCase().trim().slice(0, 60);
58
+ if (seen.has(key)) continue;
59
+ if (isTooSimilar(key, seen)) continue;
60
+ seen.add(key);
61
+ facts.push(bullet);
62
+ }
63
+ }
64
+
65
+ // Extract from hub documents
66
+ const hubs = store.getDocumentsByType("hub", 10);
67
+ for (const h of hubs) {
68
+ const body = store.getDocumentBody({
69
+ filepath: `${h.collection}/${h.path}`,
70
+ displayPath: `${h.collection}/${h.path}`,
71
+ } as any);
72
+ if (!body) continue;
73
+
74
+ const bullets = extractBullets(body);
75
+ for (const bullet of bullets) {
76
+ const key = bullet.toLowerCase().trim().slice(0, 60);
77
+ if (seen.has(key)) continue;
78
+ if (isTooSimilar(key, seen)) continue;
79
+ seen.add(key);
80
+ facts.push(bullet);
81
+ }
82
+ }
83
+
84
+ // Truncate to budget
85
+ const maxChars = STATIC_MAX_TOKENS * 4;
86
+ let charCount = 0;
87
+ const result: string[] = [];
88
+ for (const fact of facts.slice(0, STATIC_MAX_FACTS)) {
89
+ if (charCount + fact.length > maxChars) break;
90
+ result.push(fact);
91
+ charCount += fact.length;
92
+ }
93
+
94
+ return result;
95
+ }
96
+
97
+ export function buildDynamicProfile(store: Store): string[] {
98
+ const items: string[] = [];
99
+
100
+ // Recent sessions
101
+ const sessions = store.getRecentSessions(5);
102
+ for (const s of sessions) {
103
+ if (!s.handoffPath) continue;
104
+
105
+ const body = store.getDocumentBody({
106
+ filepath: s.handoffPath,
107
+ displayPath: s.handoffPath,
108
+ } as any);
109
+ if (!body) continue;
110
+
111
+ // Extract "Current State" and "Next Session Should" sections
112
+ const currentState = extractSection(body, "Current State");
113
+ const nextSession = extractSection(body, "Next Session Should");
114
+
115
+ if (currentState) {
116
+ items.push(`Current: ${smartTruncate(currentState, 150)}`);
117
+ }
118
+ if (nextSession) {
119
+ items.push(`Next: ${smartTruncate(nextSession, 150)}`);
120
+ }
121
+ }
122
+
123
+ // Recent progress documents
124
+ const cutoff = new Date();
125
+ cutoff.setDate(cutoff.getDate() - 7);
126
+ const progress = store.getDocumentsByType("progress", 5);
127
+ const recent = progress.filter(p => p.modifiedAt >= cutoff.toISOString());
128
+
129
+ for (const p of recent) {
130
+ items.push(`Progress: ${p.title} (${p.modifiedAt.slice(0, 10)})`);
131
+ }
132
+
133
+ // Truncate to budget
134
+ const maxChars = DYNAMIC_MAX_TOKENS * 4;
135
+ let charCount = 0;
136
+ const result: string[] = [];
137
+ for (const item of items.slice(0, DYNAMIC_MAX_ITEMS)) {
138
+ if (charCount + item.length > maxChars) break;
139
+ result.push(item);
140
+ charCount += item.length;
141
+ }
142
+
143
+ return result;
144
+ }
145
+
146
+ // =============================================================================
147
+ // Profile Persistence
148
+ // =============================================================================
149
+
150
+ export function updateProfile(store: Store): void {
151
+ const staticFacts = buildStaticProfile(store);
152
+ const dynamicItems = buildDynamicProfile(store);
153
+ const now = new Date().toISOString();
154
+
155
+ const body = formatProfileDocument(staticFacts, dynamicItems);
156
+ const hash = hashContent(body);
157
+
158
+ // Store content
159
+ store.insertContent(hash, body, now);
160
+
161
+ // Upsert document (handle active, inactive, or missing)
162
+ const existing = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
163
+ if (existing) {
164
+ store.updateDocument(existing.id, "User Profile", hash, now);
165
+ } else {
166
+ // Check for inactive row (UNIQUE(collection, path) prevents re-insert)
167
+ const inactive = store.findAnyDocument(PROFILE_COLLECTION, PROFILE_PATH);
168
+ if (inactive) {
169
+ // Reactivate and update
170
+ store.reactivateDocument(inactive.id, "User Profile", hash, now);
171
+ store.updateDocumentMeta(inactive.id, {
172
+ content_type: "hub",
173
+ tags: JSON.stringify(["auto-generated", "profile"]),
174
+ });
175
+ } else {
176
+ try {
177
+ store.insertDocument(PROFILE_COLLECTION, PROFILE_PATH, "User Profile", hash, now, now);
178
+ const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
179
+ if (doc) {
180
+ store.updateDocumentMeta(doc.id, {
181
+ content_type: "hub",
182
+ tags: JSON.stringify(["auto-generated", "profile"]),
183
+ });
184
+ }
185
+ } catch {
186
+ // Collection may not exist yet
187
+ }
188
+ }
189
+ }
190
+ }
191
+
192
+ export function getProfile(store: Store): Profile | null {
193
+ const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
194
+ if (!doc) return null;
195
+
196
+ const body = store.getDocumentBody({
197
+ filepath: `${PROFILE_COLLECTION}/${PROFILE_PATH}`,
198
+ displayPath: `${PROFILE_COLLECTION}/${PROFILE_PATH}`,
199
+ } as any);
200
+ if (!body) return null;
201
+
202
+ return parseProfileDocument(body);
203
+ }
204
+
205
+ export function isProfileStale(store: Store): boolean {
206
+ const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
207
+ if (!doc) return true;
208
+
209
+ // Check how many sessions since last profile update
210
+ const sessions = store.getRecentSessions(STALE_SESSION_THRESHOLD + 1);
211
+ if (sessions.length === 0) return false;
212
+
213
+ // Get the profile's modification timestamp from the document row
214
+ const rows = store.getDocumentsByType("hub", 50);
215
+ const profileRow = rows.find(r => r.path === PROFILE_PATH && r.collection === PROFILE_COLLECTION);
216
+ if (!profileRow) return true;
217
+
218
+ const profileDate = profileRow.modifiedAt;
219
+ const sessionsSince = sessions.filter(s => s.startedAt > profileDate);
220
+ return sessionsSince.length >= STALE_SESSION_THRESHOLD;
221
+ }
222
+
223
+ // =============================================================================
224
+ // Formatting
225
+ // =============================================================================
226
+
227
+ function formatProfileDocument(staticFacts: string[], dynamicItems: string[]): string {
228
+ const lines = [
229
+ "---",
230
+ "content_type: hub",
231
+ "tags: [auto-generated, profile]",
232
+ "---",
233
+ "",
234
+ "# User Profile",
235
+ "",
236
+ ];
237
+
238
+ if (staticFacts.length > 0) {
239
+ lines.push("## Known Context", "");
240
+ for (const fact of staticFacts) {
241
+ lines.push(`- ${fact}`);
242
+ }
243
+ lines.push("");
244
+ }
245
+
246
+ if (dynamicItems.length > 0) {
247
+ lines.push("## Current Focus", "");
248
+ for (const item of dynamicItems) {
249
+ lines.push(`- ${item}`);
250
+ }
251
+ lines.push("");
252
+ }
253
+
254
+ return lines.join("\n");
255
+ }
256
+
257
+ function parseProfileDocument(body: string): Profile {
258
+ const staticFacts: string[] = [];
259
+ const dynamicItems: string[] = [];
260
+ let updatedAt = "";
261
+
262
+ let section = "";
263
+ for (const line of body.split("\n")) {
264
+ if (line.startsWith("## Known Context")) {
265
+ section = "static";
266
+ continue;
267
+ }
268
+ if (line.startsWith("## Current Focus")) {
269
+ section = "dynamic";
270
+ continue;
271
+ }
272
+ if (line.startsWith("## ")) {
273
+ section = "";
274
+ continue;
275
+ }
276
+
277
+ const bullet = line.match(/^-\s+(.+)/);
278
+ if (!bullet?.[1]) continue;
279
+
280
+ if (section === "static") staticFacts.push(bullet[1]);
281
+ else if (section === "dynamic") dynamicItems.push(bullet[1]);
282
+ }
283
+
284
+ return { static: staticFacts, dynamic: dynamicItems, updatedAt };
285
+ }
286
+
287
+ // =============================================================================
288
+ // Helpers
289
+ // =============================================================================
290
+
291
+ function extractBullets(body: string): string[] {
292
+ const bullets: string[] = [];
293
+ for (const line of body.split("\n")) {
294
+ const match = line.match(/^[-*]\s+(.{10,200})/);
295
+ if (match?.[1]) {
296
+ bullets.push(match[1].trim());
297
+ }
298
+ }
299
+ return bullets;
300
+ }
301
+
302
+ function extractSection(body: string, sectionName: string): string | null {
303
+ const regex = new RegExp(
304
+ `^#{1,3}\\s+${escapeRegex(sectionName)}\\b[^\\n]*\\n([\\s\\S]*?)(?=^#{1,3}\\s|$)`,
305
+ "mi"
306
+ );
307
+ const match = body.match(regex);
308
+ if (!match?.[1]) return null;
309
+ const text = match[1].trim();
310
+ return text.length > 10 ? text : null;
311
+ }
312
+
313
+ function escapeRegex(str: string): string {
314
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
315
+ }
316
+
317
+ function isTooSimilar(key: string, existing: Set<string>): boolean {
318
+ for (const e of existing) {
319
+ if (levenshteinDistance(key, e) < 5) return true;
320
+ }
321
+ return false;
322
+ }
323
+
324
+ function levenshteinDistance(a: string, b: string): number {
325
+ // Bound inputs to prevent O(n²) memory blowup
326
+ if (a.length > MAX_LEVENSHTEIN_LENGTH || b.length > MAX_LEVENSHTEIN_LENGTH) return Math.abs(a.length - b.length);
327
+ if (a.length === 0) return b.length;
328
+ if (b.length === 0) return a.length;
329
+
330
+ const matrix: number[][] = [];
331
+ for (let i = 0; i <= b.length; i++) matrix[i] = [i];
332
+ for (let j = 0; j <= a.length; j++) matrix[0]![j] = j;
333
+
334
+ for (let i = 1; i <= b.length; i++) {
335
+ for (let j = 1; j <= a.length; j++) {
336
+ const cost = b[i - 1] === a[j - 1] ? 0 : 1;
337
+ matrix[i]![j] = Math.min(
338
+ matrix[i - 1]![j]! + 1,
339
+ matrix[i]![j - 1]! + 1,
340
+ matrix[i - 1]![j - 1]! + cost
341
+ );
342
+ }
343
+ }
344
+
345
+ return matrix[b.length]![a.length]!;
346
+ }
@@ -0,0 +1,218 @@
1
+ /**
2
+ * ClawMem Prompt Injection Guard
3
+ *
4
+ * Multi-layer detection system ported from SAME's go-promptguard integration.
5
+ * Checks vault content for prompt injection attempts before context injection.
6
+ * Pure pattern-based (no LLM) for sub-ms latency.
7
+ */
8
+
9
+ // =============================================================================
10
+ // Types
11
+ // =============================================================================
12
+
13
+ export interface DetectionResult {
14
+ safe: boolean;
15
+ detector: string | null;
16
+ score: number; // 0.0 = safe, 1.0 = definite injection
17
+ }
18
+
19
+ // =============================================================================
20
+ // Detection Layers
21
+ // =============================================================================
22
+
23
+ /**
24
+ * Layer 1: Legacy string patterns from SAME (13 patterns).
25
+ * Case-insensitive substring match. Score: 1.0 on match.
26
+ */
27
+ const LEGACY_PATTERNS = [
28
+ "ignore previous",
29
+ "ignore all previous",
30
+ "ignore above",
31
+ "disregard previous",
32
+ "disregard all previous",
33
+ "you are now",
34
+ "new instructions",
35
+ "system prompt",
36
+ "<system>",
37
+ "</system>",
38
+ "IMPORTANT:",
39
+ "CRITICAL:",
40
+ "override",
41
+ ];
42
+
43
+ /**
44
+ * Layer 2: Role injection patterns. Score: 0.9 on match.
45
+ */
46
+ const ROLE_INJECTION_PATTERNS = [
47
+ /you are (?:now |a |an |the )/i,
48
+ /act as (?:a |an |the |if )/i,
49
+ /pretend (?:you(?:'re| are) |to be )/i,
50
+ /(?:switch|change) (?:to |into )(?:a |an |the )?(?:new |different )?(?:role|mode|persona)/i,
51
+ /your (?:new |real |true )(?:role|purpose|function|task)/i,
52
+ ];
53
+
54
+ /**
55
+ * Layer 3: Instruction override patterns. Score: 0.85 on match.
56
+ */
57
+ const INSTRUCTION_OVERRIDE_PATTERNS = [
58
+ /(?:ignore|forget|discard|disregard) (?:all |any )?(?:previous|prior|above|earlier)/i,
59
+ /(?:new|updated|revised|real) (?:instructions?|directives?|rules?|guidelines?)/i,
60
+ /(?:do not|don't|never) (?:follow|obey|listen to|adhere to)/i,
61
+ /(?:bypass|circumvent|override|skip) (?:the |any |all )?(?:rules?|restrictions?|guidelines?|filters?|safety)/i,
62
+ ];
63
+
64
+ /**
65
+ * Layer 4: Delimiter injection patterns. Score: 0.8 on match.
66
+ */
67
+ const DELIMITER_PATTERNS = [
68
+ /<\/?(?:system|user|assistant|human|ai|bot|prompt|instruction)>/i,
69
+ /\[(?:SYSTEM|INST|\/INST|SYS)\]/i,
70
+ /```(?:system|instructions?|prompt)\s*\n/i,
71
+ /={3,}(?:SYSTEM|PROMPT|INSTRUCTIONS?)={3,}/i,
72
+ ];
73
+
74
+ /**
75
+ * Layer 5: Unicode obfuscation detection. Score: 0.7 on match.
76
+ */
77
+ const ZERO_WIDTH_CHARS = /[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u2061\u2062\u2063\u2064]/;
78
+
79
+ // Cyrillic characters that look like Latin
80
+ const CYRILLIC_LOOKALIKES = /[\u0400-\u04FF]/;
81
+ // Greek characters that look like Latin
82
+ const GREEK_LOOKALIKES = /[\u0370-\u03FF]/;
83
+
84
+ // =============================================================================
85
+ // Detection Functions
86
+ // =============================================================================
87
+
88
+ /**
89
+ * Multi-layer prompt injection detection.
90
+ * Checks layers in order, short-circuits on first match.
91
+ * Default threshold: 0.6 (same as SAME's go-promptguard config).
92
+ */
93
+ export function detectInjection(text: string, threshold: number = 0.6): DetectionResult {
94
+ if (!text || text.length === 0) {
95
+ return { safe: true, detector: null, score: 0 };
96
+ }
97
+
98
+ // Cap input length for performance
99
+ const input = text.slice(0, 2000);
100
+ const lower = input.toLowerCase();
101
+
102
+ // Layer 1: Legacy string patterns
103
+ for (const pattern of LEGACY_PATTERNS) {
104
+ if (lower.includes(pattern.toLowerCase())) {
105
+ const result = { safe: false, detector: "legacy_pattern", score: 1.0 };
106
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
107
+ }
108
+ }
109
+
110
+ // Layer 2: Role injection
111
+ for (const pattern of ROLE_INJECTION_PATTERNS) {
112
+ if (pattern.test(input)) {
113
+ const result = { safe: false, detector: "role_injection", score: 0.9 };
114
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
115
+ }
116
+ }
117
+
118
+ // Layer 3: Instruction override
119
+ for (const pattern of INSTRUCTION_OVERRIDE_PATTERNS) {
120
+ if (pattern.test(input)) {
121
+ const result = { safe: false, detector: "instruction_override", score: 0.85 };
122
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
123
+ }
124
+ }
125
+
126
+ // Layer 4: Delimiter injection
127
+ for (const pattern of DELIMITER_PATTERNS) {
128
+ if (pattern.test(input)) {
129
+ const result = { safe: false, detector: "delimiter_injection", score: 0.8 };
130
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
131
+ }
132
+ }
133
+
134
+ // Layer 5: Unicode obfuscation
135
+ if (ZERO_WIDTH_CHARS.test(input)) {
136
+ const result = { safe: false, detector: "unicode_obfuscation", score: 0.7 };
137
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
138
+ }
139
+
140
+ // Check for mixed scripts (Latin + Cyrillic/Greek in same word — homoglyph attack)
141
+ if (hasMixedScripts(input)) {
142
+ const result = { safe: false, detector: "homoglyph", score: 0.7 };
143
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
144
+ }
145
+
146
+ // Check normalization deviation
147
+ if (hasNormalizationDeviation(input)) {
148
+ const result = { safe: false, detector: "normalization", score: 0.7 };
149
+ return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
150
+ }
151
+
152
+ return { safe: true, detector: null, score: 0 };
153
+ }
154
+
155
+ /**
156
+ * Sanitize a snippet for safe injection into context.
157
+ * Returns the original text if safe, or a placeholder if injection detected.
158
+ */
159
+ export function sanitizeSnippet(text: string, threshold: number = 0.6): string {
160
+ const result = detectInjection(text, threshold);
161
+ if (!result.safe) {
162
+ return "[content filtered for security]";
163
+ }
164
+ return text;
165
+ }
166
+
167
+ // =============================================================================
168
+ // Helpers
169
+ // =============================================================================
170
+
171
+ /**
172
+ * Check for mixed Latin + Cyrillic/Greek within individual words.
173
+ * This detects homoglyph attacks where Cyrillic 'а' replaces Latin 'a'.
174
+ */
175
+ function hasMixedScripts(text: string): boolean {
176
+ // Only check if both scripts are present at all
177
+ const hasLatin = /[a-zA-Z]/.test(text);
178
+ const hasCyrillic = CYRILLIC_LOOKALIKES.test(text);
179
+ const hasGreek = GREEK_LOOKALIKES.test(text);
180
+
181
+ if (!hasLatin || (!hasCyrillic && !hasGreek)) return false;
182
+
183
+ // Check individual words for mixed scripts
184
+ const words = text.split(/\s+/);
185
+ for (const word of words) {
186
+ if (word.length < 3) continue;
187
+ const wordHasLatin = /[a-zA-Z]/.test(word);
188
+ const wordHasCyrillic = CYRILLIC_LOOKALIKES.test(word);
189
+ const wordHasGreek = GREEK_LOOKALIKES.test(word);
190
+
191
+ if (wordHasLatin && (wordHasCyrillic || wordHasGreek)) {
192
+ return true;
193
+ }
194
+ }
195
+
196
+ return false;
197
+ }
198
+
199
+ /**
200
+ * Check if NFKD normalization changes the text significantly.
201
+ * Catches confusable characters and encoding tricks.
202
+ */
203
+ function hasNormalizationDeviation(text: string): boolean {
204
+ const normalized = text.normalize('NFKD');
205
+ if (normalized === text) return false;
206
+
207
+ // Count character changes — small diacritic changes are fine,
208
+ // significant changes suggest obfuscation
209
+ let changes = 0;
210
+ const minLen = Math.min(text.length, normalized.length);
211
+ for (let i = 0; i < minLen; i++) {
212
+ if (text[i] !== normalized[i]) changes++;
213
+ }
214
+ changes += Math.abs(text.length - normalized.length);
215
+
216
+ // Flag if >5% of characters changed (threshold to avoid false positives on accented text)
217
+ return changes / text.length > 0.05;
218
+ }
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Retrieval Gate — Adaptive prompt filtering for context-surfacing
3
+ *
4
+ * Determines whether a prompt warrants memory retrieval. Skips greetings,
5
+ * shell commands, affirmations, pure emoji, and system pings. Forces
6
+ * retrieval for memory-intent queries even if short.
7
+ *
8
+ * Ported from memory-lancedb-pro's adaptive-retrieval.ts + noise-filter.ts,
9
+ * complementing ClawMem's existing short-prompt, slash-command, heartbeat,
10
+ * and dedupe gates in context-surfacing.
11
+ */
12
+
13
+ // Prompts that should skip retrieval entirely
14
+ const SKIP_PATTERNS = [
15
+ // Greetings & pleasantries
16
+ /^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
17
+ // Shell/dev commands (slash commands handled separately in context-surfacing)
18
+ /^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo|bun|node|deno)\b/i,
19
+ // Simple affirmations/negations
20
+ /^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome)\s*[.!]?$/i,
21
+ // Continuation prompts
22
+ /^(go ahead|continue|proceed|do it|start|begin|next)\s*[.!]?$/i,
23
+ // Pure emoji
24
+ /^[\p{Emoji}\s]+$/u,
25
+ // Single-word utility pings
26
+ /^(ping|pong|test|debug)\s*[.!?]?$/i,
27
+ ];
28
+
29
+ // Prompts that MUST trigger retrieval even if short (checked before skip)
30
+ const FORCE_RETRIEVE_PATTERNS = [
31
+ /\b(remember|recall|forgot|memory|memories)\b/i,
32
+ /\b(last time|before|previously|earlier|yesterday|ago)\b/i,
33
+ /\b(my (name|email|phone|address|birthday|preference))\b/i,
34
+ /\b(what did (i|we)|did i (tell|say|mention))\b/i,
35
+ ];
36
+
37
+ /**
38
+ * Normalize OpenClaw-injected metadata from prompts.
39
+ * Strips cron wrappers, timestamp prefixes, and conversation metadata.
40
+ */
41
+ function normalizePrompt(prompt: string): string {
42
+ let s = prompt.trim();
43
+ // Strip OpenClaw metadata headers
44
+ s = s.replace(/^(Conversation info|Sender) \(untrusted metadata\):[\s\S]*?\n\s*\n/gim, "");
45
+ // Strip cron wrapper prefix
46
+ s = s.trim().replace(/^\[cron:[^\]]+\]\s*/i, "");
47
+ // Strip timestamp prefix
48
+ s = s.trim().replace(/^\[[A-Za-z]{3}\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}\s[^\]]+\]\s*/, "");
49
+ return s.trim();
50
+ }
51
+
52
+ /**
53
+ * Check if a prompt should skip memory retrieval.
54
+ * Returns true if retrieval should be skipped.
55
+ *
56
+ * This complements (does NOT replace) existing gates in context-surfacing:
57
+ * - MIN_PROMPT_LENGTH (<20 chars)
58
+ * - Slash commands (starts with /)
59
+ * - Heartbeat suppression
60
+ * - Duplicate prompt dedupe
61
+ */
62
+ export function shouldSkipRetrieval(prompt: string): boolean {
63
+ const trimmed = normalizePrompt(prompt);
64
+
65
+ // Force retrieve if query has memory-related intent (before length/pattern checks)
66
+ if (FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
67
+
68
+ // Too short to be meaningful (below context-surfacing's MIN_PROMPT_LENGTH)
69
+ if (trimmed.length < 5) return true;
70
+
71
+ // Skip if matches any skip pattern
72
+ if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
73
+
74
+ // Skip very short non-question messages
75
+ // CJK characters carry more meaning per character — lower threshold
76
+ const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
77
+ const minLength = hasCJK ? 6 : 15;
78
+ if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('\uff1f')) return true;
79
+
80
+ return false;
81
+ }
82
+
83
+ // =============================================================================
84
+ // Noise Filter — Post-retrieval result filtering
85
+ // =============================================================================
86
+
87
+ // Agent denial patterns (filter from retrieved results)
88
+ const DENIAL_PATTERNS = [
89
+ /i don'?t have (any )?(information|data|memory|record)/i,
90
+ /i'?m not sure about/i,
91
+ /i don'?t recall/i,
92
+ /i don'?t remember/i,
93
+ /no (relevant )?memories found/i,
94
+ /i don'?t have access to/i,
95
+ ];
96
+
97
+ /**
98
+ * Check if a retrieved memory snippet is noise that should be filtered.
99
+ * Use on search results before injection, NOT on indexed documents.
100
+ */
101
+ export function isRetrievedNoise(text: string): boolean {
102
+ const trimmed = text.trim();
103
+ if (trimmed.length < 10) return true;
104
+ if (DENIAL_PATTERNS.some(p => p.test(trimmed))) return true;
105
+ return false;
106
+ }