clawmem 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +660 -0
- package/CLAUDE.md +660 -0
- package/LICENSE +21 -0
- package/README.md +993 -0
- package/SKILL.md +717 -0
- package/bin/clawmem +75 -0
- package/package.json +72 -0
- package/src/amem.ts +797 -0
- package/src/beads.ts +263 -0
- package/src/clawmem.ts +1849 -0
- package/src/collections.ts +405 -0
- package/src/config.ts +178 -0
- package/src/consolidation.ts +123 -0
- package/src/directory-context.ts +248 -0
- package/src/errors.ts +41 -0
- package/src/formatter.ts +427 -0
- package/src/graph-traversal.ts +247 -0
- package/src/hooks/context-surfacing.ts +317 -0
- package/src/hooks/curator-nudge.ts +89 -0
- package/src/hooks/decision-extractor.ts +639 -0
- package/src/hooks/feedback-loop.ts +214 -0
- package/src/hooks/handoff-generator.ts +345 -0
- package/src/hooks/postcompact-inject.ts +226 -0
- package/src/hooks/precompact-extract.ts +314 -0
- package/src/hooks/pretool-inject.ts +79 -0
- package/src/hooks/session-bootstrap.ts +324 -0
- package/src/hooks/staleness-check.ts +130 -0
- package/src/hooks.ts +367 -0
- package/src/indexer.ts +327 -0
- package/src/intent.ts +294 -0
- package/src/limits.ts +26 -0
- package/src/llm.ts +1175 -0
- package/src/mcp.ts +2138 -0
- package/src/memory.ts +336 -0
- package/src/mmr.ts +93 -0
- package/src/observer.ts +269 -0
- package/src/openclaw/engine.ts +283 -0
- package/src/openclaw/index.ts +221 -0
- package/src/openclaw/plugin.json +83 -0
- package/src/openclaw/shell.ts +207 -0
- package/src/openclaw/tools.ts +304 -0
- package/src/profile.ts +346 -0
- package/src/promptguard.ts +218 -0
- package/src/retrieval-gate.ts +106 -0
- package/src/search-utils.ts +127 -0
- package/src/server.ts +783 -0
- package/src/splitter.ts +325 -0
- package/src/store.ts +4062 -0
- package/src/validation.ts +67 -0
- package/src/watcher.ts +58 -0
package/src/profile.ts
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* User Profile Abstraction - Two-tier profile (static facts + dynamic context)
|
|
3
|
+
*
|
|
4
|
+
* Builds a profile document from vault contents:
|
|
5
|
+
* - Static: persistent facts extracted from decisions, hubs, and notes
|
|
6
|
+
* - Dynamic: recent context from last sessions and progress docs
|
|
7
|
+
*
|
|
8
|
+
* Stored at _clawmem/profile.md, injected at session start.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { Store } from "./store.ts";
|
|
12
|
+
import { hashContent } from "./indexer.ts";
|
|
13
|
+
import { smartTruncate } from "./hooks.ts";
|
|
14
|
+
import { MAX_LEVENSHTEIN_LENGTH } from "./limits.ts";
|
|
15
|
+
|
|
16
|
+
// =============================================================================
|
|
17
|
+
// Types
|
|
18
|
+
// =============================================================================
|
|
19
|
+
|
|
20
|
+
export type Profile = {
|
|
21
|
+
static: string[];
|
|
22
|
+
dynamic: string[];
|
|
23
|
+
updatedAt: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
// =============================================================================
|
|
27
|
+
// Config
|
|
28
|
+
// =============================================================================
|
|
29
|
+
|
|
30
|
+
const STATIC_MAX_TOKENS = 500;
|
|
31
|
+
const DYNAMIC_MAX_TOKENS = 300;
|
|
32
|
+
const STATIC_MAX_FACTS = 30;
|
|
33
|
+
const DYNAMIC_MAX_ITEMS = 10;
|
|
34
|
+
const PROFILE_PATH = "profile.md";
|
|
35
|
+
const PROFILE_COLLECTION = "_clawmem";
|
|
36
|
+
const STALE_SESSION_THRESHOLD = 5;
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// Profile Building
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
export function buildStaticProfile(store: Store): string[] {
|
|
43
|
+
const facts: string[] = [];
|
|
44
|
+
const seen = new Set<string>();
|
|
45
|
+
|
|
46
|
+
// Extract from decisions
|
|
47
|
+
const decisions = store.getDocumentsByType("decision", 20);
|
|
48
|
+
for (const d of decisions) {
|
|
49
|
+
const body = store.getDocumentBody({
|
|
50
|
+
filepath: `${d.collection}/${d.path}`,
|
|
51
|
+
displayPath: `${d.collection}/${d.path}`,
|
|
52
|
+
} as any);
|
|
53
|
+
if (!body) continue;
|
|
54
|
+
|
|
55
|
+
const bullets = extractBullets(body);
|
|
56
|
+
for (const bullet of bullets) {
|
|
57
|
+
const key = bullet.toLowerCase().trim().slice(0, 60);
|
|
58
|
+
if (seen.has(key)) continue;
|
|
59
|
+
if (isTooSimilar(key, seen)) continue;
|
|
60
|
+
seen.add(key);
|
|
61
|
+
facts.push(bullet);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Extract from hub documents
|
|
66
|
+
const hubs = store.getDocumentsByType("hub", 10);
|
|
67
|
+
for (const h of hubs) {
|
|
68
|
+
const body = store.getDocumentBody({
|
|
69
|
+
filepath: `${h.collection}/${h.path}`,
|
|
70
|
+
displayPath: `${h.collection}/${h.path}`,
|
|
71
|
+
} as any);
|
|
72
|
+
if (!body) continue;
|
|
73
|
+
|
|
74
|
+
const bullets = extractBullets(body);
|
|
75
|
+
for (const bullet of bullets) {
|
|
76
|
+
const key = bullet.toLowerCase().trim().slice(0, 60);
|
|
77
|
+
if (seen.has(key)) continue;
|
|
78
|
+
if (isTooSimilar(key, seen)) continue;
|
|
79
|
+
seen.add(key);
|
|
80
|
+
facts.push(bullet);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Truncate to budget
|
|
85
|
+
const maxChars = STATIC_MAX_TOKENS * 4;
|
|
86
|
+
let charCount = 0;
|
|
87
|
+
const result: string[] = [];
|
|
88
|
+
for (const fact of facts.slice(0, STATIC_MAX_FACTS)) {
|
|
89
|
+
if (charCount + fact.length > maxChars) break;
|
|
90
|
+
result.push(fact);
|
|
91
|
+
charCount += fact.length;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return result;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function buildDynamicProfile(store: Store): string[] {
|
|
98
|
+
const items: string[] = [];
|
|
99
|
+
|
|
100
|
+
// Recent sessions
|
|
101
|
+
const sessions = store.getRecentSessions(5);
|
|
102
|
+
for (const s of sessions) {
|
|
103
|
+
if (!s.handoffPath) continue;
|
|
104
|
+
|
|
105
|
+
const body = store.getDocumentBody({
|
|
106
|
+
filepath: s.handoffPath,
|
|
107
|
+
displayPath: s.handoffPath,
|
|
108
|
+
} as any);
|
|
109
|
+
if (!body) continue;
|
|
110
|
+
|
|
111
|
+
// Extract "Current State" and "Next Session Should" sections
|
|
112
|
+
const currentState = extractSection(body, "Current State");
|
|
113
|
+
const nextSession = extractSection(body, "Next Session Should");
|
|
114
|
+
|
|
115
|
+
if (currentState) {
|
|
116
|
+
items.push(`Current: ${smartTruncate(currentState, 150)}`);
|
|
117
|
+
}
|
|
118
|
+
if (nextSession) {
|
|
119
|
+
items.push(`Next: ${smartTruncate(nextSession, 150)}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Recent progress documents
|
|
124
|
+
const cutoff = new Date();
|
|
125
|
+
cutoff.setDate(cutoff.getDate() - 7);
|
|
126
|
+
const progress = store.getDocumentsByType("progress", 5);
|
|
127
|
+
const recent = progress.filter(p => p.modifiedAt >= cutoff.toISOString());
|
|
128
|
+
|
|
129
|
+
for (const p of recent) {
|
|
130
|
+
items.push(`Progress: ${p.title} (${p.modifiedAt.slice(0, 10)})`);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Truncate to budget
|
|
134
|
+
const maxChars = DYNAMIC_MAX_TOKENS * 4;
|
|
135
|
+
let charCount = 0;
|
|
136
|
+
const result: string[] = [];
|
|
137
|
+
for (const item of items.slice(0, DYNAMIC_MAX_ITEMS)) {
|
|
138
|
+
if (charCount + item.length > maxChars) break;
|
|
139
|
+
result.push(item);
|
|
140
|
+
charCount += item.length;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// =============================================================================
|
|
147
|
+
// Profile Persistence
|
|
148
|
+
// =============================================================================
|
|
149
|
+
|
|
150
|
+
export function updateProfile(store: Store): void {
|
|
151
|
+
const staticFacts = buildStaticProfile(store);
|
|
152
|
+
const dynamicItems = buildDynamicProfile(store);
|
|
153
|
+
const now = new Date().toISOString();
|
|
154
|
+
|
|
155
|
+
const body = formatProfileDocument(staticFacts, dynamicItems);
|
|
156
|
+
const hash = hashContent(body);
|
|
157
|
+
|
|
158
|
+
// Store content
|
|
159
|
+
store.insertContent(hash, body, now);
|
|
160
|
+
|
|
161
|
+
// Upsert document (handle active, inactive, or missing)
|
|
162
|
+
const existing = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
|
|
163
|
+
if (existing) {
|
|
164
|
+
store.updateDocument(existing.id, "User Profile", hash, now);
|
|
165
|
+
} else {
|
|
166
|
+
// Check for inactive row (UNIQUE(collection, path) prevents re-insert)
|
|
167
|
+
const inactive = store.findAnyDocument(PROFILE_COLLECTION, PROFILE_PATH);
|
|
168
|
+
if (inactive) {
|
|
169
|
+
// Reactivate and update
|
|
170
|
+
store.reactivateDocument(inactive.id, "User Profile", hash, now);
|
|
171
|
+
store.updateDocumentMeta(inactive.id, {
|
|
172
|
+
content_type: "hub",
|
|
173
|
+
tags: JSON.stringify(["auto-generated", "profile"]),
|
|
174
|
+
});
|
|
175
|
+
} else {
|
|
176
|
+
try {
|
|
177
|
+
store.insertDocument(PROFILE_COLLECTION, PROFILE_PATH, "User Profile", hash, now, now);
|
|
178
|
+
const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
|
|
179
|
+
if (doc) {
|
|
180
|
+
store.updateDocumentMeta(doc.id, {
|
|
181
|
+
content_type: "hub",
|
|
182
|
+
tags: JSON.stringify(["auto-generated", "profile"]),
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
} catch {
|
|
186
|
+
// Collection may not exist yet
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export function getProfile(store: Store): Profile | null {
|
|
193
|
+
const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
|
|
194
|
+
if (!doc) return null;
|
|
195
|
+
|
|
196
|
+
const body = store.getDocumentBody({
|
|
197
|
+
filepath: `${PROFILE_COLLECTION}/${PROFILE_PATH}`,
|
|
198
|
+
displayPath: `${PROFILE_COLLECTION}/${PROFILE_PATH}`,
|
|
199
|
+
} as any);
|
|
200
|
+
if (!body) return null;
|
|
201
|
+
|
|
202
|
+
return parseProfileDocument(body);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export function isProfileStale(store: Store): boolean {
|
|
206
|
+
const doc = store.findActiveDocument(PROFILE_COLLECTION, PROFILE_PATH);
|
|
207
|
+
if (!doc) return true;
|
|
208
|
+
|
|
209
|
+
// Check how many sessions since last profile update
|
|
210
|
+
const sessions = store.getRecentSessions(STALE_SESSION_THRESHOLD + 1);
|
|
211
|
+
if (sessions.length === 0) return false;
|
|
212
|
+
|
|
213
|
+
// Get the profile's modification timestamp from the document row
|
|
214
|
+
const rows = store.getDocumentsByType("hub", 50);
|
|
215
|
+
const profileRow = rows.find(r => r.path === PROFILE_PATH && r.collection === PROFILE_COLLECTION);
|
|
216
|
+
if (!profileRow) return true;
|
|
217
|
+
|
|
218
|
+
const profileDate = profileRow.modifiedAt;
|
|
219
|
+
const sessionsSince = sessions.filter(s => s.startedAt > profileDate);
|
|
220
|
+
return sessionsSince.length >= STALE_SESSION_THRESHOLD;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// =============================================================================
|
|
224
|
+
// Formatting
|
|
225
|
+
// =============================================================================
|
|
226
|
+
|
|
227
|
+
function formatProfileDocument(staticFacts: string[], dynamicItems: string[]): string {
|
|
228
|
+
const lines = [
|
|
229
|
+
"---",
|
|
230
|
+
"content_type: hub",
|
|
231
|
+
"tags: [auto-generated, profile]",
|
|
232
|
+
"---",
|
|
233
|
+
"",
|
|
234
|
+
"# User Profile",
|
|
235
|
+
"",
|
|
236
|
+
];
|
|
237
|
+
|
|
238
|
+
if (staticFacts.length > 0) {
|
|
239
|
+
lines.push("## Known Context", "");
|
|
240
|
+
for (const fact of staticFacts) {
|
|
241
|
+
lines.push(`- ${fact}`);
|
|
242
|
+
}
|
|
243
|
+
lines.push("");
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (dynamicItems.length > 0) {
|
|
247
|
+
lines.push("## Current Focus", "");
|
|
248
|
+
for (const item of dynamicItems) {
|
|
249
|
+
lines.push(`- ${item}`);
|
|
250
|
+
}
|
|
251
|
+
lines.push("");
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return lines.join("\n");
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function parseProfileDocument(body: string): Profile {
|
|
258
|
+
const staticFacts: string[] = [];
|
|
259
|
+
const dynamicItems: string[] = [];
|
|
260
|
+
let updatedAt = "";
|
|
261
|
+
|
|
262
|
+
let section = "";
|
|
263
|
+
for (const line of body.split("\n")) {
|
|
264
|
+
if (line.startsWith("## Known Context")) {
|
|
265
|
+
section = "static";
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
if (line.startsWith("## Current Focus")) {
|
|
269
|
+
section = "dynamic";
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
if (line.startsWith("## ")) {
|
|
273
|
+
section = "";
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const bullet = line.match(/^-\s+(.+)/);
|
|
278
|
+
if (!bullet?.[1]) continue;
|
|
279
|
+
|
|
280
|
+
if (section === "static") staticFacts.push(bullet[1]);
|
|
281
|
+
else if (section === "dynamic") dynamicItems.push(bullet[1]);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return { static: staticFacts, dynamic: dynamicItems, updatedAt };
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// =============================================================================
|
|
288
|
+
// Helpers
|
|
289
|
+
// =============================================================================
|
|
290
|
+
|
|
291
|
+
function extractBullets(body: string): string[] {
|
|
292
|
+
const bullets: string[] = [];
|
|
293
|
+
for (const line of body.split("\n")) {
|
|
294
|
+
const match = line.match(/^[-*]\s+(.{10,200})/);
|
|
295
|
+
if (match?.[1]) {
|
|
296
|
+
bullets.push(match[1].trim());
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
return bullets;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function extractSection(body: string, sectionName: string): string | null {
|
|
303
|
+
const regex = new RegExp(
|
|
304
|
+
`^#{1,3}\\s+${escapeRegex(sectionName)}\\b[^\\n]*\\n([\\s\\S]*?)(?=^#{1,3}\\s|$)`,
|
|
305
|
+
"mi"
|
|
306
|
+
);
|
|
307
|
+
const match = body.match(regex);
|
|
308
|
+
if (!match?.[1]) return null;
|
|
309
|
+
const text = match[1].trim();
|
|
310
|
+
return text.length > 10 ? text : null;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function escapeRegex(str: string): string {
|
|
314
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function isTooSimilar(key: string, existing: Set<string>): boolean {
|
|
318
|
+
for (const e of existing) {
|
|
319
|
+
if (levenshteinDistance(key, e) < 5) return true;
|
|
320
|
+
}
|
|
321
|
+
return false;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function levenshteinDistance(a: string, b: string): number {
|
|
325
|
+
// Bound inputs to prevent O(n²) memory blowup
|
|
326
|
+
if (a.length > MAX_LEVENSHTEIN_LENGTH || b.length > MAX_LEVENSHTEIN_LENGTH) return Math.abs(a.length - b.length);
|
|
327
|
+
if (a.length === 0) return b.length;
|
|
328
|
+
if (b.length === 0) return a.length;
|
|
329
|
+
|
|
330
|
+
const matrix: number[][] = [];
|
|
331
|
+
for (let i = 0; i <= b.length; i++) matrix[i] = [i];
|
|
332
|
+
for (let j = 0; j <= a.length; j++) matrix[0]![j] = j;
|
|
333
|
+
|
|
334
|
+
for (let i = 1; i <= b.length; i++) {
|
|
335
|
+
for (let j = 1; j <= a.length; j++) {
|
|
336
|
+
const cost = b[i - 1] === a[j - 1] ? 0 : 1;
|
|
337
|
+
matrix[i]![j] = Math.min(
|
|
338
|
+
matrix[i - 1]![j]! + 1,
|
|
339
|
+
matrix[i]![j - 1]! + 1,
|
|
340
|
+
matrix[i - 1]![j - 1]! + cost
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return matrix[b.length]![a.length]!;
|
|
346
|
+
}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawMem Prompt Injection Guard
|
|
3
|
+
*
|
|
4
|
+
* Multi-layer detection system ported from SAME's go-promptguard integration.
|
|
5
|
+
* Checks vault content for prompt injection attempts before context injection.
|
|
6
|
+
* Pure pattern-based (no LLM) for sub-ms latency.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// =============================================================================
|
|
10
|
+
// Types
|
|
11
|
+
// =============================================================================
|
|
12
|
+
|
|
13
|
+
export interface DetectionResult {
|
|
14
|
+
safe: boolean;
|
|
15
|
+
detector: string | null;
|
|
16
|
+
score: number; // 0.0 = safe, 1.0 = definite injection
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// Detection Layers
|
|
21
|
+
// =============================================================================
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Layer 1: Legacy string patterns from SAME (13 patterns).
|
|
25
|
+
* Case-insensitive substring match. Score: 1.0 on match.
|
|
26
|
+
*/
|
|
27
|
+
const LEGACY_PATTERNS = [
|
|
28
|
+
"ignore previous",
|
|
29
|
+
"ignore all previous",
|
|
30
|
+
"ignore above",
|
|
31
|
+
"disregard previous",
|
|
32
|
+
"disregard all previous",
|
|
33
|
+
"you are now",
|
|
34
|
+
"new instructions",
|
|
35
|
+
"system prompt",
|
|
36
|
+
"<system>",
|
|
37
|
+
"</system>",
|
|
38
|
+
"IMPORTANT:",
|
|
39
|
+
"CRITICAL:",
|
|
40
|
+
"override",
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Layer 2: Role injection patterns. Score: 0.9 on match.
|
|
45
|
+
*/
|
|
46
|
+
const ROLE_INJECTION_PATTERNS = [
|
|
47
|
+
/you are (?:now |a |an |the )/i,
|
|
48
|
+
/act as (?:a |an |the |if )/i,
|
|
49
|
+
/pretend (?:you(?:'re| are) |to be )/i,
|
|
50
|
+
/(?:switch|change) (?:to |into )(?:a |an |the )?(?:new |different )?(?:role|mode|persona)/i,
|
|
51
|
+
/your (?:new |real |true )(?:role|purpose|function|task)/i,
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Layer 3: Instruction override patterns. Score: 0.85 on match.
|
|
56
|
+
*/
|
|
57
|
+
const INSTRUCTION_OVERRIDE_PATTERNS = [
|
|
58
|
+
/(?:ignore|forget|discard|disregard) (?:all |any )?(?:previous|prior|above|earlier)/i,
|
|
59
|
+
/(?:new|updated|revised|real) (?:instructions?|directives?|rules?|guidelines?)/i,
|
|
60
|
+
/(?:do not|don't|never) (?:follow|obey|listen to|adhere to)/i,
|
|
61
|
+
/(?:bypass|circumvent|override|skip) (?:the |any |all )?(?:rules?|restrictions?|guidelines?|filters?|safety)/i,
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Layer 4: Delimiter injection patterns. Score: 0.8 on match.
|
|
66
|
+
*/
|
|
67
|
+
const DELIMITER_PATTERNS = [
|
|
68
|
+
/<\/?(?:system|user|assistant|human|ai|bot|prompt|instruction)>/i,
|
|
69
|
+
/\[(?:SYSTEM|INST|\/INST|SYS)\]/i,
|
|
70
|
+
/```(?:system|instructions?|prompt)\s*\n/i,
|
|
71
|
+
/={3,}(?:SYSTEM|PROMPT|INSTRUCTIONS?)={3,}/i,
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Layer 5: Unicode obfuscation detection. Score: 0.7 on match.
|
|
76
|
+
*/
|
|
77
|
+
const ZERO_WIDTH_CHARS = /[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u2061\u2062\u2063\u2064]/;
|
|
78
|
+
|
|
79
|
+
// Cyrillic characters that look like Latin
|
|
80
|
+
const CYRILLIC_LOOKALIKES = /[\u0400-\u04FF]/;
|
|
81
|
+
// Greek characters that look like Latin
|
|
82
|
+
const GREEK_LOOKALIKES = /[\u0370-\u03FF]/;
|
|
83
|
+
|
|
84
|
+
// =============================================================================
|
|
85
|
+
// Detection Functions
|
|
86
|
+
// =============================================================================
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Multi-layer prompt injection detection.
|
|
90
|
+
* Checks layers in order, short-circuits on first match.
|
|
91
|
+
* Default threshold: 0.6 (same as SAME's go-promptguard config).
|
|
92
|
+
*/
|
|
93
|
+
export function detectInjection(text: string, threshold: number = 0.6): DetectionResult {
|
|
94
|
+
if (!text || text.length === 0) {
|
|
95
|
+
return { safe: true, detector: null, score: 0 };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Cap input length for performance
|
|
99
|
+
const input = text.slice(0, 2000);
|
|
100
|
+
const lower = input.toLowerCase();
|
|
101
|
+
|
|
102
|
+
// Layer 1: Legacy string patterns
|
|
103
|
+
for (const pattern of LEGACY_PATTERNS) {
|
|
104
|
+
if (lower.includes(pattern.toLowerCase())) {
|
|
105
|
+
const result = { safe: false, detector: "legacy_pattern", score: 1.0 };
|
|
106
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Layer 2: Role injection
|
|
111
|
+
for (const pattern of ROLE_INJECTION_PATTERNS) {
|
|
112
|
+
if (pattern.test(input)) {
|
|
113
|
+
const result = { safe: false, detector: "role_injection", score: 0.9 };
|
|
114
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Layer 3: Instruction override
|
|
119
|
+
for (const pattern of INSTRUCTION_OVERRIDE_PATTERNS) {
|
|
120
|
+
if (pattern.test(input)) {
|
|
121
|
+
const result = { safe: false, detector: "instruction_override", score: 0.85 };
|
|
122
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Layer 4: Delimiter injection
|
|
127
|
+
for (const pattern of DELIMITER_PATTERNS) {
|
|
128
|
+
if (pattern.test(input)) {
|
|
129
|
+
const result = { safe: false, detector: "delimiter_injection", score: 0.8 };
|
|
130
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Layer 5: Unicode obfuscation
|
|
135
|
+
if (ZERO_WIDTH_CHARS.test(input)) {
|
|
136
|
+
const result = { safe: false, detector: "unicode_obfuscation", score: 0.7 };
|
|
137
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Check for mixed scripts (Latin + Cyrillic/Greek in same word — homoglyph attack)
|
|
141
|
+
if (hasMixedScripts(input)) {
|
|
142
|
+
const result = { safe: false, detector: "homoglyph", score: 0.7 };
|
|
143
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Check normalization deviation
|
|
147
|
+
if (hasNormalizationDeviation(input)) {
|
|
148
|
+
const result = { safe: false, detector: "normalization", score: 0.7 };
|
|
149
|
+
return result.score >= threshold ? result : { safe: true, detector: null, score: result.score };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return { safe: true, detector: null, score: 0 };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Sanitize a snippet for safe injection into context.
|
|
157
|
+
* Returns the original text if safe, or a placeholder if injection detected.
|
|
158
|
+
*/
|
|
159
|
+
export function sanitizeSnippet(text: string, threshold: number = 0.6): string {
|
|
160
|
+
const result = detectInjection(text, threshold);
|
|
161
|
+
if (!result.safe) {
|
|
162
|
+
return "[content filtered for security]";
|
|
163
|
+
}
|
|
164
|
+
return text;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// =============================================================================
|
|
168
|
+
// Helpers
|
|
169
|
+
// =============================================================================
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Check for mixed Latin + Cyrillic/Greek within individual words.
|
|
173
|
+
* This detects homoglyph attacks where Cyrillic 'а' replaces Latin 'a'.
|
|
174
|
+
*/
|
|
175
|
+
function hasMixedScripts(text: string): boolean {
|
|
176
|
+
// Only check if both scripts are present at all
|
|
177
|
+
const hasLatin = /[a-zA-Z]/.test(text);
|
|
178
|
+
const hasCyrillic = CYRILLIC_LOOKALIKES.test(text);
|
|
179
|
+
const hasGreek = GREEK_LOOKALIKES.test(text);
|
|
180
|
+
|
|
181
|
+
if (!hasLatin || (!hasCyrillic && !hasGreek)) return false;
|
|
182
|
+
|
|
183
|
+
// Check individual words for mixed scripts
|
|
184
|
+
const words = text.split(/\s+/);
|
|
185
|
+
for (const word of words) {
|
|
186
|
+
if (word.length < 3) continue;
|
|
187
|
+
const wordHasLatin = /[a-zA-Z]/.test(word);
|
|
188
|
+
const wordHasCyrillic = CYRILLIC_LOOKALIKES.test(word);
|
|
189
|
+
const wordHasGreek = GREEK_LOOKALIKES.test(word);
|
|
190
|
+
|
|
191
|
+
if (wordHasLatin && (wordHasCyrillic || wordHasGreek)) {
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Check if NFKD normalization changes the text significantly.
|
|
201
|
+
* Catches confusable characters and encoding tricks.
|
|
202
|
+
*/
|
|
203
|
+
function hasNormalizationDeviation(text: string): boolean {
|
|
204
|
+
const normalized = text.normalize('NFKD');
|
|
205
|
+
if (normalized === text) return false;
|
|
206
|
+
|
|
207
|
+
// Count character changes — small diacritic changes are fine,
|
|
208
|
+
// significant changes suggest obfuscation
|
|
209
|
+
let changes = 0;
|
|
210
|
+
const minLen = Math.min(text.length, normalized.length);
|
|
211
|
+
for (let i = 0; i < minLen; i++) {
|
|
212
|
+
if (text[i] !== normalized[i]) changes++;
|
|
213
|
+
}
|
|
214
|
+
changes += Math.abs(text.length - normalized.length);
|
|
215
|
+
|
|
216
|
+
// Flag if >5% of characters changed (threshold to avoid false positives on accented text)
|
|
217
|
+
return changes / text.length > 0.05;
|
|
218
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval Gate — Adaptive prompt filtering for context-surfacing
|
|
3
|
+
*
|
|
4
|
+
* Determines whether a prompt warrants memory retrieval. Skips greetings,
|
|
5
|
+
* shell commands, affirmations, pure emoji, and system pings. Forces
|
|
6
|
+
* retrieval for memory-intent queries even if short.
|
|
7
|
+
*
|
|
8
|
+
* Ported from memory-lancedb-pro's adaptive-retrieval.ts + noise-filter.ts,
|
|
9
|
+
* complementing ClawMem's existing short-prompt, slash-command, heartbeat,
|
|
10
|
+
* and dedupe gates in context-surfacing.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// Prompts that should skip retrieval entirely
|
|
14
|
+
const SKIP_PATTERNS = [
|
|
15
|
+
// Greetings & pleasantries
|
|
16
|
+
/^(hi|hello|hey|good\s*(morning|afternoon|evening|night)|greetings|yo|sup|howdy|what'?s up)\b/i,
|
|
17
|
+
// Shell/dev commands (slash commands handled separately in context-surfacing)
|
|
18
|
+
/^(run|build|test|ls|cd|git|npm|pip|docker|curl|cat|grep|find|make|sudo|bun|node|deno)\b/i,
|
|
19
|
+
// Simple affirmations/negations
|
|
20
|
+
/^(yes|no|yep|nope|ok|okay|sure|fine|thanks|thank you|thx|ty|got it|understood|cool|nice|great|good|perfect|awesome)\s*[.!]?$/i,
|
|
21
|
+
// Continuation prompts
|
|
22
|
+
/^(go ahead|continue|proceed|do it|start|begin|next)\s*[.!]?$/i,
|
|
23
|
+
// Pure emoji
|
|
24
|
+
/^[\p{Emoji}\s]+$/u,
|
|
25
|
+
// Single-word utility pings
|
|
26
|
+
/^(ping|pong|test|debug)\s*[.!?]?$/i,
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
// Prompts that MUST trigger retrieval even if short (checked before skip)
|
|
30
|
+
const FORCE_RETRIEVE_PATTERNS = [
|
|
31
|
+
/\b(remember|recall|forgot|memory|memories)\b/i,
|
|
32
|
+
/\b(last time|before|previously|earlier|yesterday|ago)\b/i,
|
|
33
|
+
/\b(my (name|email|phone|address|birthday|preference))\b/i,
|
|
34
|
+
/\b(what did (i|we)|did i (tell|say|mention))\b/i,
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Normalize OpenClaw-injected metadata from prompts.
|
|
39
|
+
* Strips cron wrappers, timestamp prefixes, and conversation metadata.
|
|
40
|
+
*/
|
|
41
|
+
function normalizePrompt(prompt: string): string {
|
|
42
|
+
let s = prompt.trim();
|
|
43
|
+
// Strip OpenClaw metadata headers
|
|
44
|
+
s = s.replace(/^(Conversation info|Sender) \(untrusted metadata\):[\s\S]*?\n\s*\n/gim, "");
|
|
45
|
+
// Strip cron wrapper prefix
|
|
46
|
+
s = s.trim().replace(/^\[cron:[^\]]+\]\s*/i, "");
|
|
47
|
+
// Strip timestamp prefix
|
|
48
|
+
s = s.trim().replace(/^\[[A-Za-z]{3}\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}\s[^\]]+\]\s*/, "");
|
|
49
|
+
return s.trim();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check if a prompt should skip memory retrieval.
|
|
54
|
+
* Returns true if retrieval should be skipped.
|
|
55
|
+
*
|
|
56
|
+
* This complements (does NOT replace) existing gates in context-surfacing:
|
|
57
|
+
* - MIN_PROMPT_LENGTH (<20 chars)
|
|
58
|
+
* - Slash commands (starts with /)
|
|
59
|
+
* - Heartbeat suppression
|
|
60
|
+
* - Duplicate prompt dedupe
|
|
61
|
+
*/
|
|
62
|
+
export function shouldSkipRetrieval(prompt: string): boolean {
|
|
63
|
+
const trimmed = normalizePrompt(prompt);
|
|
64
|
+
|
|
65
|
+
// Force retrieve if query has memory-related intent (before length/pattern checks)
|
|
66
|
+
if (FORCE_RETRIEVE_PATTERNS.some(p => p.test(trimmed))) return false;
|
|
67
|
+
|
|
68
|
+
// Too short to be meaningful (below context-surfacing's MIN_PROMPT_LENGTH)
|
|
69
|
+
if (trimmed.length < 5) return true;
|
|
70
|
+
|
|
71
|
+
// Skip if matches any skip pattern
|
|
72
|
+
if (SKIP_PATTERNS.some(p => p.test(trimmed))) return true;
|
|
73
|
+
|
|
74
|
+
// Skip very short non-question messages
|
|
75
|
+
// CJK characters carry more meaning per character — lower threshold
|
|
76
|
+
const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(trimmed);
|
|
77
|
+
const minLength = hasCJK ? 6 : 15;
|
|
78
|
+
if (trimmed.length < minLength && !trimmed.includes('?') && !trimmed.includes('\uff1f')) return true;
|
|
79
|
+
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// =============================================================================
|
|
84
|
+
// Noise Filter — Post-retrieval result filtering
|
|
85
|
+
// =============================================================================
|
|
86
|
+
|
|
87
|
+
// Agent denial patterns (filter from retrieved results)
|
|
88
|
+
const DENIAL_PATTERNS = [
|
|
89
|
+
/i don'?t have (any )?(information|data|memory|record)/i,
|
|
90
|
+
/i'?m not sure about/i,
|
|
91
|
+
/i don'?t recall/i,
|
|
92
|
+
/i don'?t remember/i,
|
|
93
|
+
/no (relevant )?memories found/i,
|
|
94
|
+
/i don'?t have access to/i,
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Check if a retrieved memory snippet is noise that should be filtered.
|
|
99
|
+
* Use on search results before injection, NOT on indexed documents.
|
|
100
|
+
*/
|
|
101
|
+
export function isRetrievedNoise(text: string): boolean {
|
|
102
|
+
const trimmed = text.trim();
|
|
103
|
+
if (trimmed.length < 10) return true;
|
|
104
|
+
if (DENIAL_PATTERNS.some(p => p.test(trimmed))) return true;
|
|
105
|
+
return false;
|
|
106
|
+
}
|