clawmem 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +660 -0
- package/CLAUDE.md +660 -0
- package/LICENSE +21 -0
- package/README.md +993 -0
- package/SKILL.md +717 -0
- package/bin/clawmem +75 -0
- package/package.json +72 -0
- package/src/amem.ts +797 -0
- package/src/beads.ts +263 -0
- package/src/clawmem.ts +1849 -0
- package/src/collections.ts +405 -0
- package/src/config.ts +178 -0
- package/src/consolidation.ts +123 -0
- package/src/directory-context.ts +248 -0
- package/src/errors.ts +41 -0
- package/src/formatter.ts +427 -0
- package/src/graph-traversal.ts +247 -0
- package/src/hooks/context-surfacing.ts +317 -0
- package/src/hooks/curator-nudge.ts +89 -0
- package/src/hooks/decision-extractor.ts +639 -0
- package/src/hooks/feedback-loop.ts +214 -0
- package/src/hooks/handoff-generator.ts +345 -0
- package/src/hooks/postcompact-inject.ts +226 -0
- package/src/hooks/precompact-extract.ts +314 -0
- package/src/hooks/pretool-inject.ts +79 -0
- package/src/hooks/session-bootstrap.ts +324 -0
- package/src/hooks/staleness-check.ts +130 -0
- package/src/hooks.ts +367 -0
- package/src/indexer.ts +327 -0
- package/src/intent.ts +294 -0
- package/src/limits.ts +26 -0
- package/src/llm.ts +1175 -0
- package/src/mcp.ts +2138 -0
- package/src/memory.ts +336 -0
- package/src/mmr.ts +93 -0
- package/src/observer.ts +269 -0
- package/src/openclaw/engine.ts +283 -0
- package/src/openclaw/index.ts +221 -0
- package/src/openclaw/plugin.json +83 -0
- package/src/openclaw/shell.ts +207 -0
- package/src/openclaw/tools.ts +304 -0
- package/src/profile.ts +346 -0
- package/src/promptguard.ts +218 -0
- package/src/retrieval-gate.ts +106 -0
- package/src/search-utils.ts +127 -0
- package/src/server.ts +783 -0
- package/src/splitter.ts +325 -0
- package/src/store.ts +4062 -0
- package/src/validation.ts +67 -0
- package/src/watcher.ts +58 -0
package/src/intent.ts
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intent Classification for MAGMA Multi-Graph Memory
|
|
3
|
+
*
|
|
4
|
+
* Classifies queries into intent types to route to appropriate graph structures:
|
|
5
|
+
* - WHY: Causal reasoning (use causal graph)
|
|
6
|
+
* - WHEN: Temporal queries (use temporal graph)
|
|
7
|
+
* - ENTITY: Entity-focused (use entity graph)
|
|
8
|
+
* - WHAT: General factual (balanced approach)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { Database } from "bun:sqlite";
|
|
12
|
+
import { createHash } from "crypto";
|
|
13
|
+
import type { LlamaCpp } from "./llm.ts";
|
|
14
|
+
|
|
15
|
+
export type IntentType = 'WHY' | 'WHEN' | 'ENTITY' | 'WHAT';
|
|
16
|
+
|
|
17
|
+
export interface IntentResult {
|
|
18
|
+
intent: IntentType;
|
|
19
|
+
confidence: number;
|
|
20
|
+
temporal_start?: string;
|
|
21
|
+
temporal_end?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Heuristic patterns for fast classification (no LLM needed)
|
|
25
|
+
const WHY_PATTERNS = /\b(why|cause[ds]?|because|reason|led to|result(?:ed)? (?:in|from)|depend|block|chose|decision|trade-?off|instead of)\b/i;
|
|
26
|
+
const WHEN_PATTERNS = /\b(when|timeline|chronolog|date|yesterday|last (?:week|month|year|night)|ago|before \d|after \d|(?:in|since|during|until) (?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|\d{4}))\b/i;
|
|
27
|
+
const ENTITY_PATTERNS = /\b(who|person|team|project|(?:@|#)\w+|relationship|mention|organization|company)\b/i;
|
|
28
|
+
|
|
29
|
+
// Temporal extraction patterns
|
|
30
|
+
type TemporalExtractor = (now: Date, match?: RegExpMatchArray) => { start?: string; end?: string };
|
|
31
|
+
|
|
32
|
+
const TEMPORAL_RELATIVE: [RegExp, TemporalExtractor][] = [
|
|
33
|
+
[/\blast week\b/i, (now: Date) => {
|
|
34
|
+
const s = new Date(now); s.setDate(s.getDate() - 7);
|
|
35
|
+
return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
|
|
36
|
+
}],
|
|
37
|
+
[/\blast month\b/i, (now: Date) => {
|
|
38
|
+
const s = new Date(now); s.setMonth(s.getMonth() - 1);
|
|
39
|
+
return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
|
|
40
|
+
}],
|
|
41
|
+
[/\byesterday\b/i, (now: Date) => {
|
|
42
|
+
const s = new Date(now); s.setDate(s.getDate() - 1);
|
|
43
|
+
return { start: s.toISOString().slice(0, 10), end: s.toISOString().slice(0, 10) };
|
|
44
|
+
}],
|
|
45
|
+
[/\b(\d+)\s*days?\s*ago\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
46
|
+
const s = new Date(now); s.setDate(s.getDate() - parseInt(m?.[1] ?? "1"));
|
|
47
|
+
return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
|
|
48
|
+
}],
|
|
49
|
+
[/\bin\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*(\d{4})?\b/i, (_now: Date, m?: RegExpMatchArray) => {
|
|
50
|
+
const months: Record<string, number> = { jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5, jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11 };
|
|
51
|
+
const mo = months[(m?.[1] ?? "jan").slice(0, 3).toLowerCase()] ?? 0;
|
|
52
|
+
const yr = m?.[2] ? parseInt(m[2]) : new Date().getFullYear();
|
|
53
|
+
const s = new Date(yr, mo, 1);
|
|
54
|
+
const e = new Date(yr, mo + 1, 0);
|
|
55
|
+
return { start: s.toISOString().slice(0, 10), end: e.toISOString().slice(0, 10) };
|
|
56
|
+
}],
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Fast heuristic intent classification (no LLM, instant).
|
|
61
|
+
*/
|
|
62
|
+
function classifyIntentHeuristic(query: string): IntentResult {
|
|
63
|
+
const q = query.toLowerCase();
|
|
64
|
+
|
|
65
|
+
// Extract temporal info
|
|
66
|
+
let temporal_start: string | undefined;
|
|
67
|
+
let temporal_end: string | undefined;
|
|
68
|
+
const now = new Date();
|
|
69
|
+
for (const [pattern, extractor] of TEMPORAL_RELATIVE) {
|
|
70
|
+
const match = q.match(pattern);
|
|
71
|
+
if (match) {
|
|
72
|
+
const result = extractor(now, match);
|
|
73
|
+
temporal_start = result.start;
|
|
74
|
+
temporal_end = result.end;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Score each intent
|
|
80
|
+
const scores: Record<IntentType, number> = { WHY: 0, WHEN: 0, ENTITY: 0, WHAT: 0 };
|
|
81
|
+
|
|
82
|
+
if (WHY_PATTERNS.test(q)) scores.WHY += 3;
|
|
83
|
+
if (WHEN_PATTERNS.test(q)) scores.WHEN += 3;
|
|
84
|
+
if (ENTITY_PATTERNS.test(q)) scores.ENTITY += 3;
|
|
85
|
+
if (temporal_start) scores.WHEN += 2;
|
|
86
|
+
if (/^why\b/i.test(q)) scores.WHY += 2;
|
|
87
|
+
if (/^when\b/i.test(q)) scores.WHEN += 2;
|
|
88
|
+
if (/^who\b/i.test(q)) scores.ENTITY += 2;
|
|
89
|
+
|
|
90
|
+
const maxScore = Math.max(...Object.values(scores));
|
|
91
|
+
if (maxScore === 0) {
|
|
92
|
+
return { intent: 'WHAT', confidence: 0.6, temporal_start, temporal_end };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const intent = (Object.entries(scores) as [IntentType, number][])
|
|
96
|
+
.sort((a, b) => b[1] - a[1])[0]![0];
|
|
97
|
+
const confidence = Math.min(0.95, 0.6 + maxScore * 0.1);
|
|
98
|
+
|
|
99
|
+
return { intent, confidence, temporal_start, temporal_end };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Classify query intent using heuristics (fast) with optional LLM refinement.
|
|
104
|
+
* Results are cached for 1 hour.
|
|
105
|
+
*/
|
|
106
|
+
export async function classifyIntent(
|
|
107
|
+
query: string,
|
|
108
|
+
llm: LlamaCpp,
|
|
109
|
+
db: Database
|
|
110
|
+
): Promise<IntentResult> {
|
|
111
|
+
// Check cache first (1 hour TTL)
|
|
112
|
+
const queryHash = createHash('sha256').update(query).digest('hex');
|
|
113
|
+
const cached = db.prepare(`
|
|
114
|
+
SELECT intent, confidence, temporal_start, temporal_end
|
|
115
|
+
FROM intent_classifications
|
|
116
|
+
WHERE query_hash = ? AND cached_at > datetime('now', '-1 hour')
|
|
117
|
+
`).get(queryHash) as IntentResult | undefined;
|
|
118
|
+
|
|
119
|
+
if (cached) return cached;
|
|
120
|
+
|
|
121
|
+
// Fast heuristic classification (instant, no LLM)
|
|
122
|
+
const heuristic = classifyIntentHeuristic(query);
|
|
123
|
+
|
|
124
|
+
// If heuristic is confident (score >= 0.8), use it directly
|
|
125
|
+
if (heuristic.confidence >= 0.8) {
|
|
126
|
+
cacheIntent(db, queryHash, query, heuristic);
|
|
127
|
+
return heuristic;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Try LLM refinement for ambiguous cases
|
|
131
|
+
const prompt = `Classify intent of this query as one word: WHY, WHEN, ENTITY, or WHAT.
|
|
132
|
+
Query: "${query}"
|
|
133
|
+
Intent:`;
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
const result = await llm.generate(prompt, {
|
|
137
|
+
maxTokens: 10,
|
|
138
|
+
temperature: 0.0,
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
if (result) {
|
|
142
|
+
const text = result.text.trim().toUpperCase();
|
|
143
|
+
const match = text.match(/\b(WHY|WHEN|ENTITY|WHAT)\b/);
|
|
144
|
+
if (match) {
|
|
145
|
+
const refined: IntentResult = {
|
|
146
|
+
intent: match[1] as IntentType,
|
|
147
|
+
confidence: 0.85,
|
|
148
|
+
temporal_start: heuristic.temporal_start,
|
|
149
|
+
temporal_end: heuristic.temporal_end,
|
|
150
|
+
};
|
|
151
|
+
cacheIntent(db, queryHash, query, refined);
|
|
152
|
+
return refined;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
} catch {
|
|
156
|
+
// LLM failed — use heuristic result
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
cacheIntent(db, queryHash, query, heuristic);
|
|
160
|
+
return heuristic;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function cacheIntent(db: Database, queryHash: string, query: string, result: IntentResult): void {
|
|
164
|
+
db.prepare(`
|
|
165
|
+
INSERT OR REPLACE INTO intent_classifications (
|
|
166
|
+
query_hash, query_text, intent, confidence, temporal_start, temporal_end, cached_at
|
|
167
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
168
|
+
`).run(
|
|
169
|
+
queryHash,
|
|
170
|
+
query,
|
|
171
|
+
result.intent,
|
|
172
|
+
result.confidence,
|
|
173
|
+
result.temporal_start || null,
|
|
174
|
+
result.temporal_end || null,
|
|
175
|
+
new Date().toISOString()
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// =============================================================================
|
|
180
|
+
// Query Decomposition (OpenViking-inspired QueryPlan)
|
|
181
|
+
// =============================================================================
|
|
182
|
+
|
|
183
|
+
export type QueryClause = {
|
|
184
|
+
type: 'bm25' | 'vector' | 'graph';
|
|
185
|
+
query: string;
|
|
186
|
+
collections?: string[];
|
|
187
|
+
priority: 1 | 2 | 3 | 4 | 5;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Decompose a complex query into multiple typed retrieval clauses.
|
|
192
|
+
* Uses heuristics first, LLM only for genuinely ambiguous multi-topic queries.
|
|
193
|
+
* Graph-first, planner-second design (per GPT 5.4 validation).
|
|
194
|
+
*/
|
|
195
|
+
export async function decomposeQuery(
|
|
196
|
+
query: string,
|
|
197
|
+
llm: LlamaCpp,
|
|
198
|
+
db: Database,
|
|
199
|
+
sessionContext?: string
|
|
200
|
+
): Promise<QueryClause[]> {
|
|
201
|
+
// Short queries still need intent classification — "why did this fail?" is 4 words but needs graph
|
|
202
|
+
const words = query.split(/\s+/).filter(w => w.length > 2);
|
|
203
|
+
if (words.length <= 5) {
|
|
204
|
+
const intent = await classifyIntent(query, llm, db);
|
|
205
|
+
const type = intent.intent === 'WHY' || intent.intent === 'ENTITY' ? 'graph' : 'bm25';
|
|
206
|
+
return [
|
|
207
|
+
{ type, query, priority: 1 },
|
|
208
|
+
{ type: 'vector', query, priority: 3 },
|
|
209
|
+
];
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Heuristic: detect multi-topic queries (conjunctions, "and also", multiple questions)
|
|
213
|
+
const multiTopicSignals = [
|
|
214
|
+
/\band\s+(?:also|what|how|why)\b/i,
|
|
215
|
+
/\bboth\s+.+\s+and\s+/i,
|
|
216
|
+
/\?.*\?/,
|
|
217
|
+
/\b(?:plus|additionally|as well as|along with)\b/i,
|
|
218
|
+
];
|
|
219
|
+
const isMultiTopic = multiTopicSignals.some(p => p.test(query));
|
|
220
|
+
|
|
221
|
+
if (!isMultiTopic) {
|
|
222
|
+
// Single-topic: classify intent and route appropriately
|
|
223
|
+
const intent = await classifyIntent(query, llm, db);
|
|
224
|
+
const type = intent.intent === 'WHY' || intent.intent === 'ENTITY' ? 'graph' : 'bm25';
|
|
225
|
+
return [
|
|
226
|
+
{ type, query, priority: 1 },
|
|
227
|
+
{ type: 'vector', query, priority: 3 },
|
|
228
|
+
];
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Multi-topic: use LLM to decompose
|
|
232
|
+
const contextBlock = sessionContext ? `\nSession context: ${sessionContext.slice(0, 300)}` : '';
|
|
233
|
+
const prompt = `Decompose this query into 2-4 retrieval sub-queries. Each should target one specific topic.
|
|
234
|
+
${contextBlock}
|
|
235
|
+
Query: "${query}"
|
|
236
|
+
|
|
237
|
+
Return JSON array: [{"query": "sub-query text", "type": "bm25|vector|graph", "priority": 1-5}]
|
|
238
|
+
Rules:
|
|
239
|
+
- type "graph" for causal/entity questions (why, who, relationships)
|
|
240
|
+
- type "bm25" for keyword-specific factual lookups
|
|
241
|
+
- type "vector" for conceptual/fuzzy similarity
|
|
242
|
+
- priority 1 = most important, 5 = least
|
|
243
|
+
Return ONLY the JSON array. /no_think`;
|
|
244
|
+
|
|
245
|
+
try {
|
|
246
|
+
const result = await llm.generate(prompt, { temperature: 0.3, maxTokens: 300 });
|
|
247
|
+
if (result) {
|
|
248
|
+
const text = result.text.trim();
|
|
249
|
+
const jsonMatch = text.match(/\[[\s\S]*\]/);
|
|
250
|
+
if (jsonMatch) {
|
|
251
|
+
const parsed = JSON.parse(jsonMatch[0]) as QueryClause[];
|
|
252
|
+
if (Array.isArray(parsed) && parsed.length >= 1 && parsed.length <= 4) {
|
|
253
|
+
return parsed
|
|
254
|
+
.filter(c => c.query && c.type && c.priority)
|
|
255
|
+
.map(c => ({
|
|
256
|
+
type: ['bm25', 'vector', 'graph'].includes(c.type) ? c.type : 'bm25',
|
|
257
|
+
query: c.query,
|
|
258
|
+
collections: c.collections,
|
|
259
|
+
priority: Math.min(5, Math.max(1, c.priority)) as 1 | 2 | 3 | 4 | 5,
|
|
260
|
+
}));
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
} catch {
|
|
265
|
+
// LLM failed — fallback to dual-mode
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Fallback: dual-mode search on original query
|
|
269
|
+
return [
|
|
270
|
+
{ type: 'bm25', query, priority: 1 },
|
|
271
|
+
{ type: 'vector', query, priority: 2 },
|
|
272
|
+
];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Get intent-specific weights for graph traversal.
|
|
277
|
+
*/
|
|
278
|
+
export function getIntentWeights(intent: IntentType): {
|
|
279
|
+
causal: number;
|
|
280
|
+
semantic: number;
|
|
281
|
+
temporal: number;
|
|
282
|
+
entity: number;
|
|
283
|
+
} {
|
|
284
|
+
switch (intent) {
|
|
285
|
+
case 'WHY':
|
|
286
|
+
return { causal: 5.0, semantic: 2.0, temporal: 0.5, entity: 1.0 };
|
|
287
|
+
case 'WHEN':
|
|
288
|
+
return { temporal: 5.0, semantic: 2.0, causal: 1.0, entity: 0.5 };
|
|
289
|
+
case 'ENTITY':
|
|
290
|
+
return { entity: 6.0, semantic: 3.0, temporal: 1.0, causal: 2.0 };
|
|
291
|
+
case 'WHAT':
|
|
292
|
+
return { semantic: 5.0, entity: 2.0, temporal: 1.0, causal: 1.0 };
|
|
293
|
+
}
|
|
294
|
+
}
|
package/src/limits.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centralized limits for input validation and resource bounding.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// Search & query
|
|
6
|
+
export const MAX_QUERY_LENGTH = 10_000;
|
|
7
|
+
export const MAX_SEARCH_LIMIT = 100;
|
|
8
|
+
|
|
9
|
+
// LLM
|
|
10
|
+
export const MAX_LLM_INPUT_CHARS = 100_000;
|
|
11
|
+
export const MAX_LLM_GENERATE_TIMEOUT_MS = 120_000; // 2 minutes
|
|
12
|
+
|
|
13
|
+
// Transcripts & hooks
|
|
14
|
+
export const MAX_TRANSCRIPT_BYTES = 50 * 1024 * 1024; // 50 MB
|
|
15
|
+
export const MAX_FILES_EXTRACTED = 200;
|
|
16
|
+
|
|
17
|
+
// Document processing
|
|
18
|
+
export const MAX_FRAGMENTS_PER_DOC = 500;
|
|
19
|
+
export const MAX_SPLITTER_INPUT_CHARS = 500_000;
|
|
20
|
+
export const MAX_FILE_LINES_READ = 100_000;
|
|
21
|
+
|
|
22
|
+
// Profile
|
|
23
|
+
export const MAX_LEVENSHTEIN_LENGTH = 1_000;
|
|
24
|
+
|
|
25
|
+
// Paths
|
|
26
|
+
export const MAX_PATH_LENGTH = 1_000;
|