clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
package/src/intent.ts ADDED
@@ -0,0 +1,294 @@
1
+ /**
2
+ * Intent Classification for MAGMA Multi-Graph Memory
3
+ *
4
+ * Classifies queries into intent types to route to appropriate graph structures:
5
+ * - WHY: Causal reasoning (use causal graph)
6
+ * - WHEN: Temporal queries (use temporal graph)
7
+ * - ENTITY: Entity-focused (use entity graph)
8
+ * - WHAT: General factual (balanced approach)
9
+ */
10
+
11
+ import type { Database } from "bun:sqlite";
12
+ import { createHash } from "crypto";
13
+ import type { LlamaCpp } from "./llm.ts";
14
+
15
+ export type IntentType = 'WHY' | 'WHEN' | 'ENTITY' | 'WHAT';
16
+
17
+ export interface IntentResult {
18
+ intent: IntentType;
19
+ confidence: number;
20
+ temporal_start?: string;
21
+ temporal_end?: string;
22
+ }
23
+
24
+ // Heuristic patterns for fast classification (no LLM needed)
25
+ const WHY_PATTERNS = /\b(why|cause[ds]?|because|reason|led to|result(?:ed)? (?:in|from)|depend|block|chose|decision|trade-?off|instead of)\b/i;
26
+ const WHEN_PATTERNS = /\b(when|timeline|chronolog|date|yesterday|last (?:week|month|year|night)|ago|before \d|after \d|(?:in|since|during|until) (?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|\d{4}))\b/i;
27
+ const ENTITY_PATTERNS = /\b(who|person|team|project|(?:@|#)\w+|relationship|mention|organization|company)\b/i;
28
+
29
+ // Temporal extraction patterns
30
+ type TemporalExtractor = (now: Date, match?: RegExpMatchArray) => { start?: string; end?: string };
31
+
32
+ const TEMPORAL_RELATIVE: [RegExp, TemporalExtractor][] = [
33
+ [/\blast week\b/i, (now: Date) => {
34
+ const s = new Date(now); s.setDate(s.getDate() - 7);
35
+ return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
36
+ }],
37
+ [/\blast month\b/i, (now: Date) => {
38
+ const s = new Date(now); s.setMonth(s.getMonth() - 1);
39
+ return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
40
+ }],
41
+ [/\byesterday\b/i, (now: Date) => {
42
+ const s = new Date(now); s.setDate(s.getDate() - 1);
43
+ return { start: s.toISOString().slice(0, 10), end: s.toISOString().slice(0, 10) };
44
+ }],
45
+ [/\b(\d+)\s*days?\s*ago\b/i, (now: Date, m?: RegExpMatchArray) => {
46
+ const s = new Date(now); s.setDate(s.getDate() - parseInt(m?.[1] ?? "1"));
47
+ return { start: s.toISOString().slice(0, 10), end: now.toISOString().slice(0, 10) };
48
+ }],
49
+ [/\bin\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*(\d{4})?\b/i, (_now: Date, m?: RegExpMatchArray) => {
50
+ const months: Record<string, number> = { jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5, jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11 };
51
+ const mo = months[(m?.[1] ?? "jan").slice(0, 3).toLowerCase()] ?? 0;
52
+ const yr = m?.[2] ? parseInt(m[2]) : new Date().getFullYear();
53
+ const s = new Date(yr, mo, 1);
54
+ const e = new Date(yr, mo + 1, 0);
55
+ return { start: s.toISOString().slice(0, 10), end: e.toISOString().slice(0, 10) };
56
+ }],
57
+ ];
58
+
59
+ /**
60
+ * Fast heuristic intent classification (no LLM, instant).
61
+ */
62
+ function classifyIntentHeuristic(query: string): IntentResult {
63
+ const q = query.toLowerCase();
64
+
65
+ // Extract temporal info
66
+ let temporal_start: string | undefined;
67
+ let temporal_end: string | undefined;
68
+ const now = new Date();
69
+ for (const [pattern, extractor] of TEMPORAL_RELATIVE) {
70
+ const match = q.match(pattern);
71
+ if (match) {
72
+ const result = extractor(now, match);
73
+ temporal_start = result.start;
74
+ temporal_end = result.end;
75
+ break;
76
+ }
77
+ }
78
+
79
+ // Score each intent
80
+ const scores: Record<IntentType, number> = { WHY: 0, WHEN: 0, ENTITY: 0, WHAT: 0 };
81
+
82
+ if (WHY_PATTERNS.test(q)) scores.WHY += 3;
83
+ if (WHEN_PATTERNS.test(q)) scores.WHEN += 3;
84
+ if (ENTITY_PATTERNS.test(q)) scores.ENTITY += 3;
85
+ if (temporal_start) scores.WHEN += 2;
86
+ if (/^why\b/i.test(q)) scores.WHY += 2;
87
+ if (/^when\b/i.test(q)) scores.WHEN += 2;
88
+ if (/^who\b/i.test(q)) scores.ENTITY += 2;
89
+
90
+ const maxScore = Math.max(...Object.values(scores));
91
+ if (maxScore === 0) {
92
+ return { intent: 'WHAT', confidence: 0.6, temporal_start, temporal_end };
93
+ }
94
+
95
+ const intent = (Object.entries(scores) as [IntentType, number][])
96
+ .sort((a, b) => b[1] - a[1])[0]![0];
97
+ const confidence = Math.min(0.95, 0.6 + maxScore * 0.1);
98
+
99
+ return { intent, confidence, temporal_start, temporal_end };
100
+ }
101
+
102
+ /**
103
+ * Classify query intent using heuristics (fast) with optional LLM refinement.
104
+ * Results are cached for 1 hour.
105
+ */
106
+ export async function classifyIntent(
107
+ query: string,
108
+ llm: LlamaCpp,
109
+ db: Database
110
+ ): Promise<IntentResult> {
111
+ // Check cache first (1 hour TTL)
112
+ const queryHash = createHash('sha256').update(query).digest('hex');
113
+ const cached = db.prepare(`
114
+ SELECT intent, confidence, temporal_start, temporal_end
115
+ FROM intent_classifications
116
+ WHERE query_hash = ? AND cached_at > datetime('now', '-1 hour')
117
+ `).get(queryHash) as IntentResult | undefined;
118
+
119
+ if (cached) return cached;
120
+
121
+ // Fast heuristic classification (instant, no LLM)
122
+ const heuristic = classifyIntentHeuristic(query);
123
+
124
+ // If heuristic is confident (score >= 0.8), use it directly
125
+ if (heuristic.confidence >= 0.8) {
126
+ cacheIntent(db, queryHash, query, heuristic);
127
+ return heuristic;
128
+ }
129
+
130
+ // Try LLM refinement for ambiguous cases
131
+ const prompt = `Classify intent of this query as one word: WHY, WHEN, ENTITY, or WHAT.
132
+ Query: "${query}"
133
+ Intent:`;
134
+
135
+ try {
136
+ const result = await llm.generate(prompt, {
137
+ maxTokens: 10,
138
+ temperature: 0.0,
139
+ });
140
+
141
+ if (result) {
142
+ const text = result.text.trim().toUpperCase();
143
+ const match = text.match(/\b(WHY|WHEN|ENTITY|WHAT)\b/);
144
+ if (match) {
145
+ const refined: IntentResult = {
146
+ intent: match[1] as IntentType,
147
+ confidence: 0.85,
148
+ temporal_start: heuristic.temporal_start,
149
+ temporal_end: heuristic.temporal_end,
150
+ };
151
+ cacheIntent(db, queryHash, query, refined);
152
+ return refined;
153
+ }
154
+ }
155
+ } catch {
156
+ // LLM failed — use heuristic result
157
+ }
158
+
159
+ cacheIntent(db, queryHash, query, heuristic);
160
+ return heuristic;
161
+ }
162
+
163
+ function cacheIntent(db: Database, queryHash: string, query: string, result: IntentResult): void {
164
+ db.prepare(`
165
+ INSERT OR REPLACE INTO intent_classifications (
166
+ query_hash, query_text, intent, confidence, temporal_start, temporal_end, cached_at
167
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
168
+ `).run(
169
+ queryHash,
170
+ query,
171
+ result.intent,
172
+ result.confidence,
173
+ result.temporal_start || null,
174
+ result.temporal_end || null,
175
+ new Date().toISOString()
176
+ );
177
+ }
178
+
179
+ // =============================================================================
180
+ // Query Decomposition (OpenViking-inspired QueryPlan)
181
+ // =============================================================================
182
+
183
+ export type QueryClause = {
184
+ type: 'bm25' | 'vector' | 'graph';
185
+ query: string;
186
+ collections?: string[];
187
+ priority: 1 | 2 | 3 | 4 | 5;
188
+ };
189
+
190
+ /**
191
+ * Decompose a complex query into multiple typed retrieval clauses.
192
+ * Uses heuristics first, LLM only for genuinely ambiguous multi-topic queries.
193
+ * Graph-first, planner-second design (per GPT 5.4 validation).
194
+ */
195
+ export async function decomposeQuery(
196
+ query: string,
197
+ llm: LlamaCpp,
198
+ db: Database,
199
+ sessionContext?: string
200
+ ): Promise<QueryClause[]> {
201
+ // Short queries still need intent classification — "why did this fail?" is 4 words but needs graph
202
+ const words = query.split(/\s+/).filter(w => w.length > 2);
203
+ if (words.length <= 5) {
204
+ const intent = await classifyIntent(query, llm, db);
205
+ const type = intent.intent === 'WHY' || intent.intent === 'ENTITY' ? 'graph' : 'bm25';
206
+ return [
207
+ { type, query, priority: 1 },
208
+ { type: 'vector', query, priority: 3 },
209
+ ];
210
+ }
211
+
212
+ // Heuristic: detect multi-topic queries (conjunctions, "and also", multiple questions)
213
+ const multiTopicSignals = [
214
+ /\band\s+(?:also|what|how|why)\b/i,
215
+ /\bboth\s+.+\s+and\s+/i,
216
+ /\?.*\?/,
217
+ /\b(?:plus|additionally|as well as|along with)\b/i,
218
+ ];
219
+ const isMultiTopic = multiTopicSignals.some(p => p.test(query));
220
+
221
+ if (!isMultiTopic) {
222
+ // Single-topic: classify intent and route appropriately
223
+ const intent = await classifyIntent(query, llm, db);
224
+ const type = intent.intent === 'WHY' || intent.intent === 'ENTITY' ? 'graph' : 'bm25';
225
+ return [
226
+ { type, query, priority: 1 },
227
+ { type: 'vector', query, priority: 3 },
228
+ ];
229
+ }
230
+
231
+ // Multi-topic: use LLM to decompose
232
+ const contextBlock = sessionContext ? `\nSession context: ${sessionContext.slice(0, 300)}` : '';
233
+ const prompt = `Decompose this query into 2-4 retrieval sub-queries. Each should target one specific topic.
234
+ ${contextBlock}
235
+ Query: "${query}"
236
+
237
+ Return JSON array: [{"query": "sub-query text", "type": "bm25|vector|graph", "priority": 1-5}]
238
+ Rules:
239
+ - type "graph" for causal/entity questions (why, who, relationships)
240
+ - type "bm25" for keyword-specific factual lookups
241
+ - type "vector" for conceptual/fuzzy similarity
242
+ - priority 1 = most important, 5 = least
243
+ Return ONLY the JSON array. /no_think`;
244
+
245
+ try {
246
+ const result = await llm.generate(prompt, { temperature: 0.3, maxTokens: 300 });
247
+ if (result) {
248
+ const text = result.text.trim();
249
+ const jsonMatch = text.match(/\[[\s\S]*\]/);
250
+ if (jsonMatch) {
251
+ const parsed = JSON.parse(jsonMatch[0]) as QueryClause[];
252
+ if (Array.isArray(parsed) && parsed.length >= 1 && parsed.length <= 4) {
253
+ return parsed
254
+ .filter(c => c.query && c.type && c.priority)
255
+ .map(c => ({
256
+ type: ['bm25', 'vector', 'graph'].includes(c.type) ? c.type : 'bm25',
257
+ query: c.query,
258
+ collections: c.collections,
259
+ priority: Math.min(5, Math.max(1, c.priority)) as 1 | 2 | 3 | 4 | 5,
260
+ }));
261
+ }
262
+ }
263
+ }
264
+ } catch {
265
+ // LLM failed — fallback to dual-mode
266
+ }
267
+
268
+ // Fallback: dual-mode search on original query
269
+ return [
270
+ { type: 'bm25', query, priority: 1 },
271
+ { type: 'vector', query, priority: 2 },
272
+ ];
273
+ }
274
+
275
+ /**
276
+ * Get intent-specific weights for graph traversal.
277
+ */
278
+ export function getIntentWeights(intent: IntentType): {
279
+ causal: number;
280
+ semantic: number;
281
+ temporal: number;
282
+ entity: number;
283
+ } {
284
+ switch (intent) {
285
+ case 'WHY':
286
+ return { causal: 5.0, semantic: 2.0, temporal: 0.5, entity: 1.0 };
287
+ case 'WHEN':
288
+ return { temporal: 5.0, semantic: 2.0, causal: 1.0, entity: 0.5 };
289
+ case 'ENTITY':
290
+ return { entity: 6.0, semantic: 3.0, temporal: 1.0, causal: 2.0 };
291
+ case 'WHAT':
292
+ return { semantic: 5.0, entity: 2.0, temporal: 1.0, causal: 1.0 };
293
+ }
294
+ }
package/src/limits.ts ADDED
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Centralized limits for input validation and resource bounding.
3
+ */
4
+
5
+ // Search & query
6
+ export const MAX_QUERY_LENGTH = 10_000;
7
+ export const MAX_SEARCH_LIMIT = 100;
8
+
9
+ // LLM
10
+ export const MAX_LLM_INPUT_CHARS = 100_000;
11
+ export const MAX_LLM_GENERATE_TIMEOUT_MS = 120_000; // 2 minutes
12
+
13
+ // Transcripts & hooks
14
+ export const MAX_TRANSCRIPT_BYTES = 50 * 1024 * 1024; // 50 MB
15
+ export const MAX_FILES_EXTRACTED = 200;
16
+
17
+ // Document processing
18
+ export const MAX_FRAGMENTS_PER_DOC = 500;
19
+ export const MAX_SPLITTER_INPUT_CHARS = 500_000;
20
+ export const MAX_FILE_LINES_READ = 100_000;
21
+
22
+ // Profile
23
+ export const MAX_LEVENSHTEIN_LENGTH = 1_000;
24
+
25
+ // Paths
26
+ export const MAX_PATH_LENGTH = 1_000;