@totalreclaw/totalreclaw 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/extractor.ts ADDED
@@ -0,0 +1,237 @@
1
+ /**
2
+ * TotalReclaw Plugin - Fact Extractor
3
+ *
4
+ * Uses LLM calls to extract atomic facts from conversation messages.
5
+ * Matches the extraction prompts described in SKILL.md.
6
+ */
7
+
8
+ import { chatCompletion, resolveLLMConfig } from './llm-client.js';
9
+
10
+ // ---------------------------------------------------------------------------
11
+ // Types
12
+ // ---------------------------------------------------------------------------
13
+
14
+ export type ExtractionAction = 'ADD' | 'UPDATE' | 'DELETE' | 'NOOP';
15
+
16
+ export interface ExtractedFact {
17
+ text: string;
18
+ type: 'fact' | 'preference' | 'decision' | 'episodic' | 'goal';
19
+ importance: number; // 1-10
20
+ action: ExtractionAction;
21
+ existingFactId?: string;
22
+ }
23
+
24
+ interface ContentBlock {
25
+ type?: string;
26
+ text?: string;
27
+ thinking?: string;
28
+ }
29
+
30
+ interface ConversationMessage {
31
+ role?: string;
32
+ content?: string | ContentBlock[];
33
+ text?: string;
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Extraction Prompt
38
+ // ---------------------------------------------------------------------------
39
+
40
+ const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine. Analyze the conversation and extract atomic facts worth remembering long-term.
41
+
42
+ Rules:
43
+ 1. Each fact must be a single, atomic piece of information
44
+ 2. Focus on user-specific information: preferences, decisions, facts about them, their goals
45
+ 3. Skip generic knowledge, greetings, and small talk
46
+ 4. Skip information that is only relevant to the current conversation
47
+ 5. Score importance 1-10 (7+ = worth storing, below 7 = skip)
48
+ 6. Only extract facts with importance >= 6
49
+
50
+ Types:
51
+ - fact: Objective information about the user
52
+ - preference: Likes, dislikes, or preferences
53
+ - decision: Choices the user has made
54
+ - episodic: Events or experiences
55
+ - goal: Objectives or targets
56
+
57
+ Actions (compare against existing memories if provided):
58
+ - ADD: New fact, no conflict with existing memories
59
+ - UPDATE: Modifies or refines an existing memory (provide existingFactId)
60
+ - DELETE: Contradicts an existing memory — the old one is now wrong (provide existingFactId)
61
+ - NOOP: Already captured in existing memories or not worth storing
62
+
63
+ Return a JSON array (no markdown, no code fences):
64
+ [{"text": "...", "type": "...", "importance": N, "action": "ADD|UPDATE|DELETE|NOOP", "existingFactId": "..."}, ...]
65
+
66
+ If nothing is worth extracting, return: []`;
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Helpers
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /**
73
+ * Extract text content from a conversation message (handles various formats).
74
+ *
75
+ * OpenClaw AgentMessage objects use content arrays:
76
+ * { role: "user", content: [{ type: "text", text: "..." }] }
77
+ * { role: "assistant", content: [{ type: "text", text: "..." }, { type: "toolCall", ... }] }
78
+ *
79
+ * We also handle the simpler { role, content: "string" } format.
80
+ */
81
+ function messageToText(msg: unknown): { role: string; content: string } | null {
82
+ if (!msg || typeof msg !== 'object') return null;
83
+
84
+ const m = msg as ConversationMessage;
85
+ const role = m.role ?? 'unknown';
86
+
87
+ // Only keep user and assistant messages
88
+ if (role !== 'user' && role !== 'assistant') return null;
89
+
90
+ let textContent: string;
91
+
92
+ if (typeof m.content === 'string') {
93
+ // Simple string content
94
+ textContent = m.content;
95
+ } else if (Array.isArray(m.content)) {
96
+ // OpenClaw AgentMessage format: array of content blocks
97
+ // Extract text from { type: "text", text: "..." } blocks
98
+ const textParts = (m.content as ContentBlock[])
99
+ .filter((block) => block.type === 'text' && typeof block.text === 'string')
100
+ .map((block) => block.text as string);
101
+ textContent = textParts.join('\n');
102
+ } else if (typeof m.text === 'string') {
103
+ // Fallback: { text: "..." } field
104
+ textContent = m.text;
105
+ } else {
106
+ return null;
107
+ }
108
+
109
+ if (textContent.length < 3) return null;
110
+
111
+ return { role, content: textContent };
112
+ }
113
+
114
+ /**
115
+ * Truncate messages to fit within a token budget (rough estimate: 4 chars per token).
116
+ */
117
+ function truncateMessages(messages: Array<{ role: string; content: string }>, maxChars: number): string {
118
+ const lines: string[] = [];
119
+ let totalChars = 0;
120
+
121
+ for (const msg of messages) {
122
+ const line = `[${msg.role}]: ${msg.content}`;
123
+ if (totalChars + line.length > maxChars) break;
124
+ lines.push(line);
125
+ totalChars += line.length;
126
+ }
127
+
128
+ return lines.join('\n\n');
129
+ }
130
+
131
+ /**
132
+ * Parse the LLM response into structured facts.
133
+ */
134
+ function parseFactsResponse(response: string): ExtractedFact[] {
135
+ // Strip markdown code fences if present
136
+ let cleaned = response.trim();
137
+ if (cleaned.startsWith('```')) {
138
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
139
+ }
140
+
141
+ try {
142
+ const parsed = JSON.parse(cleaned);
143
+ if (!Array.isArray(parsed)) return [];
144
+
145
+ return parsed
146
+ .filter(
147
+ (f: unknown) =>
148
+ f &&
149
+ typeof f === 'object' &&
150
+ typeof (f as ExtractedFact).text === 'string' &&
151
+ (f as ExtractedFact).text.length >= 5,
152
+ )
153
+ .map((f: unknown) => {
154
+ const fact = f as Record<string, unknown>;
155
+ const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
156
+ const action = validActions.includes(String(fact.action) as ExtractionAction)
157
+ ? (String(fact.action) as ExtractionAction)
158
+ : 'ADD'; // Default to ADD for backward compatibility
159
+ return {
160
+ text: String(fact.text).slice(0, 512),
161
+ type: (['fact', 'preference', 'decision', 'episodic', 'goal'].includes(String(fact.type))
162
+ ? String(fact.type)
163
+ : 'fact') as ExtractedFact['type'],
164
+ importance: Math.max(1, Math.min(10, Number(fact.importance) || 5)),
165
+ action,
166
+ existingFactId: typeof fact.existingFactId === 'string' ? fact.existingFactId : undefined,
167
+ };
168
+ })
169
+ .filter((f) => f.importance >= 6 || f.action === 'DELETE'); // DELETE actions pass regardless of importance
170
+ } catch {
171
+ return [];
172
+ }
173
+ }
174
+
175
+ // ---------------------------------------------------------------------------
176
+ // Main extraction function
177
+ // ---------------------------------------------------------------------------
178
+
179
+ /**
180
+ * Extract facts from a list of conversation messages using LLM.
181
+ *
182
+ * @param rawMessages - The messages array from the hook event (unknown[])
183
+ * @param mode - 'turn' for agent_end (recent only), 'full' for compaction/reset
184
+ * @param existingMemories - Optional list of existing memories for dedup context
185
+ * @returns Array of extracted facts, or empty array on failure.
186
+ */
187
+ export async function extractFacts(
188
+ rawMessages: unknown[],
189
+ mode: 'turn' | 'full',
190
+ existingMemories?: Array<{ id: string; text: string }>,
191
+ ): Promise<ExtractedFact[]> {
192
+ const config = resolveLLMConfig();
193
+ if (!config) return []; // No LLM available
194
+
195
+ // Parse messages
196
+ const parsed = rawMessages
197
+ .map(messageToText)
198
+ .filter((m): m is { role: string; content: string } => m !== null);
199
+
200
+ if (parsed.length === 0) return [];
201
+
202
+ // For 'turn' mode, only look at last 6 messages (3 turns)
203
+ // For 'full' mode, use all messages but truncate to fit token budget
204
+ const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
205
+
206
+ // Truncate to ~3000 tokens worth of text
207
+ const conversationText = truncateMessages(relevantMessages, 12_000);
208
+
209
+ if (conversationText.length < 20) return [];
210
+
211
+ // Build existing memories context if available
212
+ let memoriesContext = '';
213
+ if (existingMemories && existingMemories.length > 0) {
214
+ const memoriesStr = existingMemories
215
+ .map((m) => `[ID: ${m.id}] ${m.text}`)
216
+ .join('\n');
217
+ memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
218
+ }
219
+
220
+ const userPrompt =
221
+ mode === 'turn'
222
+ ? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
223
+ : `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
224
+
225
+ try {
226
+ const response = await chatCompletion(config, [
227
+ { role: 'system', content: EXTRACTION_SYSTEM_PROMPT },
228
+ { role: 'user', content: userPrompt },
229
+ ]);
230
+
231
+ if (!response) return [];
232
+
233
+ return parseFactsResponse(response);
234
+ } catch {
235
+ return []; // Fail silently -- hooks must never break the agent
236
+ }
237
+ }
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Generate a BIP-39 12-word mnemonic for use as TOTALRECLAW_MASTER_PASSWORD.
4
+ *
5
+ * Usage: npx tsx generate-mnemonic.ts
6
+ */
7
+ import { generateMnemonic } from '@scure/bip39';
8
+ import { wordlist } from '@scure/bip39/wordlists/english.js';
9
+
10
+ const mnemonic = generateMnemonic(wordlist, 128);
11
+ console.log('\n Your TotalReclaw master mnemonic (12 words):\n');
12
+ console.log(` ${mnemonic}\n`);
13
+ console.log(' WRITE THIS DOWN. If you lose it, your memories are unrecoverable.');
14
+ console.log(' Set it as TOTALRECLAW_MASTER_PASSWORD in your .env file.\n');
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Hot cache wrapper for the plugin.
3
+ *
4
+ * Self-contained AES-256-GCM encrypted cache (same implementation as
5
+ * client/src/cache/hot-cache.ts but without cross-package import).
6
+ */
7
+
8
+ import crypto from 'node:crypto';
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+
12
+ export interface HotFact {
13
+ id: string;
14
+ text: string;
15
+ importance: number;
16
+ }
17
+
18
+ interface CachePayload {
19
+ hotFacts: HotFact[];
20
+ factCount: number;
21
+ lastSyncedBlock: number;
22
+ smartAccountAddress: string;
23
+ lastUpdatedAt?: number; // Unix timestamp (ms) of last cache update
24
+ lastQueryEmbedding?: number[]; // Embedding of last search query
25
+ }
26
+
27
+ const MAX_HOT_FACTS = 30;
28
+ const IV_LENGTH = 12;
29
+ const TAG_LENGTH = 16;
30
+
31
+ export class PluginHotCache {
32
+ private hotFacts: HotFact[] = [];
33
+ private factCount = 0;
34
+ private lastSyncedBlock = 0;
35
+ private smartAccountAddress = '';
36
+ private lastUpdatedAt = 0;
37
+ private lastQueryEmbedding: number[] | null = null;
38
+ private key: Buffer;
39
+
40
+ constructor(private cachePath: string, hexKey: string) {
41
+ this.key = Buffer.from(hexKey, 'hex');
42
+ }
43
+
44
+ getHotFacts(): HotFact[] { return [...this.hotFacts]; }
45
+ getFactCount(): number { return this.factCount; }
46
+ getLastSyncedBlock(): number { return this.lastSyncedBlock; }
47
+ getSmartAccountAddress(): string { return this.smartAccountAddress; }
48
+ getLastUpdatedAt(): number { return this.lastUpdatedAt; }
49
+ getLastQueryEmbedding(): number[] | null { return this.lastQueryEmbedding ? [...this.lastQueryEmbedding] : null; }
50
+
51
+ setHotFacts(facts: HotFact[]): void {
52
+ const sorted = [...facts].sort((a, b) => b.importance - a.importance);
53
+ this.hotFacts = sorted.slice(0, MAX_HOT_FACTS);
54
+ this.lastUpdatedAt = Date.now();
55
+ }
56
+
57
+ setFactCount(count: number): void { this.factCount = count; }
58
+ setLastSyncedBlock(block: number): void { this.lastSyncedBlock = block; }
59
+ setSmartAccountAddress(addr: string): void { this.smartAccountAddress = addr; }
60
+ setLastUpdatedAt(ts: number): void { this.lastUpdatedAt = ts; }
61
+ setLastQueryEmbedding(embedding: number[] | null): void { this.lastQueryEmbedding = embedding ? [...embedding] : null; }
62
+
63
+ /**
64
+ * Check if the cache is fresh (within TTL).
65
+ * @param ttlMs TTL in milliseconds (default: 5 minutes)
66
+ */
67
+ isFresh(ttlMs: number = 300_000): boolean {
68
+ if (this.lastUpdatedAt === 0) return false;
69
+ return (Date.now() - this.lastUpdatedAt) < ttlMs;
70
+ }
71
+
72
+ flush(): void {
73
+ const payload: CachePayload = {
74
+ hotFacts: this.hotFacts,
75
+ factCount: this.factCount,
76
+ lastSyncedBlock: this.lastSyncedBlock,
77
+ smartAccountAddress: this.smartAccountAddress,
78
+ lastUpdatedAt: this.lastUpdatedAt,
79
+ lastQueryEmbedding: this.lastQueryEmbedding,
80
+ };
81
+
82
+ const plaintext = Buffer.from(JSON.stringify(payload), 'utf-8');
83
+ const iv = crypto.randomBytes(IV_LENGTH);
84
+ const cipher = crypto.createCipheriv('aes-256-gcm', this.key, iv);
85
+ const encrypted = Buffer.concat([cipher.update(plaintext), cipher.final()]);
86
+ const tag = cipher.getAuthTag();
87
+
88
+ const output = Buffer.concat([iv, tag, encrypted]);
89
+
90
+ const dir = path.dirname(this.cachePath);
91
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
92
+ fs.writeFileSync(this.cachePath, output);
93
+ }
94
+
95
+ load(): void {
96
+ if (!fs.existsSync(this.cachePath)) return;
97
+
98
+ try {
99
+ const data = fs.readFileSync(this.cachePath);
100
+ if (data.length < IV_LENGTH + TAG_LENGTH) return;
101
+
102
+ const iv = data.subarray(0, IV_LENGTH);
103
+ const tag = data.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
104
+ const ciphertext = data.subarray(IV_LENGTH + TAG_LENGTH);
105
+
106
+ const decipher = crypto.createDecipheriv('aes-256-gcm', this.key, iv);
107
+ decipher.setAuthTag(tag);
108
+ const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
109
+
110
+ const payload: CachePayload = JSON.parse(decrypted.toString('utf-8'));
111
+ this.hotFacts = payload.hotFacts || [];
112
+ this.factCount = payload.factCount || 0;
113
+ this.lastSyncedBlock = payload.lastSyncedBlock || 0;
114
+ this.smartAccountAddress = payload.smartAccountAddress || '';
115
+ this.lastUpdatedAt = payload.lastUpdatedAt || 0;
116
+ this.lastQueryEmbedding = payload.lastQueryEmbedding || null;
117
+ } catch {
118
+ this.hotFacts = [];
119
+ this.factCount = 0;
120
+ this.lastSyncedBlock = 0;
121
+ this.smartAccountAddress = '';
122
+ this.lastUpdatedAt = 0;
123
+ this.lastQueryEmbedding = null;
124
+ }
125
+ }
126
+ }
@@ -0,0 +1,93 @@
1
+ import type {
2
+ NormalizedFact,
3
+ ImportSource,
4
+ AdapterParseResult,
5
+ ProgressCallback,
6
+ } from './types.js';
7
+
8
+ /**
9
+ * Abstract base class for import adapters.
10
+ *
11
+ * Each adapter:
12
+ * 1. Fetches or reads source data
13
+ * 2. Parses into NormalizedFact[]
14
+ * 3. Validates each fact
15
+ *
16
+ * The caller (import tool) handles encryption + storage.
17
+ */
18
+ export abstract class BaseImportAdapter {
19
+ abstract readonly source: ImportSource;
20
+ abstract readonly displayName: string;
21
+
22
+ /**
23
+ * Parse source data into normalized facts.
24
+ *
25
+ * For API sources, this fetches from the API.
26
+ * For file sources, this parses the provided content.
27
+ */
28
+ abstract parse(
29
+ input: { content?: string; api_key?: string; source_user_id?: string; api_url?: string; file_path?: string },
30
+ onProgress?: ProgressCallback,
31
+ ): Promise<AdapterParseResult>;
32
+
33
+ /**
34
+ * Validate and clean a single fact.
35
+ * Returns null if the fact should be skipped.
36
+ */
37
+ protected validateFact(fact: Partial<NormalizedFact>): NormalizedFact | null {
38
+ // Text is required and must be non-empty
39
+ if (!fact.text || typeof fact.text !== 'string' || fact.text.trim().length < 3) {
40
+ return null;
41
+ }
42
+
43
+ // Truncate to 512 chars
44
+ const text = fact.text.trim().slice(0, 512);
45
+
46
+ // Normalize type
47
+ const validTypes = ['fact', 'preference', 'decision', 'episodic', 'goal'] as const;
48
+ const type = validTypes.includes(fact.type as typeof validTypes[number])
49
+ ? (fact.type as NormalizedFact['type'])
50
+ : 'fact';
51
+
52
+ // Normalize importance to 1-10
53
+ let importance = fact.importance ?? 5;
54
+ if (importance < 0 || importance > 1) {
55
+ // Already on 1-10 scale
56
+ importance = Math.max(1, Math.min(10, Math.round(importance)));
57
+ } else {
58
+ // 0-1 scale — convert to 1-10
59
+ importance = Math.max(1, Math.round(importance * 10));
60
+ }
61
+
62
+ return {
63
+ text,
64
+ type,
65
+ importance,
66
+ source: fact.source ?? this.source,
67
+ sourceId: fact.sourceId,
68
+ sourceTimestamp: fact.sourceTimestamp,
69
+ tags: fact.tags,
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Batch-validate an array of partial facts.
75
+ */
76
+ protected validateFacts(
77
+ rawFacts: Partial<NormalizedFact>[],
78
+ ): { facts: NormalizedFact[]; invalidCount: number } {
79
+ const facts: NormalizedFact[] = [];
80
+ let invalidCount = 0;
81
+
82
+ for (const raw of rawFacts) {
83
+ const validated = this.validateFact(raw);
84
+ if (validated) {
85
+ facts.push(validated);
86
+ } else {
87
+ invalidCount++;
88
+ }
89
+ }
90
+
91
+ return { facts, invalidCount };
92
+ }
93
+ }