@totalreclaw/totalreclaw 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/crypto.ts DELETED
@@ -1,351 +0,0 @@
1
- /**
2
- * TotalReclaw Plugin - Crypto Operations
3
- *
4
- * All cryptographic primitives used by the OpenClaw plugin. These must
5
- * produce byte-for-byte identical output to the TotalReclaw client library
6
- * (`client/src/crypto/`) so that memories written by one can be read by
7
- * the other.
8
- *
9
- * Key derivation chain:
10
- * master_password + salt
11
- * -> Argon2id(t=3, m=65536, p=4, dkLen=32) -> masterKey
12
- * -> HKDF-SHA256(masterKey, salt, "totalreclaw-auth-key-v1", 32) -> authKey
13
- * -> HKDF-SHA256(masterKey, salt, "totalreclaw-encryption-key-v1", 32) -> encryptionKey
14
- * -> HKDF-SHA256(masterKey, salt, "openmemory-dedup-v1", 32) -> dedupKey
15
- *
16
- * Encryption: AES-256-GCM (12-byte IV, 16-byte tag)
17
- * Blind indices: SHA-256 of lowercase tokens
18
- * Content fingerprint: HMAC-SHA256(dedupKey, normalizeText(plaintext))
19
- */
20
-
21
- import { argon2id } from '@noble/hashes/argon2.js';
22
- import { hkdf } from '@noble/hashes/hkdf.js';
23
- import { sha256 } from '@noble/hashes/sha2.js';
24
- import { hmac } from '@noble/hashes/hmac.js';
25
- import { mnemonicToSeedSync, validateMnemonic } from '@scure/bip39';
26
- import { wordlist } from '@scure/bip39/wordlists/english.js';
27
- import { stemmer } from 'porter-stemmer';
28
- import crypto from 'node:crypto';
29
-
30
- // ---------------------------------------------------------------------------
31
- // Key Derivation
32
- // ---------------------------------------------------------------------------
33
-
34
- /** HKDF context strings -- must match client/src/crypto/kdf.ts exactly. */
35
- const AUTH_KEY_INFO = 'totalreclaw-auth-key-v1';
36
- const ENCRYPTION_KEY_INFO = 'totalreclaw-encryption-key-v1';
37
- const DEDUP_KEY_INFO = 'openmemory-dedup-v1';
38
-
39
- /** Argon2id parameters -- OWASP recommendations, matching client defaults. */
40
- const ARGON2_TIME_COST = 3;
41
- const ARGON2_MEMORY_COST = 65536; // 64 MB in KiB
42
- const ARGON2_PARALLELISM = 4;
43
- const ARGON2_DK_LEN = 32;
44
-
45
- /** AES-256-GCM constants. */
46
- const IV_LENGTH = 12;
47
- const TAG_LENGTH = 16;
48
- const KEY_LENGTH = 32;
49
-
50
- /**
51
- * Check if the input looks like a BIP-39 mnemonic (12 or 24 words from the BIP-39 English wordlist).
52
- */
53
- function isBip39Mnemonic(input: string): boolean {
54
- const words = input.trim().split(/\s+/);
55
- if (words.length !== 12 && words.length !== 24) return false;
56
- return validateMnemonic(input.trim(), wordlist);
57
- }
58
-
59
- /**
60
- * Derive encryption keys from a BIP-39 mnemonic.
61
- * Uses the 512-bit BIP-39 seed as HKDF input (NOT the derived private key)
62
- * for proper key separation from the Ethereum signing key.
63
- */
64
- function deriveKeysFromMnemonic(
65
- mnemonic: string,
66
- ): { authKey: Buffer; encryptionKey: Buffer; dedupKey: Buffer; salt: Buffer } {
67
- // BIP-39: mnemonic -> 512-bit seed via PBKDF2(mnemonic, "mnemonic", 2048 rounds)
68
- const seed = mnemonicToSeedSync(mnemonic.trim());
69
-
70
- // Use first 32 bytes of seed as deterministic salt for HKDF
71
- // (BIP-39 mnemonics are self-salting via PBKDF2, no random salt needed)
72
- const salt = Buffer.from(seed.slice(0, 32));
73
-
74
- // HKDF-SHA256 from the full 512-bit seed, using distinct info strings
75
- const enc = (s: string) => Buffer.from(s, 'utf8');
76
- const seedBuf = Buffer.from(seed);
77
-
78
- const authKey = Buffer.from(
79
- hkdf(sha256, seedBuf, salt, enc(AUTH_KEY_INFO), 32),
80
- );
81
- const encryptionKey = Buffer.from(
82
- hkdf(sha256, seedBuf, salt, enc(ENCRYPTION_KEY_INFO), 32),
83
- );
84
- const dedupKey = Buffer.from(
85
- hkdf(sha256, seedBuf, salt, enc(DEDUP_KEY_INFO), 32),
86
- );
87
-
88
- return { authKey, encryptionKey, dedupKey, salt };
89
- }
90
-
91
- /**
92
- * Derive auth, encryption, and dedup keys from a master password.
93
- *
94
- * If the password is a valid BIP-39 mnemonic (12 or 24 words), keys are
95
- * derived from the 512-bit BIP-39 seed via HKDF. Otherwise, the legacy
96
- * Argon2id path is used.
97
- *
98
- * For the Argon2id path: if no salt is provided a fresh 32-byte random salt
99
- * is generated. Pass an existing salt when restoring a previously-registered
100
- * account so that the derived keys match the original registration.
101
- *
102
- * @returns Object containing authKey, encryptionKey, dedupKey, and salt (all Buffers).
103
- */
104
- export function deriveKeys(
105
- password: string,
106
- existingSalt?: Buffer,
107
- ): { authKey: Buffer; encryptionKey: Buffer; dedupKey: Buffer; salt: Buffer } {
108
- // Auto-detect BIP-39 mnemonic vs arbitrary password
109
- if (isBip39Mnemonic(password)) {
110
- // BIP-39 path: mnemonic is self-salting, existingSalt is ignored for derivation
111
- // but we still return the deterministic salt for server registration
112
- return deriveKeysFromMnemonic(password);
113
- }
114
-
115
- // Legacy path: arbitrary password via Argon2id
116
- const salt = existingSalt ?? crypto.randomBytes(32);
117
-
118
- // Step 1 -- Argon2id to derive a 32-byte master key.
119
- // @noble/hashes argon2id accepts Uint8Array for both password and salt.
120
- const masterKey = argon2id(
121
- Buffer.from(password, 'utf8'),
122
- salt,
123
- { t: ARGON2_TIME_COST, m: ARGON2_MEMORY_COST, p: ARGON2_PARALLELISM, dkLen: ARGON2_DK_LEN },
124
- );
125
-
126
- // Step 2 -- HKDF-SHA256 for each sub-key using distinct info strings.
127
- // @noble/hashes v2 requires Uint8Array for info param.
128
- const enc = (s: string) => Buffer.from(s, 'utf8');
129
- const authKey = Buffer.from(
130
- hkdf(sha256, masterKey, salt, enc(AUTH_KEY_INFO), 32),
131
- );
132
- const encryptionKey = Buffer.from(
133
- hkdf(sha256, masterKey, salt, enc(ENCRYPTION_KEY_INFO), 32),
134
- );
135
- const dedupKey = Buffer.from(
136
- hkdf(sha256, masterKey, salt, enc(DEDUP_KEY_INFO), 32),
137
- );
138
-
139
- return { authKey, encryptionKey, dedupKey, salt: Buffer.from(salt) };
140
- }
141
-
142
- // ---------------------------------------------------------------------------
143
- // LSH Seed Derivation
144
- // ---------------------------------------------------------------------------
145
-
146
- /**
147
- * HKDF context string for LSH seed derivation.
148
- *
149
- * The LSH hasher needs a deterministic seed so that the same master key
150
- * always generates the same random hyperplane matrices. We derive this seed
151
- * from the master key using HKDF with a unique info string.
152
- *
153
- * For the BIP-39 path the HKDF input is the 512-bit BIP-39 seed; for the
154
- * Argon2id path it is the 32-byte master key.
155
- */
156
- const LSH_SEED_INFO = 'openmemory-lsh-seed-v1';
157
-
158
- /**
159
- * Derive a 32-byte seed for the LSH hasher from the master key derivation
160
- * chain.
161
- *
162
- * Call this once during initialization and pass the result to `new LSHHasher(seed, dims)`.
163
- *
164
- * For the BIP-39 path we use the full 512-bit BIP-39 seed as IKM; for the
165
- * Argon2id path we use the 32-byte Argon2id-derived master key. In both
166
- * cases the salt from `deriveKeys()` is reused for domain separation.
167
- */
168
- export function deriveLshSeed(
169
- password: string,
170
- salt: Buffer,
171
- ): Uint8Array {
172
- if (isBip39Mnemonic(password)) {
173
- const seed = mnemonicToSeedSync(password.trim());
174
- return new Uint8Array(
175
- hkdf(sha256, Buffer.from(seed), salt, Buffer.from(LSH_SEED_INFO, 'utf8'), 32),
176
- );
177
- }
178
-
179
- // Argon2id path: re-derive the master key, then HKDF with LSH info string.
180
- const masterKey = argon2id(
181
- Buffer.from(password, 'utf8'),
182
- salt,
183
- { t: ARGON2_TIME_COST, m: ARGON2_MEMORY_COST, p: ARGON2_PARALLELISM, dkLen: ARGON2_DK_LEN },
184
- );
185
-
186
- return new Uint8Array(
187
- hkdf(sha256, masterKey, salt, Buffer.from(LSH_SEED_INFO, 'utf8'), 32),
188
- );
189
- }
190
-
191
- // ---------------------------------------------------------------------------
192
- // Auth Key Hash
193
- // ---------------------------------------------------------------------------
194
-
195
- /**
196
- * Compute the SHA-256 hash of the auth key.
197
- *
198
- * The server stores SHA256(authKey) during registration and uses it to look
199
- * up users on every request. The hex string returned here is what the plugin
200
- * sends to `/v1/register` as `auth_key_hash`.
201
- */
202
- export function computeAuthKeyHash(authKey: Buffer): string {
203
- return Buffer.from(sha256(authKey)).toString('hex');
204
- }
205
-
206
- // ---------------------------------------------------------------------------
207
- // AES-256-GCM Encrypt / Decrypt
208
- // ---------------------------------------------------------------------------
209
-
210
- /**
211
- * Encrypt a UTF-8 plaintext string with AES-256-GCM.
212
- *
213
- * Wire format (base64-encoded):
214
- * [iv: 12 bytes][tag: 16 bytes][ciphertext: variable]
215
- *
216
- * This matches `serializeEncryptedData` in `client/src/crypto/aes.ts`.
217
- */
218
- export function encrypt(plaintext: string, encryptionKey: Buffer): string {
219
- if (encryptionKey.length !== KEY_LENGTH) {
220
- throw new Error(`Invalid key length: expected ${KEY_LENGTH}, got ${encryptionKey.length}`);
221
- }
222
-
223
- const iv = crypto.randomBytes(IV_LENGTH);
224
- const cipher = crypto.createCipheriv('aes-256-gcm', encryptionKey, iv, {
225
- authTagLength: TAG_LENGTH,
226
- });
227
-
228
- const ciphertext = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
229
- const tag = cipher.getAuthTag();
230
-
231
- // Combine: iv || tag || ciphertext (same order as client library)
232
- const combined = Buffer.concat([iv, tag, ciphertext]);
233
- return combined.toString('base64');
234
- }
235
-
236
- /**
237
- * Decrypt a base64-encoded AES-256-GCM blob back to a UTF-8 string.
238
- *
239
- * Expects the wire format produced by `encrypt()` above.
240
- */
241
- export function decrypt(encryptedBase64: string, encryptionKey: Buffer): string {
242
- if (encryptionKey.length !== KEY_LENGTH) {
243
- throw new Error(`Invalid key length: expected ${KEY_LENGTH}, got ${encryptionKey.length}`);
244
- }
245
-
246
- const combined = Buffer.from(encryptedBase64, 'base64');
247
-
248
- if (combined.length < IV_LENGTH + TAG_LENGTH) {
249
- throw new Error('Encrypted data too short');
250
- }
251
-
252
- const iv = combined.subarray(0, IV_LENGTH);
253
- const tag = combined.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
254
- const ciphertext = combined.subarray(IV_LENGTH + TAG_LENGTH);
255
-
256
- const decipher = crypto.createDecipheriv('aes-256-gcm', encryptionKey, iv, {
257
- authTagLength: TAG_LENGTH,
258
- });
259
- decipher.setAuthTag(tag);
260
-
261
- const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
262
- return plaintext.toString('utf8');
263
- }
264
-
265
- // ---------------------------------------------------------------------------
266
- // Blind Indices
267
- // ---------------------------------------------------------------------------
268
-
269
- /**
270
- * Generate blind indices (SHA-256 hashes of tokens) for a text string.
271
- *
272
- * Tokenization rules (must match `client/src/crypto/blind.ts#tokenize`):
273
- * 1. Lowercase
274
- * 2. Remove punctuation (keep Unicode letters, numbers, whitespace)
275
- * 3. Split on whitespace
276
- * 4. Filter tokens shorter than 2 characters
277
- *
278
- * Each surviving token is SHA-256 hashed and returned as a hex string.
279
- * The returned array is deduplicated.
280
- */
281
- export function generateBlindIndices(text: string): string[] {
282
- const tokens = text
283
- .toLowerCase()
284
- .replace(/[^\p{L}\p{N}\s]/gu, ' ') // Remove punctuation, keep letters/numbers
285
- .split(/\s+/)
286
- .filter((t) => t.length >= 2);
287
-
288
- const seen = new Set<string>();
289
- const indices: string[] = [];
290
-
291
- for (const token of tokens) {
292
- // Exact word hash (unchanged behavior).
293
- const hash = Buffer.from(sha256(Buffer.from(token, 'utf8'))).toString('hex');
294
- if (!seen.has(hash)) {
295
- seen.add(hash);
296
- indices.push(hash);
297
- }
298
-
299
- // Stemmed word hash. The stem is prefixed with "stem:" before hashing
300
- // to avoid collisions between a word that happens to equal another
301
- // word's stem (e.g., the word "commun" vs the stem of "community").
302
- const stem = stemmer(token);
303
- if (stem.length >= 2 && stem !== token) {
304
- const stemHash = Buffer.from(
305
- sha256(Buffer.from(`stem:${stem}`, 'utf8'))
306
- ).toString('hex');
307
- if (!seen.has(stemHash)) {
308
- seen.add(stemHash);
309
- indices.push(stemHash);
310
- }
311
- }
312
- }
313
-
314
- return indices;
315
- }
316
-
317
- // ---------------------------------------------------------------------------
318
- // Content Fingerprint (Dedup)
319
- // ---------------------------------------------------------------------------
320
-
321
- /**
322
- * Normalize text for deterministic fingerprinting.
323
- *
324
- * Steps (matching `client/src/crypto/fingerprint.ts#normalizeText`):
325
- * 1. Unicode NFC normalization
326
- * 2. Lowercase
327
- * 3. Collapse whitespace (spaces/tabs/newlines to single space)
328
- * 4. Trim leading/trailing whitespace
329
- */
330
- function normalizeText(text: string): string {
331
- return text
332
- .normalize('NFC')
333
- .toLowerCase()
334
- .replace(/\s+/g, ' ')
335
- .trim();
336
- }
337
-
338
- /**
339
- * Compute an HMAC-SHA256 content fingerprint for exact-duplicate detection.
340
- *
341
- * The server stores this fingerprint and uses it to reject duplicate writes
342
- * without ever seeing the plaintext.
343
- *
344
- * @returns 64-character hex string.
345
- */
346
- export function generateContentFingerprint(plaintext: string, dedupKey: Buffer): string {
347
- const normalized = normalizeText(plaintext);
348
- return Buffer.from(
349
- hmac(sha256, dedupKey, Buffer.from(normalized, 'utf8')),
350
- ).toString('hex');
351
- }
package/embedding.ts DELETED
@@ -1,84 +0,0 @@
1
- /**
2
- * TotalReclaw Plugin - Local Embedding via @huggingface/transformers
3
- *
4
- * Uses the Xenova/bge-small-en-v1.5 ONNX model to generate 384-dimensional
5
- * text embeddings locally. No API key needed, no data leaves the machine.
6
- *
7
- * This preserves the zero-knowledge guarantee: embeddings are generated
8
- * CLIENT-SIDE before encryption, so no plaintext ever reaches an external API.
9
- *
10
- * Model details:
11
- * - Quantized (int8) ONNX model: ~33.8MB download on first use
12
- * - Cached in ~/.cache/huggingface/ after first download
13
- * - Lazy initialization: first call ~2-3s (model load), subsequent ~15ms
14
- * - Output: 384-dimensional normalized embedding vector
15
- * - For retrieval, queries should be prefixed with an instruction string
16
- * (documents/passages should NOT be prefixed)
17
- *
18
- * Dependencies: @huggingface/transformers (handles model download, WordPiece
19
- * tokenization, ONNX inference, mean pooling, and normalization).
20
- */
21
-
22
- // @ts-ignore - @huggingface/transformers types may not be perfect
23
- import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
24
-
25
- /** ONNX-optimized bge-small-en-v1.5 from HuggingFace Hub. */
26
- const MODEL_ID = 'Xenova/bge-small-en-v1.5';
27
-
28
- /** Fixed output dimensionality for bge-small-en-v1.5. */
29
- const EMBEDDING_DIM = 384;
30
-
31
- /**
32
- * Query instruction prefix for bge-small-en-v1.5 retrieval tasks.
33
- *
34
- * Per the BAAI model card: prepend this to short queries when searching
35
- * for relevant passages. Do NOT prepend for documents/passages being stored.
36
- */
37
- const QUERY_PREFIX = 'Represent this sentence for searching relevant passages: ';
38
-
39
- /** Lazily initialized feature extraction pipeline. */
40
- let extractor: FeatureExtractionPipeline | null = null;
41
-
42
- /**
43
- * Generate a 384-dimensional embedding vector for the given text.
44
- *
45
- * On first call, downloads and loads the ONNX model (~33.8MB, cached).
46
- * Subsequent calls reuse the loaded model and run in ~15ms.
47
- *
48
- * For bge-small-en-v1.5, queries should set `isQuery: true` to prepend the
49
- * retrieval instruction prefix. Documents being stored should use the default
50
- * (`isQuery: false`) so no prefix is added.
51
- *
52
- * @param text - The text to embed.
53
- * @param options - Optional settings.
54
- * @param options.isQuery - If true, prepend the BGE query instruction prefix
55
- * for improved retrieval accuracy (default: false).
56
- * @returns 384-dimensional normalized embedding as a number array.
57
- */
58
- export async function generateEmbedding(
59
- text: string,
60
- options?: { isQuery?: boolean },
61
- ): Promise<number[]> {
62
- if (!extractor) {
63
- extractor = await pipeline('feature-extraction', MODEL_ID, {
64
- // Use quantized (int8) model for smaller download (~33.8MB vs ~67MB)
65
- quantized: true,
66
- });
67
- }
68
-
69
- const input = options?.isQuery ? QUERY_PREFIX + text : text;
70
- const output = await extractor(input, { pooling: 'mean', normalize: true });
71
- // output.data is a Float32Array; convert to plain number[]
72
- return Array.from(output.data as Float32Array);
73
- }
74
-
75
- /**
76
- * Get the embedding vector dimensionality.
77
- *
78
- * Always returns 384 (fixed for bge-small-en-v1.5).
79
- * This is needed by downstream code (e.g. LSH hasher) to know the vector
80
- * size without calling the embedding model.
81
- */
82
- export function getEmbeddingDims(): number {
83
- return EMBEDDING_DIM;
84
- }
package/extractor.ts DELETED
@@ -1,210 +0,0 @@
1
- /**
2
- * TotalReclaw Plugin - Fact Extractor
3
- *
4
- * Uses LLM calls to extract atomic facts from conversation messages.
5
- * Matches the extraction prompts described in SKILL.md.
6
- */
7
-
8
- import { chatCompletion, resolveLLMConfig } from './llm-client.js';
9
-
10
- // ---------------------------------------------------------------------------
11
- // Types
12
- // ---------------------------------------------------------------------------
13
-
14
- export interface ExtractedFact {
15
- text: string;
16
- type: 'fact' | 'preference' | 'decision' | 'episodic' | 'goal';
17
- importance: number; // 1-10
18
- }
19
-
20
- interface ContentBlock {
21
- type?: string;
22
- text?: string;
23
- thinking?: string;
24
- }
25
-
26
- interface ConversationMessage {
27
- role?: string;
28
- content?: string | ContentBlock[];
29
- text?: string;
30
- }
31
-
32
- // ---------------------------------------------------------------------------
33
- // Extraction Prompt
34
- // ---------------------------------------------------------------------------
35
-
36
- const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine. Analyze the conversation and extract atomic facts worth remembering long-term.
37
-
38
- Rules:
39
- 1. Each fact must be a single, atomic piece of information
40
- 2. Focus on user-specific information: preferences, decisions, facts about them, their goals
41
- 3. Skip generic knowledge, greetings, and small talk
42
- 4. Skip information that is only relevant to the current conversation
43
- 5. Score importance 1-10 (7+ = worth storing, below 7 = skip)
44
- 6. Only extract facts with importance >= 6
45
-
46
- Types:
47
- - fact: Objective information about the user
48
- - preference: Likes, dislikes, or preferences
49
- - decision: Choices the user has made
50
- - episodic: Events or experiences
51
- - goal: Objectives or targets
52
-
53
- Return a JSON array (no markdown, no code fences):
54
- [{"text": "...", "type": "...", "importance": N}, ...]
55
-
56
- If nothing is worth extracting, return: []`;
57
-
58
- // ---------------------------------------------------------------------------
59
- // Helpers
60
- // ---------------------------------------------------------------------------
61
-
62
- /**
63
- * Extract text content from a conversation message (handles various formats).
64
- *
65
- * OpenClaw AgentMessage objects use content arrays:
66
- * { role: "user", content: [{ type: "text", text: "..." }] }
67
- * { role: "assistant", content: [{ type: "text", text: "..." }, { type: "toolCall", ... }] }
68
- *
69
- * We also handle the simpler { role, content: "string" } format.
70
- */
71
- function messageToText(msg: unknown): { role: string; content: string } | null {
72
- if (!msg || typeof msg !== 'object') return null;
73
-
74
- const m = msg as ConversationMessage;
75
- const role = m.role ?? 'unknown';
76
-
77
- // Only keep user and assistant messages
78
- if (role !== 'user' && role !== 'assistant') return null;
79
-
80
- let textContent: string;
81
-
82
- if (typeof m.content === 'string') {
83
- // Simple string content
84
- textContent = m.content;
85
- } else if (Array.isArray(m.content)) {
86
- // OpenClaw AgentMessage format: array of content blocks
87
- // Extract text from { type: "text", text: "..." } blocks
88
- const textParts = (m.content as ContentBlock[])
89
- .filter((block) => block.type === 'text' && typeof block.text === 'string')
90
- .map((block) => block.text as string);
91
- textContent = textParts.join('\n');
92
- } else if (typeof m.text === 'string') {
93
- // Fallback: { text: "..." } field
94
- textContent = m.text;
95
- } else {
96
- return null;
97
- }
98
-
99
- if (textContent.length < 3) return null;
100
-
101
- return { role, content: textContent };
102
- }
103
-
104
- /**
105
- * Truncate messages to fit within a token budget (rough estimate: 4 chars per token).
106
- */
107
- function truncateMessages(messages: Array<{ role: string; content: string }>, maxChars: number): string {
108
- const lines: string[] = [];
109
- let totalChars = 0;
110
-
111
- for (const msg of messages) {
112
- const line = `[${msg.role}]: ${msg.content}`;
113
- if (totalChars + line.length > maxChars) break;
114
- lines.push(line);
115
- totalChars += line.length;
116
- }
117
-
118
- return lines.join('\n\n');
119
- }
120
-
121
- /**
122
- * Parse the LLM response into structured facts.
123
- */
124
- function parseFactsResponse(response: string): ExtractedFact[] {
125
- // Strip markdown code fences if present
126
- let cleaned = response.trim();
127
- if (cleaned.startsWith('```')) {
128
- cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
129
- }
130
-
131
- try {
132
- const parsed = JSON.parse(cleaned);
133
- if (!Array.isArray(parsed)) return [];
134
-
135
- return parsed
136
- .filter(
137
- (f: unknown) =>
138
- f &&
139
- typeof f === 'object' &&
140
- typeof (f as ExtractedFact).text === 'string' &&
141
- (f as ExtractedFact).text.length >= 5,
142
- )
143
- .map((f: unknown) => {
144
- const fact = f as Record<string, unknown>;
145
- return {
146
- text: String(fact.text).slice(0, 512),
147
- type: (['fact', 'preference', 'decision', 'episodic', 'goal'].includes(String(fact.type))
148
- ? String(fact.type)
149
- : 'fact') as ExtractedFact['type'],
150
- importance: Math.max(1, Math.min(10, Number(fact.importance) || 5)),
151
- };
152
- })
153
- .filter((f) => f.importance >= 6); // Only keep important facts
154
- } catch {
155
- return [];
156
- }
157
- }
158
-
159
- // ---------------------------------------------------------------------------
160
- // Main extraction function
161
- // ---------------------------------------------------------------------------
162
-
163
- /**
164
- * Extract facts from a list of conversation messages using LLM.
165
- *
166
- * @param rawMessages - The messages array from the hook event (unknown[])
167
- * @param mode - 'turn' for agent_end (recent only), 'full' for compaction/reset
168
- * @returns Array of extracted facts, or empty array on failure.
169
- */
170
- export async function extractFacts(
171
- rawMessages: unknown[],
172
- mode: 'turn' | 'full',
173
- ): Promise<ExtractedFact[]> {
174
- const config = resolveLLMConfig();
175
- if (!config) return []; // No LLM available
176
-
177
- // Parse messages
178
- const parsed = rawMessages
179
- .map(messageToText)
180
- .filter((m): m is { role: string; content: string } => m !== null);
181
-
182
- if (parsed.length === 0) return [];
183
-
184
- // For 'turn' mode, only look at last 6 messages (3 turns)
185
- // For 'full' mode, use all messages but truncate to fit token budget
186
- const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
187
-
188
- // Truncate to ~3000 tokens worth of text
189
- const conversationText = truncateMessages(relevantMessages, 12_000);
190
-
191
- if (conversationText.length < 20) return [];
192
-
193
- const userPrompt =
194
- mode === 'turn'
195
- ? `Extract important facts from these recent conversation turns:\n\n${conversationText}`
196
- : `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}`;
197
-
198
- try {
199
- const response = await chatCompletion(config, [
200
- { role: 'system', content: EXTRACTION_SYSTEM_PROMPT },
201
- { role: 'user', content: userPrompt },
202
- ]);
203
-
204
- if (!response) return [];
205
-
206
- return parseFactsResponse(response);
207
- } catch {
208
- return []; // Fail silently -- hooks must never break the agent
209
- }
210
- }
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env npx tsx
2
- /**
3
- * Generate a BIP-39 12-word mnemonic for use as TOTALRECLAW_MASTER_PASSWORD.
4
- *
5
- * Usage: npx tsx generate-mnemonic.ts
6
- */
7
- import { generateMnemonic } from '@scure/bip39';
8
- import { wordlist } from '@scure/bip39/wordlists/english.js';
9
-
10
- const mnemonic = generateMnemonic(wordlist, 128);
11
- console.log('\n Your TotalReclaw master mnemonic (12 words):\n');
12
- console.log(` ${mnemonic}\n`);
13
- console.log(' WRITE THIS DOWN. If you lose it, your memories are unrecoverable.');
14
- console.log(' Set it as TOTALRECLAW_MASTER_PASSWORD in your .env file.\n');