@totalreclaw/totalreclaw 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -67
- package/api-client.ts +328 -0
- package/consolidation.test.ts +356 -0
- package/consolidation.ts +227 -0
- package/crypto.ts +351 -0
- package/embedding.ts +84 -0
- package/extractor-dedup.test.ts +168 -0
- package/extractor.ts +237 -0
- package/generate-mnemonic.ts +14 -0
- package/hot-cache-wrapper.ts +126 -0
- package/import-adapters/base-adapter.ts +93 -0
- package/import-adapters/import-adapters.test.ts +595 -0
- package/import-adapters/index.ts +22 -0
- package/import-adapters/mcp-memory-adapter.ts +274 -0
- package/import-adapters/mem0-adapter.ts +233 -0
- package/import-adapters/types.ts +89 -0
- package/index.ts +2661 -0
- package/llm-client.ts +418 -0
- package/lsh.test.ts +463 -0
- package/lsh.ts +257 -0
- package/package.json +18 -33
- package/pocv2-e2e-test.ts +917 -0
- package/reranker.test.ts +594 -0
- package/reranker.ts +537 -0
- package/semantic-dedup.test.ts +392 -0
- package/semantic-dedup.ts +100 -0
- package/setup.sh +19 -0
- package/store-dedup-wiring.test.ts +186 -0
- package/subgraph-search.ts +282 -0
- package/subgraph-store.ts +346 -0
- package/SKILL.md +0 -709
- package/dist/index.js +0 -32154
package/extractor.ts
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TotalReclaw Plugin - Fact Extractor
|
|
3
|
+
*
|
|
4
|
+
* Uses LLM calls to extract atomic facts from conversation messages.
|
|
5
|
+
* Matches the extraction prompts described in SKILL.md.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { chatCompletion, resolveLLMConfig } from './llm-client.js';
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Types
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
export type ExtractionAction = 'ADD' | 'UPDATE' | 'DELETE' | 'NOOP';
|
|
15
|
+
|
|
16
|
+
export interface ExtractedFact {
|
|
17
|
+
text: string;
|
|
18
|
+
type: 'fact' | 'preference' | 'decision' | 'episodic' | 'goal';
|
|
19
|
+
importance: number; // 1-10
|
|
20
|
+
action: ExtractionAction;
|
|
21
|
+
existingFactId?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
interface ContentBlock {
|
|
25
|
+
type?: string;
|
|
26
|
+
text?: string;
|
|
27
|
+
thinking?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface ConversationMessage {
|
|
31
|
+
role?: string;
|
|
32
|
+
content?: string | ContentBlock[];
|
|
33
|
+
text?: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Extraction Prompt
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine. Analyze the conversation and extract atomic facts worth remembering long-term.
|
|
41
|
+
|
|
42
|
+
Rules:
|
|
43
|
+
1. Each fact must be a single, atomic piece of information
|
|
44
|
+
2. Focus on user-specific information: preferences, decisions, facts about them, their goals
|
|
45
|
+
3. Skip generic knowledge, greetings, and small talk
|
|
46
|
+
4. Skip information that is only relevant to the current conversation
|
|
47
|
+
5. Score importance 1-10 (7+ = worth storing, below 7 = skip)
|
|
48
|
+
6. Only extract facts with importance >= 6
|
|
49
|
+
|
|
50
|
+
Types:
|
|
51
|
+
- fact: Objective information about the user
|
|
52
|
+
- preference: Likes, dislikes, or preferences
|
|
53
|
+
- decision: Choices the user has made
|
|
54
|
+
- episodic: Events or experiences
|
|
55
|
+
- goal: Objectives or targets
|
|
56
|
+
|
|
57
|
+
Actions (compare against existing memories if provided):
|
|
58
|
+
- ADD: New fact, no conflict with existing memories
|
|
59
|
+
- UPDATE: Modifies or refines an existing memory (provide existingFactId)
|
|
60
|
+
- DELETE: Contradicts an existing memory — the old one is now wrong (provide existingFactId)
|
|
61
|
+
- NOOP: Already captured in existing memories or not worth storing
|
|
62
|
+
|
|
63
|
+
Return a JSON array (no markdown, no code fences):
|
|
64
|
+
[{"text": "...", "type": "...", "importance": N, "action": "ADD|UPDATE|DELETE|NOOP", "existingFactId": "..."}, ...]
|
|
65
|
+
|
|
66
|
+
If nothing is worth extracting, return: []`;
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Helpers
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Extract text content from a conversation message (handles various formats).
|
|
74
|
+
*
|
|
75
|
+
* OpenClaw AgentMessage objects use content arrays:
|
|
76
|
+
* { role: "user", content: [{ type: "text", text: "..." }] }
|
|
77
|
+
* { role: "assistant", content: [{ type: "text", text: "..." }, { type: "toolCall", ... }] }
|
|
78
|
+
*
|
|
79
|
+
* We also handle the simpler { role, content: "string" } format.
|
|
80
|
+
*/
|
|
81
|
+
function messageToText(msg: unknown): { role: string; content: string } | null {
|
|
82
|
+
if (!msg || typeof msg !== 'object') return null;
|
|
83
|
+
|
|
84
|
+
const m = msg as ConversationMessage;
|
|
85
|
+
const role = m.role ?? 'unknown';
|
|
86
|
+
|
|
87
|
+
// Only keep user and assistant messages
|
|
88
|
+
if (role !== 'user' && role !== 'assistant') return null;
|
|
89
|
+
|
|
90
|
+
let textContent: string;
|
|
91
|
+
|
|
92
|
+
if (typeof m.content === 'string') {
|
|
93
|
+
// Simple string content
|
|
94
|
+
textContent = m.content;
|
|
95
|
+
} else if (Array.isArray(m.content)) {
|
|
96
|
+
// OpenClaw AgentMessage format: array of content blocks
|
|
97
|
+
// Extract text from { type: "text", text: "..." } blocks
|
|
98
|
+
const textParts = (m.content as ContentBlock[])
|
|
99
|
+
.filter((block) => block.type === 'text' && typeof block.text === 'string')
|
|
100
|
+
.map((block) => block.text as string);
|
|
101
|
+
textContent = textParts.join('\n');
|
|
102
|
+
} else if (typeof m.text === 'string') {
|
|
103
|
+
// Fallback: { text: "..." } field
|
|
104
|
+
textContent = m.text;
|
|
105
|
+
} else {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (textContent.length < 3) return null;
|
|
110
|
+
|
|
111
|
+
return { role, content: textContent };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Truncate messages to fit within a token budget (rough estimate: 4 chars per token).
|
|
116
|
+
*/
|
|
117
|
+
function truncateMessages(messages: Array<{ role: string; content: string }>, maxChars: number): string {
|
|
118
|
+
const lines: string[] = [];
|
|
119
|
+
let totalChars = 0;
|
|
120
|
+
|
|
121
|
+
for (const msg of messages) {
|
|
122
|
+
const line = `[${msg.role}]: ${msg.content}`;
|
|
123
|
+
if (totalChars + line.length > maxChars) break;
|
|
124
|
+
lines.push(line);
|
|
125
|
+
totalChars += line.length;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return lines.join('\n\n');
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Parse the LLM response into structured facts.
|
|
133
|
+
*/
|
|
134
|
+
function parseFactsResponse(response: string): ExtractedFact[] {
|
|
135
|
+
// Strip markdown code fences if present
|
|
136
|
+
let cleaned = response.trim();
|
|
137
|
+
if (cleaned.startsWith('```')) {
|
|
138
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
const parsed = JSON.parse(cleaned);
|
|
143
|
+
if (!Array.isArray(parsed)) return [];
|
|
144
|
+
|
|
145
|
+
return parsed
|
|
146
|
+
.filter(
|
|
147
|
+
(f: unknown) =>
|
|
148
|
+
f &&
|
|
149
|
+
typeof f === 'object' &&
|
|
150
|
+
typeof (f as ExtractedFact).text === 'string' &&
|
|
151
|
+
(f as ExtractedFact).text.length >= 5,
|
|
152
|
+
)
|
|
153
|
+
.map((f: unknown) => {
|
|
154
|
+
const fact = f as Record<string, unknown>;
|
|
155
|
+
const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
|
|
156
|
+
const action = validActions.includes(String(fact.action) as ExtractionAction)
|
|
157
|
+
? (String(fact.action) as ExtractionAction)
|
|
158
|
+
: 'ADD'; // Default to ADD for backward compatibility
|
|
159
|
+
return {
|
|
160
|
+
text: String(fact.text).slice(0, 512),
|
|
161
|
+
type: (['fact', 'preference', 'decision', 'episodic', 'goal'].includes(String(fact.type))
|
|
162
|
+
? String(fact.type)
|
|
163
|
+
: 'fact') as ExtractedFact['type'],
|
|
164
|
+
importance: Math.max(1, Math.min(10, Number(fact.importance) || 5)),
|
|
165
|
+
action,
|
|
166
|
+
existingFactId: typeof fact.existingFactId === 'string' ? fact.existingFactId : undefined,
|
|
167
|
+
};
|
|
168
|
+
})
|
|
169
|
+
.filter((f) => f.importance >= 6 || f.action === 'DELETE'); // DELETE actions pass regardless of importance
|
|
170
|
+
} catch {
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// Main extraction function
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Extract facts from a list of conversation messages using LLM.
|
|
181
|
+
*
|
|
182
|
+
* @param rawMessages - The messages array from the hook event (unknown[])
|
|
183
|
+
* @param mode - 'turn' for agent_end (recent only), 'full' for compaction/reset
|
|
184
|
+
* @param existingMemories - Optional list of existing memories for dedup context
|
|
185
|
+
* @returns Array of extracted facts, or empty array on failure.
|
|
186
|
+
*/
|
|
187
|
+
export async function extractFacts(
|
|
188
|
+
rawMessages: unknown[],
|
|
189
|
+
mode: 'turn' | 'full',
|
|
190
|
+
existingMemories?: Array<{ id: string; text: string }>,
|
|
191
|
+
): Promise<ExtractedFact[]> {
|
|
192
|
+
const config = resolveLLMConfig();
|
|
193
|
+
if (!config) return []; // No LLM available
|
|
194
|
+
|
|
195
|
+
// Parse messages
|
|
196
|
+
const parsed = rawMessages
|
|
197
|
+
.map(messageToText)
|
|
198
|
+
.filter((m): m is { role: string; content: string } => m !== null);
|
|
199
|
+
|
|
200
|
+
if (parsed.length === 0) return [];
|
|
201
|
+
|
|
202
|
+
// For 'turn' mode, only look at last 6 messages (3 turns)
|
|
203
|
+
// For 'full' mode, use all messages but truncate to fit token budget
|
|
204
|
+
const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
|
|
205
|
+
|
|
206
|
+
// Truncate to ~3000 tokens worth of text
|
|
207
|
+
const conversationText = truncateMessages(relevantMessages, 12_000);
|
|
208
|
+
|
|
209
|
+
if (conversationText.length < 20) return [];
|
|
210
|
+
|
|
211
|
+
// Build existing memories context if available
|
|
212
|
+
let memoriesContext = '';
|
|
213
|
+
if (existingMemories && existingMemories.length > 0) {
|
|
214
|
+
const memoriesStr = existingMemories
|
|
215
|
+
.map((m) => `[ID: ${m.id}] ${m.text}`)
|
|
216
|
+
.join('\n');
|
|
217
|
+
memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const userPrompt =
|
|
221
|
+
mode === 'turn'
|
|
222
|
+
? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
|
|
223
|
+
: `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
|
|
224
|
+
|
|
225
|
+
try {
|
|
226
|
+
const response = await chatCompletion(config, [
|
|
227
|
+
{ role: 'system', content: EXTRACTION_SYSTEM_PROMPT },
|
|
228
|
+
{ role: 'user', content: userPrompt },
|
|
229
|
+
]);
|
|
230
|
+
|
|
231
|
+
if (!response) return [];
|
|
232
|
+
|
|
233
|
+
return parseFactsResponse(response);
|
|
234
|
+
} catch {
|
|
235
|
+
return []; // Fail silently -- hooks must never break the agent
|
|
236
|
+
}
|
|
237
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Generate a BIP-39 12-word mnemonic for use as TOTALRECLAW_MASTER_PASSWORD.
|
|
4
|
+
*
|
|
5
|
+
* Usage: npx tsx generate-mnemonic.ts
|
|
6
|
+
*/
|
|
7
|
+
import { generateMnemonic } from '@scure/bip39';
|
|
8
|
+
import { wordlist } from '@scure/bip39/wordlists/english.js';
|
|
9
|
+
|
|
10
|
+
const mnemonic = generateMnemonic(wordlist, 128);
|
|
11
|
+
console.log('\n Your TotalReclaw master mnemonic (12 words):\n');
|
|
12
|
+
console.log(` ${mnemonic}\n`);
|
|
13
|
+
console.log(' WRITE THIS DOWN. If you lose it, your memories are unrecoverable.');
|
|
14
|
+
console.log(' Set it as TOTALRECLAW_MASTER_PASSWORD in your .env file.\n');
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hot cache wrapper for the plugin.
|
|
3
|
+
*
|
|
4
|
+
* Self-contained AES-256-GCM encrypted cache (same implementation as
|
|
5
|
+
* client/src/cache/hot-cache.ts but without cross-package import).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import crypto from 'node:crypto';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
|
|
12
|
+
export interface HotFact {
|
|
13
|
+
id: string;
|
|
14
|
+
text: string;
|
|
15
|
+
importance: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface CachePayload {
|
|
19
|
+
hotFacts: HotFact[];
|
|
20
|
+
factCount: number;
|
|
21
|
+
lastSyncedBlock: number;
|
|
22
|
+
smartAccountAddress: string;
|
|
23
|
+
lastUpdatedAt?: number; // Unix timestamp (ms) of last cache update
|
|
24
|
+
lastQueryEmbedding?: number[]; // Embedding of last search query
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const MAX_HOT_FACTS = 30;
|
|
28
|
+
const IV_LENGTH = 12;
|
|
29
|
+
const TAG_LENGTH = 16;
|
|
30
|
+
|
|
31
|
+
export class PluginHotCache {
|
|
32
|
+
private hotFacts: HotFact[] = [];
|
|
33
|
+
private factCount = 0;
|
|
34
|
+
private lastSyncedBlock = 0;
|
|
35
|
+
private smartAccountAddress = '';
|
|
36
|
+
private lastUpdatedAt = 0;
|
|
37
|
+
private lastQueryEmbedding: number[] | null = null;
|
|
38
|
+
private key: Buffer;
|
|
39
|
+
|
|
40
|
+
constructor(private cachePath: string, hexKey: string) {
|
|
41
|
+
this.key = Buffer.from(hexKey, 'hex');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
getHotFacts(): HotFact[] { return [...this.hotFacts]; }
|
|
45
|
+
getFactCount(): number { return this.factCount; }
|
|
46
|
+
getLastSyncedBlock(): number { return this.lastSyncedBlock; }
|
|
47
|
+
getSmartAccountAddress(): string { return this.smartAccountAddress; }
|
|
48
|
+
getLastUpdatedAt(): number { return this.lastUpdatedAt; }
|
|
49
|
+
getLastQueryEmbedding(): number[] | null { return this.lastQueryEmbedding ? [...this.lastQueryEmbedding] : null; }
|
|
50
|
+
|
|
51
|
+
setHotFacts(facts: HotFact[]): void {
|
|
52
|
+
const sorted = [...facts].sort((a, b) => b.importance - a.importance);
|
|
53
|
+
this.hotFacts = sorted.slice(0, MAX_HOT_FACTS);
|
|
54
|
+
this.lastUpdatedAt = Date.now();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
setFactCount(count: number): void { this.factCount = count; }
|
|
58
|
+
setLastSyncedBlock(block: number): void { this.lastSyncedBlock = block; }
|
|
59
|
+
setSmartAccountAddress(addr: string): void { this.smartAccountAddress = addr; }
|
|
60
|
+
setLastUpdatedAt(ts: number): void { this.lastUpdatedAt = ts; }
|
|
61
|
+
setLastQueryEmbedding(embedding: number[] | null): void { this.lastQueryEmbedding = embedding ? [...embedding] : null; }
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Check if the cache is fresh (within TTL).
|
|
65
|
+
* @param ttlMs TTL in milliseconds (default: 5 minutes)
|
|
66
|
+
*/
|
|
67
|
+
isFresh(ttlMs: number = 300_000): boolean {
|
|
68
|
+
if (this.lastUpdatedAt === 0) return false;
|
|
69
|
+
return (Date.now() - this.lastUpdatedAt) < ttlMs;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
flush(): void {
|
|
73
|
+
const payload: CachePayload = {
|
|
74
|
+
hotFacts: this.hotFacts,
|
|
75
|
+
factCount: this.factCount,
|
|
76
|
+
lastSyncedBlock: this.lastSyncedBlock,
|
|
77
|
+
smartAccountAddress: this.smartAccountAddress,
|
|
78
|
+
lastUpdatedAt: this.lastUpdatedAt,
|
|
79
|
+
lastQueryEmbedding: this.lastQueryEmbedding,
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const plaintext = Buffer.from(JSON.stringify(payload), 'utf-8');
|
|
83
|
+
const iv = crypto.randomBytes(IV_LENGTH);
|
|
84
|
+
const cipher = crypto.createCipheriv('aes-256-gcm', this.key, iv);
|
|
85
|
+
const encrypted = Buffer.concat([cipher.update(plaintext), cipher.final()]);
|
|
86
|
+
const tag = cipher.getAuthTag();
|
|
87
|
+
|
|
88
|
+
const output = Buffer.concat([iv, tag, encrypted]);
|
|
89
|
+
|
|
90
|
+
const dir = path.dirname(this.cachePath);
|
|
91
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
92
|
+
fs.writeFileSync(this.cachePath, output);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
load(): void {
|
|
96
|
+
if (!fs.existsSync(this.cachePath)) return;
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const data = fs.readFileSync(this.cachePath);
|
|
100
|
+
if (data.length < IV_LENGTH + TAG_LENGTH) return;
|
|
101
|
+
|
|
102
|
+
const iv = data.subarray(0, IV_LENGTH);
|
|
103
|
+
const tag = data.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
|
|
104
|
+
const ciphertext = data.subarray(IV_LENGTH + TAG_LENGTH);
|
|
105
|
+
|
|
106
|
+
const decipher = crypto.createDecipheriv('aes-256-gcm', this.key, iv);
|
|
107
|
+
decipher.setAuthTag(tag);
|
|
108
|
+
const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
109
|
+
|
|
110
|
+
const payload: CachePayload = JSON.parse(decrypted.toString('utf-8'));
|
|
111
|
+
this.hotFacts = payload.hotFacts || [];
|
|
112
|
+
this.factCount = payload.factCount || 0;
|
|
113
|
+
this.lastSyncedBlock = payload.lastSyncedBlock || 0;
|
|
114
|
+
this.smartAccountAddress = payload.smartAccountAddress || '';
|
|
115
|
+
this.lastUpdatedAt = payload.lastUpdatedAt || 0;
|
|
116
|
+
this.lastQueryEmbedding = payload.lastQueryEmbedding || null;
|
|
117
|
+
} catch {
|
|
118
|
+
this.hotFacts = [];
|
|
119
|
+
this.factCount = 0;
|
|
120
|
+
this.lastSyncedBlock = 0;
|
|
121
|
+
this.smartAccountAddress = '';
|
|
122
|
+
this.lastUpdatedAt = 0;
|
|
123
|
+
this.lastQueryEmbedding = null;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
NormalizedFact,
|
|
3
|
+
ImportSource,
|
|
4
|
+
AdapterParseResult,
|
|
5
|
+
ProgressCallback,
|
|
6
|
+
} from './types.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Abstract base class for import adapters.
|
|
10
|
+
*
|
|
11
|
+
* Each adapter:
|
|
12
|
+
* 1. Fetches or reads source data
|
|
13
|
+
* 2. Parses into NormalizedFact[]
|
|
14
|
+
* 3. Validates each fact
|
|
15
|
+
*
|
|
16
|
+
* The caller (import tool) handles encryption + storage.
|
|
17
|
+
*/
|
|
18
|
+
export abstract class BaseImportAdapter {
|
|
19
|
+
abstract readonly source: ImportSource;
|
|
20
|
+
abstract readonly displayName: string;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse source data into normalized facts.
|
|
24
|
+
*
|
|
25
|
+
* For API sources, this fetches from the API.
|
|
26
|
+
* For file sources, this parses the provided content.
|
|
27
|
+
*/
|
|
28
|
+
abstract parse(
|
|
29
|
+
input: { content?: string; api_key?: string; source_user_id?: string; api_url?: string; file_path?: string },
|
|
30
|
+
onProgress?: ProgressCallback,
|
|
31
|
+
): Promise<AdapterParseResult>;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Validate and clean a single fact.
|
|
35
|
+
* Returns null if the fact should be skipped.
|
|
36
|
+
*/
|
|
37
|
+
protected validateFact(fact: Partial<NormalizedFact>): NormalizedFact | null {
|
|
38
|
+
// Text is required and must be non-empty
|
|
39
|
+
if (!fact.text || typeof fact.text !== 'string' || fact.text.trim().length < 3) {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Truncate to 512 chars
|
|
44
|
+
const text = fact.text.trim().slice(0, 512);
|
|
45
|
+
|
|
46
|
+
// Normalize type
|
|
47
|
+
const validTypes = ['fact', 'preference', 'decision', 'episodic', 'goal'] as const;
|
|
48
|
+
const type = validTypes.includes(fact.type as typeof validTypes[number])
|
|
49
|
+
? (fact.type as NormalizedFact['type'])
|
|
50
|
+
: 'fact';
|
|
51
|
+
|
|
52
|
+
// Normalize importance to 1-10
|
|
53
|
+
let importance = fact.importance ?? 5;
|
|
54
|
+
if (importance < 0 || importance > 1) {
|
|
55
|
+
// Already on 1-10 scale
|
|
56
|
+
importance = Math.max(1, Math.min(10, Math.round(importance)));
|
|
57
|
+
} else {
|
|
58
|
+
// 0-1 scale — convert to 1-10
|
|
59
|
+
importance = Math.max(1, Math.round(importance * 10));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
text,
|
|
64
|
+
type,
|
|
65
|
+
importance,
|
|
66
|
+
source: fact.source ?? this.source,
|
|
67
|
+
sourceId: fact.sourceId,
|
|
68
|
+
sourceTimestamp: fact.sourceTimestamp,
|
|
69
|
+
tags: fact.tags,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Batch-validate an array of partial facts.
|
|
75
|
+
*/
|
|
76
|
+
protected validateFacts(
|
|
77
|
+
rawFacts: Partial<NormalizedFact>[],
|
|
78
|
+
): { facts: NormalizedFact[]; invalidCount: number } {
|
|
79
|
+
const facts: NormalizedFact[] = [];
|
|
80
|
+
let invalidCount = 0;
|
|
81
|
+
|
|
82
|
+
for (const raw of rawFacts) {
|
|
83
|
+
const validated = this.validateFact(raw);
|
|
84
|
+
if (validated) {
|
|
85
|
+
facts.push(validated);
|
|
86
|
+
} else {
|
|
87
|
+
invalidCount++;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return { facts, invalidCount };
|
|
92
|
+
}
|
|
93
|
+
}
|