@totalreclaw/totalreclaw 3.3.1-rc.2 → 3.3.1-rc.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +330 -0
- package/SKILL.md +50 -83
- package/api-client.ts +18 -11
- package/config.ts +117 -3
- package/crypto.ts +10 -2
- package/dist/api-client.js +226 -0
- package/dist/billing-cache.js +100 -0
- package/dist/claims-helper.js +606 -0
- package/dist/config.js +280 -0
- package/dist/consolidation.js +258 -0
- package/dist/contradiction-sync.js +1034 -0
- package/dist/crypto.js +138 -0
- package/dist/digest-sync.js +361 -0
- package/dist/download-ux.js +63 -0
- package/dist/embedding.js +86 -0
- package/dist/extractor.js +1225 -0
- package/dist/first-run.js +103 -0
- package/dist/fs-helpers.js +563 -0
- package/dist/gateway-url.js +197 -0
- package/dist/generate-mnemonic.js +13 -0
- package/dist/hot-cache-wrapper.js +101 -0
- package/dist/import-adapters/base-adapter.js +64 -0
- package/dist/import-adapters/chatgpt-adapter.js +238 -0
- package/dist/import-adapters/claude-adapter.js +114 -0
- package/dist/import-adapters/gemini-adapter.js +201 -0
- package/dist/import-adapters/index.js +26 -0
- package/dist/import-adapters/mcp-memory-adapter.js +219 -0
- package/dist/import-adapters/mem0-adapter.js +158 -0
- package/dist/import-adapters/types.js +1 -0
- package/dist/index.js +5348 -0
- package/dist/llm-client.js +686 -0
- package/dist/llm-profile-reader.js +346 -0
- package/dist/lsh.js +62 -0
- package/dist/onboarding-cli.js +750 -0
- package/dist/pair-cli.js +344 -0
- package/dist/pair-crypto.js +359 -0
- package/dist/pair-http.js +404 -0
- package/dist/pair-page.js +826 -0
- package/dist/pair-qr.js +107 -0
- package/dist/pair-remote-client.js +410 -0
- package/dist/pair-session-store.js +566 -0
- package/dist/pin.js +542 -0
- package/dist/qa-bug-report.js +301 -0
- package/dist/relay-headers.js +44 -0
- package/dist/reranker.js +442 -0
- package/dist/retype-setscope.js +348 -0
- package/dist/semantic-dedup.js +75 -0
- package/dist/subgraph-search.js +289 -0
- package/dist/subgraph-store.js +694 -0
- package/dist/tool-gating.js +58 -0
- package/download-ux.ts +91 -0
- package/embedding.ts +32 -9
- package/fs-helpers.ts +124 -0
- package/gateway-url.ts +57 -9
- package/index.ts +586 -357
- package/llm-client.ts +211 -23
- package/lsh.ts +7 -2
- package/onboarding-cli.ts +114 -1
- package/package.json +19 -5
- package/pair-cli.ts +76 -8
- package/pair-crypto.ts +34 -24
- package/pair-page.ts +28 -17
- package/pair-qr.ts +152 -0
- package/pair-remote-client.ts +540 -0
- package/qa-bug-report.ts +381 -0
- package/relay-headers.ts +50 -0
- package/reranker.ts +73 -0
- package/retype-setscope.ts +12 -0
- package/subgraph-search.ts +4 -3
- package/subgraph-store.ts +109 -16
|
@@ -0,0 +1,1225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TotalReclaw Plugin - Fact Extractor
|
|
3
|
+
*
|
|
4
|
+
* Uses LLM calls to extract atomic facts from conversation messages.
|
|
5
|
+
* Matches the extraction prompts described in SKILL.md.
|
|
6
|
+
*/
|
|
7
|
+
import { chatCompletion, resolveLLMConfig } from './llm-client.js';
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Memory Taxonomy v1 — the 6 canonical memory types. Single source of truth.
|
|
10
|
+
//
|
|
11
|
+
// Plugin v3.0.0 adopts v1 as the ONLY taxonomy. Legacy v0 tokens
|
|
12
|
+
// (fact, decision, episodic, goal, context, rule) are accepted only on the
|
|
13
|
+
// read-side via `LEGACY_V0_MEMORY_TYPES` / `V0_TO_V1_TYPE` and
|
|
14
|
+
// `normalizeToV1Type` in `claims-helper.ts`, so pre-v3 vault entries can
|
|
15
|
+
// still be decoded. Extraction and write paths emit v1 exclusively.
|
|
16
|
+
//
|
|
17
|
+
// When adding a new type, update ALL of:
|
|
18
|
+
// - This constant
|
|
19
|
+
// - `mcp/src/v1-types.ts`
|
|
20
|
+
// - `python/src/totalreclaw/agent/extraction.py`
|
|
21
|
+
// - `rust/totalreclaw-core/src/claims.rs`
|
|
22
|
+
// - `skill/plugin/claims-helper.ts`
|
|
23
|
+
// - The `EXTRACTION_SYSTEM_PROMPT` Types: list
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
export const VALID_MEMORY_TYPES = [
|
|
26
|
+
'claim',
|
|
27
|
+
'preference',
|
|
28
|
+
'directive',
|
|
29
|
+
'commitment',
|
|
30
|
+
'episode',
|
|
31
|
+
'summary',
|
|
32
|
+
];
|
|
33
|
+
/**
|
|
34
|
+
* Runtime type guard — returns whether an unknown value is a valid v1
|
|
35
|
+
* `MemoryType`. Legacy v0 tokens return `false`; use `normalizeToV1Type()`
|
|
36
|
+
* in `claims-helper.ts` to coerce them on the read path.
|
|
37
|
+
*/
|
|
38
|
+
export function isValidMemoryType(value) {
|
|
39
|
+
return typeof value === 'string' && VALID_MEMORY_TYPES.includes(value);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Backward-compat alias. Same list as `VALID_MEMORY_TYPES`.
|
|
43
|
+
* @deprecated Use `VALID_MEMORY_TYPES` instead.
|
|
44
|
+
*/
|
|
45
|
+
export const VALID_MEMORY_TYPES_V1 = VALID_MEMORY_TYPES;
|
|
46
|
+
/**
|
|
47
|
+
* Backward-compat alias. Same guard as `isValidMemoryType`.
|
|
48
|
+
* @deprecated Use `isValidMemoryType` instead.
|
|
49
|
+
*/
|
|
50
|
+
export function isValidMemoryTypeV1(value) {
|
|
51
|
+
return isValidMemoryType(value);
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Legacy v0 memory types — retained as a typed constant so the read-side
|
|
55
|
+
* `V0_TO_V1_TYPE` mapping can reference them without redeclaration.
|
|
56
|
+
*
|
|
57
|
+
* Do NOT emit these on the write/extraction path. They exist solely so
|
|
58
|
+
* `claims-helper.ts::readClaimFromBlob` can decode pre-v1 vault entries
|
|
59
|
+
* whose encrypted blobs still carry v0 token strings.
|
|
60
|
+
*/
|
|
61
|
+
export const LEGACY_V0_MEMORY_TYPES = [
|
|
62
|
+
'fact',
|
|
63
|
+
'preference',
|
|
64
|
+
'decision',
|
|
65
|
+
'episodic',
|
|
66
|
+
'goal',
|
|
67
|
+
'context',
|
|
68
|
+
'summary',
|
|
69
|
+
'rule',
|
|
70
|
+
];
|
|
71
|
+
export const VALID_MEMORY_SOURCES = [
|
|
72
|
+
'user',
|
|
73
|
+
'user-inferred',
|
|
74
|
+
'assistant',
|
|
75
|
+
'external',
|
|
76
|
+
'derived',
|
|
77
|
+
];
|
|
78
|
+
export const VALID_MEMORY_SCOPES = [
|
|
79
|
+
'work',
|
|
80
|
+
'personal',
|
|
81
|
+
'health',
|
|
82
|
+
'family',
|
|
83
|
+
'creative',
|
|
84
|
+
'finance',
|
|
85
|
+
'misc',
|
|
86
|
+
'unspecified',
|
|
87
|
+
];
|
|
88
|
+
export const VALID_MEMORY_VOLATILITIES = [
|
|
89
|
+
'stable',
|
|
90
|
+
'updatable',
|
|
91
|
+
'ephemeral',
|
|
92
|
+
];
|
|
93
|
+
/**
|
|
94
|
+
* Legacy v0 → v1 type mapping used by the read-side adapter when decoding
|
|
95
|
+
* a pre-v1 vault entry that still carries a v0 token string.
|
|
96
|
+
*
|
|
97
|
+
* Decisions (v0) map to v1 `claim` — the reasoning lives in the separate
|
|
98
|
+
* `reasoning` field rather than being encoded in the type.
|
|
99
|
+
*/
|
|
100
|
+
export const V0_TO_V1_TYPE = {
|
|
101
|
+
fact: 'claim',
|
|
102
|
+
preference: 'preference',
|
|
103
|
+
decision: 'claim',
|
|
104
|
+
episodic: 'episode',
|
|
105
|
+
goal: 'commitment',
|
|
106
|
+
context: 'claim',
|
|
107
|
+
summary: 'summary',
|
|
108
|
+
rule: 'directive',
|
|
109
|
+
};
|
|
110
|
+
const ALLOWED_ENTITY_TYPES = new Set([
|
|
111
|
+
'person',
|
|
112
|
+
'project',
|
|
113
|
+
'tool',
|
|
114
|
+
'company',
|
|
115
|
+
'concept',
|
|
116
|
+
'place',
|
|
117
|
+
]);
|
|
118
|
+
/**
|
|
119
|
+
* Default confidence when the LLM does not provide one.
|
|
120
|
+
* Mirrors the fallback used by other extraction clients.
|
|
121
|
+
*/
|
|
122
|
+
export const DEFAULT_EXTRACTION_CONFIDENCE = 0.85;
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
// Helpers
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
/**
|
|
127
|
+
* Extract text content from a conversation message (handles various formats).
|
|
128
|
+
*
|
|
129
|
+
* OpenClaw AgentMessage objects use content arrays:
|
|
130
|
+
* { role: "user", content: [{ type: "text", text: "..." }] }
|
|
131
|
+
* { role: "assistant", content: [{ type: "text", text: "..." }, { type: "toolCall", ... }] }
|
|
132
|
+
*
|
|
133
|
+
* We also handle the simpler { role, content: "string" } format.
|
|
134
|
+
*/
|
|
135
|
+
function messageToText(msg) {
|
|
136
|
+
if (!msg || typeof msg !== 'object')
|
|
137
|
+
return null;
|
|
138
|
+
const m = msg;
|
|
139
|
+
const role = m.role ?? 'unknown';
|
|
140
|
+
// Only keep user and assistant messages
|
|
141
|
+
if (role !== 'user' && role !== 'assistant')
|
|
142
|
+
return null;
|
|
143
|
+
let textContent;
|
|
144
|
+
if (typeof m.content === 'string') {
|
|
145
|
+
// Simple string content
|
|
146
|
+
textContent = m.content;
|
|
147
|
+
}
|
|
148
|
+
else if (Array.isArray(m.content)) {
|
|
149
|
+
// OpenClaw AgentMessage format: array of content blocks
|
|
150
|
+
// Extract text from { type: "text", text: "..." } blocks
|
|
151
|
+
const textParts = m.content
|
|
152
|
+
.filter((block) => block.type === 'text' && typeof block.text === 'string')
|
|
153
|
+
.map((block) => block.text);
|
|
154
|
+
textContent = textParts.join('\n');
|
|
155
|
+
}
|
|
156
|
+
else if (typeof m.text === 'string') {
|
|
157
|
+
// Fallback: { text: "..." } field
|
|
158
|
+
textContent = m.text;
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
if (textContent.length < 3)
|
|
164
|
+
return null;
|
|
165
|
+
return { role, content: textContent };
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Truncate messages to fit within a token budget (rough estimate: 4 chars per token).
|
|
169
|
+
*/
|
|
170
|
+
function truncateMessages(messages, maxChars) {
|
|
171
|
+
const lines = [];
|
|
172
|
+
let totalChars = 0;
|
|
173
|
+
for (const msg of messages) {
|
|
174
|
+
const line = `[${msg.role}]: ${msg.content}`;
|
|
175
|
+
if (totalChars + line.length > maxChars)
|
|
176
|
+
break;
|
|
177
|
+
lines.push(line);
|
|
178
|
+
totalChars += line.length;
|
|
179
|
+
}
|
|
180
|
+
return lines.join('\n\n');
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Parse a single entity object from LLM output. Returns null if invalid.
|
|
184
|
+
* Invalid entities are silently dropped so a bad entity never fails the whole fact.
|
|
185
|
+
*/
|
|
186
|
+
export function parseEntity(raw) {
|
|
187
|
+
if (!raw || typeof raw !== 'object')
|
|
188
|
+
return null;
|
|
189
|
+
const e = raw;
|
|
190
|
+
const name = typeof e.name === 'string' ? e.name.trim() : '';
|
|
191
|
+
if (name.length === 0)
|
|
192
|
+
return null;
|
|
193
|
+
const type = String(e.type ?? '').toLowerCase();
|
|
194
|
+
if (!ALLOWED_ENTITY_TYPES.has(type))
|
|
195
|
+
return null;
|
|
196
|
+
const entity = { name: name.slice(0, 128), type };
|
|
197
|
+
if (typeof e.role === 'string' && e.role.trim().length > 0) {
|
|
198
|
+
entity.role = e.role.trim().slice(0, 128);
|
|
199
|
+
}
|
|
200
|
+
return entity;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Clamp a raw confidence value to [0, 1]. Returns the default when missing or NaN.
|
|
204
|
+
*/
|
|
205
|
+
export function normalizeConfidence(raw) {
|
|
206
|
+
if (typeof raw !== 'number' || !Number.isFinite(raw))
|
|
207
|
+
return DEFAULT_EXTRACTION_CONFIDENCE;
|
|
208
|
+
if (raw < 0)
|
|
209
|
+
return 0;
|
|
210
|
+
if (raw > 1)
|
|
211
|
+
return 1;
|
|
212
|
+
return raw;
|
|
213
|
+
}
|
|
214
|
+
// ---------------------------------------------------------------------------
|
|
215
|
+
// Phase 2.2.6: lexical importance bumps
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
/**
|
|
218
|
+
* Escape regex metacharacters so a string can be used as a literal pattern.
|
|
219
|
+
*/
|
|
220
|
+
function escapeRegExp(s) {
|
|
221
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Compute a lexical importance bump (0-2) for a single fact based on signals
|
|
225
|
+
* in the surrounding conversation text.
|
|
226
|
+
*
|
|
227
|
+
* This is a Phase 2.2.6 quality fix complementing the prompt rubric tightening
|
|
228
|
+
* (item A). Where the rubric tells the LLM to use the full 1-10 range, the
|
|
229
|
+
* bump tells us *as a post-process*: when the user's actual phrasing carries
|
|
230
|
+
* strong "remember this" signals that the LLM may have under-weighted, push
|
|
231
|
+
* the score up.
|
|
232
|
+
*
|
|
233
|
+
* Signals detected (each adds +1, capped at +2 total):
|
|
234
|
+
*
|
|
235
|
+
* 1. **Strong intent phrases** anywhere in the conversation:
|
|
236
|
+
* "remember this", "never forget", "rule of thumb", "critical",
|
|
237
|
+
* "don't ever forget", explicit "always X" / "never Y" patterns.
|
|
238
|
+
* 2. **Emphasis markers**: `!!` (double exclamation), or 3+ all-caps words
|
|
239
|
+
* in a row (e.g. "DO NOT FORGET", "VERY IMPORTANT").
|
|
240
|
+
* 3. **Repetition**: the fact's first ~20 chars appear at least twice in
|
|
241
|
+
* the conversation text (paraphrased restating).
|
|
242
|
+
*
|
|
243
|
+
* The bump is additive on top of whatever the LLM scored; final importance
|
|
244
|
+
* is capped at 10.
|
|
245
|
+
*
|
|
246
|
+
* Final-importance ceiling: this never makes a fact pass the importance >= 6
|
|
247
|
+
* filter on its own — a fact still needs to have an LLM score >= 5 (because
|
|
248
|
+
* +2 from 5 = 7, above floor; +1 from 5 = 6, above floor). This is intentional:
|
|
249
|
+
* the bump is for "the LLM correctly identified this as worth storing but
|
|
250
|
+
* under-weighted it", not "the LLM said skip but we're overriding."
|
|
251
|
+
*/
|
|
252
|
+
export function computeLexicalImportanceBump(factText, conversationText) {
|
|
253
|
+
let bump = 0;
|
|
254
|
+
const lowerConv = conversationText.toLowerCase();
|
|
255
|
+
// Signal 1: strong intent phrases anywhere in the conversation
|
|
256
|
+
const strongIntent = /\b(remember this|never forget|rule of thumb|don't (?:ever )?forget|critical|important|gotcha|note to self)\b/i;
|
|
257
|
+
if (strongIntent.test(lowerConv))
|
|
258
|
+
bump += 1;
|
|
259
|
+
// Signal 2: emphasis markers — double exclamation OR 3+ consecutive all-caps words
|
|
260
|
+
// (3+ chars each, to avoid false positives on acronyms like "AWS S3 IAM")
|
|
261
|
+
const doubleExclamation = /!!/;
|
|
262
|
+
const allCapsPhrase = /\b[A-Z]{3,}(?:\s+[A-Z]{3,}){2,}\b/;
|
|
263
|
+
if (doubleExclamation.test(conversationText) || allCapsPhrase.test(conversationText)) {
|
|
264
|
+
bump += 1;
|
|
265
|
+
}
|
|
266
|
+
// Signal 3: repetition — extract content words (length >= 5, not common stop
|
|
267
|
+
// words) from the fact, and check if any single one appears 2+ times in the
|
|
268
|
+
// conversation. This is more robust to LLM paraphrasing than a fingerprint
|
|
269
|
+
// match: "User prefers PostgreSQL" extracted from "I prefer PostgreSQL ...
|
|
270
|
+
// yeah PostgreSQL is right for OLTP" still triggers because "postgresql"
|
|
271
|
+
// appears multiple times even though the leading chars differ.
|
|
272
|
+
const lowerFact = factText.toLowerCase();
|
|
273
|
+
const stopWords = new Set([
|
|
274
|
+
'about', 'after', 'again', 'against', 'because', 'before', 'being',
|
|
275
|
+
'between', 'could', 'doing', 'during', 'every', 'further', 'having',
|
|
276
|
+
'their', 'these', 'those', 'through', 'under', 'until', 'where', 'which',
|
|
277
|
+
'while', 'would', 'should', 'about', 'thing', 'things', 'something',
|
|
278
|
+
'someone', 'always', 'never', 'often', 'still', 'really', 'maybe',
|
|
279
|
+
'using', 'works', 'work', 'user', 'users', 'with', 'from', 'into',
|
|
280
|
+
'like', 'just', 'than', 'them', 'they', 'will', 'when', 'what', 'were',
|
|
281
|
+
'this', 'that', 'have', 'this',
|
|
282
|
+
]);
|
|
283
|
+
const factWords = lowerFact.split(/[^a-z0-9_]+/).filter((w) => w.length >= 5 && !stopWords.has(w));
|
|
284
|
+
let triggered = false;
|
|
285
|
+
for (const word of factWords) {
|
|
286
|
+
const occurrences = (lowerConv.match(new RegExp(`\\b${escapeRegExp(word)}\\b`, 'g')) || [])
|
|
287
|
+
.length;
|
|
288
|
+
if (occurrences >= 2) {
|
|
289
|
+
triggered = true;
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
if (triggered)
|
|
294
|
+
bump += 1;
|
|
295
|
+
return Math.min(bump, 2);
|
|
296
|
+
}
|
|
297
|
+
// ---------------------------------------------------------------------------
|
|
298
|
+
// Compaction-Aware Extraction (Phase 2.3)
|
|
299
|
+
// ---------------------------------------------------------------------------
|
|
300
|
+
/**
|
|
301
|
+
* Compaction-specific system prompt (v1 taxonomy). Fires when the conversation
|
|
302
|
+
* context is about to be compacted. LAST CHANCE to capture knowledge before
|
|
303
|
+
* it is lost, so the importance floor is 5 instead of 6 and the prompt is
|
|
304
|
+
* more aggressive about extracting active-project context, claims, and
|
|
305
|
+
* episodes.
|
|
306
|
+
*
|
|
307
|
+
* Differences from `EXTRACTION_SYSTEM_PROMPT`:
|
|
308
|
+
* - Opening framing emphasizes urgency ("last chance")
|
|
309
|
+
* - Format-agnostic: handles bullet lists, prose, mixed formats
|
|
310
|
+
* - Importance threshold lowered to 5
|
|
311
|
+
* - More aggressive on claim / episode / directive types
|
|
312
|
+
* - Anti-pattern: don't skip content just because it's in a summary
|
|
313
|
+
*
|
|
314
|
+
* Output format matches `EXTRACTION_SYSTEM_PROMPT` exactly (same merged
|
|
315
|
+
* topics+facts JSON shape with v1 type / source / scope fields), so the
|
|
316
|
+
* same `parseMergedResponseV1` parser can validate it.
|
|
317
|
+
*/
|
|
318
|
+
export const COMPACTION_SYSTEM_PROMPT = `You are extracting memories from a conversation that is about to be compacted. The context will be LOST after this point — this is your LAST CHANCE to capture everything worth remembering. Be more aggressive than usual: err on the side of storing.
|
|
319
|
+
|
|
320
|
+
Work in TWO explicit phases within one response:
|
|
321
|
+
|
|
322
|
+
PHASE 1 — Topic identification.
|
|
323
|
+
Identify the 2-3 main topics the user was engaging with before extracting any fact. Topics should be short phrases (2-5 words each). If there's no clear user-focused topic, use an empty topics array.
|
|
324
|
+
|
|
325
|
+
PHASE 2 — Fact extraction anchored to those topics (plus preserve active context).
|
|
326
|
+
Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Active project context, decisions in progress, and current working state score 6-8 during compaction — capture them even when they'd normally be marginal.
|
|
327
|
+
|
|
328
|
+
Rules:
|
|
329
|
+
1. Each memory = single self-contained piece of information
|
|
330
|
+
2. Focus on user-specific info useful in future conversations
|
|
331
|
+
3. Skip generic knowledge, greetings, small talk
|
|
332
|
+
4. Score importance 1-10 (5+ = worth storing during compaction)
|
|
333
|
+
5. Every memory MUST attribute a source (provenance critical)
|
|
334
|
+
|
|
335
|
+
Importance rubric (full 1-10 range, NOT just 7-8):
|
|
336
|
+
- 10: Core identity, never-forget ("remember this forever", name/birthday)
|
|
337
|
+
- 9: Affects many future decisions / high-impact rules
|
|
338
|
+
- 8: Preference / decision-with-reasoning / operational rule
|
|
339
|
+
- 7: Specific durable fact
|
|
340
|
+
- 6: Borderline — during compaction, capture anyway
|
|
341
|
+
- 5: Would normally drop; keep as compaction safety net
|
|
342
|
+
- 4 or below: DROP (greetings, filler)
|
|
343
|
+
|
|
344
|
+
═══════════════════════════════════════════════════════════════
|
|
345
|
+
TYPE (6 values)
|
|
346
|
+
═══════════════════════════════════════════════════════════════
|
|
347
|
+
- claim: factual assertion (absorbs v0 fact/context/decision; decisions populate reasoning)
|
|
348
|
+
- preference: likes/dislikes/tastes
|
|
349
|
+
- directive: imperative rule ("always X", "never Y")
|
|
350
|
+
- commitment: future intent ("will do X")
|
|
351
|
+
- episode: notable event
|
|
352
|
+
- summary: derived synthesis (source must be derived|assistant)
|
|
353
|
+
|
|
354
|
+
═══════════════════════════════════════════════════════════════
|
|
355
|
+
SOURCE (provenance, CRITICAL)
|
|
356
|
+
═══════════════════════════════════════════════════════════════
|
|
357
|
+
- user: user explicitly stated it (in [user]: turns)
|
|
358
|
+
- user-inferred: extractor inferred from user signals
|
|
359
|
+
- assistant: assistant authored — DOWNGRADE unless user affirmed/quoted
|
|
360
|
+
- external, derived: rare
|
|
361
|
+
|
|
362
|
+
IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant.
|
|
363
|
+
|
|
364
|
+
═══════════════════════════════════════════════════════════════
|
|
365
|
+
SCOPE
|
|
366
|
+
═══════════════════════════════════════════════════════════════
|
|
367
|
+
work | personal | health | family | creative | finance | misc | unspecified
|
|
368
|
+
|
|
369
|
+
═══════════════════════════════════════════════════════════════
|
|
370
|
+
ENTITIES
|
|
371
|
+
═══════════════════════════════════════════════════════════════
|
|
372
|
+
- type ∈ {person, project, tool, company, concept, place}
|
|
373
|
+
- prefer specific names ("PostgreSQL" not "database")
|
|
374
|
+
- omit umbrella categories when specific name is present
|
|
375
|
+
|
|
376
|
+
═══════════════════════════════════════════════════════════════
|
|
377
|
+
REASONING (only for claims that are decisions)
|
|
378
|
+
═══════════════════════════════════════════════════════════════
|
|
379
|
+
For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
|
|
380
|
+
|
|
381
|
+
═══════════════════════════════════════════════════════════════
|
|
382
|
+
FORMAT-AGNOSTIC PARSING (IMPORTANT)
|
|
383
|
+
═══════════════════════════════════════════════════════════════
|
|
384
|
+
The conversation may contain bullet lists, numbered lists, section headers, code snippets, or plain prose. Treat ALL formats as potential sources of extractable memory:
|
|
385
|
+
- Bullets/list items: each item is a candidate.
|
|
386
|
+
- Section headers (Context, Decisions, Key Learnings, Open Questions): use the header as a TYPE HINT (Context → claim, Decisions → claim+reasoning, Learnings → directive, Open Questions → commitment).
|
|
387
|
+
- Plain prose: parse each distinct assertion as a candidate.
|
|
388
|
+
- Code snippets: extract config choices, tool versions, architectural decisions embedded in comments or structure.
|
|
389
|
+
- Mixed format: apply all of the above.
|
|
390
|
+
|
|
391
|
+
Do NOT skip content just because it's in a summary. The agent has already filtered — your job is to convert into structured memories, not to re-evaluate worth.
|
|
392
|
+
|
|
393
|
+
═══════════════════════════════════════════════════════════════
|
|
394
|
+
OUTPUT FORMAT (no markdown, no code fences)
|
|
395
|
+
═══════════════════════════════════════════════════════════════
|
|
396
|
+
{
|
|
397
|
+
"topics": ["topic 1", "topic 2"],
|
|
398
|
+
"facts": [
|
|
399
|
+
{
|
|
400
|
+
"text": "...",
|
|
401
|
+
"type": "claim|preference|directive|commitment|episode",
|
|
402
|
+
"source": "user|user-inferred|assistant",
|
|
403
|
+
"scope": "work|personal|health|...",
|
|
404
|
+
"importance": N,
|
|
405
|
+
"confidence": 0.9,
|
|
406
|
+
"action": "ADD",
|
|
407
|
+
"reasoning": "...", // optional, only for claim+decision
|
|
408
|
+
"entities": [{"name": "...", "type": "tool"}]
|
|
409
|
+
}
|
|
410
|
+
]
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
If nothing worth extracting: {"topics": [], "facts": []}`;
|
|
414
|
+
/**
|
|
415
|
+
* Parse facts for compaction context (v1 taxonomy; importance floor 5).
|
|
416
|
+
*
|
|
417
|
+
* Identical to `parseFactsResponse` except the importance floor is 5 instead
|
|
418
|
+
* of 6 — compaction is the last chance to capture context, so we accept
|
|
419
|
+
* borderline facts that would normally be dropped.
|
|
420
|
+
*
|
|
421
|
+
* Accepts the same merged-topic v1 JSON shape as the main prompt. The
|
|
422
|
+
* inner `parseMergedResponseV1` enforces the >=6 floor, so we re-run a
|
|
423
|
+
* lenient >=5 pass on the raw parsed payload to admit the borderline items.
|
|
424
|
+
*/
|
|
425
|
+
export function parseFactsResponseForCompaction(response, logger) {
|
|
426
|
+
const originalPreview = response.trim().slice(0, 200);
|
|
427
|
+
let cleaned = response.trim();
|
|
428
|
+
// Strip <think>...</think> and <thinking>...</thinking> tags
|
|
429
|
+
cleaned = cleaned
|
|
430
|
+
.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '')
|
|
431
|
+
.trim();
|
|
432
|
+
// Strip markdown code fences if present
|
|
433
|
+
if (cleaned.startsWith('```')) {
|
|
434
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
435
|
+
}
|
|
436
|
+
const tryParse = (input) => {
|
|
437
|
+
try {
|
|
438
|
+
return JSON.parse(input);
|
|
439
|
+
}
|
|
440
|
+
catch {
|
|
441
|
+
return undefined;
|
|
442
|
+
}
|
|
443
|
+
};
|
|
444
|
+
let parsed = tryParse(cleaned);
|
|
445
|
+
let recoveryUsed = 'none';
|
|
446
|
+
if (parsed === undefined) {
|
|
447
|
+
// Try bare-array first (legacy compaction output), then object (v1 merged).
|
|
448
|
+
const arrMatch = cleaned.match(/\[[\s\S]*\]/);
|
|
449
|
+
if (arrMatch) {
|
|
450
|
+
parsed = tryParse(arrMatch[0]);
|
|
451
|
+
if (parsed !== undefined)
|
|
452
|
+
recoveryUsed = 'bracket-scan';
|
|
453
|
+
}
|
|
454
|
+
if (parsed === undefined) {
|
|
455
|
+
const objMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
456
|
+
if (objMatch) {
|
|
457
|
+
parsed = tryParse(objMatch[0]);
|
|
458
|
+
if (parsed !== undefined)
|
|
459
|
+
recoveryUsed = 'bracket-scan';
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
if (recoveryUsed === 'bracket-scan') {
|
|
464
|
+
logger?.info?.(`parseFactsResponseForCompaction: recovered JSON via bracket-scan fallback`);
|
|
465
|
+
}
|
|
466
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
467
|
+
logger?.warn?.(`parseFactsResponseForCompaction: could not parse LLM output as JSON object. Preview: ${JSON.stringify(originalPreview)}`);
|
|
468
|
+
return [];
|
|
469
|
+
}
|
|
470
|
+
const obj = parsed;
|
|
471
|
+
const rawFacts = Array.isArray(obj.facts) ? obj.facts : null;
|
|
472
|
+
// Legacy v0 compaction output (bare JSON array) — best-effort parse.
|
|
473
|
+
const rawArray = rawFacts ?? (Array.isArray(parsed) ? parsed : null);
|
|
474
|
+
if (!rawArray) {
|
|
475
|
+
logger?.warn?.(`parseFactsResponseForCompaction: expected { facts: [...] } object, got ${typeof parsed}`);
|
|
476
|
+
return [];
|
|
477
|
+
}
|
|
478
|
+
const validActions = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
|
|
479
|
+
const facts = rawArray
|
|
480
|
+
.filter((f) => !!f &&
|
|
481
|
+
typeof f === 'object' &&
|
|
482
|
+
typeof f.text === 'string' &&
|
|
483
|
+
f.text.length >= 5)
|
|
484
|
+
.map((f) => {
|
|
485
|
+
const rawType = String(f.type ?? 'claim').toLowerCase();
|
|
486
|
+
// Accept v1 tokens directly; coerce legacy v0 tokens via V0_TO_V1_TYPE.
|
|
487
|
+
let type;
|
|
488
|
+
if (isValidMemoryType(rawType)) {
|
|
489
|
+
type = rawType;
|
|
490
|
+
}
|
|
491
|
+
else if (LEGACY_V0_MEMORY_TYPES.includes(rawType)) {
|
|
492
|
+
type = V0_TO_V1_TYPE[rawType];
|
|
493
|
+
}
|
|
494
|
+
else {
|
|
495
|
+
type = 'claim';
|
|
496
|
+
}
|
|
497
|
+
const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
|
|
498
|
+
const source = VALID_MEMORY_SOURCES.includes(rawSource)
|
|
499
|
+
? rawSource
|
|
500
|
+
: 'user-inferred';
|
|
501
|
+
const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
|
|
502
|
+
const scope = VALID_MEMORY_SCOPES.includes(rawScope)
|
|
503
|
+
? rawScope
|
|
504
|
+
: 'unspecified';
|
|
505
|
+
const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
|
|
506
|
+
const action = validActions.includes(String(f.action))
|
|
507
|
+
? String(f.action)
|
|
508
|
+
: 'ADD';
|
|
509
|
+
let entities;
|
|
510
|
+
if (Array.isArray(f.entities)) {
|
|
511
|
+
const valid = f.entities
|
|
512
|
+
.map(parseEntity)
|
|
513
|
+
.filter((e) => e !== null);
|
|
514
|
+
if (valid.length > 0)
|
|
515
|
+
entities = valid;
|
|
516
|
+
}
|
|
517
|
+
const result = {
|
|
518
|
+
text: String(f.text).slice(0, 512),
|
|
519
|
+
type,
|
|
520
|
+
source,
|
|
521
|
+
scope,
|
|
522
|
+
reasoning,
|
|
523
|
+
importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
|
|
524
|
+
action,
|
|
525
|
+
existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
|
|
526
|
+
confidence: normalizeConfidence(f.confidence),
|
|
527
|
+
};
|
|
528
|
+
if (entities)
|
|
529
|
+
result.entities = entities;
|
|
530
|
+
return result;
|
|
531
|
+
})
|
|
532
|
+
// Reject illegal type:summary + source:user
|
|
533
|
+
.filter((f) => !(f.type === 'summary' && f.source === 'user'))
|
|
534
|
+
// Compaction: importance >= 5 (not 6)
|
|
535
|
+
.filter((f) => f.importance >= 5 || f.action === 'DELETE');
|
|
536
|
+
return facts;
|
|
537
|
+
}
|
|
538
|
+
/**
|
|
539
|
+
* Extract facts using the compaction-aware prompt.
|
|
540
|
+
*
|
|
541
|
+
* This is called from the `before_compaction` hook — the LAST CHANCE to
|
|
542
|
+
* capture knowledge before conversation context is lost. Key differences
|
|
543
|
+
* from `extractFacts`:
|
|
544
|
+
* - Uses `COMPACTION_SYSTEM_PROMPT` (lower threshold, format-agnostic, more aggressive)
|
|
545
|
+
* - Always processes the full conversation (`mode: 'full'`)
|
|
546
|
+
* - Importance filter is >= 5 instead of >= 6
|
|
547
|
+
* - Lexical importance bumps still apply
|
|
548
|
+
*
|
|
549
|
+
* @param rawMessages - The messages array from the hook event (unknown[])
|
|
550
|
+
* @param existingMemories - Optional list of existing memories for dedup context
|
|
551
|
+
* @param logger - Optional logger for observability
|
|
552
|
+
* @returns Array of extracted facts, or empty array on failure.
|
|
553
|
+
*/
|
|
554
|
+
export async function extractFactsForCompaction(rawMessages, existingMemories, logger) {
|
|
555
|
+
const config = resolveLLMConfig();
|
|
556
|
+
if (!config) {
|
|
557
|
+
logger?.info?.('extractFactsForCompaction: no LLM config resolved (skipping extraction)');
|
|
558
|
+
return [];
|
|
559
|
+
}
|
|
560
|
+
// Parse messages
|
|
561
|
+
const parsed = rawMessages
|
|
562
|
+
.map(messageToText)
|
|
563
|
+
.filter((m) => m !== null);
|
|
564
|
+
if (parsed.length === 0) {
|
|
565
|
+
logger?.info?.(`extractFactsForCompaction: no parseable messages (raw count=${rawMessages.length})`);
|
|
566
|
+
return [];
|
|
567
|
+
}
|
|
568
|
+
// Always full mode — process entire conversation for compaction
|
|
569
|
+
const conversationText = truncateMessages(parsed, 12_000);
|
|
570
|
+
if (conversationText.length < 20) {
|
|
571
|
+
logger?.info?.(`extractFactsForCompaction: conversation too short (${conversationText.length} chars < 20)`);
|
|
572
|
+
return [];
|
|
573
|
+
}
|
|
574
|
+
// Build existing memories context if available
|
|
575
|
+
let memoriesContext = '';
|
|
576
|
+
if (existingMemories && existingMemories.length > 0) {
|
|
577
|
+
const memoriesStr = existingMemories
|
|
578
|
+
.map((m) => `[ID: ${m.id}] ${m.text}`)
|
|
579
|
+
.join('\n');
|
|
580
|
+
memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
|
|
581
|
+
}
|
|
582
|
+
const userPrompt = `Extract ALL valuable long-term memories from this conversation before it is compacted and lost:\n\n${conversationText}${memoriesContext}`;
|
|
583
|
+
let response;
|
|
584
|
+
try {
|
|
585
|
+
response = await chatCompletion(config, [
|
|
586
|
+
{ role: 'system', content: COMPACTION_SYSTEM_PROMPT },
|
|
587
|
+
{ role: 'user', content: userPrompt },
|
|
588
|
+
], {
|
|
589
|
+
// 3.3.1-rc.2: retry transient 429 / timeout (same policy as extractFacts).
|
|
590
|
+
retry: { attempts: 3, baseDelayMs: 1000 },
|
|
591
|
+
timeoutMs: 30_000,
|
|
592
|
+
logger,
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
catch (err) {
|
|
596
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
597
|
+
logger?.warn?.(`extractFactsForCompaction: chatCompletion threw: ${msg}`);
|
|
598
|
+
return [];
|
|
599
|
+
}
|
|
600
|
+
if (!response) {
|
|
601
|
+
logger?.info?.('extractFactsForCompaction: chatCompletion returned null/empty response');
|
|
602
|
+
return [];
|
|
603
|
+
}
|
|
604
|
+
logger?.info?.(`extractFactsForCompaction: LLM returned ${response.length} chars; handing to parseFactsResponseForCompaction`);
|
|
605
|
+
let facts = parseFactsResponseForCompaction(response, logger);
|
|
606
|
+
// v1 provenance filter (tag-don't-drop). Uses importance >= 5 floor because
|
|
607
|
+
// the filter's own floor is 5 in lax mode, matching compaction semantics.
|
|
608
|
+
facts = applyProvenanceFilterLax(facts, conversationText);
|
|
609
|
+
// Comparative rescore if >= 5 facts (same as default pipeline), else
|
|
610
|
+
// assign defaultVolatility so v1 write path has a value.
|
|
611
|
+
facts = await comparativeRescoreV1(facts, conversationText, logger);
|
|
612
|
+
facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
613
|
+
// Lexical importance bumps (same as regular extraction)
|
|
614
|
+
for (const f of facts) {
|
|
615
|
+
const bump = computeLexicalImportanceBump(f.text, conversationText);
|
|
616
|
+
if (bump > 0) {
|
|
617
|
+
const oldImportance = f.importance;
|
|
618
|
+
const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
|
|
619
|
+
f.importance = Math.min(10, f.importance + effectiveBump);
|
|
620
|
+
logger?.info?.(`extractFactsForCompaction: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
return facts;
|
|
624
|
+
}
|
|
625
|
+
// ---------------------------------------------------------------------------
|
|
626
|
+
// Debrief Extraction
|
|
627
|
+
// ---------------------------------------------------------------------------
|
|
628
|
+
/**
|
|
629
|
+
* Canonical debrief system prompt — must be identical across all clients.
|
|
630
|
+
*/
|
|
631
|
+
export const DEBRIEF_SYSTEM_PROMPT = `You are reviewing a conversation that just ended. The following facts were
|
|
632
|
+
already extracted and stored during this conversation:
|
|
633
|
+
|
|
634
|
+
{already_stored_facts}
|
|
635
|
+
|
|
636
|
+
Your job is to capture what turn-by-turn extraction MISSED. Focus on:
|
|
637
|
+
|
|
638
|
+
1. **Broader context** — What was the conversation about overall? What project,
|
|
639
|
+
problem, or topic tied the discussion together?
|
|
640
|
+
2. **Outcomes & conclusions** — What was decided, agreed upon, or resolved?
|
|
641
|
+
3. **What was attempted** — What approaches were tried? What worked, what didn't, and why?
|
|
642
|
+
4. **Relationships** — How do topics discussed relate to each other or to things
|
|
643
|
+
from previous conversations?
|
|
644
|
+
5. **Open threads** — What was left unfinished or needs follow-up?
|
|
645
|
+
|
|
646
|
+
Do NOT repeat facts already stored. Only add genuinely new information that provides
|
|
647
|
+
broader context a future conversation would benefit from.
|
|
648
|
+
|
|
649
|
+
Return a JSON array (no markdown, no code fences):
|
|
650
|
+
[{"text": "...", "type": "summary|context", "importance": N}]
|
|
651
|
+
|
|
652
|
+
- Use type "summary" for conclusions, outcomes, and decisions-of-the-session
|
|
653
|
+
- Use type "context" for broader project context, open threads, and what-was-tried
|
|
654
|
+
- Importance 7-8 for most debrief items (they are high-value by definition)
|
|
655
|
+
- Maximum 5 items (debriefs should be concise, not exhaustive)
|
|
656
|
+
- Each item should be 1-3 sentences, self-contained
|
|
657
|
+
|
|
658
|
+
If the conversation was too short or trivial to warrant a debrief, return: []`;
|
|
659
|
+
/**
|
|
660
|
+
* Parse a debrief response into validated DebriefItems.
|
|
661
|
+
*/
|
|
662
|
+
export function parseDebriefResponse(response) {
|
|
663
|
+
let cleaned = response.trim();
|
|
664
|
+
if (cleaned.startsWith('```')) {
|
|
665
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
666
|
+
}
|
|
667
|
+
try {
|
|
668
|
+
const parsed = JSON.parse(cleaned);
|
|
669
|
+
if (!Array.isArray(parsed))
|
|
670
|
+
return [];
|
|
671
|
+
return parsed
|
|
672
|
+
.filter((item) => item &&
|
|
673
|
+
typeof item === 'object' &&
|
|
674
|
+
typeof item.text === 'string' &&
|
|
675
|
+
item.text.length >= 5)
|
|
676
|
+
.map((item) => {
|
|
677
|
+
const d = item;
|
|
678
|
+
const type = d.type === 'summary' ? 'summary' : 'context';
|
|
679
|
+
const rawImportance = typeof d.importance === 'number' ? d.importance : 7;
|
|
680
|
+
const importance = Math.max(1, Math.min(10, rawImportance));
|
|
681
|
+
return { text: String(d.text).slice(0, 512), type, importance };
|
|
682
|
+
})
|
|
683
|
+
.filter((d) => d.importance >= 6)
|
|
684
|
+
.slice(0, 5);
|
|
685
|
+
}
|
|
686
|
+
catch {
|
|
687
|
+
return [];
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
/**
|
|
691
|
+
* Extract a session debrief using LLM.
|
|
692
|
+
*
|
|
693
|
+
* @param rawMessages - All messages from the session
|
|
694
|
+
* @param storedFactTexts - Texts of facts already stored in this session (for dedup)
|
|
695
|
+
* @returns Array of debrief items, or empty array on failure
|
|
696
|
+
*/
|
|
697
|
+
export async function extractDebrief(rawMessages, storedFactTexts) {
|
|
698
|
+
const config = resolveLLMConfig();
|
|
699
|
+
if (!config)
|
|
700
|
+
return [];
|
|
701
|
+
const parsed = rawMessages
|
|
702
|
+
.map(messageToText)
|
|
703
|
+
.filter((m) => m !== null);
|
|
704
|
+
// Minimum 4 turns (8 messages) to warrant a debrief
|
|
705
|
+
if (parsed.length < 8)
|
|
706
|
+
return [];
|
|
707
|
+
const conversationText = truncateMessages(parsed, 12_000);
|
|
708
|
+
if (conversationText.length < 20)
|
|
709
|
+
return [];
|
|
710
|
+
const alreadyStored = storedFactTexts.length > 0
|
|
711
|
+
? storedFactTexts.map((t) => `- ${t}`).join('\n')
|
|
712
|
+
: '(none)';
|
|
713
|
+
const systemPrompt = DEBRIEF_SYSTEM_PROMPT.replace('{already_stored_facts}', alreadyStored);
|
|
714
|
+
try {
|
|
715
|
+
const response = await chatCompletion(config, [
|
|
716
|
+
{ role: 'system', content: systemPrompt },
|
|
717
|
+
{ role: 'user', content: `Review this conversation and provide a debrief:\n\n${conversationText}` },
|
|
718
|
+
], {
|
|
719
|
+
// 3.3.1-rc.2: retry transient 429 / timeout.
|
|
720
|
+
retry: { attempts: 3, baseDelayMs: 1000 },
|
|
721
|
+
timeoutMs: 30_000,
|
|
722
|
+
});
|
|
723
|
+
if (!response)
|
|
724
|
+
return [];
|
|
725
|
+
return parseDebriefResponse(response);
|
|
726
|
+
}
|
|
727
|
+
catch {
|
|
728
|
+
return [];
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
// ---------------------------------------------------------------------------
|
|
732
|
+
// v1 Taxonomy Extraction Pipeline (default as of plugin v3.0.0)
|
|
733
|
+
//
|
|
734
|
+
// Produces facts conforming to Memory Taxonomy v1 (6 types: claim,
|
|
735
|
+
// preference, directive, commitment, episode, summary; 5 sources; 8 scopes).
|
|
736
|
+
//
|
|
737
|
+
// The G-pipeline uses a single merged-topic prompt that returns both the
|
|
738
|
+
// 2-3 main topics the user engaged with AND the extracted facts, so topic
|
|
739
|
+
// anchoring is preserved within one call. After extraction we apply:
|
|
740
|
+
//
|
|
741
|
+
// 1. `applyProvenanceFilterLax` — tag-don't-drop. Assistant-sourced facts
|
|
742
|
+
// get their importance capped at 7 rather than being filtered out; the
|
|
743
|
+
// reranker later uses the source field to deprioritize them.
|
|
744
|
+
// 2. `comparativeRescoreV1` — spread importance across the 1-10 range
|
|
745
|
+
// and assign volatility. Forced when the batch has >= 5 facts.
|
|
746
|
+
// 3. `defaultVolatility` — heuristic fallback.
|
|
747
|
+
//
|
|
748
|
+
// This matches the winning G pipeline from the 200-conv benchmark.
|
|
749
|
+
// ---------------------------------------------------------------------------
|
|
750
|
+
/**
|
|
751
|
+
* The main extraction system prompt (v1 merged-topic pipeline).
|
|
752
|
+
*
|
|
753
|
+
* Exported as both `EXTRACTION_SYSTEM_PROMPT` (canonical) and
|
|
754
|
+
* `EXTRACTION_SYSTEM_PROMPT_V1_MERGED` (deprecated alias) for back-compat.
|
|
755
|
+
*/
|
|
756
|
+
export const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine using Memory Taxonomy v1. Work in TWO explicit phases within one response:
|
|
757
|
+
|
|
758
|
+
PHASE 1 — Topic identification.
|
|
759
|
+
Before extracting any fact, identify the 2-3 main topics the user was engaging with. Topics should be short phrases (2-5 words each). If the conversation has no clear user-focused topic, use an empty topics array.
|
|
760
|
+
|
|
761
|
+
PHASE 2 — Fact extraction anchored to those topics.
|
|
762
|
+
Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Tangential facts may still be extracted but score lower (6-7 range).
|
|
763
|
+
|
|
764
|
+
Rules:
|
|
765
|
+
1. Each memory = single self-contained piece of information
|
|
766
|
+
2. Focus on user-specific info useful in future conversations
|
|
767
|
+
3. Skip generic knowledge, greetings, small talk, ephemeral task coordination
|
|
768
|
+
4. Score importance 1-10 (6+ = worth storing)
|
|
769
|
+
5. Every memory MUST attribute a source (provenance critical)
|
|
770
|
+
|
|
771
|
+
Importance rubric (use FULL 1-10 range):
|
|
772
|
+
- 10: Critical, core identity, never-forget content
|
|
773
|
+
- 9: Affects many future decisions
|
|
774
|
+
- 8: High-value preference/decision/rule
|
|
775
|
+
- 7: Specific durable fact
|
|
776
|
+
- 6: Borderline
|
|
777
|
+
- 5 or below: NOT worth storing — drop
|
|
778
|
+
|
|
779
|
+
DO NOT cluster everything at 7-8-9.
|
|
780
|
+
|
|
781
|
+
═══════════════════════════════════════════════════════════════
|
|
782
|
+
TYPE (6 values)
|
|
783
|
+
═══════════════════════════════════════════════════════════════
|
|
784
|
+
- claim: factual assertion (absorbs fact/context/decision; decisions populate reasoning field)
|
|
785
|
+
- preference: likes/dislikes/tastes
|
|
786
|
+
- directive: imperative rule ("always X", "never Y")
|
|
787
|
+
- commitment: future intent ("will do X")
|
|
788
|
+
- episode: notable event
|
|
789
|
+
- summary: derived synthesis (source must be derived|assistant) — do NOT emit for turn-extraction
|
|
790
|
+
|
|
791
|
+
═══════════════════════════════════════════════════════════════
|
|
792
|
+
SOURCE (provenance, CRITICAL)
|
|
793
|
+
═══════════════════════════════════════════════════════════════
|
|
794
|
+
- user: user explicitly stated it (in [user]: turns)
|
|
795
|
+
- user-inferred: extractor inferred from user signals
|
|
796
|
+
- assistant: assistant authored content — DOWNGRADE unless user affirmed/quoted/used it
|
|
797
|
+
- external, derived: rare
|
|
798
|
+
|
|
799
|
+
IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant
|
|
800
|
+
|
|
801
|
+
═══════════════════════════════════════════════════════════════
|
|
802
|
+
SCOPE (life domain)
|
|
803
|
+
═══════════════════════════════════════════════════════════════
|
|
804
|
+
work | personal | health | family | creative | finance | misc | unspecified
|
|
805
|
+
|
|
806
|
+
═══════════════════════════════════════════════════════════════
|
|
807
|
+
ENTITIES
|
|
808
|
+
═══════════════════════════════════════════════════════════════
|
|
809
|
+
- type ∈ {person, project, tool, company, concept, place}
|
|
810
|
+
- prefer specific names ("PostgreSQL" not "database")
|
|
811
|
+
- omit umbrella categories when specific name is present
|
|
812
|
+
|
|
813
|
+
═══════════════════════════════════════════════════════════════
|
|
814
|
+
REASONING (only for claims that are decisions)
|
|
815
|
+
═══════════════════════════════════════════════════════════════
|
|
816
|
+
For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
|
|
817
|
+
|
|
818
|
+
═══════════════════════════════════════════════════════════════
|
|
819
|
+
OUTPUT FORMAT (no markdown, no code fences)
|
|
820
|
+
═══════════════════════════════════════════════════════════════
|
|
821
|
+
{
|
|
822
|
+
"topics": ["topic 1", "topic 2"],
|
|
823
|
+
"facts": [
|
|
824
|
+
{
|
|
825
|
+
"text": "...",
|
|
826
|
+
"type": "claim|preference|directive|commitment|episode",
|
|
827
|
+
"source": "user|user-inferred|assistant",
|
|
828
|
+
"scope": "work|personal|health|...",
|
|
829
|
+
"importance": N,
|
|
830
|
+
"confidence": 0.9,
|
|
831
|
+
"action": "ADD",
|
|
832
|
+
"reasoning": "...", // optional, only for claim+decision
|
|
833
|
+
"entities": [{"name": "...", "type": "tool"}]
|
|
834
|
+
}
|
|
835
|
+
]
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
If nothing worth extracting: {"topics": [], "facts": []}`;
|
|
839
|
+
/**
|
|
840
|
+
* @deprecated Use `EXTRACTION_SYSTEM_PROMPT` instead. Kept only as a
|
|
841
|
+
* back-compat alias for callers that imported the v1 rollout name.
|
|
842
|
+
*/
|
|
843
|
+
export const EXTRACTION_SYSTEM_PROMPT_V1_MERGED = EXTRACTION_SYSTEM_PROMPT;
|
|
844
|
+
/**
|
|
845
|
+
* Parse a v1 merged-topic LLM response. Returns both the topic list and the
|
|
846
|
+
* validated/filtered fact list. Illegal combinations (summary+user) are
|
|
847
|
+
* dropped; importance < 6 with action != DELETE is dropped.
|
|
848
|
+
*
|
|
849
|
+
* Exported as both `parseFactsResponse` (canonical, returns facts array) and
|
|
850
|
+
* `parseMergedResponseV1` (returns `{ topics, facts }`). Prefer the former
|
|
851
|
+
* unless the topic list is needed.
|
|
852
|
+
*/
|
|
853
|
+
export function parseMergedResponseV1(response, logger) {
|
|
854
|
+
const originalPreview = response.trim().slice(0, 200);
|
|
855
|
+
let cleaned = response.trim();
|
|
856
|
+
cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
|
|
857
|
+
if (cleaned.startsWith('```')) {
|
|
858
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
859
|
+
}
|
|
860
|
+
const tryParse = (input) => {
|
|
861
|
+
try {
|
|
862
|
+
return JSON.parse(input);
|
|
863
|
+
}
|
|
864
|
+
catch {
|
|
865
|
+
return undefined;
|
|
866
|
+
}
|
|
867
|
+
};
|
|
868
|
+
let parsed = tryParse(cleaned);
|
|
869
|
+
let recoveryUsed = 'none';
|
|
870
|
+
if (parsed === undefined) {
|
|
871
|
+
// First try an outermost-array greedy match (legacy bare-array format).
|
|
872
|
+
const arrMatch = cleaned.match(/\[[\s\S]*\]/);
|
|
873
|
+
if (arrMatch) {
|
|
874
|
+
parsed = tryParse(arrMatch[0]);
|
|
875
|
+
if (parsed !== undefined)
|
|
876
|
+
recoveryUsed = 'bracket-scan';
|
|
877
|
+
}
|
|
878
|
+
if (parsed === undefined) {
|
|
879
|
+
// Fall back to an outermost-object greedy match (merged-topic format).
|
|
880
|
+
const objMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
881
|
+
if (objMatch) {
|
|
882
|
+
parsed = tryParse(objMatch[0]);
|
|
883
|
+
if (parsed !== undefined)
|
|
884
|
+
recoveryUsed = 'bracket-scan';
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
if (recoveryUsed === 'bracket-scan') {
|
|
889
|
+
logger?.info?.(`parseFactsResponse: recovered JSON via bracket-scan fallback`);
|
|
890
|
+
}
|
|
891
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
892
|
+
logger?.warn?.(`parseFactsResponse: could not parse LLM output as JSON. Preview: ${JSON.stringify(originalPreview)}`);
|
|
893
|
+
return { topics: [], facts: [] };
|
|
894
|
+
}
|
|
895
|
+
// Dual-format acceptance: either the merged object `{ topics, facts }` or
|
|
896
|
+
// a bare JSON array of fact objects (legacy / test fixture shape). The
|
|
897
|
+
// bare array is wrapped as { topics: [], facts: [...] } so the downstream
|
|
898
|
+
// logic stays uniform. A single fact object (no wrapper) is also wrapped.
|
|
899
|
+
let obj;
|
|
900
|
+
if (Array.isArray(parsed)) {
|
|
901
|
+
obj = { topics: [], facts: parsed };
|
|
902
|
+
}
|
|
903
|
+
else if (typeof parsed.facts === 'undefined' &&
|
|
904
|
+
typeof parsed.text === 'string') {
|
|
905
|
+
// Single fact object, not a merged wrapper.
|
|
906
|
+
obj = { topics: [], facts: [parsed] };
|
|
907
|
+
}
|
|
908
|
+
else {
|
|
909
|
+
obj = parsed;
|
|
910
|
+
}
|
|
911
|
+
const rawTopics = obj.topics;
|
|
912
|
+
const topics = Array.isArray(rawTopics)
|
|
913
|
+
? rawTopics
|
|
914
|
+
.filter((t) => typeof t === 'string' && t.length > 0)
|
|
915
|
+
.slice(0, 3)
|
|
916
|
+
: [];
|
|
917
|
+
const rawFacts = obj.facts;
|
|
918
|
+
if (!Array.isArray(rawFacts))
|
|
919
|
+
return { topics, facts: [] };
|
|
920
|
+
const validActions = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
|
|
921
|
+
const facts = rawFacts
|
|
922
|
+
.filter((f) => !!f &&
|
|
923
|
+
typeof f === 'object' &&
|
|
924
|
+
typeof f.text === 'string' &&
|
|
925
|
+
f.text.length >= 5)
|
|
926
|
+
.map((f) => {
|
|
927
|
+
const rawType = String(f.type ?? 'claim').toLowerCase();
|
|
928
|
+
// Accept both v1 tokens and legacy v0 tokens — coerce v0 via V0_TO_V1_TYPE.
|
|
929
|
+
let type;
|
|
930
|
+
if (isValidMemoryType(rawType)) {
|
|
931
|
+
type = rawType;
|
|
932
|
+
}
|
|
933
|
+
else if (LEGACY_V0_MEMORY_TYPES.includes(rawType)) {
|
|
934
|
+
type = V0_TO_V1_TYPE[rawType];
|
|
935
|
+
}
|
|
936
|
+
else {
|
|
937
|
+
type = 'claim';
|
|
938
|
+
}
|
|
939
|
+
const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
|
|
940
|
+
const source = VALID_MEMORY_SOURCES.includes(rawSource)
|
|
941
|
+
? rawSource
|
|
942
|
+
: 'user-inferred';
|
|
943
|
+
const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
|
|
944
|
+
const scope = VALID_MEMORY_SCOPES.includes(rawScope)
|
|
945
|
+
? rawScope
|
|
946
|
+
: 'unspecified';
|
|
947
|
+
const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
|
|
948
|
+
const action = validActions.includes(String(f.action))
|
|
949
|
+
? String(f.action)
|
|
950
|
+
: 'ADD';
|
|
951
|
+
let entities;
|
|
952
|
+
if (Array.isArray(f.entities)) {
|
|
953
|
+
const valid = f.entities
|
|
954
|
+
.map(parseEntity)
|
|
955
|
+
.filter((e) => e !== null);
|
|
956
|
+
if (valid.length > 0)
|
|
957
|
+
entities = valid;
|
|
958
|
+
}
|
|
959
|
+
const fact = {
|
|
960
|
+
text: String(f.text).slice(0, 512),
|
|
961
|
+
type,
|
|
962
|
+
source,
|
|
963
|
+
scope,
|
|
964
|
+
reasoning,
|
|
965
|
+
importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
|
|
966
|
+
confidence: normalizeConfidence(f.confidence),
|
|
967
|
+
action,
|
|
968
|
+
existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
|
|
969
|
+
};
|
|
970
|
+
if (entities)
|
|
971
|
+
fact.entities = entities;
|
|
972
|
+
return fact;
|
|
973
|
+
})
|
|
974
|
+
// Reject illegal type:summary + source:user
|
|
975
|
+
.filter((f) => !(f.type === 'summary' && f.source === 'user'))
|
|
976
|
+
// Importance threshold (preserves DELETE)
|
|
977
|
+
.filter((f) => f.importance >= 6 || f.action === 'DELETE');
|
|
978
|
+
return { topics, facts };
|
|
979
|
+
}
|
|
980
|
+
/**
|
|
981
|
+
* Parse an LLM extraction response into structured v1 facts. Canonical
|
|
982
|
+
* parser used by the default `extractFacts()` pipeline.
|
|
983
|
+
*
|
|
984
|
+
* This is a thin wrapper around `parseMergedResponseV1` that discards the
|
|
985
|
+
* topic list so existing callers that expect a flat `ExtractedFact[]`
|
|
986
|
+
* signature keep working.
|
|
987
|
+
*/
|
|
988
|
+
export function parseFactsResponse(response, logger) {
|
|
989
|
+
return parseMergedResponseV1(response, logger).facts;
|
|
990
|
+
}
|
|
991
|
+
/**
|
|
992
|
+
* Tag-don't-drop provenance filter (pipeline G / F).
|
|
993
|
+
*
|
|
994
|
+
* For each fact:
|
|
995
|
+
* - If source is already "assistant", cap importance at 7.
|
|
996
|
+
* - Otherwise, keyword-match the fact against user turns. If <30% of
|
|
997
|
+
* content words (length >= 4) appear in user turns AND source != "user",
|
|
998
|
+
* tag source as "assistant" and cap importance at 7 (keep the fact).
|
|
999
|
+
* - Drop facts below importance 5 (unless DELETE action).
|
|
1000
|
+
*/
|
|
1001
|
+
export function applyProvenanceFilterLax(facts, conversationText) {
|
|
1002
|
+
const userTurnsLower = conversationText
|
|
1003
|
+
.split(/\n\n/)
|
|
1004
|
+
.filter((line) => line.startsWith('[user]:'))
|
|
1005
|
+
.join(' ')
|
|
1006
|
+
.toLowerCase();
|
|
1007
|
+
return facts
|
|
1008
|
+
.map((f) => {
|
|
1009
|
+
if (f.source === 'assistant') {
|
|
1010
|
+
return { ...f, importance: Math.min(f.importance, 7) };
|
|
1011
|
+
}
|
|
1012
|
+
const factWords = f.text
|
|
1013
|
+
.toLowerCase()
|
|
1014
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
1015
|
+
.split(/\s+/)
|
|
1016
|
+
.filter((w) => w.length >= 4);
|
|
1017
|
+
const matchedWords = factWords.filter((w) => userTurnsLower.includes(w)).length;
|
|
1018
|
+
const matchRatio = factWords.length > 0 ? matchedWords / factWords.length : 0;
|
|
1019
|
+
if (matchRatio < 0.3 && f.source !== 'user') {
|
|
1020
|
+
return {
|
|
1021
|
+
...f,
|
|
1022
|
+
source: 'assistant',
|
|
1023
|
+
importance: Math.min(f.importance, 7),
|
|
1024
|
+
};
|
|
1025
|
+
}
|
|
1026
|
+
return f;
|
|
1027
|
+
})
|
|
1028
|
+
.filter((f) => f.importance >= 5 || f.action === 'DELETE');
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Heuristic fallback volatility when the LLM doesn't assign one.
|
|
1032
|
+
*/
|
|
1033
|
+
export function defaultVolatility(f) {
|
|
1034
|
+
if (f.type === 'commitment')
|
|
1035
|
+
return 'updatable';
|
|
1036
|
+
if (f.type === 'episode')
|
|
1037
|
+
return 'stable';
|
|
1038
|
+
if (f.type === 'directive')
|
|
1039
|
+
return 'stable';
|
|
1040
|
+
if (f.scope === 'health' || f.scope === 'family')
|
|
1041
|
+
return 'stable';
|
|
1042
|
+
return 'updatable';
|
|
1043
|
+
}
|
|
1044
|
+
const COMPARATIVE_PROMPT_V1 = `You are a memory re-ranker for the v1 taxonomy. You receive facts already extracted from one conversation, each with initial importance. Your job is twofold:
|
|
1045
|
+
|
|
1046
|
+
1. RE-RANK importance to spread across the 1-10 range (avoid clustering at 7-8-9)
|
|
1047
|
+
2. ASSIGN volatility to each fact
|
|
1048
|
+
|
|
1049
|
+
Re-ranking rules:
|
|
1050
|
+
- Top 1/3 of facts (most significant for this user): importance 9-10
|
|
1051
|
+
- Middle 1/3: importance 7-8
|
|
1052
|
+
- Bottom 1/3: importance 5-6 (borderline, may be dropped)
|
|
1053
|
+
- A fact may stay at 10 if it's clearly identity-defining (name, birthday) or marked as "never forget"
|
|
1054
|
+
- Never raise without justification; never lower below 5 unless clearly noise
|
|
1055
|
+
- You MUST produce a spread
|
|
1056
|
+
|
|
1057
|
+
Volatility rules:
|
|
1058
|
+
- stable: unlikely to change for years (name, allergies, birthplace, fundamental traits)
|
|
1059
|
+
- updatable: changes occasionally (current job, active project, partner's name, address)
|
|
1060
|
+
- ephemeral: short-lived state (today's task, this week's plan, current trip itinerary)
|
|
1061
|
+
|
|
1062
|
+
Use the FULL conversation context to judge volatility — a single claim may be ambiguous, but in context you can usually tell.
|
|
1063
|
+
|
|
1064
|
+
Return JSON array, same order as input, ONLY with importance + volatility fields:
|
|
1065
|
+
[{"importance": N, "volatility": "stable|updatable|ephemeral"}, ...]
|
|
1066
|
+
No markdown.`;
|
|
1067
|
+
/**
|
|
1068
|
+
* Comparative re-scoring pass (v1). Forces re-scoring when facts.length >= 5
|
|
1069
|
+
* so the importance distribution spreads across the 1-10 range. When
|
|
1070
|
+
* facts.length < 5, assigns defaultVolatility and returns.
|
|
1071
|
+
*/
|
|
1072
|
+
export async function comparativeRescoreV1(facts, conversationText, logger) {
|
|
1073
|
+
// G-tuned behavior: force rescore when >= 5 facts
|
|
1074
|
+
if (facts.length < 2 || facts.length < 5) {
|
|
1075
|
+
return facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
1076
|
+
}
|
|
1077
|
+
const config = resolveLLMConfig();
|
|
1078
|
+
if (!config) {
|
|
1079
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1080
|
+
}
|
|
1081
|
+
const factsForPrompt = facts
|
|
1082
|
+
.map((f, i) => `${i + 1}. [imp: ${f.importance}] [type: ${f.type}] [scope: ${f.scope ?? 'unspecified'}] ${f.text}`)
|
|
1083
|
+
.join('\n');
|
|
1084
|
+
const userPrompt = `Conversation context:\n${conversationText}\n\nExtracted facts:\n${factsForPrompt}\n\nReturn ${facts.length} JSON objects, each with "importance" + "volatility". Match input order.`;
|
|
1085
|
+
let response;
|
|
1086
|
+
try {
|
|
1087
|
+
response = await chatCompletion(config, [
|
|
1088
|
+
{ role: 'system', content: COMPARATIVE_PROMPT_V1 },
|
|
1089
|
+
{ role: 'user', content: userPrompt },
|
|
1090
|
+
], {
|
|
1091
|
+
// 3.3.1-rc.2: retry transient 429 / timeout (rescore is an inner
|
|
1092
|
+
// call after extractFacts — if extraction backs off successfully
|
|
1093
|
+
// the rescore usually also passes on first try, but keep symmetry).
|
|
1094
|
+
retry: { attempts: 3, baseDelayMs: 1000 },
|
|
1095
|
+
timeoutMs: 30_000,
|
|
1096
|
+
logger,
|
|
1097
|
+
});
|
|
1098
|
+
}
|
|
1099
|
+
catch (err) {
|
|
1100
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1101
|
+
logger?.warn?.(`comparativeRescoreV1: chatCompletion threw: ${msg}`);
|
|
1102
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1103
|
+
}
|
|
1104
|
+
if (!response) {
|
|
1105
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1106
|
+
}
|
|
1107
|
+
let cleaned = response.trim();
|
|
1108
|
+
cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
|
|
1109
|
+
if (cleaned.startsWith('```')) {
|
|
1110
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
1111
|
+
}
|
|
1112
|
+
const match = cleaned.match(/\[[\s\S]*\]/);
|
|
1113
|
+
if (match)
|
|
1114
|
+
cleaned = match[0];
|
|
1115
|
+
let parsed;
|
|
1116
|
+
try {
|
|
1117
|
+
parsed = JSON.parse(cleaned);
|
|
1118
|
+
}
|
|
1119
|
+
catch {
|
|
1120
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1121
|
+
}
|
|
1122
|
+
if (!Array.isArray(parsed)) {
|
|
1123
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1124
|
+
}
|
|
1125
|
+
return facts.map((f, i) => {
|
|
1126
|
+
const entry = parsed[i];
|
|
1127
|
+
const rawImp = entry && typeof entry === 'object' ? Number(entry.importance) : NaN;
|
|
1128
|
+
const rawVol = entry && typeof entry === 'object' ? String(entry.volatility ?? '').toLowerCase() : '';
|
|
1129
|
+
const newImp = Number.isFinite(rawImp)
|
|
1130
|
+
? Math.max(5, Math.min(10, Math.round(rawImp)))
|
|
1131
|
+
: f.importance;
|
|
1132
|
+
const newVol = VALID_MEMORY_VOLATILITIES.includes(rawVol)
|
|
1133
|
+
? rawVol
|
|
1134
|
+
: defaultVolatility(f);
|
|
1135
|
+
return { ...f, importance: newImp, volatility: newVol };
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
1138
|
+
/**
|
|
1139
|
+
* Main extraction entry point (default pipeline as of plugin v3.0.0).
|
|
1140
|
+
*
|
|
1141
|
+
* Pipeline: single merged-topic LLM call → `applyProvenanceFilterLax`
|
|
1142
|
+
* (tag-don't-drop) → `comparativeRescoreV1` (forces re-rank when >= 5 facts)
|
|
1143
|
+
* → `defaultVolatility` fallback → lexical importance bumps.
|
|
1144
|
+
*
|
|
1145
|
+
* Produces v1-shaped facts with `type`, `source`, `scope`, `volatility`,
|
|
1146
|
+
* and optional `reasoning` fields populated. The caller should hand the
|
|
1147
|
+
* result to `storeExtractedFacts` which emits a v1 canonical claim blob.
|
|
1148
|
+
*/
|
|
1149
|
+
export async function extractFacts(rawMessages, mode, existingMemories, profileContext, logger) {
|
|
1150
|
+
const config = resolveLLMConfig();
|
|
1151
|
+
if (!config) {
|
|
1152
|
+
logger?.info?.('extractFacts: no LLM config resolved (skipping extraction)');
|
|
1153
|
+
return [];
|
|
1154
|
+
}
|
|
1155
|
+
const parsed = rawMessages
|
|
1156
|
+
.map(messageToText)
|
|
1157
|
+
.filter((m) => m !== null);
|
|
1158
|
+
if (parsed.length === 0) {
|
|
1159
|
+
logger?.info?.(`extractFacts: no parseable messages (raw count=${rawMessages.length})`);
|
|
1160
|
+
return [];
|
|
1161
|
+
}
|
|
1162
|
+
const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
|
|
1163
|
+
const conversationText = truncateMessages(relevantMessages, 12_000);
|
|
1164
|
+
if (conversationText.length < 20) {
|
|
1165
|
+
logger?.info?.(`extractFacts: conversation too short (${conversationText.length} chars < 20, parsed=${parsed.length}, mode=${mode})`);
|
|
1166
|
+
return [];
|
|
1167
|
+
}
|
|
1168
|
+
let memoriesContext = '';
|
|
1169
|
+
if (existingMemories && existingMemories.length > 0) {
|
|
1170
|
+
const memoriesStr = existingMemories
|
|
1171
|
+
.map((m) => `[ID: ${m.id}] ${m.text}`)
|
|
1172
|
+
.join('\n');
|
|
1173
|
+
memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
|
|
1174
|
+
}
|
|
1175
|
+
const userPrompt = mode === 'turn'
|
|
1176
|
+
? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
|
|
1177
|
+
: `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
|
|
1178
|
+
const systemPrompt = profileContext || EXTRACTION_SYSTEM_PROMPT;
|
|
1179
|
+
let response;
|
|
1180
|
+
try {
|
|
1181
|
+
response = await chatCompletion(config, [
|
|
1182
|
+
{ role: 'system', content: systemPrompt },
|
|
1183
|
+
{ role: 'user', content: userPrompt },
|
|
1184
|
+
], {
|
|
1185
|
+
// 3.3.1-rc.2: the headline fix for the rc.1 QA NO-GO — 5/6 extraction
|
|
1186
|
+
// windows failed on zai 429 + timeouts with no retry. 3 attempts with
|
|
1187
|
+
// 1s → 2s → 4s backoff recovers virtually all transient rate-limit
|
|
1188
|
+
// hiccups. Graceful timeout: per-attempt 30s, total worst-case 30+1+30+2+30+4≈97s.
|
|
1189
|
+
retry: { attempts: 3, baseDelayMs: 1000 },
|
|
1190
|
+
timeoutMs: 30_000,
|
|
1191
|
+
logger,
|
|
1192
|
+
});
|
|
1193
|
+
}
|
|
1194
|
+
catch (err) {
|
|
1195
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1196
|
+
logger?.warn?.(`extractFacts: chatCompletion threw: ${msg}`);
|
|
1197
|
+
return [];
|
|
1198
|
+
}
|
|
1199
|
+
if (!response) {
|
|
1200
|
+
logger?.info?.('extractFacts: chatCompletion returned null/empty response');
|
|
1201
|
+
return [];
|
|
1202
|
+
}
|
|
1203
|
+
logger?.info?.(`extractFacts: LLM returned ${response.length} chars; parsing merged response`);
|
|
1204
|
+
const { topics, facts: rawFacts } = parseMergedResponseV1(response, logger);
|
|
1205
|
+
if (topics.length > 0) {
|
|
1206
|
+
logger?.info?.(`extractFacts: topics = ${JSON.stringify(topics)}`);
|
|
1207
|
+
}
|
|
1208
|
+
// Provenance filter (tag-don't-drop)
|
|
1209
|
+
let facts = applyProvenanceFilterLax(rawFacts, conversationText);
|
|
1210
|
+
// Comparative rescore (forces re-rank when >= 5 facts)
|
|
1211
|
+
facts = await comparativeRescoreV1(facts, conversationText, logger);
|
|
1212
|
+
// Ensure every fact has a volatility (defensive: rescore may have skipped)
|
|
1213
|
+
facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
1214
|
+
// Lexical importance bumps (same as v0 pipeline)
|
|
1215
|
+
for (const f of facts) {
|
|
1216
|
+
const bump = computeLexicalImportanceBump(f.text, conversationText);
|
|
1217
|
+
if (bump > 0) {
|
|
1218
|
+
const oldImportance = f.importance;
|
|
1219
|
+
const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
|
|
1220
|
+
f.importance = Math.min(10, f.importance + effectiveBump);
|
|
1221
|
+
logger?.info?.(`extractFacts: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`);
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
return facts;
|
|
1225
|
+
}
|