@totalreclaw/totalreclaw 1.6.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWHUB.md +134 -0
- package/README.md +407 -64
- package/SKILL.md +1032 -0
- package/api-client.ts +5 -5
- package/claims-helper.ts +686 -0
- package/config.ts +211 -0
- package/consolidation.ts +141 -33
- package/contradiction-sync.ts +1389 -0
- package/crypto.ts +63 -261
- package/digest-sync.ts +516 -0
- package/embedding.ts +69 -46
- package/extractor.ts +1307 -84
- package/hot-cache-wrapper.ts +1 -1
- package/import-adapters/gemini-adapter.ts +243 -0
- package/import-adapters/index.ts +3 -0
- package/import-adapters/types.ts +1 -1
- package/index.ts +1887 -323
- package/llm-client.ts +106 -53
- package/lsh.ts +21 -210
- package/package.json +20 -7
- package/pin.ts +502 -0
- package/reranker.ts +96 -124
- package/skill.json +213 -0
- package/subgraph-search.ts +112 -5
- package/subgraph-store.ts +559 -275
- package/consolidation.test.ts +0 -356
- package/extractor-dedup.test.ts +0 -168
- package/import-adapters/import-adapters.test.ts +0 -1123
- package/lsh.test.ts +0 -463
- package/pocv2-e2e-test.ts +0 -917
- package/porter-stemmer.d.ts +0 -4
- package/reranker.test.ts +0 -594
- package/semantic-dedup.test.ts +0 -392
- package/setup.sh +0 -19
- package/store-dedup-wiring.test.ts +0 -186
package/extractor.ts
CHANGED
|
@@ -13,14 +13,209 @@ import { chatCompletion, resolveLLMConfig } from './llm-client.js';
|
|
|
13
13
|
|
|
14
14
|
export type ExtractionAction = 'ADD' | 'UPDATE' | 'DELETE' | 'NOOP';
|
|
15
15
|
|
|
16
|
+
export type EntityType = 'person' | 'project' | 'tool' | 'company' | 'concept' | 'place';
|
|
17
|
+
|
|
18
|
+
export interface ExtractedEntity {
|
|
19
|
+
name: string;
|
|
20
|
+
type: EntityType;
|
|
21
|
+
role?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Memory Taxonomy v1 — the 6 canonical memory types. Single source of truth.
|
|
26
|
+
//
|
|
27
|
+
// Plugin v3.0.0 adopts v1 as the ONLY taxonomy. Legacy v0 tokens
|
|
28
|
+
// (fact, decision, episodic, goal, context, rule) are accepted only on the
|
|
29
|
+
// read-side via `LEGACY_V0_MEMORY_TYPES` / `V0_TO_V1_TYPE` and
|
|
30
|
+
// `normalizeToV1Type` in `claims-helper.ts`, so pre-v3 vault entries can
|
|
31
|
+
// still be decoded. Extraction and write paths emit v1 exclusively.
|
|
32
|
+
//
|
|
33
|
+
// When adding a new type, update ALL of:
|
|
34
|
+
// - This constant
|
|
35
|
+
// - `mcp/src/v1-types.ts`
|
|
36
|
+
// - `python/src/totalreclaw/agent/extraction.py`
|
|
37
|
+
// - `rust/totalreclaw-core/src/claims.rs`
|
|
38
|
+
// - `skill/plugin/claims-helper.ts`
|
|
39
|
+
// - The `EXTRACTION_SYSTEM_PROMPT` Types: list
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
export const VALID_MEMORY_TYPES = [
|
|
43
|
+
'claim',
|
|
44
|
+
'preference',
|
|
45
|
+
'directive',
|
|
46
|
+
'commitment',
|
|
47
|
+
'episode',
|
|
48
|
+
'summary',
|
|
49
|
+
] as const;
|
|
50
|
+
|
|
51
|
+
/** v1 MemoryType — the 6 canonical types. */
|
|
52
|
+
export type MemoryType = (typeof VALID_MEMORY_TYPES)[number];
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Runtime type guard — returns whether an unknown value is a valid v1
|
|
56
|
+
* `MemoryType`. Legacy v0 tokens return `false`; use `normalizeToV1Type()`
|
|
57
|
+
* in `claims-helper.ts` to coerce them on the read path.
|
|
58
|
+
*/
|
|
59
|
+
export function isValidMemoryType(value: unknown): value is MemoryType {
|
|
60
|
+
return typeof value === 'string' && (VALID_MEMORY_TYPES as readonly string[]).includes(value);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Backward-compat alias so existing consumers that import `MemoryTypeV1`
|
|
65
|
+
* keep compiling. Identical to `MemoryType` as of plugin v3.0.0.
|
|
66
|
+
* @deprecated Use `MemoryType` instead.
|
|
67
|
+
*/
|
|
68
|
+
export type MemoryTypeV1 = MemoryType;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Backward-compat alias. Same list as `VALID_MEMORY_TYPES`.
|
|
72
|
+
* @deprecated Use `VALID_MEMORY_TYPES` instead.
|
|
73
|
+
*/
|
|
74
|
+
export const VALID_MEMORY_TYPES_V1: readonly MemoryType[] = VALID_MEMORY_TYPES;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Backward-compat alias. Same guard as `isValidMemoryType`.
|
|
78
|
+
* @deprecated Use `isValidMemoryType` instead.
|
|
79
|
+
*/
|
|
80
|
+
export function isValidMemoryTypeV1(value: unknown): value is MemoryType {
|
|
81
|
+
return isValidMemoryType(value);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Legacy v0 memory types — retained as a typed constant so the read-side
|
|
86
|
+
* `V0_TO_V1_TYPE` mapping can reference them without redeclaration.
|
|
87
|
+
*
|
|
88
|
+
* Do NOT emit these on the write/extraction path. They exist solely so
|
|
89
|
+
* `claims-helper.ts::readClaimFromBlob` can decode pre-v1 vault entries
|
|
90
|
+
* whose encrypted blobs still carry v0 token strings.
|
|
91
|
+
*/
|
|
92
|
+
export const LEGACY_V0_MEMORY_TYPES = [
|
|
93
|
+
'fact',
|
|
94
|
+
'preference',
|
|
95
|
+
'decision',
|
|
96
|
+
'episodic',
|
|
97
|
+
'goal',
|
|
98
|
+
'context',
|
|
99
|
+
'summary',
|
|
100
|
+
'rule',
|
|
101
|
+
] as const;
|
|
102
|
+
|
|
103
|
+
export type MemoryTypeV0 = (typeof LEGACY_V0_MEMORY_TYPES)[number];
|
|
104
|
+
|
|
105
|
+
export type MemorySource =
|
|
106
|
+
| 'user'
|
|
107
|
+
| 'user-inferred'
|
|
108
|
+
| 'assistant'
|
|
109
|
+
| 'external'
|
|
110
|
+
| 'derived';
|
|
111
|
+
|
|
112
|
+
export type MemoryScope =
|
|
113
|
+
| 'work'
|
|
114
|
+
| 'personal'
|
|
115
|
+
| 'health'
|
|
116
|
+
| 'family'
|
|
117
|
+
| 'creative'
|
|
118
|
+
| 'finance'
|
|
119
|
+
| 'misc'
|
|
120
|
+
| 'unspecified';
|
|
121
|
+
|
|
122
|
+
export type MemoryVolatility = 'stable' | 'updatable' | 'ephemeral';
|
|
123
|
+
|
|
124
|
+
export const VALID_MEMORY_SOURCES: readonly MemorySource[] = [
|
|
125
|
+
'user',
|
|
126
|
+
'user-inferred',
|
|
127
|
+
'assistant',
|
|
128
|
+
'external',
|
|
129
|
+
'derived',
|
|
130
|
+
];
|
|
131
|
+
|
|
132
|
+
export const VALID_MEMORY_SCOPES: readonly MemoryScope[] = [
|
|
133
|
+
'work',
|
|
134
|
+
'personal',
|
|
135
|
+
'health',
|
|
136
|
+
'family',
|
|
137
|
+
'creative',
|
|
138
|
+
'finance',
|
|
139
|
+
'misc',
|
|
140
|
+
'unspecified',
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
export const VALID_MEMORY_VOLATILITIES: readonly MemoryVolatility[] = [
|
|
144
|
+
'stable',
|
|
145
|
+
'updatable',
|
|
146
|
+
'ephemeral',
|
|
147
|
+
];
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Legacy v0 → v1 type mapping used by the read-side adapter when decoding
|
|
151
|
+
* a pre-v1 vault entry that still carries a v0 token string.
|
|
152
|
+
*
|
|
153
|
+
* Decisions (v0) map to v1 `claim` — the reasoning lives in the separate
|
|
154
|
+
* `reasoning` field rather than being encoded in the type.
|
|
155
|
+
*/
|
|
156
|
+
export const V0_TO_V1_TYPE: Record<MemoryTypeV0, MemoryType> = {
|
|
157
|
+
fact: 'claim',
|
|
158
|
+
preference: 'preference',
|
|
159
|
+
decision: 'claim',
|
|
160
|
+
episodic: 'episode',
|
|
161
|
+
goal: 'commitment',
|
|
162
|
+
context: 'claim',
|
|
163
|
+
summary: 'summary',
|
|
164
|
+
rule: 'directive',
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// ExtractedFact — canonical shape carried through the extraction pipeline
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Extracted fact. Shape carries full v1 taxonomy fields (source / scope /
|
|
173
|
+
* reasoning / volatility). `source` is required on the write path —
|
|
174
|
+
* `storeExtractedFacts` supplies `'user-inferred'` as a defensive default
|
|
175
|
+
* when a heuristic upstream fails to populate it.
|
|
176
|
+
*/
|
|
16
177
|
export interface ExtractedFact {
|
|
17
178
|
text: string;
|
|
18
|
-
|
|
179
|
+
/** v1 taxonomy type. Always present on newly-extracted facts. */
|
|
180
|
+
type: MemoryType;
|
|
19
181
|
importance: number; // 1-10
|
|
20
182
|
action: ExtractionAction;
|
|
21
183
|
existingFactId?: string;
|
|
184
|
+
entities?: ExtractedEntity[];
|
|
185
|
+
confidence?: number; // 0.0-1.0, LLM self-assessed
|
|
186
|
+
/**
|
|
187
|
+
* v1 provenance tag. Required on the write path — when missing,
|
|
188
|
+
* `storeExtractedFacts` supplies `'user-inferred'` as a defensive default.
|
|
189
|
+
*/
|
|
190
|
+
source?: MemorySource;
|
|
191
|
+
/** v1 life-domain scope. Default 'unspecified'. */
|
|
192
|
+
scope?: MemoryScope;
|
|
193
|
+
/**
|
|
194
|
+
* Decision-with-reasoning "because Y" clause, for type=claim. Max 256 chars.
|
|
195
|
+
*/
|
|
196
|
+
reasoning?: string;
|
|
197
|
+
/**
|
|
198
|
+
* v1 stability signal. Assigned by `comparativeRescoreV1` or, when rescore
|
|
199
|
+
* is skipped (facts.length < 5), by the `defaultVolatility` heuristic.
|
|
200
|
+
*/
|
|
201
|
+
volatility?: MemoryVolatility;
|
|
22
202
|
}
|
|
23
203
|
|
|
204
|
+
const ALLOWED_ENTITY_TYPES: ReadonlySet<EntityType> = new Set([
|
|
205
|
+
'person',
|
|
206
|
+
'project',
|
|
207
|
+
'tool',
|
|
208
|
+
'company',
|
|
209
|
+
'concept',
|
|
210
|
+
'place',
|
|
211
|
+
]);
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Default confidence when the LLM does not provide one.
|
|
215
|
+
* Mirrors the fallback used by other extraction clients.
|
|
216
|
+
*/
|
|
217
|
+
export const DEFAULT_EXTRACTION_CONFIDENCE = 0.85;
|
|
218
|
+
|
|
24
219
|
interface ContentBlock {
|
|
25
220
|
type?: string;
|
|
26
221
|
text?: string;
|
|
@@ -33,45 +228,6 @@ interface ConversationMessage {
|
|
|
33
228
|
text?: string;
|
|
34
229
|
}
|
|
35
230
|
|
|
36
|
-
// ---------------------------------------------------------------------------
|
|
37
|
-
// Extraction Prompt
|
|
38
|
-
// ---------------------------------------------------------------------------
|
|
39
|
-
|
|
40
|
-
const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine. Analyze the conversation and extract valuable long-term memories.
|
|
41
|
-
|
|
42
|
-
Rules:
|
|
43
|
-
1. Each memory must be a single, self-contained piece of information
|
|
44
|
-
2. Focus on user-specific information that would be useful in future conversations
|
|
45
|
-
3. Skip generic knowledge, greetings, small talk, and ephemeral task coordination
|
|
46
|
-
4. Score importance 1-10 (6+ = worth storing)
|
|
47
|
-
5. Only extract memories with importance >= 6
|
|
48
|
-
|
|
49
|
-
Types:
|
|
50
|
-
- fact: Objective information about the user (name, location, job, relationships)
|
|
51
|
-
- preference: Likes, dislikes, or preferences ("prefers dark mode", "allergic to peanuts")
|
|
52
|
-
- decision: Choices WITH reasoning ("chose PostgreSQL because data is relational and needs ACID")
|
|
53
|
-
- episodic: Notable events or experiences ("deployed v1.0 to production on March 15")
|
|
54
|
-
- goal: Objectives, targets, or plans ("wants to launch public beta by end of Q1")
|
|
55
|
-
- context: Active project/task context ("working on TotalReclaw v1.2, staging on Base Sepolia")
|
|
56
|
-
- summary: Key outcome or conclusion from a discussion ("agreed to use phased rollout for migration")
|
|
57
|
-
|
|
58
|
-
Extraction guidance:
|
|
59
|
-
- For decisions: ALWAYS include the reasoning. "Chose X" is weak. "Chose X because Y" is strong.
|
|
60
|
-
- For context: Capture what the user is actively working on, including versions, environments, and status.
|
|
61
|
-
- For summaries: Only extract when a conversation reaches a clear conclusion or agreement.
|
|
62
|
-
- For facts: Prefer specific over vague. "Lives in Lisbon" beats "lives in Europe".
|
|
63
|
-
- Decisions and context should be importance >= 7 (they are high-value for future conversations).
|
|
64
|
-
|
|
65
|
-
Actions (compare against existing memories if provided):
|
|
66
|
-
- ADD: New memory, no conflict with existing
|
|
67
|
-
- UPDATE: Refines or corrects an existing memory (provide existingFactId)
|
|
68
|
-
- DELETE: Contradicts an existing memory -- the old one is now wrong (provide existingFactId)
|
|
69
|
-
- NOOP: Already captured or not worth storing
|
|
70
|
-
|
|
71
|
-
Return a JSON array (no markdown, no code fences):
|
|
72
|
-
[{"text": "...", "type": "...", "importance": N, "action": "ADD|UPDATE|DELETE|NOOP", "existingFactId": "..."}, ...]
|
|
73
|
-
|
|
74
|
-
If nothing is worth extracting, return: []`;
|
|
75
231
|
|
|
76
232
|
// ---------------------------------------------------------------------------
|
|
77
233
|
// Helpers
|
|
@@ -137,10 +293,555 @@ function truncateMessages(messages: Array<{ role: string; content: string }>, ma
|
|
|
137
293
|
}
|
|
138
294
|
|
|
139
295
|
/**
|
|
140
|
-
* Parse
|
|
296
|
+
* Parse a single entity object from LLM output. Returns null if invalid.
|
|
297
|
+
* Invalid entities are silently dropped so a bad entity never fails the whole fact.
|
|
298
|
+
*/
|
|
299
|
+
export function parseEntity(raw: unknown): ExtractedEntity | null {
|
|
300
|
+
if (!raw || typeof raw !== 'object') return null;
|
|
301
|
+
const e = raw as Record<string, unknown>;
|
|
302
|
+
const name = typeof e.name === 'string' ? e.name.trim() : '';
|
|
303
|
+
if (name.length === 0) return null;
|
|
304
|
+
const type = String(e.type ?? '').toLowerCase() as EntityType;
|
|
305
|
+
if (!ALLOWED_ENTITY_TYPES.has(type)) return null;
|
|
306
|
+
const entity: ExtractedEntity = { name: name.slice(0, 128), type };
|
|
307
|
+
if (typeof e.role === 'string' && e.role.trim().length > 0) {
|
|
308
|
+
entity.role = e.role.trim().slice(0, 128);
|
|
309
|
+
}
|
|
310
|
+
return entity;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Clamp a raw confidence value to [0, 1]. Returns the default when missing or NaN.
|
|
315
|
+
*/
|
|
316
|
+
export function normalizeConfidence(raw: unknown): number {
|
|
317
|
+
if (typeof raw !== 'number' || !Number.isFinite(raw)) return DEFAULT_EXTRACTION_CONFIDENCE;
|
|
318
|
+
if (raw < 0) return 0;
|
|
319
|
+
if (raw > 1) return 1;
|
|
320
|
+
return raw;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Minimal logger shape accepted by the extraction pipeline. Matches the
|
|
325
|
+
* OpenClaw plugin logger so callers can pass `api.logger` directly.
|
|
326
|
+
*
|
|
327
|
+
* All methods are optional so tests can pass a partial object and callers
|
|
328
|
+
* that don't care about observability can omit the argument entirely.
|
|
329
|
+
*/
|
|
330
|
+
export interface ExtractorLogger {
|
|
331
|
+
info?: (msg: string) => void;
|
|
332
|
+
warn?: (msg: string) => void;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
// ---------------------------------------------------------------------------
|
|
337
|
+
// Phase 2.2.6: lexical importance bumps
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Escape regex metacharacters so a string can be used as a literal pattern.
|
|
342
|
+
*/
|
|
343
|
+
function escapeRegExp(s: string): string {
|
|
344
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Compute a lexical importance bump (0-2) for a single fact based on signals
|
|
349
|
+
* in the surrounding conversation text.
|
|
350
|
+
*
|
|
351
|
+
* This is a Phase 2.2.6 quality fix complementing the prompt rubric tightening
|
|
352
|
+
* (item A). Where the rubric tells the LLM to use the full 1-10 range, the
|
|
353
|
+
* bump tells us *as a post-process*: when the user's actual phrasing carries
|
|
354
|
+
* strong "remember this" signals that the LLM may have under-weighted, push
|
|
355
|
+
* the score up.
|
|
356
|
+
*
|
|
357
|
+
* Signals detected (each adds +1, capped at +2 total):
|
|
358
|
+
*
|
|
359
|
+
* 1. **Strong intent phrases** anywhere in the conversation:
|
|
360
|
+
* "remember this", "never forget", "rule of thumb", "critical",
|
|
361
|
+
* "don't ever forget", explicit "always X" / "never Y" patterns.
|
|
362
|
+
* 2. **Emphasis markers**: `!!` (double exclamation), or 3+ all-caps words
|
|
363
|
+
* in a row (e.g. "DO NOT FORGET", "VERY IMPORTANT").
|
|
364
|
+
* 3. **Repetition**: the fact's first ~20 chars appear at least twice in
|
|
365
|
+
* the conversation text (paraphrased restating).
|
|
366
|
+
*
|
|
367
|
+
* The bump is additive on top of whatever the LLM scored; final importance
|
|
368
|
+
* is capped at 10.
|
|
369
|
+
*
|
|
370
|
+
* Final-importance ceiling: this never makes a fact pass the importance >= 6
|
|
371
|
+
* filter on its own — a fact still needs to have an LLM score >= 5 (because
|
|
372
|
+
* +2 from 5 = 7, above floor; +1 from 5 = 6, above floor). This is intentional:
|
|
373
|
+
* the bump is for "the LLM correctly identified this as worth storing but
|
|
374
|
+
* under-weighted it", not "the LLM said skip but we're overriding."
|
|
141
375
|
*/
|
|
142
|
-
function
|
|
376
|
+
export function computeLexicalImportanceBump(
|
|
377
|
+
factText: string,
|
|
378
|
+
conversationText: string,
|
|
379
|
+
): number {
|
|
380
|
+
let bump = 0;
|
|
381
|
+
const lowerConv = conversationText.toLowerCase();
|
|
382
|
+
|
|
383
|
+
// Signal 1: strong intent phrases anywhere in the conversation
|
|
384
|
+
const strongIntent =
|
|
385
|
+
/\b(remember this|never forget|rule of thumb|don't (?:ever )?forget|critical|important|gotcha|note to self)\b/i;
|
|
386
|
+
if (strongIntent.test(lowerConv)) bump += 1;
|
|
387
|
+
|
|
388
|
+
// Signal 2: emphasis markers — double exclamation OR 3+ consecutive all-caps words
|
|
389
|
+
// (3+ chars each, to avoid false positives on acronyms like "AWS S3 IAM")
|
|
390
|
+
const doubleExclamation = /!!/;
|
|
391
|
+
const allCapsPhrase = /\b[A-Z]{3,}(?:\s+[A-Z]{3,}){2,}\b/;
|
|
392
|
+
if (doubleExclamation.test(conversationText) || allCapsPhrase.test(conversationText)) {
|
|
393
|
+
bump += 1;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Signal 3: repetition — extract content words (length >= 5, not common stop
|
|
397
|
+
// words) from the fact, and check if any single one appears 2+ times in the
|
|
398
|
+
// conversation. This is more robust to LLM paraphrasing than a fingerprint
|
|
399
|
+
// match: "User prefers PostgreSQL" extracted from "I prefer PostgreSQL ...
|
|
400
|
+
// yeah PostgreSQL is right for OLTP" still triggers because "postgresql"
|
|
401
|
+
// appears multiple times even though the leading chars differ.
|
|
402
|
+
const lowerFact = factText.toLowerCase();
|
|
403
|
+
const stopWords = new Set([
|
|
404
|
+
'about', 'after', 'again', 'against', 'because', 'before', 'being',
|
|
405
|
+
'between', 'could', 'doing', 'during', 'every', 'further', 'having',
|
|
406
|
+
'their', 'these', 'those', 'through', 'under', 'until', 'where', 'which',
|
|
407
|
+
'while', 'would', 'should', 'about', 'thing', 'things', 'something',
|
|
408
|
+
'someone', 'always', 'never', 'often', 'still', 'really', 'maybe',
|
|
409
|
+
'using', 'works', 'work', 'user', 'users', 'with', 'from', 'into',
|
|
410
|
+
'like', 'just', 'than', 'them', 'they', 'will', 'when', 'what', 'were',
|
|
411
|
+
'this', 'that', 'have', 'this',
|
|
412
|
+
]);
|
|
413
|
+
const factWords = lowerFact.split(/[^a-z0-9_]+/).filter((w) => w.length >= 5 && !stopWords.has(w));
|
|
414
|
+
let triggered = false;
|
|
415
|
+
for (const word of factWords) {
|
|
416
|
+
const occurrences = (lowerConv.match(new RegExp(`\\b${escapeRegExp(word)}\\b`, 'g')) || [])
|
|
417
|
+
.length;
|
|
418
|
+
if (occurrences >= 2) {
|
|
419
|
+
triggered = true;
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
if (triggered) bump += 1;
|
|
424
|
+
|
|
425
|
+
return Math.min(bump, 2);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
// ---------------------------------------------------------------------------
|
|
430
|
+
// Compaction-Aware Extraction (Phase 2.3)
|
|
431
|
+
// ---------------------------------------------------------------------------
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Compaction-specific system prompt (v1 taxonomy). Fires when the conversation
|
|
435
|
+
* context is about to be compacted. LAST CHANCE to capture knowledge before
|
|
436
|
+
* it is lost, so the importance floor is 5 instead of 6 and the prompt is
|
|
437
|
+
* more aggressive about extracting active-project context, claims, and
|
|
438
|
+
* episodes.
|
|
439
|
+
*
|
|
440
|
+
* Differences from `EXTRACTION_SYSTEM_PROMPT`:
|
|
441
|
+
* - Opening framing emphasizes urgency ("last chance")
|
|
442
|
+
* - Format-agnostic: handles bullet lists, prose, mixed formats
|
|
443
|
+
* - Importance threshold lowered to 5
|
|
444
|
+
* - More aggressive on claim / episode / directive types
|
|
445
|
+
* - Anti-pattern: don't skip content just because it's in a summary
|
|
446
|
+
*
|
|
447
|
+
* Output format matches `EXTRACTION_SYSTEM_PROMPT` exactly (same merged
|
|
448
|
+
* topics+facts JSON shape with v1 type / source / scope fields), so the
|
|
449
|
+
* same `parseMergedResponseV1` parser can validate it.
|
|
450
|
+
*/
|
|
451
|
+
export const COMPACTION_SYSTEM_PROMPT = `You are extracting memories from a conversation that is about to be compacted. The context will be LOST after this point — this is your LAST CHANCE to capture everything worth remembering. Be more aggressive than usual: err on the side of storing.
|
|
452
|
+
|
|
453
|
+
Work in TWO explicit phases within one response:
|
|
454
|
+
|
|
455
|
+
PHASE 1 — Topic identification.
|
|
456
|
+
Identify the 2-3 main topics the user was engaging with before extracting any fact. Topics should be short phrases (2-5 words each). If there's no clear user-focused topic, use an empty topics array.
|
|
457
|
+
|
|
458
|
+
PHASE 2 — Fact extraction anchored to those topics (plus preserve active context).
|
|
459
|
+
Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Active project context, decisions in progress, and current working state score 6-8 during compaction — capture them even when they'd normally be marginal.
|
|
460
|
+
|
|
461
|
+
Rules:
|
|
462
|
+
1. Each memory = single self-contained piece of information
|
|
463
|
+
2. Focus on user-specific info useful in future conversations
|
|
464
|
+
3. Skip generic knowledge, greetings, small talk
|
|
465
|
+
4. Score importance 1-10 (5+ = worth storing during compaction)
|
|
466
|
+
5. Every memory MUST attribute a source (provenance critical)
|
|
467
|
+
|
|
468
|
+
Importance rubric (full 1-10 range, NOT just 7-8):
|
|
469
|
+
- 10: Core identity, never-forget ("remember this forever", name/birthday)
|
|
470
|
+
- 9: Affects many future decisions / high-impact rules
|
|
471
|
+
- 8: Preference / decision-with-reasoning / operational rule
|
|
472
|
+
- 7: Specific durable fact
|
|
473
|
+
- 6: Borderline — during compaction, capture anyway
|
|
474
|
+
- 5: Would normally drop; keep as compaction safety net
|
|
475
|
+
- 4 or below: DROP (greetings, filler)
|
|
476
|
+
|
|
477
|
+
═══════════════════════════════════════════════════════════════
|
|
478
|
+
TYPE (6 values)
|
|
479
|
+
═══════════════════════════════════════════════════════════════
|
|
480
|
+
- claim: factual assertion (absorbs v0 fact/context/decision; decisions populate reasoning)
|
|
481
|
+
- preference: likes/dislikes/tastes
|
|
482
|
+
- directive: imperative rule ("always X", "never Y")
|
|
483
|
+
- commitment: future intent ("will do X")
|
|
484
|
+
- episode: notable event
|
|
485
|
+
- summary: derived synthesis (source must be derived|assistant)
|
|
486
|
+
|
|
487
|
+
═══════════════════════════════════════════════════════════════
|
|
488
|
+
SOURCE (provenance, CRITICAL)
|
|
489
|
+
═══════════════════════════════════════════════════════════════
|
|
490
|
+
- user: user explicitly stated it (in [user]: turns)
|
|
491
|
+
- user-inferred: extractor inferred from user signals
|
|
492
|
+
- assistant: assistant authored — DOWNGRADE unless user affirmed/quoted
|
|
493
|
+
- external, derived: rare
|
|
494
|
+
|
|
495
|
+
IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant.
|
|
496
|
+
|
|
497
|
+
═══════════════════════════════════════════════════════════════
|
|
498
|
+
SCOPE
|
|
499
|
+
═══════════════════════════════════════════════════════════════
|
|
500
|
+
work | personal | health | family | creative | finance | misc | unspecified
|
|
501
|
+
|
|
502
|
+
═══════════════════════════════════════════════════════════════
|
|
503
|
+
ENTITIES
|
|
504
|
+
═══════════════════════════════════════════════════════════════
|
|
505
|
+
- type ∈ {person, project, tool, company, concept, place}
|
|
506
|
+
- prefer specific names ("PostgreSQL" not "database")
|
|
507
|
+
- omit umbrella categories when specific name is present
|
|
508
|
+
|
|
509
|
+
═══════════════════════════════════════════════════════════════
|
|
510
|
+
REASONING (only for claims that are decisions)
|
|
511
|
+
═══════════════════════════════════════════════════════════════
|
|
512
|
+
For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
|
|
513
|
+
|
|
514
|
+
═══════════════════════════════════════════════════════════════
|
|
515
|
+
FORMAT-AGNOSTIC PARSING (IMPORTANT)
|
|
516
|
+
═══════════════════════════════════════════════════════════════
|
|
517
|
+
The conversation may contain bullet lists, numbered lists, section headers, code snippets, or plain prose. Treat ALL formats as potential sources of extractable memory:
|
|
518
|
+
- Bullets/list items: each item is a candidate.
|
|
519
|
+
- Section headers (Context, Decisions, Key Learnings, Open Questions): use the header as a TYPE HINT (Context → claim, Decisions → claim+reasoning, Learnings → directive, Open Questions → commitment).
|
|
520
|
+
- Plain prose: parse each distinct assertion as a candidate.
|
|
521
|
+
- Code snippets: extract config choices, tool versions, architectural decisions embedded in comments or structure.
|
|
522
|
+
- Mixed format: apply all of the above.
|
|
523
|
+
|
|
524
|
+
Do NOT skip content just because it's in a summary. The agent has already filtered — your job is to convert into structured memories, not to re-evaluate worth.
|
|
525
|
+
|
|
526
|
+
═══════════════════════════════════════════════════════════════
|
|
527
|
+
OUTPUT FORMAT (no markdown, no code fences)
|
|
528
|
+
═══════════════════════════════════════════════════════════════
|
|
529
|
+
{
|
|
530
|
+
"topics": ["topic 1", "topic 2"],
|
|
531
|
+
"facts": [
|
|
532
|
+
{
|
|
533
|
+
"text": "...",
|
|
534
|
+
"type": "claim|preference|directive|commitment|episode",
|
|
535
|
+
"source": "user|user-inferred|assistant",
|
|
536
|
+
"scope": "work|personal|health|...",
|
|
537
|
+
"importance": N,
|
|
538
|
+
"confidence": 0.9,
|
|
539
|
+
"action": "ADD",
|
|
540
|
+
"reasoning": "...", // optional, only for claim+decision
|
|
541
|
+
"entities": [{"name": "...", "type": "tool"}]
|
|
542
|
+
}
|
|
543
|
+
]
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
If nothing worth extracting: {"topics": [], "facts": []}`;
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Parse facts for compaction context (v1 taxonomy; importance floor 5).
|
|
550
|
+
*
|
|
551
|
+
* Identical to `parseFactsResponse` except the importance floor is 5 instead
|
|
552
|
+
* of 6 — compaction is the last chance to capture context, so we accept
|
|
553
|
+
* borderline facts that would normally be dropped.
|
|
554
|
+
*
|
|
555
|
+
* Accepts the same merged-topic v1 JSON shape as the main prompt. The
|
|
556
|
+
* inner `parseMergedResponseV1` enforces the >=6 floor, so we re-run a
|
|
557
|
+
* lenient >=5 pass on the raw parsed payload to admit the borderline items.
|
|
558
|
+
*/
|
|
559
|
+
export function parseFactsResponseForCompaction(
|
|
560
|
+
response: string,
|
|
561
|
+
logger?: ExtractorLogger,
|
|
562
|
+
): ExtractedFact[] {
|
|
563
|
+
const originalPreview = response.trim().slice(0, 200);
|
|
564
|
+
let cleaned = response.trim();
|
|
565
|
+
|
|
566
|
+
// Strip <think>...</think> and <thinking>...</thinking> tags
|
|
567
|
+
cleaned = cleaned
|
|
568
|
+
.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '')
|
|
569
|
+
.trim();
|
|
570
|
+
|
|
143
571
|
// Strip markdown code fences if present
|
|
572
|
+
if (cleaned.startsWith('```')) {
|
|
573
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
const tryParse = (input: string): unknown => {
|
|
577
|
+
try {
|
|
578
|
+
return JSON.parse(input);
|
|
579
|
+
} catch {
|
|
580
|
+
return undefined;
|
|
581
|
+
}
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
let parsed = tryParse(cleaned);
|
|
585
|
+
let recoveryUsed: 'none' | 'bracket-scan' = 'none';
|
|
586
|
+
if (parsed === undefined) {
|
|
587
|
+
// Try bare-array first (legacy compaction output), then object (v1 merged).
|
|
588
|
+
const arrMatch = cleaned.match(/\[[\s\S]*\]/);
|
|
589
|
+
if (arrMatch) {
|
|
590
|
+
parsed = tryParse(arrMatch[0]);
|
|
591
|
+
if (parsed !== undefined) recoveryUsed = 'bracket-scan';
|
|
592
|
+
}
|
|
593
|
+
if (parsed === undefined) {
|
|
594
|
+
const objMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
595
|
+
if (objMatch) {
|
|
596
|
+
parsed = tryParse(objMatch[0]);
|
|
597
|
+
if (parsed !== undefined) recoveryUsed = 'bracket-scan';
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
if (recoveryUsed === 'bracket-scan') {
|
|
602
|
+
logger?.info?.(
|
|
603
|
+
`parseFactsResponseForCompaction: recovered JSON via bracket-scan fallback`,
|
|
604
|
+
);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
608
|
+
logger?.warn?.(
|
|
609
|
+
`parseFactsResponseForCompaction: could not parse LLM output as JSON object. Preview: ${JSON.stringify(originalPreview)}`,
|
|
610
|
+
);
|
|
611
|
+
return [];
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
const obj = parsed as Record<string, unknown>;
|
|
615
|
+
const rawFacts = Array.isArray(obj.facts) ? (obj.facts as unknown[]) : null;
|
|
616
|
+
|
|
617
|
+
// Legacy v0 compaction output (bare JSON array) — best-effort parse.
|
|
618
|
+
const rawArray = rawFacts ?? (Array.isArray(parsed) ? (parsed as unknown[]) : null);
|
|
619
|
+
if (!rawArray) {
|
|
620
|
+
logger?.warn?.(
|
|
621
|
+
`parseFactsResponseForCompaction: expected { facts: [...] } object, got ${typeof parsed}`,
|
|
622
|
+
);
|
|
623
|
+
return [];
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
|
|
627
|
+
|
|
628
|
+
const facts = rawArray
|
|
629
|
+
.filter(
|
|
630
|
+
(f): f is Record<string, unknown> =>
|
|
631
|
+
!!f &&
|
|
632
|
+
typeof f === 'object' &&
|
|
633
|
+
typeof (f as Record<string, unknown>).text === 'string' &&
|
|
634
|
+
((f as Record<string, unknown>).text as string).length >= 5,
|
|
635
|
+
)
|
|
636
|
+
.map((f) => {
|
|
637
|
+
const rawType = String(f.type ?? 'claim').toLowerCase();
|
|
638
|
+
// Accept v1 tokens directly; coerce legacy v0 tokens via V0_TO_V1_TYPE.
|
|
639
|
+
let type: MemoryType;
|
|
640
|
+
if (isValidMemoryType(rawType)) {
|
|
641
|
+
type = rawType;
|
|
642
|
+
} else if ((LEGACY_V0_MEMORY_TYPES as readonly string[]).includes(rawType)) {
|
|
643
|
+
type = V0_TO_V1_TYPE[rawType as MemoryTypeV0];
|
|
644
|
+
} else {
|
|
645
|
+
type = 'claim';
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
|
|
649
|
+
const source: MemorySource = (VALID_MEMORY_SOURCES as readonly string[]).includes(rawSource)
|
|
650
|
+
? (rawSource as MemorySource)
|
|
651
|
+
: 'user-inferred';
|
|
652
|
+
|
|
653
|
+
const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
|
|
654
|
+
const scope: MemoryScope = (VALID_MEMORY_SCOPES as readonly string[]).includes(rawScope)
|
|
655
|
+
? (rawScope as MemoryScope)
|
|
656
|
+
: 'unspecified';
|
|
657
|
+
|
|
658
|
+
const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
|
|
659
|
+
|
|
660
|
+
const action = validActions.includes(String(f.action) as ExtractionAction)
|
|
661
|
+
? (String(f.action) as ExtractionAction)
|
|
662
|
+
: 'ADD';
|
|
663
|
+
|
|
664
|
+
let entities: ExtractedEntity[] | undefined;
|
|
665
|
+
if (Array.isArray(f.entities)) {
|
|
666
|
+
const valid = (f.entities as unknown[])
|
|
667
|
+
.map(parseEntity)
|
|
668
|
+
.filter((e): e is ExtractedEntity => e !== null);
|
|
669
|
+
if (valid.length > 0) entities = valid;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
const result: ExtractedFact = {
|
|
673
|
+
text: String(f.text).slice(0, 512),
|
|
674
|
+
type,
|
|
675
|
+
source,
|
|
676
|
+
scope,
|
|
677
|
+
reasoning,
|
|
678
|
+
importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
|
|
679
|
+
action,
|
|
680
|
+
existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
|
|
681
|
+
confidence: normalizeConfidence(f.confidence),
|
|
682
|
+
};
|
|
683
|
+
if (entities) result.entities = entities;
|
|
684
|
+
return result;
|
|
685
|
+
})
|
|
686
|
+
// Reject illegal type:summary + source:user
|
|
687
|
+
.filter((f) => !(f.type === 'summary' && f.source === 'user'))
|
|
688
|
+
// Compaction: importance >= 5 (not 6)
|
|
689
|
+
.filter((f) => f.importance >= 5 || f.action === 'DELETE');
|
|
690
|
+
|
|
691
|
+
return facts;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Extract facts using the compaction-aware prompt.
|
|
696
|
+
*
|
|
697
|
+
* This is called from the `before_compaction` hook — the LAST CHANCE to
|
|
698
|
+
* capture knowledge before conversation context is lost. Key differences
|
|
699
|
+
* from `extractFacts`:
|
|
700
|
+
* - Uses `COMPACTION_SYSTEM_PROMPT` (lower threshold, format-agnostic, more aggressive)
|
|
701
|
+
* - Always processes the full conversation (`mode: 'full'`)
|
|
702
|
+
* - Importance filter is >= 5 instead of >= 6
|
|
703
|
+
* - Lexical importance bumps still apply
|
|
704
|
+
*
|
|
705
|
+
* @param rawMessages - The messages array from the hook event (unknown[])
|
|
706
|
+
* @param existingMemories - Optional list of existing memories for dedup context
|
|
707
|
+
* @param logger - Optional logger for observability
|
|
708
|
+
* @returns Array of extracted facts, or empty array on failure.
|
|
709
|
+
*/
|
|
710
|
+
export async function extractFactsForCompaction(
|
|
711
|
+
rawMessages: unknown[],
|
|
712
|
+
existingMemories?: Array<{ id: string; text: string }>,
|
|
713
|
+
logger?: ExtractorLogger,
|
|
714
|
+
): Promise<ExtractedFact[]> {
|
|
715
|
+
const config = resolveLLMConfig();
|
|
716
|
+
if (!config) {
|
|
717
|
+
logger?.info?.('extractFactsForCompaction: no LLM config resolved (skipping extraction)');
|
|
718
|
+
return [];
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Parse messages
|
|
722
|
+
const parsed = rawMessages
|
|
723
|
+
.map(messageToText)
|
|
724
|
+
.filter((m): m is { role: string; content: string } => m !== null);
|
|
725
|
+
|
|
726
|
+
if (parsed.length === 0) {
|
|
727
|
+
logger?.info?.(`extractFactsForCompaction: no parseable messages (raw count=${rawMessages.length})`);
|
|
728
|
+
return [];
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Always full mode — process entire conversation for compaction
|
|
732
|
+
const conversationText = truncateMessages(parsed, 12_000);
|
|
733
|
+
|
|
734
|
+
if (conversationText.length < 20) {
|
|
735
|
+
logger?.info?.(
|
|
736
|
+
`extractFactsForCompaction: conversation too short (${conversationText.length} chars < 20)`,
|
|
737
|
+
);
|
|
738
|
+
return [];
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// Build existing memories context if available
|
|
742
|
+
let memoriesContext = '';
|
|
743
|
+
if (existingMemories && existingMemories.length > 0) {
|
|
744
|
+
const memoriesStr = existingMemories
|
|
745
|
+
.map((m) => `[ID: ${m.id}] ${m.text}`)
|
|
746
|
+
.join('\n');
|
|
747
|
+
memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
const userPrompt = `Extract ALL valuable long-term memories from this conversation before it is compacted and lost:\n\n${conversationText}${memoriesContext}`;
|
|
751
|
+
|
|
752
|
+
let response: string | null | undefined;
|
|
753
|
+
try {
|
|
754
|
+
response = await chatCompletion(config, [
|
|
755
|
+
{ role: 'system', content: COMPACTION_SYSTEM_PROMPT },
|
|
756
|
+
{ role: 'user', content: userPrompt },
|
|
757
|
+
]);
|
|
758
|
+
} catch (err) {
|
|
759
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
760
|
+
logger?.warn?.(`extractFactsForCompaction: chatCompletion threw: ${msg}`);
|
|
761
|
+
return [];
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
if (!response) {
|
|
765
|
+
logger?.info?.('extractFactsForCompaction: chatCompletion returned null/empty response');
|
|
766
|
+
return [];
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
logger?.info?.(
|
|
770
|
+
`extractFactsForCompaction: LLM returned ${response.length} chars; handing to parseFactsResponseForCompaction`,
|
|
771
|
+
);
|
|
772
|
+
let facts = parseFactsResponseForCompaction(response, logger);
|
|
773
|
+
|
|
774
|
+
// v1 provenance filter (tag-don't-drop). Uses importance >= 5 floor because
|
|
775
|
+
// the filter's own floor is 5 in lax mode, matching compaction semantics.
|
|
776
|
+
facts = applyProvenanceFilterLax(facts, conversationText);
|
|
777
|
+
|
|
778
|
+
// Comparative rescore if >= 5 facts (same as default pipeline), else
|
|
779
|
+
// assign defaultVolatility so v1 write path has a value.
|
|
780
|
+
facts = await comparativeRescoreV1(facts, conversationText, logger);
|
|
781
|
+
facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
782
|
+
|
|
783
|
+
// Lexical importance bumps (same as regular extraction)
|
|
784
|
+
for (const f of facts) {
|
|
785
|
+
const bump = computeLexicalImportanceBump(f.text, conversationText);
|
|
786
|
+
if (bump > 0) {
|
|
787
|
+
const oldImportance = f.importance;
|
|
788
|
+
const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
|
|
789
|
+
f.importance = Math.min(10, f.importance + effectiveBump);
|
|
790
|
+
logger?.info?.(
|
|
791
|
+
`extractFactsForCompaction: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`,
|
|
792
|
+
);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
return facts;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// ---------------------------------------------------------------------------
|
|
800
|
+
// Debrief Extraction
|
|
801
|
+
// ---------------------------------------------------------------------------
|
|
802
|
+
|
|
803
|
+
/**
|
|
804
|
+
* Canonical debrief system prompt — must be identical across all clients.
|
|
805
|
+
*/
|
|
806
|
+
export const DEBRIEF_SYSTEM_PROMPT = `You are reviewing a conversation that just ended. The following facts were
|
|
807
|
+
already extracted and stored during this conversation:
|
|
808
|
+
|
|
809
|
+
{already_stored_facts}
|
|
810
|
+
|
|
811
|
+
Your job is to capture what turn-by-turn extraction MISSED. Focus on:
|
|
812
|
+
|
|
813
|
+
1. **Broader context** — What was the conversation about overall? What project,
|
|
814
|
+
problem, or topic tied the discussion together?
|
|
815
|
+
2. **Outcomes & conclusions** — What was decided, agreed upon, or resolved?
|
|
816
|
+
3. **What was attempted** — What approaches were tried? What worked, what didn't, and why?
|
|
817
|
+
4. **Relationships** — How do topics discussed relate to each other or to things
|
|
818
|
+
from previous conversations?
|
|
819
|
+
5. **Open threads** — What was left unfinished or needs follow-up?
|
|
820
|
+
|
|
821
|
+
Do NOT repeat facts already stored. Only add genuinely new information that provides
|
|
822
|
+
broader context a future conversation would benefit from.
|
|
823
|
+
|
|
824
|
+
Return a JSON array (no markdown, no code fences):
|
|
825
|
+
[{"text": "...", "type": "summary|context", "importance": N}]
|
|
826
|
+
|
|
827
|
+
- Use type "summary" for conclusions, outcomes, and decisions-of-the-session
|
|
828
|
+
- Use type "context" for broader project context, open threads, and what-was-tried
|
|
829
|
+
- Importance 7-8 for most debrief items (they are high-value by definition)
|
|
830
|
+
- Maximum 5 items (debriefs should be concise, not exhaustive)
|
|
831
|
+
- Each item should be 1-3 sentences, self-contained
|
|
832
|
+
|
|
833
|
+
If the conversation was too short or trivial to warrant a debrief, return: []`;
|
|
834
|
+
|
|
835
|
+
export interface DebriefItem {
|
|
836
|
+
text: string;
|
|
837
|
+
type: 'summary' | 'context';
|
|
838
|
+
importance: number;
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
/**
|
|
842
|
+
* Parse a debrief response into validated DebriefItems.
|
|
843
|
+
*/
|
|
844
|
+
export function parseDebriefResponse(response: string): DebriefItem[] {
|
|
144
845
|
let cleaned = response.trim();
|
|
145
846
|
if (cleaned.startsWith('```')) {
|
|
146
847
|
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
@@ -152,71 +853,555 @@ function parseFactsResponse(response: string): ExtractedFact[] {
|
|
|
152
853
|
|
|
153
854
|
return parsed
|
|
154
855
|
.filter(
|
|
155
|
-
(
|
|
156
|
-
|
|
157
|
-
typeof
|
|
158
|
-
typeof (
|
|
159
|
-
(
|
|
856
|
+
(item: unknown) =>
|
|
857
|
+
item &&
|
|
858
|
+
typeof item === 'object' &&
|
|
859
|
+
typeof (item as Record<string, unknown>).text === 'string' &&
|
|
860
|
+
((item as Record<string, unknown>).text as string).length >= 5,
|
|
160
861
|
)
|
|
161
|
-
.map((
|
|
162
|
-
const
|
|
163
|
-
const
|
|
164
|
-
const
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
text: String(fact.text).slice(0, 512),
|
|
169
|
-
type: (['fact', 'preference', 'decision', 'episodic', 'goal', 'context', 'summary'].includes(String(fact.type))
|
|
170
|
-
? String(fact.type)
|
|
171
|
-
: 'fact') as ExtractedFact['type'],
|
|
172
|
-
importance: Math.max(1, Math.min(10, Number(fact.importance) || 5)),
|
|
173
|
-
action,
|
|
174
|
-
existingFactId: typeof fact.existingFactId === 'string' ? fact.existingFactId : undefined,
|
|
175
|
-
};
|
|
862
|
+
.map((item: unknown) => {
|
|
863
|
+
const d = item as Record<string, unknown>;
|
|
864
|
+
const type: 'summary' | 'context' = d.type === 'summary' ? 'summary' : 'context';
|
|
865
|
+
const rawImportance = typeof d.importance === 'number' ? d.importance : 7;
|
|
866
|
+
const importance = Math.max(1, Math.min(10, rawImportance));
|
|
867
|
+
return { text: String(d.text).slice(0, 512), type, importance };
|
|
176
868
|
})
|
|
177
|
-
.filter((
|
|
869
|
+
.filter((d) => d.importance >= 6)
|
|
870
|
+
.slice(0, 5);
|
|
871
|
+
} catch {
|
|
872
|
+
return [];
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
/**
|
|
877
|
+
* Extract a session debrief using LLM.
|
|
878
|
+
*
|
|
879
|
+
* @param rawMessages - All messages from the session
|
|
880
|
+
* @param storedFactTexts - Texts of facts already stored in this session (for dedup)
|
|
881
|
+
* @returns Array of debrief items, or empty array on failure
|
|
882
|
+
*/
|
|
883
|
+
export async function extractDebrief(
|
|
884
|
+
rawMessages: unknown[],
|
|
885
|
+
storedFactTexts: string[],
|
|
886
|
+
): Promise<DebriefItem[]> {
|
|
887
|
+
const config = resolveLLMConfig();
|
|
888
|
+
if (!config) return [];
|
|
889
|
+
|
|
890
|
+
const parsed = rawMessages
|
|
891
|
+
.map(messageToText)
|
|
892
|
+
.filter((m): m is { role: string; content: string } => m !== null);
|
|
893
|
+
|
|
894
|
+
// Minimum 4 turns (8 messages) to warrant a debrief
|
|
895
|
+
if (parsed.length < 8) return [];
|
|
896
|
+
|
|
897
|
+
const conversationText = truncateMessages(parsed, 12_000);
|
|
898
|
+
if (conversationText.length < 20) return [];
|
|
899
|
+
|
|
900
|
+
const alreadyStored = storedFactTexts.length > 0
|
|
901
|
+
? storedFactTexts.map((t) => `- ${t}`).join('\n')
|
|
902
|
+
: '(none)';
|
|
903
|
+
|
|
904
|
+
const systemPrompt = DEBRIEF_SYSTEM_PROMPT.replace('{already_stored_facts}', alreadyStored);
|
|
905
|
+
|
|
906
|
+
try {
|
|
907
|
+
const response = await chatCompletion(config, [
|
|
908
|
+
{ role: 'system', content: systemPrompt },
|
|
909
|
+
{ role: 'user', content: `Review this conversation and provide a debrief:\n\n${conversationText}` },
|
|
910
|
+
]);
|
|
911
|
+
|
|
912
|
+
if (!response) return [];
|
|
913
|
+
return parseDebriefResponse(response);
|
|
178
914
|
} catch {
|
|
179
915
|
return [];
|
|
180
916
|
}
|
|
181
917
|
}
|
|
182
918
|
|
|
183
919
|
// ---------------------------------------------------------------------------
|
|
184
|
-
//
|
|
920
|
+
// v1 Taxonomy Extraction Pipeline (default as of plugin v3.0.0)
|
|
921
|
+
//
|
|
922
|
+
// Produces facts conforming to Memory Taxonomy v1 (6 types: claim,
|
|
923
|
+
// preference, directive, commitment, episode, summary; 5 sources; 8 scopes).
|
|
924
|
+
//
|
|
925
|
+
// The G-pipeline uses a single merged-topic prompt that returns both the
|
|
926
|
+
// 2-3 main topics the user engaged with AND the extracted facts, so topic
|
|
927
|
+
// anchoring is preserved within one call. After extraction we apply:
|
|
928
|
+
//
|
|
929
|
+
// 1. `applyProvenanceFilterLax` — tag-don't-drop. Assistant-sourced facts
|
|
930
|
+
// get their importance capped at 7 rather than being filtered out; the
|
|
931
|
+
// reranker later uses the source field to deprioritize them.
|
|
932
|
+
// 2. `comparativeRescoreV1` — spread importance across the 1-10 range
|
|
933
|
+
// and assign volatility. Forced when the batch has >= 5 facts.
|
|
934
|
+
// 3. `defaultVolatility` — heuristic fallback.
|
|
935
|
+
//
|
|
936
|
+
// This matches the winning G pipeline from the 200-conv benchmark.
|
|
185
937
|
// ---------------------------------------------------------------------------
|
|
186
938
|
|
|
187
939
|
/**
|
|
188
|
-
*
|
|
940
|
+
* The main extraction system prompt (v1 merged-topic pipeline).
|
|
189
941
|
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
|
|
193
|
-
|
|
942
|
+
* Exported as both `EXTRACTION_SYSTEM_PROMPT` (canonical) and
|
|
943
|
+
* `EXTRACTION_SYSTEM_PROMPT_V1_MERGED` (deprecated alias) for back-compat.
|
|
944
|
+
*/
|
|
945
|
+
export const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine using Memory Taxonomy v1. Work in TWO explicit phases within one response:
|
|
946
|
+
|
|
947
|
+
PHASE 1 — Topic identification.
|
|
948
|
+
Before extracting any fact, identify the 2-3 main topics the user was engaging with. Topics should be short phrases (2-5 words each). If the conversation has no clear user-focused topic, use an empty topics array.
|
|
949
|
+
|
|
950
|
+
PHASE 2 — Fact extraction anchored to those topics.
|
|
951
|
+
Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Tangential facts may still be extracted but score lower (6-7 range).
|
|
952
|
+
|
|
953
|
+
Rules:
|
|
954
|
+
1. Each memory = single self-contained piece of information
|
|
955
|
+
2. Focus on user-specific info useful in future conversations
|
|
956
|
+
3. Skip generic knowledge, greetings, small talk, ephemeral task coordination
|
|
957
|
+
4. Score importance 1-10 (6+ = worth storing)
|
|
958
|
+
5. Every memory MUST attribute a source (provenance critical)
|
|
959
|
+
|
|
960
|
+
Importance rubric (use FULL 1-10 range):
|
|
961
|
+
- 10: Critical, core identity, never-forget content
|
|
962
|
+
- 9: Affects many future decisions
|
|
963
|
+
- 8: High-value preference/decision/rule
|
|
964
|
+
- 7: Specific durable fact
|
|
965
|
+
- 6: Borderline
|
|
966
|
+
- 5 or below: NOT worth storing — drop
|
|
967
|
+
|
|
968
|
+
DO NOT cluster everything at 7-8-9.
|
|
969
|
+
|
|
970
|
+
═══════════════════════════════════════════════════════════════
|
|
971
|
+
TYPE (6 values)
|
|
972
|
+
═══════════════════════════════════════════════════════════════
|
|
973
|
+
- claim: factual assertion (absorbs fact/context/decision; decisions populate reasoning field)
|
|
974
|
+
- preference: likes/dislikes/tastes
|
|
975
|
+
- directive: imperative rule ("always X", "never Y")
|
|
976
|
+
- commitment: future intent ("will do X")
|
|
977
|
+
- episode: notable event
|
|
978
|
+
- summary: derived synthesis (source must be derived|assistant) — do NOT emit for turn-extraction
|
|
979
|
+
|
|
980
|
+
═══════════════════════════════════════════════════════════════
|
|
981
|
+
SOURCE (provenance, CRITICAL)
|
|
982
|
+
═══════════════════════════════════════════════════════════════
|
|
983
|
+
- user: user explicitly stated it (in [user]: turns)
|
|
984
|
+
- user-inferred: extractor inferred from user signals
|
|
985
|
+
- assistant: assistant authored content — DOWNGRADE unless user affirmed/quoted/used it
|
|
986
|
+
- external, derived: rare
|
|
987
|
+
|
|
988
|
+
IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant
|
|
989
|
+
|
|
990
|
+
═══════════════════════════════════════════════════════════════
|
|
991
|
+
SCOPE (life domain)
|
|
992
|
+
═══════════════════════════════════════════════════════════════
|
|
993
|
+
work | personal | health | family | creative | finance | misc | unspecified
|
|
994
|
+
|
|
995
|
+
═══════════════════════════════════════════════════════════════
|
|
996
|
+
ENTITIES
|
|
997
|
+
═══════════════════════════════════════════════════════════════
|
|
998
|
+
- type ∈ {person, project, tool, company, concept, place}
|
|
999
|
+
- prefer specific names ("PostgreSQL" not "database")
|
|
1000
|
+
- omit umbrella categories when specific name is present
|
|
1001
|
+
|
|
1002
|
+
═══════════════════════════════════════════════════════════════
|
|
1003
|
+
REASONING (only for claims that are decisions)
|
|
1004
|
+
═══════════════════════════════════════════════════════════════
|
|
1005
|
+
For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
|
|
1006
|
+
|
|
1007
|
+
═══════════════════════════════════════════════════════════════
|
|
1008
|
+
OUTPUT FORMAT (no markdown, no code fences)
|
|
1009
|
+
═══════════════════════════════════════════════════════════════
|
|
1010
|
+
{
|
|
1011
|
+
"topics": ["topic 1", "topic 2"],
|
|
1012
|
+
"facts": [
|
|
1013
|
+
{
|
|
1014
|
+
"text": "...",
|
|
1015
|
+
"type": "claim|preference|directive|commitment|episode",
|
|
1016
|
+
"source": "user|user-inferred|assistant",
|
|
1017
|
+
"scope": "work|personal|health|...",
|
|
1018
|
+
"importance": N,
|
|
1019
|
+
"confidence": 0.9,
|
|
1020
|
+
"action": "ADD",
|
|
1021
|
+
"reasoning": "...", // optional, only for claim+decision
|
|
1022
|
+
"entities": [{"name": "...", "type": "tool"}]
|
|
1023
|
+
}
|
|
1024
|
+
]
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
If nothing worth extracting: {"topics": [], "facts": []}`;
|
|
1028
|
+
|
|
1029
|
+
/**
|
|
1030
|
+
* @deprecated Use `EXTRACTION_SYSTEM_PROMPT` instead. Kept only as a
|
|
1031
|
+
* back-compat alias for callers that imported the v1 rollout name.
|
|
1032
|
+
*/
|
|
1033
|
+
export const EXTRACTION_SYSTEM_PROMPT_V1_MERGED = EXTRACTION_SYSTEM_PROMPT;
|
|
1034
|
+
|
|
1035
|
+
/**
|
|
1036
|
+
* Parse a v1 merged-topic LLM response. Returns both the topic list and the
|
|
1037
|
+
* validated/filtered fact list. Illegal combinations (summary+user) are
|
|
1038
|
+
* dropped; importance < 6 with action != DELETE is dropped.
|
|
1039
|
+
*
|
|
1040
|
+
* Exported as both `parseFactsResponse` (canonical, returns facts array) and
|
|
1041
|
+
* `parseMergedResponseV1` (returns `{ topics, facts }`). Prefer the former
|
|
1042
|
+
* unless the topic list is needed.
|
|
1043
|
+
*/
|
|
1044
|
+
export function parseMergedResponseV1(
|
|
1045
|
+
response: string,
|
|
1046
|
+
logger?: ExtractorLogger,
|
|
1047
|
+
): { topics: string[]; facts: ExtractedFact[] } {
|
|
1048
|
+
const originalPreview = response.trim().slice(0, 200);
|
|
1049
|
+
let cleaned = response.trim();
|
|
1050
|
+
cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
|
|
1051
|
+
if (cleaned.startsWith('```')) {
|
|
1052
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
const tryParse = (input: string): unknown => {
|
|
1056
|
+
try { return JSON.parse(input); } catch { return undefined; }
|
|
1057
|
+
};
|
|
1058
|
+
|
|
1059
|
+
let parsed = tryParse(cleaned);
|
|
1060
|
+
let recoveryUsed: 'none' | 'bracket-scan' = 'none';
|
|
1061
|
+
if (parsed === undefined) {
|
|
1062
|
+
// First try an outermost-array greedy match (legacy bare-array format).
|
|
1063
|
+
const arrMatch = cleaned.match(/\[[\s\S]*\]/);
|
|
1064
|
+
if (arrMatch) {
|
|
1065
|
+
parsed = tryParse(arrMatch[0]);
|
|
1066
|
+
if (parsed !== undefined) recoveryUsed = 'bracket-scan';
|
|
1067
|
+
}
|
|
1068
|
+
if (parsed === undefined) {
|
|
1069
|
+
// Fall back to an outermost-object greedy match (merged-topic format).
|
|
1070
|
+
const objMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1071
|
+
if (objMatch) {
|
|
1072
|
+
parsed = tryParse(objMatch[0]);
|
|
1073
|
+
if (parsed !== undefined) recoveryUsed = 'bracket-scan';
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
if (recoveryUsed === 'bracket-scan') {
|
|
1078
|
+
logger?.info?.(
|
|
1079
|
+
`parseFactsResponse: recovered JSON via bracket-scan fallback`,
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
1084
|
+
logger?.warn?.(
|
|
1085
|
+
`parseFactsResponse: could not parse LLM output as JSON. Preview: ${JSON.stringify(originalPreview)}`,
|
|
1086
|
+
);
|
|
1087
|
+
return { topics: [], facts: [] };
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
// Dual-format acceptance: either the merged object `{ topics, facts }` or
|
|
1091
|
+
// a bare JSON array of fact objects (legacy / test fixture shape). The
|
|
1092
|
+
// bare array is wrapped as { topics: [], facts: [...] } so the downstream
|
|
1093
|
+
// logic stays uniform. A single fact object (no wrapper) is also wrapped.
|
|
1094
|
+
let obj: Record<string, unknown>;
|
|
1095
|
+
if (Array.isArray(parsed)) {
|
|
1096
|
+
obj = { topics: [], facts: parsed };
|
|
1097
|
+
} else if (
|
|
1098
|
+
typeof (parsed as Record<string, unknown>).facts === 'undefined' &&
|
|
1099
|
+
typeof (parsed as Record<string, unknown>).text === 'string'
|
|
1100
|
+
) {
|
|
1101
|
+
// Single fact object, not a merged wrapper.
|
|
1102
|
+
obj = { topics: [], facts: [parsed] };
|
|
1103
|
+
} else {
|
|
1104
|
+
obj = parsed as Record<string, unknown>;
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
const rawTopics = obj.topics;
|
|
1108
|
+
const topics = Array.isArray(rawTopics)
|
|
1109
|
+
? (rawTopics as unknown[])
|
|
1110
|
+
.filter((t): t is string => typeof t === 'string' && t.length > 0)
|
|
1111
|
+
.slice(0, 3)
|
|
1112
|
+
: [];
|
|
1113
|
+
|
|
1114
|
+
const rawFacts = obj.facts;
|
|
1115
|
+
if (!Array.isArray(rawFacts)) return { topics, facts: [] };
|
|
1116
|
+
|
|
1117
|
+
const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
|
|
1118
|
+
|
|
1119
|
+
const facts = (rawFacts as unknown[])
|
|
1120
|
+
.filter(
|
|
1121
|
+
(f): f is Record<string, unknown> =>
|
|
1122
|
+
!!f &&
|
|
1123
|
+
typeof f === 'object' &&
|
|
1124
|
+
typeof (f as Record<string, unknown>).text === 'string' &&
|
|
1125
|
+
((f as Record<string, unknown>).text as string).length >= 5,
|
|
1126
|
+
)
|
|
1127
|
+
.map((f) => {
|
|
1128
|
+
const rawType = String(f.type ?? 'claim').toLowerCase();
|
|
1129
|
+
// Accept both v1 tokens and legacy v0 tokens — coerce v0 via V0_TO_V1_TYPE.
|
|
1130
|
+
let type: MemoryType;
|
|
1131
|
+
if (isValidMemoryType(rawType)) {
|
|
1132
|
+
type = rawType;
|
|
1133
|
+
} else if ((LEGACY_V0_MEMORY_TYPES as readonly string[]).includes(rawType)) {
|
|
1134
|
+
type = V0_TO_V1_TYPE[rawType as MemoryTypeV0];
|
|
1135
|
+
} else {
|
|
1136
|
+
type = 'claim';
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
|
|
1140
|
+
const source: MemorySource =
|
|
1141
|
+
(VALID_MEMORY_SOURCES as readonly string[]).includes(rawSource)
|
|
1142
|
+
? (rawSource as MemorySource)
|
|
1143
|
+
: 'user-inferred';
|
|
1144
|
+
|
|
1145
|
+
const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
|
|
1146
|
+
const scope: MemoryScope =
|
|
1147
|
+
(VALID_MEMORY_SCOPES as readonly string[]).includes(rawScope)
|
|
1148
|
+
? (rawScope as MemoryScope)
|
|
1149
|
+
: 'unspecified';
|
|
1150
|
+
|
|
1151
|
+
const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
|
|
1152
|
+
|
|
1153
|
+
const action = validActions.includes(String(f.action) as ExtractionAction)
|
|
1154
|
+
? (String(f.action) as ExtractionAction)
|
|
1155
|
+
: 'ADD';
|
|
1156
|
+
|
|
1157
|
+
let entities: ExtractedEntity[] | undefined;
|
|
1158
|
+
if (Array.isArray(f.entities)) {
|
|
1159
|
+
const valid = (f.entities as unknown[])
|
|
1160
|
+
.map(parseEntity)
|
|
1161
|
+
.filter((e): e is ExtractedEntity => e !== null);
|
|
1162
|
+
if (valid.length > 0) entities = valid;
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
const fact: ExtractedFact = {
|
|
1166
|
+
text: String(f.text).slice(0, 512),
|
|
1167
|
+
type,
|
|
1168
|
+
source,
|
|
1169
|
+
scope,
|
|
1170
|
+
reasoning,
|
|
1171
|
+
importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
|
|
1172
|
+
confidence: normalizeConfidence(f.confidence),
|
|
1173
|
+
action,
|
|
1174
|
+
existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
|
|
1175
|
+
};
|
|
1176
|
+
if (entities) fact.entities = entities;
|
|
1177
|
+
return fact;
|
|
1178
|
+
})
|
|
1179
|
+
// Reject illegal type:summary + source:user
|
|
1180
|
+
.filter((f) => !(f.type === 'summary' && f.source === 'user'))
|
|
1181
|
+
// Importance threshold (preserves DELETE)
|
|
1182
|
+
.filter((f) => f.importance >= 6 || f.action === 'DELETE');
|
|
1183
|
+
|
|
1184
|
+
return { topics, facts };
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
/**
|
|
1188
|
+
* Parse an LLM extraction response into structured v1 facts. Canonical
|
|
1189
|
+
* parser used by the default `extractFacts()` pipeline.
|
|
1190
|
+
*
|
|
1191
|
+
* This is a thin wrapper around `parseMergedResponseV1` that discards the
|
|
1192
|
+
* topic list so existing callers that expect a flat `ExtractedFact[]`
|
|
1193
|
+
* signature keep working.
|
|
1194
|
+
*/
|
|
1195
|
+
export function parseFactsResponse(
|
|
1196
|
+
response: string,
|
|
1197
|
+
logger?: ExtractorLogger,
|
|
1198
|
+
): ExtractedFact[] {
|
|
1199
|
+
return parseMergedResponseV1(response, logger).facts;
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
/**
|
|
1203
|
+
* Tag-don't-drop provenance filter (pipeline G / F).
|
|
1204
|
+
*
|
|
1205
|
+
* For each fact:
|
|
1206
|
+
* - If source is already "assistant", cap importance at 7.
|
|
1207
|
+
* - Otherwise, keyword-match the fact against user turns. If <30% of
|
|
1208
|
+
* content words (length >= 4) appear in user turns AND source != "user",
|
|
1209
|
+
* tag source as "assistant" and cap importance at 7 (keep the fact).
|
|
1210
|
+
* - Drop facts below importance 5 (unless DELETE action).
|
|
1211
|
+
*/
|
|
1212
|
+
export function applyProvenanceFilterLax(
|
|
1213
|
+
facts: ExtractedFact[],
|
|
1214
|
+
conversationText: string,
|
|
1215
|
+
): ExtractedFact[] {
|
|
1216
|
+
const userTurnsLower = conversationText
|
|
1217
|
+
.split(/\n\n/)
|
|
1218
|
+
.filter((line) => line.startsWith('[user]:'))
|
|
1219
|
+
.join(' ')
|
|
1220
|
+
.toLowerCase();
|
|
1221
|
+
|
|
1222
|
+
return facts
|
|
1223
|
+
.map((f) => {
|
|
1224
|
+
if (f.source === 'assistant') {
|
|
1225
|
+
return { ...f, importance: Math.min(f.importance, 7) };
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
const factWords = f.text
|
|
1229
|
+
.toLowerCase()
|
|
1230
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
1231
|
+
.split(/\s+/)
|
|
1232
|
+
.filter((w) => w.length >= 4);
|
|
1233
|
+
|
|
1234
|
+
const matchedWords = factWords.filter((w) => userTurnsLower.includes(w)).length;
|
|
1235
|
+
const matchRatio = factWords.length > 0 ? matchedWords / factWords.length : 0;
|
|
1236
|
+
|
|
1237
|
+
if (matchRatio < 0.3 && f.source !== 'user') {
|
|
1238
|
+
return {
|
|
1239
|
+
...f,
|
|
1240
|
+
source: 'assistant' as MemorySource,
|
|
1241
|
+
importance: Math.min(f.importance, 7),
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
return f;
|
|
1246
|
+
})
|
|
1247
|
+
.filter((f) => f.importance >= 5 || f.action === 'DELETE');
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
/**
|
|
1251
|
+
* Heuristic fallback volatility when the LLM doesn't assign one.
|
|
1252
|
+
*/
|
|
1253
|
+
export function defaultVolatility(f: ExtractedFact): MemoryVolatility {
|
|
1254
|
+
if (f.type === 'commitment') return 'updatable';
|
|
1255
|
+
if (f.type === 'episode') return 'stable';
|
|
1256
|
+
if (f.type === 'directive') return 'stable';
|
|
1257
|
+
if (f.scope === 'health' || f.scope === 'family') return 'stable';
|
|
1258
|
+
return 'updatable';
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
const COMPARATIVE_PROMPT_V1 = `You are a memory re-ranker for the v1 taxonomy. You receive facts already extracted from one conversation, each with initial importance. Your job is twofold:
|
|
1262
|
+
|
|
1263
|
+
1. RE-RANK importance to spread across the 1-10 range (avoid clustering at 7-8-9)
|
|
1264
|
+
2. ASSIGN volatility to each fact
|
|
1265
|
+
|
|
1266
|
+
Re-ranking rules:
|
|
1267
|
+
- Top 1/3 of facts (most significant for this user): importance 9-10
|
|
1268
|
+
- Middle 1/3: importance 7-8
|
|
1269
|
+
- Bottom 1/3: importance 5-6 (borderline, may be dropped)
|
|
1270
|
+
- A fact may stay at 10 if it's clearly identity-defining (name, birthday) or marked as "never forget"
|
|
1271
|
+
- Never raise without justification; never lower below 5 unless clearly noise
|
|
1272
|
+
- You MUST produce a spread
|
|
1273
|
+
|
|
1274
|
+
Volatility rules:
|
|
1275
|
+
- stable: unlikely to change for years (name, allergies, birthplace, fundamental traits)
|
|
1276
|
+
- updatable: changes occasionally (current job, active project, partner's name, address)
|
|
1277
|
+
- ephemeral: short-lived state (today's task, this week's plan, current trip itinerary)
|
|
1278
|
+
|
|
1279
|
+
Use the FULL conversation context to judge volatility — a single claim may be ambiguous, but in context you can usually tell.
|
|
1280
|
+
|
|
1281
|
+
Return JSON array, same order as input, ONLY with importance + volatility fields:
|
|
1282
|
+
[{"importance": N, "volatility": "stable|updatable|ephemeral"}, ...]
|
|
1283
|
+
No markdown.`;
|
|
1284
|
+
|
|
1285
|
+
/**
|
|
1286
|
+
* Comparative re-scoring pass (v1). Forces re-scoring when facts.length >= 5
|
|
1287
|
+
* so the importance distribution spreads across the 1-10 range. When
|
|
1288
|
+
* facts.length < 5, assigns defaultVolatility and returns.
|
|
1289
|
+
*/
|
|
1290
|
+
export async function comparativeRescoreV1(
|
|
1291
|
+
facts: ExtractedFact[],
|
|
1292
|
+
conversationText: string,
|
|
1293
|
+
logger?: ExtractorLogger,
|
|
1294
|
+
): Promise<ExtractedFact[]> {
|
|
1295
|
+
// G-tuned behavior: force rescore when >= 5 facts
|
|
1296
|
+
if (facts.length < 2 || facts.length < 5) {
|
|
1297
|
+
return facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
const config = resolveLLMConfig();
|
|
1301
|
+
if (!config) {
|
|
1302
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
const factsForPrompt = facts
|
|
1306
|
+
.map((f, i) => `${i + 1}. [imp: ${f.importance}] [type: ${f.type}] [scope: ${f.scope ?? 'unspecified'}] ${f.text}`)
|
|
1307
|
+
.join('\n');
|
|
1308
|
+
|
|
1309
|
+
const userPrompt = `Conversation context:\n${conversationText}\n\nExtracted facts:\n${factsForPrompt}\n\nReturn ${facts.length} JSON objects, each with "importance" + "volatility". Match input order.`;
|
|
1310
|
+
|
|
1311
|
+
let response: string | null | undefined;
|
|
1312
|
+
try {
|
|
1313
|
+
response = await chatCompletion(config, [
|
|
1314
|
+
{ role: 'system', content: COMPARATIVE_PROMPT_V1 },
|
|
1315
|
+
{ role: 'user', content: userPrompt },
|
|
1316
|
+
]);
|
|
1317
|
+
} catch (err) {
|
|
1318
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1319
|
+
logger?.warn?.(`comparativeRescoreV1: chatCompletion threw: ${msg}`);
|
|
1320
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
if (!response) {
|
|
1324
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
let cleaned = response.trim();
|
|
1328
|
+
cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
|
|
1329
|
+
if (cleaned.startsWith('```')) {
|
|
1330
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
1331
|
+
}
|
|
1332
|
+
const match = cleaned.match(/\[[\s\S]*\]/);
|
|
1333
|
+
if (match) cleaned = match[0];
|
|
1334
|
+
|
|
1335
|
+
let parsed: unknown;
|
|
1336
|
+
try {
|
|
1337
|
+
parsed = JSON.parse(cleaned);
|
|
1338
|
+
} catch {
|
|
1339
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1340
|
+
}
|
|
1341
|
+
if (!Array.isArray(parsed)) {
|
|
1342
|
+
return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
return facts.map((f, i) => {
|
|
1346
|
+
const entry = parsed[i] as Record<string, unknown> | undefined;
|
|
1347
|
+
const rawImp = entry && typeof entry === 'object' ? Number(entry.importance) : NaN;
|
|
1348
|
+
const rawVol = entry && typeof entry === 'object' ? String(entry.volatility ?? '').toLowerCase() : '';
|
|
1349
|
+
|
|
1350
|
+
const newImp = Number.isFinite(rawImp)
|
|
1351
|
+
? Math.max(5, Math.min(10, Math.round(rawImp)))
|
|
1352
|
+
: f.importance;
|
|
1353
|
+
const newVol: MemoryVolatility =
|
|
1354
|
+
(VALID_MEMORY_VOLATILITIES as readonly string[]).includes(rawVol)
|
|
1355
|
+
? (rawVol as MemoryVolatility)
|
|
1356
|
+
: defaultVolatility(f);
|
|
1357
|
+
|
|
1358
|
+
return { ...f, importance: newImp, volatility: newVol };
|
|
1359
|
+
});
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
/**
|
|
1363
|
+
* Main extraction entry point (default pipeline as of plugin v3.0.0).
|
|
1364
|
+
*
|
|
1365
|
+
* Pipeline: single merged-topic LLM call → `applyProvenanceFilterLax`
|
|
1366
|
+
* (tag-don't-drop) → `comparativeRescoreV1` (forces re-rank when >= 5 facts)
|
|
1367
|
+
* → `defaultVolatility` fallback → lexical importance bumps.
|
|
1368
|
+
*
|
|
1369
|
+
* Produces v1-shaped facts with `type`, `source`, `scope`, `volatility`,
|
|
1370
|
+
* and optional `reasoning` fields populated. The caller should hand the
|
|
1371
|
+
* result to `storeExtractedFacts` which emits a v1 canonical claim blob.
|
|
194
1372
|
*/
|
|
195
1373
|
export async function extractFacts(
|
|
196
1374
|
rawMessages: unknown[],
|
|
197
1375
|
mode: 'turn' | 'full',
|
|
198
1376
|
existingMemories?: Array<{ id: string; text: string }>,
|
|
1377
|
+
profileContext?: string,
|
|
1378
|
+
logger?: ExtractorLogger,
|
|
199
1379
|
): Promise<ExtractedFact[]> {
|
|
200
1380
|
const config = resolveLLMConfig();
|
|
201
|
-
if (!config)
|
|
1381
|
+
if (!config) {
|
|
1382
|
+
logger?.info?.('extractFacts: no LLM config resolved (skipping extraction)');
|
|
1383
|
+
return [];
|
|
1384
|
+
}
|
|
202
1385
|
|
|
203
|
-
// Parse messages
|
|
204
1386
|
const parsed = rawMessages
|
|
205
1387
|
.map(messageToText)
|
|
206
1388
|
.filter((m): m is { role: string; content: string } => m !== null);
|
|
207
1389
|
|
|
208
|
-
if (parsed.length === 0)
|
|
1390
|
+
if (parsed.length === 0) {
|
|
1391
|
+
logger?.info?.(`extractFacts: no parseable messages (raw count=${rawMessages.length})`);
|
|
1392
|
+
return [];
|
|
1393
|
+
}
|
|
209
1394
|
|
|
210
|
-
// For 'turn' mode, only look at last 6 messages (3 turns)
|
|
211
|
-
// For 'full' mode, use all messages but truncate to fit token budget
|
|
212
1395
|
const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
|
|
213
|
-
|
|
214
|
-
// Truncate to ~3000 tokens worth of text
|
|
215
1396
|
const conversationText = truncateMessages(relevantMessages, 12_000);
|
|
216
1397
|
|
|
217
|
-
if (conversationText.length < 20)
|
|
1398
|
+
if (conversationText.length < 20) {
|
|
1399
|
+
logger?.info?.(
|
|
1400
|
+
`extractFacts: conversation too short (${conversationText.length} chars < 20, parsed=${parsed.length}, mode=${mode})`,
|
|
1401
|
+
);
|
|
1402
|
+
return [];
|
|
1403
|
+
}
|
|
218
1404
|
|
|
219
|
-
// Build existing memories context if available
|
|
220
1405
|
let memoriesContext = '';
|
|
221
1406
|
if (existingMemories && existingMemories.length > 0) {
|
|
222
1407
|
const memoriesStr = existingMemories
|
|
@@ -230,16 +1415,54 @@ export async function extractFacts(
|
|
|
230
1415
|
? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
|
|
231
1416
|
: `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
|
|
232
1417
|
|
|
1418
|
+
const systemPrompt = profileContext || EXTRACTION_SYSTEM_PROMPT;
|
|
1419
|
+
|
|
1420
|
+
let response: string | null | undefined;
|
|
233
1421
|
try {
|
|
234
|
-
|
|
235
|
-
{ role: 'system', content:
|
|
1422
|
+
response = await chatCompletion(config, [
|
|
1423
|
+
{ role: 'system', content: systemPrompt },
|
|
236
1424
|
{ role: 'user', content: userPrompt },
|
|
237
1425
|
]);
|
|
1426
|
+
} catch (err) {
|
|
1427
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1428
|
+
logger?.warn?.(`extractFacts: chatCompletion threw: ${msg}`);
|
|
1429
|
+
return [];
|
|
1430
|
+
}
|
|
238
1431
|
|
|
239
|
-
|
|
1432
|
+
if (!response) {
|
|
1433
|
+
logger?.info?.('extractFacts: chatCompletion returned null/empty response');
|
|
1434
|
+
return [];
|
|
1435
|
+
}
|
|
240
1436
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
1437
|
+
logger?.info?.(
|
|
1438
|
+
`extractFacts: LLM returned ${response.length} chars; parsing merged response`,
|
|
1439
|
+
);
|
|
1440
|
+
const { topics, facts: rawFacts } = parseMergedResponseV1(response, logger);
|
|
1441
|
+
if (topics.length > 0) {
|
|
1442
|
+
logger?.info?.(`extractFacts: topics = ${JSON.stringify(topics)}`);
|
|
244
1443
|
}
|
|
1444
|
+
|
|
1445
|
+
// Provenance filter (tag-don't-drop)
|
|
1446
|
+
let facts = applyProvenanceFilterLax(rawFacts, conversationText);
|
|
1447
|
+
|
|
1448
|
+
// Comparative rescore (forces re-rank when >= 5 facts)
|
|
1449
|
+
facts = await comparativeRescoreV1(facts, conversationText, logger);
|
|
1450
|
+
|
|
1451
|
+
// Ensure every fact has a volatility (defensive: rescore may have skipped)
|
|
1452
|
+
facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
|
|
1453
|
+
|
|
1454
|
+
// Lexical importance bumps (same as v0 pipeline)
|
|
1455
|
+
for (const f of facts) {
|
|
1456
|
+
const bump = computeLexicalImportanceBump(f.text, conversationText);
|
|
1457
|
+
if (bump > 0) {
|
|
1458
|
+
const oldImportance = f.importance;
|
|
1459
|
+
const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
|
|
1460
|
+
f.importance = Math.min(10, f.importance + effectiveBump);
|
|
1461
|
+
logger?.info?.(
|
|
1462
|
+
`extractFacts: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`,
|
|
1463
|
+
);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
return facts;
|
|
245
1468
|
}
|