@totalreclaw/totalreclaw 1.6.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/extractor.ts CHANGED
@@ -13,14 +13,209 @@ import { chatCompletion, resolveLLMConfig } from './llm-client.js';
13
13
 
14
14
  export type ExtractionAction = 'ADD' | 'UPDATE' | 'DELETE' | 'NOOP';
15
15
 
16
+ export type EntityType = 'person' | 'project' | 'tool' | 'company' | 'concept' | 'place';
17
+
18
+ export interface ExtractedEntity {
19
+ name: string;
20
+ type: EntityType;
21
+ role?: string;
22
+ }
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Memory Taxonomy v1 — the 6 canonical memory types. Single source of truth.
26
+ //
27
+ // Plugin v3.0.0 adopts v1 as the ONLY taxonomy. Legacy v0 tokens
28
+ // (fact, decision, episodic, goal, context, rule) are accepted only on the
29
+ // read-side via `LEGACY_V0_MEMORY_TYPES` / `V0_TO_V1_TYPE` and
30
+ // `normalizeToV1Type` in `claims-helper.ts`, so pre-v3 vault entries can
31
+ // still be decoded. Extraction and write paths emit v1 exclusively.
32
+ //
33
+ // When adding a new type, update ALL of:
34
+ // - This constant
35
+ // - `mcp/src/v1-types.ts`
36
+ // - `python/src/totalreclaw/agent/extraction.py`
37
+ // - `rust/totalreclaw-core/src/claims.rs`
38
+ // - `skill/plugin/claims-helper.ts`
39
+ // - The `EXTRACTION_SYSTEM_PROMPT` Types: list
40
+ // ---------------------------------------------------------------------------
41
+
42
+ export const VALID_MEMORY_TYPES = [
43
+ 'claim',
44
+ 'preference',
45
+ 'directive',
46
+ 'commitment',
47
+ 'episode',
48
+ 'summary',
49
+ ] as const;
50
+
51
+ /** v1 MemoryType — the 6 canonical types. */
52
+ export type MemoryType = (typeof VALID_MEMORY_TYPES)[number];
53
+
54
+ /**
55
+ * Runtime type guard — returns whether an unknown value is a valid v1
56
+ * `MemoryType`. Legacy v0 tokens return `false`; use `normalizeToV1Type()`
57
+ * in `claims-helper.ts` to coerce them on the read path.
58
+ */
59
+ export function isValidMemoryType(value: unknown): value is MemoryType {
60
+ return typeof value === 'string' && (VALID_MEMORY_TYPES as readonly string[]).includes(value);
61
+ }
62
+
63
+ /**
64
+ * Backward-compat alias so existing consumers that import `MemoryTypeV1`
65
+ * keep compiling. Identical to `MemoryType` as of plugin v3.0.0.
66
+ * @deprecated Use `MemoryType` instead.
67
+ */
68
+ export type MemoryTypeV1 = MemoryType;
69
+
70
+ /**
71
+ * Backward-compat alias. Same list as `VALID_MEMORY_TYPES`.
72
+ * @deprecated Use `VALID_MEMORY_TYPES` instead.
73
+ */
74
+ export const VALID_MEMORY_TYPES_V1: readonly MemoryType[] = VALID_MEMORY_TYPES;
75
+
76
+ /**
77
+ * Backward-compat alias. Same guard as `isValidMemoryType`.
78
+ * @deprecated Use `isValidMemoryType` instead.
79
+ */
80
+ export function isValidMemoryTypeV1(value: unknown): value is MemoryType {
81
+ return isValidMemoryType(value);
82
+ }
83
+
84
+ /**
85
+ * Legacy v0 memory types — retained as a typed constant so the read-side
86
+ * `V0_TO_V1_TYPE` mapping can reference them without redeclaration.
87
+ *
88
+ * Do NOT emit these on the write/extraction path. They exist solely so
89
+ * `claims-helper.ts::readClaimFromBlob` can decode pre-v1 vault entries
90
+ * whose encrypted blobs still carry v0 token strings.
91
+ */
92
+ export const LEGACY_V0_MEMORY_TYPES = [
93
+ 'fact',
94
+ 'preference',
95
+ 'decision',
96
+ 'episodic',
97
+ 'goal',
98
+ 'context',
99
+ 'summary',
100
+ 'rule',
101
+ ] as const;
102
+
103
+ export type MemoryTypeV0 = (typeof LEGACY_V0_MEMORY_TYPES)[number];
104
+
105
+ export type MemorySource =
106
+ | 'user'
107
+ | 'user-inferred'
108
+ | 'assistant'
109
+ | 'external'
110
+ | 'derived';
111
+
112
+ export type MemoryScope =
113
+ | 'work'
114
+ | 'personal'
115
+ | 'health'
116
+ | 'family'
117
+ | 'creative'
118
+ | 'finance'
119
+ | 'misc'
120
+ | 'unspecified';
121
+
122
+ export type MemoryVolatility = 'stable' | 'updatable' | 'ephemeral';
123
+
124
+ export const VALID_MEMORY_SOURCES: readonly MemorySource[] = [
125
+ 'user',
126
+ 'user-inferred',
127
+ 'assistant',
128
+ 'external',
129
+ 'derived',
130
+ ];
131
+
132
+ export const VALID_MEMORY_SCOPES: readonly MemoryScope[] = [
133
+ 'work',
134
+ 'personal',
135
+ 'health',
136
+ 'family',
137
+ 'creative',
138
+ 'finance',
139
+ 'misc',
140
+ 'unspecified',
141
+ ];
142
+
143
+ export const VALID_MEMORY_VOLATILITIES: readonly MemoryVolatility[] = [
144
+ 'stable',
145
+ 'updatable',
146
+ 'ephemeral',
147
+ ];
148
+
149
+ /**
150
+ * Legacy v0 → v1 type mapping used by the read-side adapter when decoding
151
+ * a pre-v1 vault entry that still carries a v0 token string.
152
+ *
153
+ * Decisions (v0) map to v1 `claim` — the reasoning lives in the separate
154
+ * `reasoning` field rather than being encoded in the type.
155
+ */
156
+ export const V0_TO_V1_TYPE: Record<MemoryTypeV0, MemoryType> = {
157
+ fact: 'claim',
158
+ preference: 'preference',
159
+ decision: 'claim',
160
+ episodic: 'episode',
161
+ goal: 'commitment',
162
+ context: 'claim',
163
+ summary: 'summary',
164
+ rule: 'directive',
165
+ };
166
+
167
+ // ---------------------------------------------------------------------------
168
+ // ExtractedFact — canonical shape carried through the extraction pipeline
169
+ // ---------------------------------------------------------------------------
170
+
171
+ /**
172
+ * Extracted fact. Shape carries full v1 taxonomy fields (source / scope /
173
+ * reasoning / volatility). `source` is required on the write path —
174
+ * `storeExtractedFacts` supplies `'user-inferred'` as a defensive default
175
+ * when a heuristic upstream fails to populate it.
176
+ */
16
177
  export interface ExtractedFact {
17
178
  text: string;
18
- type: 'fact' | 'preference' | 'decision' | 'episodic' | 'goal' | 'context' | 'summary';
179
+ /** v1 taxonomy type. Always present on newly-extracted facts. */
180
+ type: MemoryType;
19
181
  importance: number; // 1-10
20
182
  action: ExtractionAction;
21
183
  existingFactId?: string;
184
+ entities?: ExtractedEntity[];
185
+ confidence?: number; // 0.0-1.0, LLM self-assessed
186
+ /**
187
+ * v1 provenance tag. Required on the write path — when missing,
188
+ * `storeExtractedFacts` supplies `'user-inferred'` as a defensive default.
189
+ */
190
+ source?: MemorySource;
191
+ /** v1 life-domain scope. Default 'unspecified'. */
192
+ scope?: MemoryScope;
193
+ /**
194
+ * Decision-with-reasoning "because Y" clause, for type=claim. Max 256 chars.
195
+ */
196
+ reasoning?: string;
197
+ /**
198
+ * v1 stability signal. Assigned by `comparativeRescoreV1` or, when rescore
199
+ * is skipped (facts.length < 5), by the `defaultVolatility` heuristic.
200
+ */
201
+ volatility?: MemoryVolatility;
22
202
  }
23
203
 
204
+ const ALLOWED_ENTITY_TYPES: ReadonlySet<EntityType> = new Set([
205
+ 'person',
206
+ 'project',
207
+ 'tool',
208
+ 'company',
209
+ 'concept',
210
+ 'place',
211
+ ]);
212
+
213
+ /**
214
+ * Default confidence when the LLM does not provide one.
215
+ * Mirrors the fallback used by other extraction clients.
216
+ */
217
+ export const DEFAULT_EXTRACTION_CONFIDENCE = 0.85;
218
+
24
219
  interface ContentBlock {
25
220
  type?: string;
26
221
  text?: string;
@@ -33,45 +228,6 @@ interface ConversationMessage {
33
228
  text?: string;
34
229
  }
35
230
 
36
- // ---------------------------------------------------------------------------
37
- // Extraction Prompt
38
- // ---------------------------------------------------------------------------
39
-
40
- const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine. Analyze the conversation and extract valuable long-term memories.
41
-
42
- Rules:
43
- 1. Each memory must be a single, self-contained piece of information
44
- 2. Focus on user-specific information that would be useful in future conversations
45
- 3. Skip generic knowledge, greetings, small talk, and ephemeral task coordination
46
- 4. Score importance 1-10 (6+ = worth storing)
47
- 5. Only extract memories with importance >= 6
48
-
49
- Types:
50
- - fact: Objective information about the user (name, location, job, relationships)
51
- - preference: Likes, dislikes, or preferences ("prefers dark mode", "allergic to peanuts")
52
- - decision: Choices WITH reasoning ("chose PostgreSQL because data is relational and needs ACID")
53
- - episodic: Notable events or experiences ("deployed v1.0 to production on March 15")
54
- - goal: Objectives, targets, or plans ("wants to launch public beta by end of Q1")
55
- - context: Active project/task context ("working on TotalReclaw v1.2, staging on Base Sepolia")
56
- - summary: Key outcome or conclusion from a discussion ("agreed to use phased rollout for migration")
57
-
58
- Extraction guidance:
59
- - For decisions: ALWAYS include the reasoning. "Chose X" is weak. "Chose X because Y" is strong.
60
- - For context: Capture what the user is actively working on, including versions, environments, and status.
61
- - For summaries: Only extract when a conversation reaches a clear conclusion or agreement.
62
- - For facts: Prefer specific over vague. "Lives in Lisbon" beats "lives in Europe".
63
- - Decisions and context should be importance >= 7 (they are high-value for future conversations).
64
-
65
- Actions (compare against existing memories if provided):
66
- - ADD: New memory, no conflict with existing
67
- - UPDATE: Refines or corrects an existing memory (provide existingFactId)
68
- - DELETE: Contradicts an existing memory -- the old one is now wrong (provide existingFactId)
69
- - NOOP: Already captured or not worth storing
70
-
71
- Return a JSON array (no markdown, no code fences):
72
- [{"text": "...", "type": "...", "importance": N, "action": "ADD|UPDATE|DELETE|NOOP", "existingFactId": "..."}, ...]
73
-
74
- If nothing is worth extracting, return: []`;
75
231
 
76
232
  // ---------------------------------------------------------------------------
77
233
  // Helpers
@@ -137,10 +293,555 @@ function truncateMessages(messages: Array<{ role: string; content: string }>, ma
137
293
  }
138
294
 
139
295
  /**
140
- * Parse the LLM response into structured facts.
296
+ * Parse a single entity object from LLM output. Returns null if invalid.
297
+ * Invalid entities are silently dropped so a bad entity never fails the whole fact.
298
+ */
299
+ export function parseEntity(raw: unknown): ExtractedEntity | null {
300
+ if (!raw || typeof raw !== 'object') return null;
301
+ const e = raw as Record<string, unknown>;
302
+ const name = typeof e.name === 'string' ? e.name.trim() : '';
303
+ if (name.length === 0) return null;
304
+ const type = String(e.type ?? '').toLowerCase() as EntityType;
305
+ if (!ALLOWED_ENTITY_TYPES.has(type)) return null;
306
+ const entity: ExtractedEntity = { name: name.slice(0, 128), type };
307
+ if (typeof e.role === 'string' && e.role.trim().length > 0) {
308
+ entity.role = e.role.trim().slice(0, 128);
309
+ }
310
+ return entity;
311
+ }
312
+
313
+ /**
314
+ * Clamp a raw confidence value to [0, 1]. Returns the default when missing or NaN.
315
+ */
316
+ export function normalizeConfidence(raw: unknown): number {
317
+ if (typeof raw !== 'number' || !Number.isFinite(raw)) return DEFAULT_EXTRACTION_CONFIDENCE;
318
+ if (raw < 0) return 0;
319
+ if (raw > 1) return 1;
320
+ return raw;
321
+ }
322
+
323
+ /**
324
+ * Minimal logger shape accepted by the extraction pipeline. Matches the
325
+ * OpenClaw plugin logger so callers can pass `api.logger` directly.
326
+ *
327
+ * All methods are optional so tests can pass a partial object and callers
328
+ * that don't care about observability can omit the argument entirely.
329
+ */
330
+ export interface ExtractorLogger {
331
+ info?: (msg: string) => void;
332
+ warn?: (msg: string) => void;
333
+ }
334
+
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // Phase 2.2.6: lexical importance bumps
338
+ // ---------------------------------------------------------------------------
339
+
340
+ /**
341
+ * Escape regex metacharacters so a string can be used as a literal pattern.
342
+ */
343
+ function escapeRegExp(s: string): string {
344
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
345
+ }
346
+
347
+ /**
348
+ * Compute a lexical importance bump (0-2) for a single fact based on signals
349
+ * in the surrounding conversation text.
350
+ *
351
+ * This is a Phase 2.2.6 quality fix complementing the prompt rubric tightening
352
+ * (item A). Where the rubric tells the LLM to use the full 1-10 range, the
353
+ * bump tells us *as a post-process*: when the user's actual phrasing carries
354
+ * strong "remember this" signals that the LLM may have under-weighted, push
355
+ * the score up.
356
+ *
357
+ * Signals detected (each adds +1, capped at +2 total):
358
+ *
359
+ * 1. **Strong intent phrases** anywhere in the conversation:
360
+ * "remember this", "never forget", "rule of thumb", "critical",
361
+ * "don't ever forget", explicit "always X" / "never Y" patterns.
362
+ * 2. **Emphasis markers**: `!!` (double exclamation), or 3+ all-caps words
363
+ * in a row (e.g. "DO NOT FORGET", "VERY IMPORTANT").
364
+ * 3. **Repetition**: the fact's first ~20 chars appear at least twice in
365
+ * the conversation text (paraphrased restating).
366
+ *
367
+ * The bump is additive on top of whatever the LLM scored; final importance
368
+ * is capped at 10.
369
+ *
370
+ * Final-importance ceiling: this never makes a fact pass the importance >= 6
371
+ * filter on its own — a fact still needs to have an LLM score >= 5 (because
372
+ * +2 from 5 = 7, above floor; +1 from 5 = 6, above floor). This is intentional:
373
+ * the bump is for "the LLM correctly identified this as worth storing but
374
+ * under-weighted it", not "the LLM said skip but we're overriding."
141
375
  */
142
- function parseFactsResponse(response: string): ExtractedFact[] {
376
+ export function computeLexicalImportanceBump(
377
+ factText: string,
378
+ conversationText: string,
379
+ ): number {
380
+ let bump = 0;
381
+ const lowerConv = conversationText.toLowerCase();
382
+
383
+ // Signal 1: strong intent phrases anywhere in the conversation
384
+ const strongIntent =
385
+ /\b(remember this|never forget|rule of thumb|don't (?:ever )?forget|critical|important|gotcha|note to self)\b/i;
386
+ if (strongIntent.test(lowerConv)) bump += 1;
387
+
388
+ // Signal 2: emphasis markers — double exclamation OR 3+ consecutive all-caps words
389
+ // (3+ chars each, to avoid false positives on acronyms like "AWS S3 IAM")
390
+ const doubleExclamation = /!!/;
391
+ const allCapsPhrase = /\b[A-Z]{3,}(?:\s+[A-Z]{3,}){2,}\b/;
392
+ if (doubleExclamation.test(conversationText) || allCapsPhrase.test(conversationText)) {
393
+ bump += 1;
394
+ }
395
+
396
+ // Signal 3: repetition — extract content words (length >= 5, not common stop
397
+ // words) from the fact, and check if any single one appears 2+ times in the
398
+ // conversation. This is more robust to LLM paraphrasing than a fingerprint
399
+ // match: "User prefers PostgreSQL" extracted from "I prefer PostgreSQL ...
400
+ // yeah PostgreSQL is right for OLTP" still triggers because "postgresql"
401
+ // appears multiple times even though the leading chars differ.
402
+ const lowerFact = factText.toLowerCase();
403
+ const stopWords = new Set([
404
+ 'about', 'after', 'again', 'against', 'because', 'before', 'being',
405
+ 'between', 'could', 'doing', 'during', 'every', 'further', 'having',
406
+ 'their', 'these', 'those', 'through', 'under', 'until', 'where', 'which',
407
+ 'while', 'would', 'should', 'about', 'thing', 'things', 'something',
408
+ 'someone', 'always', 'never', 'often', 'still', 'really', 'maybe',
409
+ 'using', 'works', 'work', 'user', 'users', 'with', 'from', 'into',
410
+ 'like', 'just', 'than', 'them', 'they', 'will', 'when', 'what', 'were',
411
+ 'this', 'that', 'have', 'this',
412
+ ]);
413
+ const factWords = lowerFact.split(/[^a-z0-9_]+/).filter((w) => w.length >= 5 && !stopWords.has(w));
414
+ let triggered = false;
415
+ for (const word of factWords) {
416
+ const occurrences = (lowerConv.match(new RegExp(`\\b${escapeRegExp(word)}\\b`, 'g')) || [])
417
+ .length;
418
+ if (occurrences >= 2) {
419
+ triggered = true;
420
+ break;
421
+ }
422
+ }
423
+ if (triggered) bump += 1;
424
+
425
+ return Math.min(bump, 2);
426
+ }
427
+
428
+
429
+ // ---------------------------------------------------------------------------
430
+ // Compaction-Aware Extraction (Phase 2.3)
431
+ // ---------------------------------------------------------------------------
432
+
433
+ /**
434
+ * Compaction-specific system prompt (v1 taxonomy). Fires when the conversation
435
+ * context is about to be compacted. LAST CHANCE to capture knowledge before
436
+ * it is lost, so the importance floor is 5 instead of 6 and the prompt is
437
+ * more aggressive about extracting active-project context, claims, and
438
+ * episodes.
439
+ *
440
+ * Differences from `EXTRACTION_SYSTEM_PROMPT`:
441
+ * - Opening framing emphasizes urgency ("last chance")
442
+ * - Format-agnostic: handles bullet lists, prose, mixed formats
443
+ * - Importance threshold lowered to 5
444
+ * - More aggressive on claim / episode / directive types
445
+ * - Anti-pattern: don't skip content just because it's in a summary
446
+ *
447
+ * Output format matches `EXTRACTION_SYSTEM_PROMPT` exactly (same merged
448
+ * topics+facts JSON shape with v1 type / source / scope fields), so the
449
+ * same `parseMergedResponseV1` parser can validate it.
450
+ */
451
+ export const COMPACTION_SYSTEM_PROMPT = `You are extracting memories from a conversation that is about to be compacted. The context will be LOST after this point — this is your LAST CHANCE to capture everything worth remembering. Be more aggressive than usual: err on the side of storing.
452
+
453
+ Work in TWO explicit phases within one response:
454
+
455
+ PHASE 1 — Topic identification.
456
+ Identify the 2-3 main topics the user was engaging with before extracting any fact. Topics should be short phrases (2-5 words each). If there's no clear user-focused topic, use an empty topics array.
457
+
458
+ PHASE 2 — Fact extraction anchored to those topics (plus preserve active context).
459
+ Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Active project context, decisions in progress, and current working state score 6-8 during compaction — capture them even when they'd normally be marginal.
460
+
461
+ Rules:
462
+ 1. Each memory = single self-contained piece of information
463
+ 2. Focus on user-specific info useful in future conversations
464
+ 3. Skip generic knowledge, greetings, small talk
465
+ 4. Score importance 1-10 (5+ = worth storing during compaction)
466
+ 5. Every memory MUST attribute a source (provenance critical)
467
+
468
+ Importance rubric (full 1-10 range, NOT just 7-8):
469
+ - 10: Core identity, never-forget ("remember this forever", name/birthday)
470
+ - 9: Affects many future decisions / high-impact rules
471
+ - 8: Preference / decision-with-reasoning / operational rule
472
+ - 7: Specific durable fact
473
+ - 6: Borderline — during compaction, capture anyway
474
+ - 5: Would normally drop; keep as compaction safety net
475
+ - 4 or below: DROP (greetings, filler)
476
+
477
+ ═══════════════════════════════════════════════════════════════
478
+ TYPE (6 values)
479
+ ═══════════════════════════════════════════════════════════════
480
+ - claim: factual assertion (absorbs v0 fact/context/decision; decisions populate reasoning)
481
+ - preference: likes/dislikes/tastes
482
+ - directive: imperative rule ("always X", "never Y")
483
+ - commitment: future intent ("will do X")
484
+ - episode: notable event
485
+ - summary: derived synthesis (source must be derived|assistant)
486
+
487
+ ═══════════════════════════════════════════════════════════════
488
+ SOURCE (provenance, CRITICAL)
489
+ ═══════════════════════════════════════════════════════════════
490
+ - user: user explicitly stated it (in [user]: turns)
491
+ - user-inferred: extractor inferred from user signals
492
+ - assistant: assistant authored — DOWNGRADE unless user affirmed/quoted
493
+ - external, derived: rare
494
+
495
+ IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant.
496
+
497
+ ═══════════════════════════════════════════════════════════════
498
+ SCOPE
499
+ ═══════════════════════════════════════════════════════════════
500
+ work | personal | health | family | creative | finance | misc | unspecified
501
+
502
+ ═══════════════════════════════════════════════════════════════
503
+ ENTITIES
504
+ ═══════════════════════════════════════════════════════════════
505
+ - type ∈ {person, project, tool, company, concept, place}
506
+ - prefer specific names ("PostgreSQL" not "database")
507
+ - omit umbrella categories when specific name is present
508
+
509
+ ═══════════════════════════════════════════════════════════════
510
+ REASONING (only for claims that are decisions)
511
+ ═══════════════════════════════════════════════════════════════
512
+ For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
513
+
514
+ ═══════════════════════════════════════════════════════════════
515
+ FORMAT-AGNOSTIC PARSING (IMPORTANT)
516
+ ═══════════════════════════════════════════════════════════════
517
+ The conversation may contain bullet lists, numbered lists, section headers, code snippets, or plain prose. Treat ALL formats as potential sources of extractable memory:
518
+ - Bullets/list items: each item is a candidate.
519
+ - Section headers (Context, Decisions, Key Learnings, Open Questions): use the header as a TYPE HINT (Context → claim, Decisions → claim+reasoning, Learnings → directive, Open Questions → commitment).
520
+ - Plain prose: parse each distinct assertion as a candidate.
521
+ - Code snippets: extract config choices, tool versions, architectural decisions embedded in comments or structure.
522
+ - Mixed format: apply all of the above.
523
+
524
+ Do NOT skip content just because it's in a summary. The agent has already filtered — your job is to convert into structured memories, not to re-evaluate worth.
525
+
526
+ ═══════════════════════════════════════════════════════════════
527
+ OUTPUT FORMAT (no markdown, no code fences)
528
+ ═══════════════════════════════════════════════════════════════
529
+ {
530
+ "topics": ["topic 1", "topic 2"],
531
+ "facts": [
532
+ {
533
+ "text": "...",
534
+ "type": "claim|preference|directive|commitment|episode",
535
+ "source": "user|user-inferred|assistant",
536
+ "scope": "work|personal|health|...",
537
+ "importance": N,
538
+ "confidence": 0.9,
539
+ "action": "ADD",
540
+ "reasoning": "...", // optional, only for claim+decision
541
+ "entities": [{"name": "...", "type": "tool"}]
542
+ }
543
+ ]
544
+ }
545
+
546
+ If nothing worth extracting: {"topics": [], "facts": []}`;
547
+
548
+ /**
549
+ * Parse facts for compaction context (v1 taxonomy; importance floor 5).
550
+ *
551
+ * Identical to `parseFactsResponse` except the importance floor is 5 instead
552
+ * of 6 — compaction is the last chance to capture context, so we accept
553
+ * borderline facts that would normally be dropped.
554
+ *
555
+ * Accepts the same merged-topic v1 JSON shape as the main prompt. The
556
+ * inner `parseMergedResponseV1` enforces the >=6 floor, so we re-run a
557
+ * lenient >=5 pass on the raw parsed payload to admit the borderline items.
558
+ */
559
+ export function parseFactsResponseForCompaction(
560
+ response: string,
561
+ logger?: ExtractorLogger,
562
+ ): ExtractedFact[] {
563
+ const originalPreview = response.trim().slice(0, 200);
564
+ let cleaned = response.trim();
565
+
566
+ // Strip <think>...</think> and <thinking>...</thinking> tags
567
+ cleaned = cleaned
568
+ .replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '')
569
+ .trim();
570
+
143
571
  // Strip markdown code fences if present
572
+ if (cleaned.startsWith('```')) {
573
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
574
+ }
575
+
576
+ const tryParse = (input: string): unknown => {
577
+ try {
578
+ return JSON.parse(input);
579
+ } catch {
580
+ return undefined;
581
+ }
582
+ };
583
+
584
+ let parsed = tryParse(cleaned);
585
+ let recoveryUsed: 'none' | 'bracket-scan' = 'none';
586
+ if (parsed === undefined) {
587
+ // Try bare-array first (legacy compaction output), then object (v1 merged).
588
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
589
+ if (arrMatch) {
590
+ parsed = tryParse(arrMatch[0]);
591
+ if (parsed !== undefined) recoveryUsed = 'bracket-scan';
592
+ }
593
+ if (parsed === undefined) {
594
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
595
+ if (objMatch) {
596
+ parsed = tryParse(objMatch[0]);
597
+ if (parsed !== undefined) recoveryUsed = 'bracket-scan';
598
+ }
599
+ }
600
+ }
601
+ if (recoveryUsed === 'bracket-scan') {
602
+ logger?.info?.(
603
+ `parseFactsResponseForCompaction: recovered JSON via bracket-scan fallback`,
604
+ );
605
+ }
606
+
607
+ if (!parsed || typeof parsed !== 'object') {
608
+ logger?.warn?.(
609
+ `parseFactsResponseForCompaction: could not parse LLM output as JSON object. Preview: ${JSON.stringify(originalPreview)}`,
610
+ );
611
+ return [];
612
+ }
613
+
614
+ const obj = parsed as Record<string, unknown>;
615
+ const rawFacts = Array.isArray(obj.facts) ? (obj.facts as unknown[]) : null;
616
+
617
+ // Legacy v0 compaction output (bare JSON array) — best-effort parse.
618
+ const rawArray = rawFacts ?? (Array.isArray(parsed) ? (parsed as unknown[]) : null);
619
+ if (!rawArray) {
620
+ logger?.warn?.(
621
+ `parseFactsResponseForCompaction: expected { facts: [...] } object, got ${typeof parsed}`,
622
+ );
623
+ return [];
624
+ }
625
+
626
+ const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
627
+
628
+ const facts = rawArray
629
+ .filter(
630
+ (f): f is Record<string, unknown> =>
631
+ !!f &&
632
+ typeof f === 'object' &&
633
+ typeof (f as Record<string, unknown>).text === 'string' &&
634
+ ((f as Record<string, unknown>).text as string).length >= 5,
635
+ )
636
+ .map((f) => {
637
+ const rawType = String(f.type ?? 'claim').toLowerCase();
638
+ // Accept v1 tokens directly; coerce legacy v0 tokens via V0_TO_V1_TYPE.
639
+ let type: MemoryType;
640
+ if (isValidMemoryType(rawType)) {
641
+ type = rawType;
642
+ } else if ((LEGACY_V0_MEMORY_TYPES as readonly string[]).includes(rawType)) {
643
+ type = V0_TO_V1_TYPE[rawType as MemoryTypeV0];
644
+ } else {
645
+ type = 'claim';
646
+ }
647
+
648
+ const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
649
+ const source: MemorySource = (VALID_MEMORY_SOURCES as readonly string[]).includes(rawSource)
650
+ ? (rawSource as MemorySource)
651
+ : 'user-inferred';
652
+
653
+ const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
654
+ const scope: MemoryScope = (VALID_MEMORY_SCOPES as readonly string[]).includes(rawScope)
655
+ ? (rawScope as MemoryScope)
656
+ : 'unspecified';
657
+
658
+ const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
659
+
660
+ const action = validActions.includes(String(f.action) as ExtractionAction)
661
+ ? (String(f.action) as ExtractionAction)
662
+ : 'ADD';
663
+
664
+ let entities: ExtractedEntity[] | undefined;
665
+ if (Array.isArray(f.entities)) {
666
+ const valid = (f.entities as unknown[])
667
+ .map(parseEntity)
668
+ .filter((e): e is ExtractedEntity => e !== null);
669
+ if (valid.length > 0) entities = valid;
670
+ }
671
+
672
+ const result: ExtractedFact = {
673
+ text: String(f.text).slice(0, 512),
674
+ type,
675
+ source,
676
+ scope,
677
+ reasoning,
678
+ importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
679
+ action,
680
+ existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
681
+ confidence: normalizeConfidence(f.confidence),
682
+ };
683
+ if (entities) result.entities = entities;
684
+ return result;
685
+ })
686
+ // Reject illegal type:summary + source:user
687
+ .filter((f) => !(f.type === 'summary' && f.source === 'user'))
688
+ // Compaction: importance >= 5 (not 6)
689
+ .filter((f) => f.importance >= 5 || f.action === 'DELETE');
690
+
691
+ return facts;
692
+ }
693
+
694
+ /**
695
+ * Extract facts using the compaction-aware prompt.
696
+ *
697
+ * This is called from the `before_compaction` hook — the LAST CHANCE to
698
+ * capture knowledge before conversation context is lost. Key differences
699
+ * from `extractFacts`:
700
+ * - Uses `COMPACTION_SYSTEM_PROMPT` (lower threshold, format-agnostic, more aggressive)
701
+ * - Always processes the full conversation (`mode: 'full'`)
702
+ * - Importance filter is >= 5 instead of >= 6
703
+ * - Lexical importance bumps still apply
704
+ *
705
+ * @param rawMessages - The messages array from the hook event (unknown[])
706
+ * @param existingMemories - Optional list of existing memories for dedup context
707
+ * @param logger - Optional logger for observability
708
+ * @returns Array of extracted facts, or empty array on failure.
709
+ */
710
+ export async function extractFactsForCompaction(
711
+ rawMessages: unknown[],
712
+ existingMemories?: Array<{ id: string; text: string }>,
713
+ logger?: ExtractorLogger,
714
+ ): Promise<ExtractedFact[]> {
715
+ const config = resolveLLMConfig();
716
+ if (!config) {
717
+ logger?.info?.('extractFactsForCompaction: no LLM config resolved (skipping extraction)');
718
+ return [];
719
+ }
720
+
721
+ // Parse messages
722
+ const parsed = rawMessages
723
+ .map(messageToText)
724
+ .filter((m): m is { role: string; content: string } => m !== null);
725
+
726
+ if (parsed.length === 0) {
727
+ logger?.info?.(`extractFactsForCompaction: no parseable messages (raw count=${rawMessages.length})`);
728
+ return [];
729
+ }
730
+
731
+ // Always full mode — process entire conversation for compaction
732
+ const conversationText = truncateMessages(parsed, 12_000);
733
+
734
+ if (conversationText.length < 20) {
735
+ logger?.info?.(
736
+ `extractFactsForCompaction: conversation too short (${conversationText.length} chars < 20)`,
737
+ );
738
+ return [];
739
+ }
740
+
741
+ // Build existing memories context if available
742
+ let memoriesContext = '';
743
+ if (existingMemories && existingMemories.length > 0) {
744
+ const memoriesStr = existingMemories
745
+ .map((m) => `[ID: ${m.id}] ${m.text}`)
746
+ .join('\n');
747
+ memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
748
+ }
749
+
750
+ const userPrompt = `Extract ALL valuable long-term memories from this conversation before it is compacted and lost:\n\n${conversationText}${memoriesContext}`;
751
+
752
+ let response: string | null | undefined;
753
+ try {
754
+ response = await chatCompletion(config, [
755
+ { role: 'system', content: COMPACTION_SYSTEM_PROMPT },
756
+ { role: 'user', content: userPrompt },
757
+ ]);
758
+ } catch (err) {
759
+ const msg = err instanceof Error ? err.message : String(err);
760
+ logger?.warn?.(`extractFactsForCompaction: chatCompletion threw: ${msg}`);
761
+ return [];
762
+ }
763
+
764
+ if (!response) {
765
+ logger?.info?.('extractFactsForCompaction: chatCompletion returned null/empty response');
766
+ return [];
767
+ }
768
+
769
+ logger?.info?.(
770
+ `extractFactsForCompaction: LLM returned ${response.length} chars; handing to parseFactsResponseForCompaction`,
771
+ );
772
+ let facts = parseFactsResponseForCompaction(response, logger);
773
+
774
+ // v1 provenance filter (tag-don't-drop). Uses importance >= 5 floor because
775
+ // the filter's own floor is 5 in lax mode, matching compaction semantics.
776
+ facts = applyProvenanceFilterLax(facts, conversationText);
777
+
778
+ // Comparative rescore if >= 5 facts (same as default pipeline), else
779
+ // assign defaultVolatility so v1 write path has a value.
780
+ facts = await comparativeRescoreV1(facts, conversationText, logger);
781
+ facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
782
+
783
+ // Lexical importance bumps (same as regular extraction)
784
+ for (const f of facts) {
785
+ const bump = computeLexicalImportanceBump(f.text, conversationText);
786
+ if (bump > 0) {
787
+ const oldImportance = f.importance;
788
+ const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
789
+ f.importance = Math.min(10, f.importance + effectiveBump);
790
+ logger?.info?.(
791
+ `extractFactsForCompaction: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`,
792
+ );
793
+ }
794
+ }
795
+
796
+ return facts;
797
+ }
798
+
799
+ // ---------------------------------------------------------------------------
800
+ // Debrief Extraction
801
+ // ---------------------------------------------------------------------------
802
+
803
+ /**
804
+ * Canonical debrief system prompt — must be identical across all clients.
805
+ */
806
+ export const DEBRIEF_SYSTEM_PROMPT = `You are reviewing a conversation that just ended. The following facts were
807
+ already extracted and stored during this conversation:
808
+
809
+ {already_stored_facts}
810
+
811
+ Your job is to capture what turn-by-turn extraction MISSED. Focus on:
812
+
813
+ 1. **Broader context** — What was the conversation about overall? What project,
814
+ problem, or topic tied the discussion together?
815
+ 2. **Outcomes & conclusions** — What was decided, agreed upon, or resolved?
816
+ 3. **What was attempted** — What approaches were tried? What worked, what didn't, and why?
817
+ 4. **Relationships** — How do topics discussed relate to each other or to things
818
+ from previous conversations?
819
+ 5. **Open threads** — What was left unfinished or needs follow-up?
820
+
821
+ Do NOT repeat facts already stored. Only add genuinely new information that provides
822
+ broader context a future conversation would benefit from.
823
+
824
+ Return a JSON array (no markdown, no code fences):
825
+ [{"text": "...", "type": "summary|context", "importance": N}]
826
+
827
+ - Use type "summary" for conclusions, outcomes, and decisions-of-the-session
828
+ - Use type "context" for broader project context, open threads, and what-was-tried
829
+ - Importance 7-8 for most debrief items (they are high-value by definition)
830
+ - Maximum 5 items (debriefs should be concise, not exhaustive)
831
+ - Each item should be 1-3 sentences, self-contained
832
+
833
+ If the conversation was too short or trivial to warrant a debrief, return: []`;
834
+
835
+ export interface DebriefItem {
836
+ text: string;
837
+ type: 'summary' | 'context';
838
+ importance: number;
839
+ }
840
+
841
+ /**
842
+ * Parse a debrief response into validated DebriefItems.
843
+ */
844
+ export function parseDebriefResponse(response: string): DebriefItem[] {
144
845
  let cleaned = response.trim();
145
846
  if (cleaned.startsWith('```')) {
146
847
  cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
@@ -152,71 +853,555 @@ function parseFactsResponse(response: string): ExtractedFact[] {
152
853
 
153
854
  return parsed
154
855
  .filter(
155
- (f: unknown) =>
156
- f &&
157
- typeof f === 'object' &&
158
- typeof (f as ExtractedFact).text === 'string' &&
159
- (f as ExtractedFact).text.length >= 5,
856
+ (item: unknown) =>
857
+ item &&
858
+ typeof item === 'object' &&
859
+ typeof (item as Record<string, unknown>).text === 'string' &&
860
+ ((item as Record<string, unknown>).text as string).length >= 5,
160
861
  )
161
- .map((f: unknown) => {
162
- const fact = f as Record<string, unknown>;
163
- const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
164
- const action = validActions.includes(String(fact.action) as ExtractionAction)
165
- ? (String(fact.action) as ExtractionAction)
166
- : 'ADD'; // Default to ADD for backward compatibility
167
- return {
168
- text: String(fact.text).slice(0, 512),
169
- type: (['fact', 'preference', 'decision', 'episodic', 'goal', 'context', 'summary'].includes(String(fact.type))
170
- ? String(fact.type)
171
- : 'fact') as ExtractedFact['type'],
172
- importance: Math.max(1, Math.min(10, Number(fact.importance) || 5)),
173
- action,
174
- existingFactId: typeof fact.existingFactId === 'string' ? fact.existingFactId : undefined,
175
- };
862
+ .map((item: unknown) => {
863
+ const d = item as Record<string, unknown>;
864
+ const type: 'summary' | 'context' = d.type === 'summary' ? 'summary' : 'context';
865
+ const rawImportance = typeof d.importance === 'number' ? d.importance : 7;
866
+ const importance = Math.max(1, Math.min(10, rawImportance));
867
+ return { text: String(d.text).slice(0, 512), type, importance };
176
868
  })
177
- .filter((f) => f.importance >= 6 || f.action === 'DELETE'); // DELETE actions pass regardless of importance
869
+ .filter((d) => d.importance >= 6)
870
+ .slice(0, 5);
871
+ } catch {
872
+ return [];
873
+ }
874
+ }
875
+
876
+ /**
877
+ * Extract a session debrief using LLM.
878
+ *
879
+ * @param rawMessages - All messages from the session
880
+ * @param storedFactTexts - Texts of facts already stored in this session (for dedup)
881
+ * @returns Array of debrief items, or empty array on failure
882
+ */
883
+ export async function extractDebrief(
884
+ rawMessages: unknown[],
885
+ storedFactTexts: string[],
886
+ ): Promise<DebriefItem[]> {
887
+ const config = resolveLLMConfig();
888
+ if (!config) return [];
889
+
890
+ const parsed = rawMessages
891
+ .map(messageToText)
892
+ .filter((m): m is { role: string; content: string } => m !== null);
893
+
894
+ // Minimum 4 turns (8 messages) to warrant a debrief
895
+ if (parsed.length < 8) return [];
896
+
897
+ const conversationText = truncateMessages(parsed, 12_000);
898
+ if (conversationText.length < 20) return [];
899
+
900
+ const alreadyStored = storedFactTexts.length > 0
901
+ ? storedFactTexts.map((t) => `- ${t}`).join('\n')
902
+ : '(none)';
903
+
904
+ const systemPrompt = DEBRIEF_SYSTEM_PROMPT.replace('{already_stored_facts}', alreadyStored);
905
+
906
+ try {
907
+ const response = await chatCompletion(config, [
908
+ { role: 'system', content: systemPrompt },
909
+ { role: 'user', content: `Review this conversation and provide a debrief:\n\n${conversationText}` },
910
+ ]);
911
+
912
+ if (!response) return [];
913
+ return parseDebriefResponse(response);
178
914
  } catch {
179
915
  return [];
180
916
  }
181
917
  }
182
918
 
183
919
  // ---------------------------------------------------------------------------
184
- // Main extraction function
920
+ // v1 Taxonomy Extraction Pipeline (default as of plugin v3.0.0)
921
+ //
922
+ // Produces facts conforming to Memory Taxonomy v1 (6 types: claim,
923
+ // preference, directive, commitment, episode, summary; 5 sources; 8 scopes).
924
+ //
925
+ // The G-pipeline uses a single merged-topic prompt that returns both the
926
+ // 2-3 main topics the user engaged with AND the extracted facts, so topic
927
+ // anchoring is preserved within one call. After extraction we apply:
928
+ //
929
+ // 1. `applyProvenanceFilterLax` — tag-don't-drop. Assistant-sourced facts
930
+ // get their importance capped at 7 rather than being filtered out; the
931
+ // reranker later uses the source field to deprioritize them.
932
+ // 2. `comparativeRescoreV1` — spread importance across the 1-10 range
933
+ // and assign volatility. Forced when the batch has >= 5 facts.
934
+ // 3. `defaultVolatility` — heuristic fallback.
935
+ //
936
+ // This matches the winning G pipeline from the 200-conv benchmark.
185
937
  // ---------------------------------------------------------------------------
186
938
 
187
939
  /**
188
- * Extract facts from a list of conversation messages using LLM.
940
+ * The main extraction system prompt (v1 merged-topic pipeline).
189
941
  *
190
- * @param rawMessages - The messages array from the hook event (unknown[])
191
- * @param mode - 'turn' for agent_end (recent only), 'full' for compaction/reset
192
- * @param existingMemories - Optional list of existing memories for dedup context
193
- * @returns Array of extracted facts, or empty array on failure.
942
+ * Exported as both `EXTRACTION_SYSTEM_PROMPT` (canonical) and
943
+ * `EXTRACTION_SYSTEM_PROMPT_V1_MERGED` (deprecated alias) for back-compat.
944
+ */
945
+ export const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine using Memory Taxonomy v1. Work in TWO explicit phases within one response:
946
+
947
+ PHASE 1 — Topic identification.
948
+ Before extracting any fact, identify the 2-3 main topics the user was engaging with. Topics should be short phrases (2-5 words each). If the conversation has no clear user-focused topic, use an empty topics array.
949
+
950
+ PHASE 2 — Fact extraction anchored to those topics.
951
+ Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Tangential facts may still be extracted but score lower (6-7 range).
952
+
953
+ Rules:
954
+ 1. Each memory = single self-contained piece of information
955
+ 2. Focus on user-specific info useful in future conversations
956
+ 3. Skip generic knowledge, greetings, small talk, ephemeral task coordination
957
+ 4. Score importance 1-10 (6+ = worth storing)
958
+ 5. Every memory MUST attribute a source (provenance critical)
959
+
960
+ Importance rubric (use FULL 1-10 range):
961
+ - 10: Critical, core identity, never-forget content
962
+ - 9: Affects many future decisions
963
+ - 8: High-value preference/decision/rule
964
+ - 7: Specific durable fact
965
+ - 6: Borderline
966
+ - 5 or below: NOT worth storing — drop
967
+
968
+ DO NOT cluster everything at 7-8-9.
969
+
970
+ ═══════════════════════════════════════════════════════════════
971
+ TYPE (6 values)
972
+ ═══════════════════════════════════════════════════════════════
973
+ - claim: factual assertion (absorbs fact/context/decision; decisions populate reasoning field)
974
+ - preference: likes/dislikes/tastes
975
+ - directive: imperative rule ("always X", "never Y")
976
+ - commitment: future intent ("will do X")
977
+ - episode: notable event
978
+ - summary: derived synthesis (source must be derived|assistant) — do NOT emit for turn-extraction
979
+
980
+ ═══════════════════════════════════════════════════════════════
981
+ SOURCE (provenance, CRITICAL)
982
+ ═══════════════════════════════════════════════════════════════
983
+ - user: user explicitly stated it (in [user]: turns)
984
+ - user-inferred: extractor inferred from user signals
985
+ - assistant: assistant authored content — DOWNGRADE unless user affirmed/quoted/used it
986
+ - external, derived: rare
987
+
988
+ IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant
989
+
990
+ ═══════════════════════════════════════════════════════════════
991
+ SCOPE (life domain)
992
+ ═══════════════════════════════════════════════════════════════
993
+ work | personal | health | family | creative | finance | misc | unspecified
994
+
995
+ ═══════════════════════════════════════════════════════════════
996
+ ENTITIES
997
+ ═══════════════════════════════════════════════════════════════
998
+ - type ∈ {person, project, tool, company, concept, place}
999
+ - prefer specific names ("PostgreSQL" not "database")
1000
+ - omit umbrella categories when specific name is present
1001
+
1002
+ ═══════════════════════════════════════════════════════════════
1003
+ REASONING (only for claims that are decisions)
1004
+ ═══════════════════════════════════════════════════════════════
1005
+ For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
1006
+
1007
+ ═══════════════════════════════════════════════════════════════
1008
+ OUTPUT FORMAT (no markdown, no code fences)
1009
+ ═══════════════════════════════════════════════════════════════
1010
+ {
1011
+ "topics": ["topic 1", "topic 2"],
1012
+ "facts": [
1013
+ {
1014
+ "text": "...",
1015
+ "type": "claim|preference|directive|commitment|episode",
1016
+ "source": "user|user-inferred|assistant",
1017
+ "scope": "work|personal|health|...",
1018
+ "importance": N,
1019
+ "confidence": 0.9,
1020
+ "action": "ADD",
1021
+ "reasoning": "...", // optional, only for claim+decision
1022
+ "entities": [{"name": "...", "type": "tool"}]
1023
+ }
1024
+ ]
1025
+ }
1026
+
1027
+ If nothing worth extracting: {"topics": [], "facts": []}`;
1028
+
1029
+ /**
1030
+ * @deprecated Use `EXTRACTION_SYSTEM_PROMPT` instead. Kept only as a
1031
+ * back-compat alias for callers that imported the v1 rollout name.
1032
+ */
1033
+ export const EXTRACTION_SYSTEM_PROMPT_V1_MERGED = EXTRACTION_SYSTEM_PROMPT;
1034
+
1035
+ /**
1036
+ * Parse a v1 merged-topic LLM response. Returns both the topic list and the
1037
+ * validated/filtered fact list. Illegal combinations (summary+user) are
1038
+ * dropped; importance < 6 with action != DELETE is dropped.
1039
+ *
1040
+ * Exported as both `parseFactsResponse` (canonical, returns facts array) and
1041
+ * `parseMergedResponseV1` (returns `{ topics, facts }`). Prefer the former
1042
+ * unless the topic list is needed.
1043
+ */
1044
+ export function parseMergedResponseV1(
1045
+ response: string,
1046
+ logger?: ExtractorLogger,
1047
+ ): { topics: string[]; facts: ExtractedFact[] } {
1048
+ const originalPreview = response.trim().slice(0, 200);
1049
+ let cleaned = response.trim();
1050
+ cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
1051
+ if (cleaned.startsWith('```')) {
1052
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
1053
+ }
1054
+
1055
+ const tryParse = (input: string): unknown => {
1056
+ try { return JSON.parse(input); } catch { return undefined; }
1057
+ };
1058
+
1059
+ let parsed = tryParse(cleaned);
1060
+ let recoveryUsed: 'none' | 'bracket-scan' = 'none';
1061
+ if (parsed === undefined) {
1062
+ // First try an outermost-array greedy match (legacy bare-array format).
1063
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
1064
+ if (arrMatch) {
1065
+ parsed = tryParse(arrMatch[0]);
1066
+ if (parsed !== undefined) recoveryUsed = 'bracket-scan';
1067
+ }
1068
+ if (parsed === undefined) {
1069
+ // Fall back to an outermost-object greedy match (merged-topic format).
1070
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
1071
+ if (objMatch) {
1072
+ parsed = tryParse(objMatch[0]);
1073
+ if (parsed !== undefined) recoveryUsed = 'bracket-scan';
1074
+ }
1075
+ }
1076
+ }
1077
+ if (recoveryUsed === 'bracket-scan') {
1078
+ logger?.info?.(
1079
+ `parseFactsResponse: recovered JSON via bracket-scan fallback`,
1080
+ );
1081
+ }
1082
+
1083
+ if (!parsed || typeof parsed !== 'object') {
1084
+ logger?.warn?.(
1085
+ `parseFactsResponse: could not parse LLM output as JSON. Preview: ${JSON.stringify(originalPreview)}`,
1086
+ );
1087
+ return { topics: [], facts: [] };
1088
+ }
1089
+
1090
+ // Dual-format acceptance: either the merged object `{ topics, facts }` or
1091
+ // a bare JSON array of fact objects (legacy / test fixture shape). The
1092
+ // bare array is wrapped as { topics: [], facts: [...] } so the downstream
1093
+ // logic stays uniform. A single fact object (no wrapper) is also wrapped.
1094
+ let obj: Record<string, unknown>;
1095
+ if (Array.isArray(parsed)) {
1096
+ obj = { topics: [], facts: parsed };
1097
+ } else if (
1098
+ typeof (parsed as Record<string, unknown>).facts === 'undefined' &&
1099
+ typeof (parsed as Record<string, unknown>).text === 'string'
1100
+ ) {
1101
+ // Single fact object, not a merged wrapper.
1102
+ obj = { topics: [], facts: [parsed] };
1103
+ } else {
1104
+ obj = parsed as Record<string, unknown>;
1105
+ }
1106
+
1107
+ const rawTopics = obj.topics;
1108
+ const topics = Array.isArray(rawTopics)
1109
+ ? (rawTopics as unknown[])
1110
+ .filter((t): t is string => typeof t === 'string' && t.length > 0)
1111
+ .slice(0, 3)
1112
+ : [];
1113
+
1114
+ const rawFacts = obj.facts;
1115
+ if (!Array.isArray(rawFacts)) return { topics, facts: [] };
1116
+
1117
+ const validActions: ExtractionAction[] = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
1118
+
1119
+ const facts = (rawFacts as unknown[])
1120
+ .filter(
1121
+ (f): f is Record<string, unknown> =>
1122
+ !!f &&
1123
+ typeof f === 'object' &&
1124
+ typeof (f as Record<string, unknown>).text === 'string' &&
1125
+ ((f as Record<string, unknown>).text as string).length >= 5,
1126
+ )
1127
+ .map((f) => {
1128
+ const rawType = String(f.type ?? 'claim').toLowerCase();
1129
+ // Accept both v1 tokens and legacy v0 tokens — coerce v0 via V0_TO_V1_TYPE.
1130
+ let type: MemoryType;
1131
+ if (isValidMemoryType(rawType)) {
1132
+ type = rawType;
1133
+ } else if ((LEGACY_V0_MEMORY_TYPES as readonly string[]).includes(rawType)) {
1134
+ type = V0_TO_V1_TYPE[rawType as MemoryTypeV0];
1135
+ } else {
1136
+ type = 'claim';
1137
+ }
1138
+
1139
+ const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
1140
+ const source: MemorySource =
1141
+ (VALID_MEMORY_SOURCES as readonly string[]).includes(rawSource)
1142
+ ? (rawSource as MemorySource)
1143
+ : 'user-inferred';
1144
+
1145
+ const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
1146
+ const scope: MemoryScope =
1147
+ (VALID_MEMORY_SCOPES as readonly string[]).includes(rawScope)
1148
+ ? (rawScope as MemoryScope)
1149
+ : 'unspecified';
1150
+
1151
+ const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
1152
+
1153
+ const action = validActions.includes(String(f.action) as ExtractionAction)
1154
+ ? (String(f.action) as ExtractionAction)
1155
+ : 'ADD';
1156
+
1157
+ let entities: ExtractedEntity[] | undefined;
1158
+ if (Array.isArray(f.entities)) {
1159
+ const valid = (f.entities as unknown[])
1160
+ .map(parseEntity)
1161
+ .filter((e): e is ExtractedEntity => e !== null);
1162
+ if (valid.length > 0) entities = valid;
1163
+ }
1164
+
1165
+ const fact: ExtractedFact = {
1166
+ text: String(f.text).slice(0, 512),
1167
+ type,
1168
+ source,
1169
+ scope,
1170
+ reasoning,
1171
+ importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
1172
+ confidence: normalizeConfidence(f.confidence),
1173
+ action,
1174
+ existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
1175
+ };
1176
+ if (entities) fact.entities = entities;
1177
+ return fact;
1178
+ })
1179
+ // Reject illegal type:summary + source:user
1180
+ .filter((f) => !(f.type === 'summary' && f.source === 'user'))
1181
+ // Importance threshold (preserves DELETE)
1182
+ .filter((f) => f.importance >= 6 || f.action === 'DELETE');
1183
+
1184
+ return { topics, facts };
1185
+ }
1186
+
1187
+ /**
1188
+ * Parse an LLM extraction response into structured v1 facts. Canonical
1189
+ * parser used by the default `extractFacts()` pipeline.
1190
+ *
1191
+ * This is a thin wrapper around `parseMergedResponseV1` that discards the
1192
+ * topic list so existing callers that expect a flat `ExtractedFact[]`
1193
+ * signature keep working.
1194
+ */
1195
+ export function parseFactsResponse(
1196
+ response: string,
1197
+ logger?: ExtractorLogger,
1198
+ ): ExtractedFact[] {
1199
+ return parseMergedResponseV1(response, logger).facts;
1200
+ }
1201
+
1202
+ /**
1203
+ * Tag-don't-drop provenance filter (pipeline G / F).
1204
+ *
1205
+ * For each fact:
1206
+ * - If source is already "assistant", cap importance at 7.
1207
+ * - Otherwise, keyword-match the fact against user turns. If <30% of
1208
+ * content words (length >= 4) appear in user turns AND source != "user",
1209
+ * tag source as "assistant" and cap importance at 7 (keep the fact).
1210
+ * - Drop facts below importance 5 (unless DELETE action).
1211
+ */
1212
+ export function applyProvenanceFilterLax(
1213
+ facts: ExtractedFact[],
1214
+ conversationText: string,
1215
+ ): ExtractedFact[] {
1216
+ const userTurnsLower = conversationText
1217
+ .split(/\n\n/)
1218
+ .filter((line) => line.startsWith('[user]:'))
1219
+ .join(' ')
1220
+ .toLowerCase();
1221
+
1222
+ return facts
1223
+ .map((f) => {
1224
+ if (f.source === 'assistant') {
1225
+ return { ...f, importance: Math.min(f.importance, 7) };
1226
+ }
1227
+
1228
+ const factWords = f.text
1229
+ .toLowerCase()
1230
+ .replace(/[^a-z0-9\s]/g, ' ')
1231
+ .split(/\s+/)
1232
+ .filter((w) => w.length >= 4);
1233
+
1234
+ const matchedWords = factWords.filter((w) => userTurnsLower.includes(w)).length;
1235
+ const matchRatio = factWords.length > 0 ? matchedWords / factWords.length : 0;
1236
+
1237
+ if (matchRatio < 0.3 && f.source !== 'user') {
1238
+ return {
1239
+ ...f,
1240
+ source: 'assistant' as MemorySource,
1241
+ importance: Math.min(f.importance, 7),
1242
+ };
1243
+ }
1244
+
1245
+ return f;
1246
+ })
1247
+ .filter((f) => f.importance >= 5 || f.action === 'DELETE');
1248
+ }
1249
+
1250
+ /**
1251
+ * Heuristic fallback volatility when the LLM doesn't assign one.
1252
+ */
1253
+ export function defaultVolatility(f: ExtractedFact): MemoryVolatility {
1254
+ if (f.type === 'commitment') return 'updatable';
1255
+ if (f.type === 'episode') return 'stable';
1256
+ if (f.type === 'directive') return 'stable';
1257
+ if (f.scope === 'health' || f.scope === 'family') return 'stable';
1258
+ return 'updatable';
1259
+ }
1260
+
1261
+ const COMPARATIVE_PROMPT_V1 = `You are a memory re-ranker for the v1 taxonomy. You receive facts already extracted from one conversation, each with initial importance. Your job is twofold:
1262
+
1263
+ 1. RE-RANK importance to spread across the 1-10 range (avoid clustering at 7-8-9)
1264
+ 2. ASSIGN volatility to each fact
1265
+
1266
+ Re-ranking rules:
1267
+ - Top 1/3 of facts (most significant for this user): importance 9-10
1268
+ - Middle 1/3: importance 7-8
1269
+ - Bottom 1/3: importance 5-6 (borderline, may be dropped)
1270
+ - A fact may stay at 10 if it's clearly identity-defining (name, birthday) or marked as "never forget"
1271
+ - Never raise without justification; never lower below 5 unless clearly noise
1272
+ - You MUST produce a spread
1273
+
1274
+ Volatility rules:
1275
+ - stable: unlikely to change for years (name, allergies, birthplace, fundamental traits)
1276
+ - updatable: changes occasionally (current job, active project, partner's name, address)
1277
+ - ephemeral: short-lived state (today's task, this week's plan, current trip itinerary)
1278
+
1279
+ Use the FULL conversation context to judge volatility — a single claim may be ambiguous, but in context you can usually tell.
1280
+
1281
+ Return JSON array, same order as input, ONLY with importance + volatility fields:
1282
+ [{"importance": N, "volatility": "stable|updatable|ephemeral"}, ...]
1283
+ No markdown.`;
1284
+
1285
+ /**
1286
+ * Comparative re-scoring pass (v1). Forces re-scoring when facts.length >= 5
1287
+ * so the importance distribution spreads across the 1-10 range. When
1288
+ * facts.length < 5, assigns defaultVolatility and returns.
1289
+ */
1290
+ export async function comparativeRescoreV1(
1291
+ facts: ExtractedFact[],
1292
+ conversationText: string,
1293
+ logger?: ExtractorLogger,
1294
+ ): Promise<ExtractedFact[]> {
1295
+ // G-tuned behavior: force rescore when >= 5 facts
1296
+ if (facts.length < 2 || facts.length < 5) {
1297
+ return facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
1298
+ }
1299
+
1300
+ const config = resolveLLMConfig();
1301
+ if (!config) {
1302
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1303
+ }
1304
+
1305
+ const factsForPrompt = facts
1306
+ .map((f, i) => `${i + 1}. [imp: ${f.importance}] [type: ${f.type}] [scope: ${f.scope ?? 'unspecified'}] ${f.text}`)
1307
+ .join('\n');
1308
+
1309
+ const userPrompt = `Conversation context:\n${conversationText}\n\nExtracted facts:\n${factsForPrompt}\n\nReturn ${facts.length} JSON objects, each with "importance" + "volatility". Match input order.`;
1310
+
1311
+ let response: string | null | undefined;
1312
+ try {
1313
+ response = await chatCompletion(config, [
1314
+ { role: 'system', content: COMPARATIVE_PROMPT_V1 },
1315
+ { role: 'user', content: userPrompt },
1316
+ ]);
1317
+ } catch (err) {
1318
+ const msg = err instanceof Error ? err.message : String(err);
1319
+ logger?.warn?.(`comparativeRescoreV1: chatCompletion threw: ${msg}`);
1320
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1321
+ }
1322
+
1323
+ if (!response) {
1324
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1325
+ }
1326
+
1327
+ let cleaned = response.trim();
1328
+ cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
1329
+ if (cleaned.startsWith('```')) {
1330
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
1331
+ }
1332
+ const match = cleaned.match(/\[[\s\S]*\]/);
1333
+ if (match) cleaned = match[0];
1334
+
1335
+ let parsed: unknown;
1336
+ try {
1337
+ parsed = JSON.parse(cleaned);
1338
+ } catch {
1339
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1340
+ }
1341
+ if (!Array.isArray(parsed)) {
1342
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1343
+ }
1344
+
1345
+ return facts.map((f, i) => {
1346
+ const entry = parsed[i] as Record<string, unknown> | undefined;
1347
+ const rawImp = entry && typeof entry === 'object' ? Number(entry.importance) : NaN;
1348
+ const rawVol = entry && typeof entry === 'object' ? String(entry.volatility ?? '').toLowerCase() : '';
1349
+
1350
+ const newImp = Number.isFinite(rawImp)
1351
+ ? Math.max(5, Math.min(10, Math.round(rawImp)))
1352
+ : f.importance;
1353
+ const newVol: MemoryVolatility =
1354
+ (VALID_MEMORY_VOLATILITIES as readonly string[]).includes(rawVol)
1355
+ ? (rawVol as MemoryVolatility)
1356
+ : defaultVolatility(f);
1357
+
1358
+ return { ...f, importance: newImp, volatility: newVol };
1359
+ });
1360
+ }
1361
+
1362
+ /**
1363
+ * Main extraction entry point (default pipeline as of plugin v3.0.0).
1364
+ *
1365
+ * Pipeline: single merged-topic LLM call → `applyProvenanceFilterLax`
1366
+ * (tag-don't-drop) → `comparativeRescoreV1` (forces re-rank when >= 5 facts)
1367
+ * → `defaultVolatility` fallback → lexical importance bumps.
1368
+ *
1369
+ * Produces v1-shaped facts with `type`, `source`, `scope`, `volatility`,
1370
+ * and optional `reasoning` fields populated. The caller should hand the
1371
+ * result to `storeExtractedFacts` which emits a v1 canonical claim blob.
194
1372
  */
195
1373
  export async function extractFacts(
196
1374
  rawMessages: unknown[],
197
1375
  mode: 'turn' | 'full',
198
1376
  existingMemories?: Array<{ id: string; text: string }>,
1377
+ profileContext?: string,
1378
+ logger?: ExtractorLogger,
199
1379
  ): Promise<ExtractedFact[]> {
200
1380
  const config = resolveLLMConfig();
201
- if (!config) return []; // No LLM available
1381
+ if (!config) {
1382
+ logger?.info?.('extractFacts: no LLM config resolved (skipping extraction)');
1383
+ return [];
1384
+ }
202
1385
 
203
- // Parse messages
204
1386
  const parsed = rawMessages
205
1387
  .map(messageToText)
206
1388
  .filter((m): m is { role: string; content: string } => m !== null);
207
1389
 
208
- if (parsed.length === 0) return [];
1390
+ if (parsed.length === 0) {
1391
+ logger?.info?.(`extractFacts: no parseable messages (raw count=${rawMessages.length})`);
1392
+ return [];
1393
+ }
209
1394
 
210
- // For 'turn' mode, only look at last 6 messages (3 turns)
211
- // For 'full' mode, use all messages but truncate to fit token budget
212
1395
  const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
213
-
214
- // Truncate to ~3000 tokens worth of text
215
1396
  const conversationText = truncateMessages(relevantMessages, 12_000);
216
1397
 
217
- if (conversationText.length < 20) return [];
1398
+ if (conversationText.length < 20) {
1399
+ logger?.info?.(
1400
+ `extractFacts: conversation too short (${conversationText.length} chars < 20, parsed=${parsed.length}, mode=${mode})`,
1401
+ );
1402
+ return [];
1403
+ }
218
1404
 
219
- // Build existing memories context if available
220
1405
  let memoriesContext = '';
221
1406
  if (existingMemories && existingMemories.length > 0) {
222
1407
  const memoriesStr = existingMemories
@@ -230,16 +1415,54 @@ export async function extractFacts(
230
1415
  ? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
231
1416
  : `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
232
1417
 
1418
+ const systemPrompt = profileContext || EXTRACTION_SYSTEM_PROMPT;
1419
+
1420
+ let response: string | null | undefined;
233
1421
  try {
234
- const response = await chatCompletion(config, [
235
- { role: 'system', content: EXTRACTION_SYSTEM_PROMPT },
1422
+ response = await chatCompletion(config, [
1423
+ { role: 'system', content: systemPrompt },
236
1424
  { role: 'user', content: userPrompt },
237
1425
  ]);
1426
+ } catch (err) {
1427
+ const msg = err instanceof Error ? err.message : String(err);
1428
+ logger?.warn?.(`extractFacts: chatCompletion threw: ${msg}`);
1429
+ return [];
1430
+ }
238
1431
 
239
- if (!response) return [];
1432
+ if (!response) {
1433
+ logger?.info?.('extractFacts: chatCompletion returned null/empty response');
1434
+ return [];
1435
+ }
240
1436
 
241
- return parseFactsResponse(response);
242
- } catch {
243
- return []; // Fail silently -- hooks must never break the agent
1437
+ logger?.info?.(
1438
+ `extractFacts: LLM returned ${response.length} chars; parsing merged response`,
1439
+ );
1440
+ const { topics, facts: rawFacts } = parseMergedResponseV1(response, logger);
1441
+ if (topics.length > 0) {
1442
+ logger?.info?.(`extractFacts: topics = ${JSON.stringify(topics)}`);
244
1443
  }
1444
+
1445
+ // Provenance filter (tag-don't-drop)
1446
+ let facts = applyProvenanceFilterLax(rawFacts, conversationText);
1447
+
1448
+ // Comparative rescore (forces re-rank when >= 5 facts)
1449
+ facts = await comparativeRescoreV1(facts, conversationText, logger);
1450
+
1451
+ // Ensure every fact has a volatility (defensive: rescore may have skipped)
1452
+ facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
1453
+
1454
+ // Lexical importance bumps (same as v0 pipeline)
1455
+ for (const f of facts) {
1456
+ const bump = computeLexicalImportanceBump(f.text, conversationText);
1457
+ if (bump > 0) {
1458
+ const oldImportance = f.importance;
1459
+ const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
1460
+ f.importance = Math.min(10, f.importance + effectiveBump);
1461
+ logger?.info?.(
1462
+ `extractFacts: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`,
1463
+ );
1464
+ }
1465
+ }
1466
+
1467
+ return facts;
245
1468
  }