@totalreclaw/totalreclaw 3.3.1-rc.2 → 3.3.1-rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +330 -0
  2. package/SKILL.md +50 -83
  3. package/api-client.ts +18 -11
  4. package/config.ts +117 -3
  5. package/crypto.ts +10 -2
  6. package/dist/api-client.js +226 -0
  7. package/dist/billing-cache.js +100 -0
  8. package/dist/claims-helper.js +606 -0
  9. package/dist/config.js +280 -0
  10. package/dist/consolidation.js +258 -0
  11. package/dist/contradiction-sync.js +1034 -0
  12. package/dist/crypto.js +138 -0
  13. package/dist/digest-sync.js +361 -0
  14. package/dist/download-ux.js +63 -0
  15. package/dist/embedding.js +86 -0
  16. package/dist/extractor.js +1225 -0
  17. package/dist/first-run.js +103 -0
  18. package/dist/fs-helpers.js +563 -0
  19. package/dist/gateway-url.js +197 -0
  20. package/dist/generate-mnemonic.js +13 -0
  21. package/dist/hot-cache-wrapper.js +101 -0
  22. package/dist/import-adapters/base-adapter.js +64 -0
  23. package/dist/import-adapters/chatgpt-adapter.js +238 -0
  24. package/dist/import-adapters/claude-adapter.js +114 -0
  25. package/dist/import-adapters/gemini-adapter.js +201 -0
  26. package/dist/import-adapters/index.js +26 -0
  27. package/dist/import-adapters/mcp-memory-adapter.js +219 -0
  28. package/dist/import-adapters/mem0-adapter.js +158 -0
  29. package/dist/import-adapters/types.js +1 -0
  30. package/dist/index.js +5348 -0
  31. package/dist/llm-client.js +686 -0
  32. package/dist/llm-profile-reader.js +346 -0
  33. package/dist/lsh.js +62 -0
  34. package/dist/onboarding-cli.js +750 -0
  35. package/dist/pair-cli.js +344 -0
  36. package/dist/pair-crypto.js +359 -0
  37. package/dist/pair-http.js +404 -0
  38. package/dist/pair-page.js +826 -0
  39. package/dist/pair-qr.js +107 -0
  40. package/dist/pair-remote-client.js +410 -0
  41. package/dist/pair-session-store.js +566 -0
  42. package/dist/pin.js +542 -0
  43. package/dist/qa-bug-report.js +301 -0
  44. package/dist/relay-headers.js +44 -0
  45. package/dist/reranker.js +442 -0
  46. package/dist/retype-setscope.js +348 -0
  47. package/dist/semantic-dedup.js +75 -0
  48. package/dist/subgraph-search.js +289 -0
  49. package/dist/subgraph-store.js +694 -0
  50. package/dist/tool-gating.js +58 -0
  51. package/download-ux.ts +91 -0
  52. package/embedding.ts +32 -9
  53. package/fs-helpers.ts +124 -0
  54. package/gateway-url.ts +57 -9
  55. package/index.ts +586 -357
  56. package/llm-client.ts +211 -23
  57. package/lsh.ts +7 -2
  58. package/onboarding-cli.ts +114 -1
  59. package/package.json +19 -5
  60. package/pair-cli.ts +76 -8
  61. package/pair-crypto.ts +34 -24
  62. package/pair-page.ts +28 -17
  63. package/pair-qr.ts +152 -0
  64. package/pair-remote-client.ts +540 -0
  65. package/qa-bug-report.ts +381 -0
  66. package/relay-headers.ts +50 -0
  67. package/reranker.ts +73 -0
  68. package/retype-setscope.ts +12 -0
  69. package/subgraph-search.ts +4 -3
  70. package/subgraph-store.ts +109 -16
@@ -0,0 +1,1225 @@
1
+ /**
2
+ * TotalReclaw Plugin - Fact Extractor
3
+ *
4
+ * Uses LLM calls to extract atomic facts from conversation messages.
5
+ * Matches the extraction prompts described in SKILL.md.
6
+ */
7
+ import { chatCompletion, resolveLLMConfig } from './llm-client.js';
8
+ // ---------------------------------------------------------------------------
9
+ // Memory Taxonomy v1 — the 6 canonical memory types. Single source of truth.
10
+ //
11
+ // Plugin v3.0.0 adopts v1 as the ONLY taxonomy. Legacy v0 tokens
12
+ // (fact, decision, episodic, goal, context, rule) are accepted only on the
13
+ // read-side via `LEGACY_V0_MEMORY_TYPES` / `V0_TO_V1_TYPE` and
14
+ // `normalizeToV1Type` in `claims-helper.ts`, so pre-v3 vault entries can
15
+ // still be decoded. Extraction and write paths emit v1 exclusively.
16
+ //
17
+ // When adding a new type, update ALL of:
18
+ // - This constant
19
+ // - `mcp/src/v1-types.ts`
20
+ // - `python/src/totalreclaw/agent/extraction.py`
21
+ // - `rust/totalreclaw-core/src/claims.rs`
22
+ // - `skill/plugin/claims-helper.ts`
23
+ // - The `EXTRACTION_SYSTEM_PROMPT` Types: list
24
+ // ---------------------------------------------------------------------------
25
+ export const VALID_MEMORY_TYPES = [
26
+ 'claim',
27
+ 'preference',
28
+ 'directive',
29
+ 'commitment',
30
+ 'episode',
31
+ 'summary',
32
+ ];
33
+ /**
34
+ * Runtime type guard — returns whether an unknown value is a valid v1
35
+ * `MemoryType`. Legacy v0 tokens return `false`; use `normalizeToV1Type()`
36
+ * in `claims-helper.ts` to coerce them on the read path.
37
+ */
38
+ export function isValidMemoryType(value) {
39
+ return typeof value === 'string' && VALID_MEMORY_TYPES.includes(value);
40
+ }
41
+ /**
42
+ * Backward-compat alias. Same list as `VALID_MEMORY_TYPES`.
43
+ * @deprecated Use `VALID_MEMORY_TYPES` instead.
44
+ */
45
+ export const VALID_MEMORY_TYPES_V1 = VALID_MEMORY_TYPES;
46
+ /**
47
+ * Backward-compat alias. Same guard as `isValidMemoryType`.
48
+ * @deprecated Use `isValidMemoryType` instead.
49
+ */
50
+ export function isValidMemoryTypeV1(value) {
51
+ return isValidMemoryType(value);
52
+ }
53
+ /**
54
+ * Legacy v0 memory types — retained as a typed constant so the read-side
55
+ * `V0_TO_V1_TYPE` mapping can reference them without redeclaration.
56
+ *
57
+ * Do NOT emit these on the write/extraction path. They exist solely so
58
+ * `claims-helper.ts::readClaimFromBlob` can decode pre-v1 vault entries
59
+ * whose encrypted blobs still carry v0 token strings.
60
+ */
61
+ export const LEGACY_V0_MEMORY_TYPES = [
62
+ 'fact',
63
+ 'preference',
64
+ 'decision',
65
+ 'episodic',
66
+ 'goal',
67
+ 'context',
68
+ 'summary',
69
+ 'rule',
70
+ ];
71
+ export const VALID_MEMORY_SOURCES = [
72
+ 'user',
73
+ 'user-inferred',
74
+ 'assistant',
75
+ 'external',
76
+ 'derived',
77
+ ];
78
+ export const VALID_MEMORY_SCOPES = [
79
+ 'work',
80
+ 'personal',
81
+ 'health',
82
+ 'family',
83
+ 'creative',
84
+ 'finance',
85
+ 'misc',
86
+ 'unspecified',
87
+ ];
88
+ export const VALID_MEMORY_VOLATILITIES = [
89
+ 'stable',
90
+ 'updatable',
91
+ 'ephemeral',
92
+ ];
93
+ /**
94
+ * Legacy v0 → v1 type mapping used by the read-side adapter when decoding
95
+ * a pre-v1 vault entry that still carries a v0 token string.
96
+ *
97
+ * Decisions (v0) map to v1 `claim` — the reasoning lives in the separate
98
+ * `reasoning` field rather than being encoded in the type.
99
+ */
100
+ export const V0_TO_V1_TYPE = {
101
+ fact: 'claim',
102
+ preference: 'preference',
103
+ decision: 'claim',
104
+ episodic: 'episode',
105
+ goal: 'commitment',
106
+ context: 'claim',
107
+ summary: 'summary',
108
+ rule: 'directive',
109
+ };
110
+ const ALLOWED_ENTITY_TYPES = new Set([
111
+ 'person',
112
+ 'project',
113
+ 'tool',
114
+ 'company',
115
+ 'concept',
116
+ 'place',
117
+ ]);
118
+ /**
119
+ * Default confidence when the LLM does not provide one.
120
+ * Mirrors the fallback used by other extraction clients.
121
+ */
122
+ export const DEFAULT_EXTRACTION_CONFIDENCE = 0.85;
123
+ // ---------------------------------------------------------------------------
124
+ // Helpers
125
+ // ---------------------------------------------------------------------------
126
+ /**
127
+ * Extract text content from a conversation message (handles various formats).
128
+ *
129
+ * OpenClaw AgentMessage objects use content arrays:
130
+ * { role: "user", content: [{ type: "text", text: "..." }] }
131
+ * { role: "assistant", content: [{ type: "text", text: "..." }, { type: "toolCall", ... }] }
132
+ *
133
+ * We also handle the simpler { role, content: "string" } format.
134
+ */
135
+ function messageToText(msg) {
136
+ if (!msg || typeof msg !== 'object')
137
+ return null;
138
+ const m = msg;
139
+ const role = m.role ?? 'unknown';
140
+ // Only keep user and assistant messages
141
+ if (role !== 'user' && role !== 'assistant')
142
+ return null;
143
+ let textContent;
144
+ if (typeof m.content === 'string') {
145
+ // Simple string content
146
+ textContent = m.content;
147
+ }
148
+ else if (Array.isArray(m.content)) {
149
+ // OpenClaw AgentMessage format: array of content blocks
150
+ // Extract text from { type: "text", text: "..." } blocks
151
+ const textParts = m.content
152
+ .filter((block) => block.type === 'text' && typeof block.text === 'string')
153
+ .map((block) => block.text);
154
+ textContent = textParts.join('\n');
155
+ }
156
+ else if (typeof m.text === 'string') {
157
+ // Fallback: { text: "..." } field
158
+ textContent = m.text;
159
+ }
160
+ else {
161
+ return null;
162
+ }
163
+ if (textContent.length < 3)
164
+ return null;
165
+ return { role, content: textContent };
166
+ }
167
+ /**
168
+ * Truncate messages to fit within a token budget (rough estimate: 4 chars per token).
169
+ */
170
+ function truncateMessages(messages, maxChars) {
171
+ const lines = [];
172
+ let totalChars = 0;
173
+ for (const msg of messages) {
174
+ const line = `[${msg.role}]: ${msg.content}`;
175
+ if (totalChars + line.length > maxChars)
176
+ break;
177
+ lines.push(line);
178
+ totalChars += line.length;
179
+ }
180
+ return lines.join('\n\n');
181
+ }
182
+ /**
183
+ * Parse a single entity object from LLM output. Returns null if invalid.
184
+ * Invalid entities are silently dropped so a bad entity never fails the whole fact.
185
+ */
186
+ export function parseEntity(raw) {
187
+ if (!raw || typeof raw !== 'object')
188
+ return null;
189
+ const e = raw;
190
+ const name = typeof e.name === 'string' ? e.name.trim() : '';
191
+ if (name.length === 0)
192
+ return null;
193
+ const type = String(e.type ?? '').toLowerCase();
194
+ if (!ALLOWED_ENTITY_TYPES.has(type))
195
+ return null;
196
+ const entity = { name: name.slice(0, 128), type };
197
+ if (typeof e.role === 'string' && e.role.trim().length > 0) {
198
+ entity.role = e.role.trim().slice(0, 128);
199
+ }
200
+ return entity;
201
+ }
202
+ /**
203
+ * Clamp a raw confidence value to [0, 1]. Returns the default when missing or NaN.
204
+ */
205
+ export function normalizeConfidence(raw) {
206
+ if (typeof raw !== 'number' || !Number.isFinite(raw))
207
+ return DEFAULT_EXTRACTION_CONFIDENCE;
208
+ if (raw < 0)
209
+ return 0;
210
+ if (raw > 1)
211
+ return 1;
212
+ return raw;
213
+ }
214
+ // ---------------------------------------------------------------------------
215
+ // Phase 2.2.6: lexical importance bumps
216
+ // ---------------------------------------------------------------------------
217
+ /**
218
+ * Escape regex metacharacters so a string can be used as a literal pattern.
219
+ */
220
+ function escapeRegExp(s) {
221
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
222
+ }
223
+ /**
224
+ * Compute a lexical importance bump (0-2) for a single fact based on signals
225
+ * in the surrounding conversation text.
226
+ *
227
+ * This is a Phase 2.2.6 quality fix complementing the prompt rubric tightening
228
+ * (item A). Where the rubric tells the LLM to use the full 1-10 range, the
229
+ * bump tells us *as a post-process*: when the user's actual phrasing carries
230
+ * strong "remember this" signals that the LLM may have under-weighted, push
231
+ * the score up.
232
+ *
233
+ * Signals detected (each adds +1, capped at +2 total):
234
+ *
235
+ * 1. **Strong intent phrases** anywhere in the conversation:
236
+ * "remember this", "never forget", "rule of thumb", "critical",
237
+ * "don't ever forget", explicit "always X" / "never Y" patterns.
238
+ * 2. **Emphasis markers**: `!!` (double exclamation), or 3+ all-caps words
239
+ * in a row (e.g. "DO NOT FORGET", "VERY IMPORTANT").
240
+ * 3. **Repetition**: the fact's first ~20 chars appear at least twice in
241
+ * the conversation text (paraphrased restating).
242
+ *
243
+ * The bump is additive on top of whatever the LLM scored; final importance
244
+ * is capped at 10.
245
+ *
246
+ * Final-importance ceiling: this never makes a fact pass the importance >= 6
247
+ * filter on its own — a fact still needs to have an LLM score >= 5 (because
248
+ * +2 from 5 = 7, above floor; +1 from 5 = 6, above floor). This is intentional:
249
+ * the bump is for "the LLM correctly identified this as worth storing but
250
+ * under-weighted it", not "the LLM said skip but we're overriding."
251
+ */
252
+ export function computeLexicalImportanceBump(factText, conversationText) {
253
+ let bump = 0;
254
+ const lowerConv = conversationText.toLowerCase();
255
+ // Signal 1: strong intent phrases anywhere in the conversation
256
+ const strongIntent = /\b(remember this|never forget|rule of thumb|don't (?:ever )?forget|critical|important|gotcha|note to self)\b/i;
257
+ if (strongIntent.test(lowerConv))
258
+ bump += 1;
259
+ // Signal 2: emphasis markers — double exclamation OR 3+ consecutive all-caps words
260
+ // (3+ chars each, to avoid false positives on acronyms like "AWS S3 IAM")
261
+ const doubleExclamation = /!!/;
262
+ const allCapsPhrase = /\b[A-Z]{3,}(?:\s+[A-Z]{3,}){2,}\b/;
263
+ if (doubleExclamation.test(conversationText) || allCapsPhrase.test(conversationText)) {
264
+ bump += 1;
265
+ }
266
+ // Signal 3: repetition — extract content words (length >= 5, not common stop
267
+ // words) from the fact, and check if any single one appears 2+ times in the
268
+ // conversation. This is more robust to LLM paraphrasing than a fingerprint
269
+ // match: "User prefers PostgreSQL" extracted from "I prefer PostgreSQL ...
270
+ // yeah PostgreSQL is right for OLTP" still triggers because "postgresql"
271
+ // appears multiple times even though the leading chars differ.
272
+ const lowerFact = factText.toLowerCase();
273
+ const stopWords = new Set([
274
+ 'about', 'after', 'again', 'against', 'because', 'before', 'being',
275
+ 'between', 'could', 'doing', 'during', 'every', 'further', 'having',
276
+ 'their', 'these', 'those', 'through', 'under', 'until', 'where', 'which',
277
+ 'while', 'would', 'should', 'about', 'thing', 'things', 'something',
278
+ 'someone', 'always', 'never', 'often', 'still', 'really', 'maybe',
279
+ 'using', 'works', 'work', 'user', 'users', 'with', 'from', 'into',
280
+ 'like', 'just', 'than', 'them', 'they', 'will', 'when', 'what', 'were',
281
+ 'this', 'that', 'have', 'this',
282
+ ]);
283
+ const factWords = lowerFact.split(/[^a-z0-9_]+/).filter((w) => w.length >= 5 && !stopWords.has(w));
284
+ let triggered = false;
285
+ for (const word of factWords) {
286
+ const occurrences = (lowerConv.match(new RegExp(`\\b${escapeRegExp(word)}\\b`, 'g')) || [])
287
+ .length;
288
+ if (occurrences >= 2) {
289
+ triggered = true;
290
+ break;
291
+ }
292
+ }
293
+ if (triggered)
294
+ bump += 1;
295
+ return Math.min(bump, 2);
296
+ }
297
+ // ---------------------------------------------------------------------------
298
+ // Compaction-Aware Extraction (Phase 2.3)
299
+ // ---------------------------------------------------------------------------
300
+ /**
301
+ * Compaction-specific system prompt (v1 taxonomy). Fires when the conversation
302
+ * context is about to be compacted. LAST CHANCE to capture knowledge before
303
+ * it is lost, so the importance floor is 5 instead of 6 and the prompt is
304
+ * more aggressive about extracting active-project context, claims, and
305
+ * episodes.
306
+ *
307
+ * Differences from `EXTRACTION_SYSTEM_PROMPT`:
308
+ * - Opening framing emphasizes urgency ("last chance")
309
+ * - Format-agnostic: handles bullet lists, prose, mixed formats
310
+ * - Importance threshold lowered to 5
311
+ * - More aggressive on claim / episode / directive types
312
+ * - Anti-pattern: don't skip content just because it's in a summary
313
+ *
314
+ * Output format matches `EXTRACTION_SYSTEM_PROMPT` exactly (same merged
315
+ * topics+facts JSON shape with v1 type / source / scope fields), so the
316
+ * same `parseMergedResponseV1` parser can validate it.
317
+ */
318
+ export const COMPACTION_SYSTEM_PROMPT = `You are extracting memories from a conversation that is about to be compacted. The context will be LOST after this point — this is your LAST CHANCE to capture everything worth remembering. Be more aggressive than usual: err on the side of storing.
319
+
320
+ Work in TWO explicit phases within one response:
321
+
322
+ PHASE 1 — Topic identification.
323
+ Identify the 2-3 main topics the user was engaging with before extracting any fact. Topics should be short phrases (2-5 words each). If there's no clear user-focused topic, use an empty topics array.
324
+
325
+ PHASE 2 — Fact extraction anchored to those topics (plus preserve active context).
326
+ Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Active project context, decisions in progress, and current working state score 6-8 during compaction — capture them even when they'd normally be marginal.
327
+
328
+ Rules:
329
+ 1. Each memory = single self-contained piece of information
330
+ 2. Focus on user-specific info useful in future conversations
331
+ 3. Skip generic knowledge, greetings, small talk
332
+ 4. Score importance 1-10 (5+ = worth storing during compaction)
333
+ 5. Every memory MUST attribute a source (provenance critical)
334
+
335
+ Importance rubric (full 1-10 range, NOT just 7-8):
336
+ - 10: Core identity, never-forget ("remember this forever", name/birthday)
337
+ - 9: Affects many future decisions / high-impact rules
338
+ - 8: Preference / decision-with-reasoning / operational rule
339
+ - 7: Specific durable fact
340
+ - 6: Borderline — during compaction, capture anyway
341
+ - 5: Would normally drop; keep as compaction safety net
342
+ - 4 or below: DROP (greetings, filler)
343
+
344
+ ═══════════════════════════════════════════════════════════════
345
+ TYPE (6 values)
346
+ ═══════════════════════════════════════════════════════════════
347
+ - claim: factual assertion (absorbs v0 fact/context/decision; decisions populate reasoning)
348
+ - preference: likes/dislikes/tastes
349
+ - directive: imperative rule ("always X", "never Y")
350
+ - commitment: future intent ("will do X")
351
+ - episode: notable event
352
+ - summary: derived synthesis (source must be derived|assistant)
353
+
354
+ ═══════════════════════════════════════════════════════════════
355
+ SOURCE (provenance, CRITICAL)
356
+ ═══════════════════════════════════════════════════════════════
357
+ - user: user explicitly stated it (in [user]: turns)
358
+ - user-inferred: extractor inferred from user signals
359
+ - assistant: assistant authored — DOWNGRADE unless user affirmed/quoted
360
+ - external, derived: rare
361
+
362
+ IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant.
363
+
364
+ ═══════════════════════════════════════════════════════════════
365
+ SCOPE
366
+ ═══════════════════════════════════════════════════════════════
367
+ work | personal | health | family | creative | finance | misc | unspecified
368
+
369
+ ═══════════════════════════════════════════════════════════════
370
+ ENTITIES
371
+ ═══════════════════════════════════════════════════════════════
372
+ - type ∈ {person, project, tool, company, concept, place}
373
+ - prefer specific names ("PostgreSQL" not "database")
374
+ - omit umbrella categories when specific name is present
375
+
376
+ ═══════════════════════════════════════════════════════════════
377
+ REASONING (only for claims that are decisions)
378
+ ═══════════════════════════════════════════════════════════════
379
+ For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
380
+
381
+ ═══════════════════════════════════════════════════════════════
382
+ FORMAT-AGNOSTIC PARSING (IMPORTANT)
383
+ ═══════════════════════════════════════════════════════════════
384
+ The conversation may contain bullet lists, numbered lists, section headers, code snippets, or plain prose. Treat ALL formats as potential sources of extractable memory:
385
+ - Bullets/list items: each item is a candidate.
386
+ - Section headers (Context, Decisions, Key Learnings, Open Questions): use the header as a TYPE HINT (Context → claim, Decisions → claim+reasoning, Learnings → directive, Open Questions → commitment).
387
+ - Plain prose: parse each distinct assertion as a candidate.
388
+ - Code snippets: extract config choices, tool versions, architectural decisions embedded in comments or structure.
389
+ - Mixed format: apply all of the above.
390
+
391
+ Do NOT skip content just because it's in a summary. The agent has already filtered — your job is to convert into structured memories, not to re-evaluate worth.
392
+
393
+ ═══════════════════════════════════════════════════════════════
394
+ OUTPUT FORMAT (no markdown, no code fences)
395
+ ═══════════════════════════════════════════════════════════════
396
+ {
397
+ "topics": ["topic 1", "topic 2"],
398
+ "facts": [
399
+ {
400
+ "text": "...",
401
+ "type": "claim|preference|directive|commitment|episode",
402
+ "source": "user|user-inferred|assistant",
403
+ "scope": "work|personal|health|...",
404
+ "importance": N,
405
+ "confidence": 0.9,
406
+ "action": "ADD",
407
+ "reasoning": "...", // optional, only for claim+decision
408
+ "entities": [{"name": "...", "type": "tool"}]
409
+ }
410
+ ]
411
+ }
412
+
413
+ If nothing worth extracting: {"topics": [], "facts": []}`;
414
+ /**
415
+ * Parse facts for compaction context (v1 taxonomy; importance floor 5).
416
+ *
417
+ * Identical to `parseFactsResponse` except the importance floor is 5 instead
418
+ * of 6 — compaction is the last chance to capture context, so we accept
419
+ * borderline facts that would normally be dropped.
420
+ *
421
+ * Accepts the same merged-topic v1 JSON shape as the main prompt. The
422
+ * inner `parseMergedResponseV1` enforces the >=6 floor, so we re-run a
423
+ * lenient >=5 pass on the raw parsed payload to admit the borderline items.
424
+ */
425
+ export function parseFactsResponseForCompaction(response, logger) {
426
+ const originalPreview = response.trim().slice(0, 200);
427
+ let cleaned = response.trim();
428
+ // Strip <think>...</think> and <thinking>...</thinking> tags
429
+ cleaned = cleaned
430
+ .replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '')
431
+ .trim();
432
+ // Strip markdown code fences if present
433
+ if (cleaned.startsWith('```')) {
434
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
435
+ }
436
+ const tryParse = (input) => {
437
+ try {
438
+ return JSON.parse(input);
439
+ }
440
+ catch {
441
+ return undefined;
442
+ }
443
+ };
444
+ let parsed = tryParse(cleaned);
445
+ let recoveryUsed = 'none';
446
+ if (parsed === undefined) {
447
+ // Try bare-array first (legacy compaction output), then object (v1 merged).
448
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
449
+ if (arrMatch) {
450
+ parsed = tryParse(arrMatch[0]);
451
+ if (parsed !== undefined)
452
+ recoveryUsed = 'bracket-scan';
453
+ }
454
+ if (parsed === undefined) {
455
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
456
+ if (objMatch) {
457
+ parsed = tryParse(objMatch[0]);
458
+ if (parsed !== undefined)
459
+ recoveryUsed = 'bracket-scan';
460
+ }
461
+ }
462
+ }
463
+ if (recoveryUsed === 'bracket-scan') {
464
+ logger?.info?.(`parseFactsResponseForCompaction: recovered JSON via bracket-scan fallback`);
465
+ }
466
+ if (!parsed || typeof parsed !== 'object') {
467
+ logger?.warn?.(`parseFactsResponseForCompaction: could not parse LLM output as JSON object. Preview: ${JSON.stringify(originalPreview)}`);
468
+ return [];
469
+ }
470
+ const obj = parsed;
471
+ const rawFacts = Array.isArray(obj.facts) ? obj.facts : null;
472
+ // Legacy v0 compaction output (bare JSON array) — best-effort parse.
473
+ const rawArray = rawFacts ?? (Array.isArray(parsed) ? parsed : null);
474
+ if (!rawArray) {
475
+ logger?.warn?.(`parseFactsResponseForCompaction: expected { facts: [...] } object, got ${typeof parsed}`);
476
+ return [];
477
+ }
478
+ const validActions = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
479
+ const facts = rawArray
480
+ .filter((f) => !!f &&
481
+ typeof f === 'object' &&
482
+ typeof f.text === 'string' &&
483
+ f.text.length >= 5)
484
+ .map((f) => {
485
+ const rawType = String(f.type ?? 'claim').toLowerCase();
486
+ // Accept v1 tokens directly; coerce legacy v0 tokens via V0_TO_V1_TYPE.
487
+ let type;
488
+ if (isValidMemoryType(rawType)) {
489
+ type = rawType;
490
+ }
491
+ else if (LEGACY_V0_MEMORY_TYPES.includes(rawType)) {
492
+ type = V0_TO_V1_TYPE[rawType];
493
+ }
494
+ else {
495
+ type = 'claim';
496
+ }
497
+ const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
498
+ const source = VALID_MEMORY_SOURCES.includes(rawSource)
499
+ ? rawSource
500
+ : 'user-inferred';
501
+ const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
502
+ const scope = VALID_MEMORY_SCOPES.includes(rawScope)
503
+ ? rawScope
504
+ : 'unspecified';
505
+ const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
506
+ const action = validActions.includes(String(f.action))
507
+ ? String(f.action)
508
+ : 'ADD';
509
+ let entities;
510
+ if (Array.isArray(f.entities)) {
511
+ const valid = f.entities
512
+ .map(parseEntity)
513
+ .filter((e) => e !== null);
514
+ if (valid.length > 0)
515
+ entities = valid;
516
+ }
517
+ const result = {
518
+ text: String(f.text).slice(0, 512),
519
+ type,
520
+ source,
521
+ scope,
522
+ reasoning,
523
+ importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
524
+ action,
525
+ existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
526
+ confidence: normalizeConfidence(f.confidence),
527
+ };
528
+ if (entities)
529
+ result.entities = entities;
530
+ return result;
531
+ })
532
+ // Reject illegal type:summary + source:user
533
+ .filter((f) => !(f.type === 'summary' && f.source === 'user'))
534
+ // Compaction: importance >= 5 (not 6)
535
+ .filter((f) => f.importance >= 5 || f.action === 'DELETE');
536
+ return facts;
537
+ }
538
+ /**
539
+ * Extract facts using the compaction-aware prompt.
540
+ *
541
+ * This is called from the `before_compaction` hook — the LAST CHANCE to
542
+ * capture knowledge before conversation context is lost. Key differences
543
+ * from `extractFacts`:
544
+ * - Uses `COMPACTION_SYSTEM_PROMPT` (lower threshold, format-agnostic, more aggressive)
545
+ * - Always processes the full conversation (`mode: 'full'`)
546
+ * - Importance filter is >= 5 instead of >= 6
547
+ * - Lexical importance bumps still apply
548
+ *
549
+ * @param rawMessages - The messages array from the hook event (unknown[])
550
+ * @param existingMemories - Optional list of existing memories for dedup context
551
+ * @param logger - Optional logger for observability
552
+ * @returns Array of extracted facts, or empty array on failure.
553
+ */
554
+ export async function extractFactsForCompaction(rawMessages, existingMemories, logger) {
555
+ const config = resolveLLMConfig();
556
+ if (!config) {
557
+ logger?.info?.('extractFactsForCompaction: no LLM config resolved (skipping extraction)');
558
+ return [];
559
+ }
560
+ // Parse messages
561
+ const parsed = rawMessages
562
+ .map(messageToText)
563
+ .filter((m) => m !== null);
564
+ if (parsed.length === 0) {
565
+ logger?.info?.(`extractFactsForCompaction: no parseable messages (raw count=${rawMessages.length})`);
566
+ return [];
567
+ }
568
+ // Always full mode — process entire conversation for compaction
569
+ const conversationText = truncateMessages(parsed, 12_000);
570
+ if (conversationText.length < 20) {
571
+ logger?.info?.(`extractFactsForCompaction: conversation too short (${conversationText.length} chars < 20)`);
572
+ return [];
573
+ }
574
+ // Build existing memories context if available
575
+ let memoriesContext = '';
576
+ if (existingMemories && existingMemories.length > 0) {
577
+ const memoriesStr = existingMemories
578
+ .map((m) => `[ID: ${m.id}] ${m.text}`)
579
+ .join('\n');
580
+ memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
581
+ }
582
+ const userPrompt = `Extract ALL valuable long-term memories from this conversation before it is compacted and lost:\n\n${conversationText}${memoriesContext}`;
583
+ let response;
584
+ try {
585
+ response = await chatCompletion(config, [
586
+ { role: 'system', content: COMPACTION_SYSTEM_PROMPT },
587
+ { role: 'user', content: userPrompt },
588
+ ], {
589
+ // 3.3.1-rc.2: retry transient 429 / timeout (same policy as extractFacts).
590
+ retry: { attempts: 3, baseDelayMs: 1000 },
591
+ timeoutMs: 30_000,
592
+ logger,
593
+ });
594
+ }
595
+ catch (err) {
596
+ const msg = err instanceof Error ? err.message : String(err);
597
+ logger?.warn?.(`extractFactsForCompaction: chatCompletion threw: ${msg}`);
598
+ return [];
599
+ }
600
+ if (!response) {
601
+ logger?.info?.('extractFactsForCompaction: chatCompletion returned null/empty response');
602
+ return [];
603
+ }
604
+ logger?.info?.(`extractFactsForCompaction: LLM returned ${response.length} chars; handing to parseFactsResponseForCompaction`);
605
+ let facts = parseFactsResponseForCompaction(response, logger);
606
+ // v1 provenance filter (tag-don't-drop). Uses importance >= 5 floor because
607
+ // the filter's own floor is 5 in lax mode, matching compaction semantics.
608
+ facts = applyProvenanceFilterLax(facts, conversationText);
609
+ // Comparative rescore if >= 5 facts (same as default pipeline), else
610
+ // assign defaultVolatility so v1 write path has a value.
611
+ facts = await comparativeRescoreV1(facts, conversationText, logger);
612
+ facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
613
+ // Lexical importance bumps (same as regular extraction)
614
+ for (const f of facts) {
615
+ const bump = computeLexicalImportanceBump(f.text, conversationText);
616
+ if (bump > 0) {
617
+ const oldImportance = f.importance;
618
+ const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
619
+ f.importance = Math.min(10, f.importance + effectiveBump);
620
+ logger?.info?.(`extractFactsForCompaction: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`);
621
+ }
622
+ }
623
+ return facts;
624
+ }
625
+ // ---------------------------------------------------------------------------
626
+ // Debrief Extraction
627
+ // ---------------------------------------------------------------------------
628
+ /**
629
+ * Canonical debrief system prompt — must be identical across all clients.
630
+ */
631
+ export const DEBRIEF_SYSTEM_PROMPT = `You are reviewing a conversation that just ended. The following facts were
632
+ already extracted and stored during this conversation:
633
+
634
+ {already_stored_facts}
635
+
636
+ Your job is to capture what turn-by-turn extraction MISSED. Focus on:
637
+
638
+ 1. **Broader context** — What was the conversation about overall? What project,
639
+ problem, or topic tied the discussion together?
640
+ 2. **Outcomes & conclusions** — What was decided, agreed upon, or resolved?
641
+ 3. **What was attempted** — What approaches were tried? What worked, what didn't, and why?
642
+ 4. **Relationships** — How do topics discussed relate to each other or to things
643
+ from previous conversations?
644
+ 5. **Open threads** — What was left unfinished or needs follow-up?
645
+
646
+ Do NOT repeat facts already stored. Only add genuinely new information that provides
647
+ broader context a future conversation would benefit from.
648
+
649
+ Return a JSON array (no markdown, no code fences):
650
+ [{"text": "...", "type": "summary|context", "importance": N}]
651
+
652
+ - Use type "summary" for conclusions, outcomes, and decisions-of-the-session
653
+ - Use type "context" for broader project context, open threads, and what-was-tried
654
+ - Importance 7-8 for most debrief items (they are high-value by definition)
655
+ - Maximum 5 items (debriefs should be concise, not exhaustive)
656
+ - Each item should be 1-3 sentences, self-contained
657
+
658
+ If the conversation was too short or trivial to warrant a debrief, return: []`;
659
+ /**
660
+ * Parse a debrief response into validated DebriefItems.
661
+ */
662
+ export function parseDebriefResponse(response) {
663
+ let cleaned = response.trim();
664
+ if (cleaned.startsWith('```')) {
665
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
666
+ }
667
+ try {
668
+ const parsed = JSON.parse(cleaned);
669
+ if (!Array.isArray(parsed))
670
+ return [];
671
+ return parsed
672
+ .filter((item) => item &&
673
+ typeof item === 'object' &&
674
+ typeof item.text === 'string' &&
675
+ item.text.length >= 5)
676
+ .map((item) => {
677
+ const d = item;
678
+ const type = d.type === 'summary' ? 'summary' : 'context';
679
+ const rawImportance = typeof d.importance === 'number' ? d.importance : 7;
680
+ const importance = Math.max(1, Math.min(10, rawImportance));
681
+ return { text: String(d.text).slice(0, 512), type, importance };
682
+ })
683
+ .filter((d) => d.importance >= 6)
684
+ .slice(0, 5);
685
+ }
686
+ catch {
687
+ return [];
688
+ }
689
+ }
690
+ /**
691
+ * Extract a session debrief using LLM.
692
+ *
693
+ * @param rawMessages - All messages from the session
694
+ * @param storedFactTexts - Texts of facts already stored in this session (for dedup)
695
+ * @returns Array of debrief items, or empty array on failure
696
+ */
697
+ export async function extractDebrief(rawMessages, storedFactTexts) {
698
+ const config = resolveLLMConfig();
699
+ if (!config)
700
+ return [];
701
+ const parsed = rawMessages
702
+ .map(messageToText)
703
+ .filter((m) => m !== null);
704
+ // Minimum 4 turns (8 messages) to warrant a debrief
705
+ if (parsed.length < 8)
706
+ return [];
707
+ const conversationText = truncateMessages(parsed, 12_000);
708
+ if (conversationText.length < 20)
709
+ return [];
710
+ const alreadyStored = storedFactTexts.length > 0
711
+ ? storedFactTexts.map((t) => `- ${t}`).join('\n')
712
+ : '(none)';
713
+ const systemPrompt = DEBRIEF_SYSTEM_PROMPT.replace('{already_stored_facts}', alreadyStored);
714
+ try {
715
+ const response = await chatCompletion(config, [
716
+ { role: 'system', content: systemPrompt },
717
+ { role: 'user', content: `Review this conversation and provide a debrief:\n\n${conversationText}` },
718
+ ], {
719
+ // 3.3.1-rc.2: retry transient 429 / timeout.
720
+ retry: { attempts: 3, baseDelayMs: 1000 },
721
+ timeoutMs: 30_000,
722
+ });
723
+ if (!response)
724
+ return [];
725
+ return parseDebriefResponse(response);
726
+ }
727
+ catch {
728
+ return [];
729
+ }
730
+ }
731
+ // ---------------------------------------------------------------------------
732
+ // v1 Taxonomy Extraction Pipeline (default as of plugin v3.0.0)
733
+ //
734
+ // Produces facts conforming to Memory Taxonomy v1 (6 types: claim,
735
+ // preference, directive, commitment, episode, summary; 5 sources; 8 scopes).
736
+ //
737
+ // The G-pipeline uses a single merged-topic prompt that returns both the
738
+ // 2-3 main topics the user engaged with AND the extracted facts, so topic
739
+ // anchoring is preserved within one call. After extraction we apply:
740
+ //
741
+ // 1. `applyProvenanceFilterLax` — tag-don't-drop. Assistant-sourced facts
742
+ // get their importance capped at 7 rather than being filtered out; the
743
+ // reranker later uses the source field to deprioritize them.
744
+ // 2. `comparativeRescoreV1` — spread importance across the 1-10 range
745
+ // and assign volatility. Forced when the batch has >= 5 facts.
746
+ // 3. `defaultVolatility` — heuristic fallback.
747
+ //
748
+ // This matches the winning G pipeline from the 200-conv benchmark.
749
+ // ---------------------------------------------------------------------------
750
+ /**
751
+ * The main extraction system prompt (v1 merged-topic pipeline).
752
+ *
753
+ * Exported as both `EXTRACTION_SYSTEM_PROMPT` (canonical) and
754
+ * `EXTRACTION_SYSTEM_PROMPT_V1_MERGED` (deprecated alias) for back-compat.
755
+ */
756
+ export const EXTRACTION_SYSTEM_PROMPT = `You are a memory extraction engine using Memory Taxonomy v1. Work in TWO explicit phases within one response:
757
+
758
+ PHASE 1 — Topic identification.
759
+ Before extracting any fact, identify the 2-3 main topics the user was engaging with. Topics should be short phrases (2-5 words each). If the conversation has no clear user-focused topic, use an empty topics array.
760
+
761
+ PHASE 2 — Fact extraction anchored to those topics.
762
+ Extract valuable memories. Prefer facts that directly relate to the identified topics (importance 7-9 range). Tangential facts may still be extracted but score lower (6-7 range).
763
+
764
+ Rules:
765
+ 1. Each memory = single self-contained piece of information
766
+ 2. Focus on user-specific info useful in future conversations
767
+ 3. Skip generic knowledge, greetings, small talk, ephemeral task coordination
768
+ 4. Score importance 1-10 (6+ = worth storing)
769
+ 5. Every memory MUST attribute a source (provenance critical)
770
+
771
+ Importance rubric (use FULL 1-10 range):
772
+ - 10: Critical, core identity, never-forget content
773
+ - 9: Affects many future decisions
774
+ - 8: High-value preference/decision/rule
775
+ - 7: Specific durable fact
776
+ - 6: Borderline
777
+ - 5 or below: NOT worth storing — drop
778
+
779
+ DO NOT cluster everything at 7-8-9.
780
+
781
+ ═══════════════════════════════════════════════════════════════
782
+ TYPE (6 values)
783
+ ═══════════════════════════════════════════════════════════════
784
+ - claim: factual assertion (absorbs fact/context/decision; decisions populate reasoning field)
785
+ - preference: likes/dislikes/tastes
786
+ - directive: imperative rule ("always X", "never Y")
787
+ - commitment: future intent ("will do X")
788
+ - episode: notable event
789
+ - summary: derived synthesis (source must be derived|assistant) — do NOT emit for turn-extraction
790
+
791
+ ═══════════════════════════════════════════════════════════════
792
+ SOURCE (provenance, CRITICAL)
793
+ ═══════════════════════════════════════════════════════════════
794
+ - user: user explicitly stated it (in [user]: turns)
795
+ - user-inferred: extractor inferred from user signals
796
+ - assistant: assistant authored content — DOWNGRADE unless user affirmed/quoted/used it
797
+ - external, derived: rare
798
+
799
+ IF fact substance appears ONLY in [assistant]: turns without user affirmation → source:assistant
800
+
801
+ ═══════════════════════════════════════════════════════════════
802
+ SCOPE (life domain)
803
+ ═══════════════════════════════════════════════════════════════
804
+ work | personal | health | family | creative | finance | misc | unspecified
805
+
806
+ ═══════════════════════════════════════════════════════════════
807
+ ENTITIES
808
+ ═══════════════════════════════════════════════════════════════
809
+ - type ∈ {person, project, tool, company, concept, place}
810
+ - prefer specific names ("PostgreSQL" not "database")
811
+ - omit umbrella categories when specific name is present
812
+
813
+ ═══════════════════════════════════════════════════════════════
814
+ REASONING (only for claims that are decisions)
815
+ ═══════════════════════════════════════════════════════════════
816
+ For type=claim where the user expressed a decision-with-reasoning, populate "reasoning" with the WHY clause.
817
+
818
+ ═══════════════════════════════════════════════════════════════
819
+ OUTPUT FORMAT (no markdown, no code fences)
820
+ ═══════════════════════════════════════════════════════════════
821
+ {
822
+ "topics": ["topic 1", "topic 2"],
823
+ "facts": [
824
+ {
825
+ "text": "...",
826
+ "type": "claim|preference|directive|commitment|episode",
827
+ "source": "user|user-inferred|assistant",
828
+ "scope": "work|personal|health|...",
829
+ "importance": N,
830
+ "confidence": 0.9,
831
+ "action": "ADD",
832
+ "reasoning": "...", // optional, only for claim+decision
833
+ "entities": [{"name": "...", "type": "tool"}]
834
+ }
835
+ ]
836
+ }
837
+
838
+ If nothing worth extracting: {"topics": [], "facts": []}`;
839
+ /**
840
+ * @deprecated Use `EXTRACTION_SYSTEM_PROMPT` instead. Kept only as a
841
+ * back-compat alias for callers that imported the v1 rollout name.
842
+ */
843
+ export const EXTRACTION_SYSTEM_PROMPT_V1_MERGED = EXTRACTION_SYSTEM_PROMPT;
844
+ /**
845
+ * Parse a v1 merged-topic LLM response. Returns both the topic list and the
846
+ * validated/filtered fact list. Illegal combinations (summary+user) are
847
+ * dropped; importance < 6 with action != DELETE is dropped.
848
+ *
849
+ * Exported as both `parseFactsResponse` (canonical, returns facts array) and
850
+ * `parseMergedResponseV1` (returns `{ topics, facts }`). Prefer the former
851
+ * unless the topic list is needed.
852
+ */
853
+ export function parseMergedResponseV1(response, logger) {
854
+ const originalPreview = response.trim().slice(0, 200);
855
+ let cleaned = response.trim();
856
+ cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
857
+ if (cleaned.startsWith('```')) {
858
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
859
+ }
860
+ const tryParse = (input) => {
861
+ try {
862
+ return JSON.parse(input);
863
+ }
864
+ catch {
865
+ return undefined;
866
+ }
867
+ };
868
+ let parsed = tryParse(cleaned);
869
+ let recoveryUsed = 'none';
870
+ if (parsed === undefined) {
871
+ // First try an outermost-array greedy match (legacy bare-array format).
872
+ const arrMatch = cleaned.match(/\[[\s\S]*\]/);
873
+ if (arrMatch) {
874
+ parsed = tryParse(arrMatch[0]);
875
+ if (parsed !== undefined)
876
+ recoveryUsed = 'bracket-scan';
877
+ }
878
+ if (parsed === undefined) {
879
+ // Fall back to an outermost-object greedy match (merged-topic format).
880
+ const objMatch = cleaned.match(/\{[\s\S]*\}/);
881
+ if (objMatch) {
882
+ parsed = tryParse(objMatch[0]);
883
+ if (parsed !== undefined)
884
+ recoveryUsed = 'bracket-scan';
885
+ }
886
+ }
887
+ }
888
+ if (recoveryUsed === 'bracket-scan') {
889
+ logger?.info?.(`parseFactsResponse: recovered JSON via bracket-scan fallback`);
890
+ }
891
+ if (!parsed || typeof parsed !== 'object') {
892
+ logger?.warn?.(`parseFactsResponse: could not parse LLM output as JSON. Preview: ${JSON.stringify(originalPreview)}`);
893
+ return { topics: [], facts: [] };
894
+ }
895
+ // Dual-format acceptance: either the merged object `{ topics, facts }` or
896
+ // a bare JSON array of fact objects (legacy / test fixture shape). The
897
+ // bare array is wrapped as { topics: [], facts: [...] } so the downstream
898
+ // logic stays uniform. A single fact object (no wrapper) is also wrapped.
899
+ let obj;
900
+ if (Array.isArray(parsed)) {
901
+ obj = { topics: [], facts: parsed };
902
+ }
903
+ else if (typeof parsed.facts === 'undefined' &&
904
+ typeof parsed.text === 'string') {
905
+ // Single fact object, not a merged wrapper.
906
+ obj = { topics: [], facts: [parsed] };
907
+ }
908
+ else {
909
+ obj = parsed;
910
+ }
911
+ const rawTopics = obj.topics;
912
+ const topics = Array.isArray(rawTopics)
913
+ ? rawTopics
914
+ .filter((t) => typeof t === 'string' && t.length > 0)
915
+ .slice(0, 3)
916
+ : [];
917
+ const rawFacts = obj.facts;
918
+ if (!Array.isArray(rawFacts))
919
+ return { topics, facts: [] };
920
+ const validActions = ['ADD', 'UPDATE', 'DELETE', 'NOOP'];
921
+ const facts = rawFacts
922
+ .filter((f) => !!f &&
923
+ typeof f === 'object' &&
924
+ typeof f.text === 'string' &&
925
+ f.text.length >= 5)
926
+ .map((f) => {
927
+ const rawType = String(f.type ?? 'claim').toLowerCase();
928
+ // Accept both v1 tokens and legacy v0 tokens — coerce v0 via V0_TO_V1_TYPE.
929
+ let type;
930
+ if (isValidMemoryType(rawType)) {
931
+ type = rawType;
932
+ }
933
+ else if (LEGACY_V0_MEMORY_TYPES.includes(rawType)) {
934
+ type = V0_TO_V1_TYPE[rawType];
935
+ }
936
+ else {
937
+ type = 'claim';
938
+ }
939
+ const rawSource = String(f.source ?? 'user-inferred').toLowerCase();
940
+ const source = VALID_MEMORY_SOURCES.includes(rawSource)
941
+ ? rawSource
942
+ : 'user-inferred';
943
+ const rawScope = String(f.scope ?? 'unspecified').toLowerCase();
944
+ const scope = VALID_MEMORY_SCOPES.includes(rawScope)
945
+ ? rawScope
946
+ : 'unspecified';
947
+ const reasoning = typeof f.reasoning === 'string' ? f.reasoning.slice(0, 256) : undefined;
948
+ const action = validActions.includes(String(f.action))
949
+ ? String(f.action)
950
+ : 'ADD';
951
+ let entities;
952
+ if (Array.isArray(f.entities)) {
953
+ const valid = f.entities
954
+ .map(parseEntity)
955
+ .filter((e) => e !== null);
956
+ if (valid.length > 0)
957
+ entities = valid;
958
+ }
959
+ const fact = {
960
+ text: String(f.text).slice(0, 512),
961
+ type,
962
+ source,
963
+ scope,
964
+ reasoning,
965
+ importance: Math.max(1, Math.min(10, Number(f.importance) || 5)),
966
+ confidence: normalizeConfidence(f.confidence),
967
+ action,
968
+ existingFactId: typeof f.existingFactId === 'string' ? f.existingFactId : undefined,
969
+ };
970
+ if (entities)
971
+ fact.entities = entities;
972
+ return fact;
973
+ })
974
+ // Reject illegal type:summary + source:user
975
+ .filter((f) => !(f.type === 'summary' && f.source === 'user'))
976
+ // Importance threshold (preserves DELETE)
977
+ .filter((f) => f.importance >= 6 || f.action === 'DELETE');
978
+ return { topics, facts };
979
+ }
980
+ /**
981
+ * Parse an LLM extraction response into structured v1 facts. Canonical
982
+ * parser used by the default `extractFacts()` pipeline.
983
+ *
984
+ * This is a thin wrapper around `parseMergedResponseV1` that discards the
985
+ * topic list so existing callers that expect a flat `ExtractedFact[]`
986
+ * signature keep working.
987
+ */
988
+ export function parseFactsResponse(response, logger) {
989
+ return parseMergedResponseV1(response, logger).facts;
990
+ }
991
+ /**
992
+ * Tag-don't-drop provenance filter (pipeline G / F).
993
+ *
994
+ * For each fact:
995
+ * - If source is already "assistant", cap importance at 7.
996
+ * - Otherwise, keyword-match the fact against user turns. If <30% of
997
+ * content words (length >= 4) appear in user turns AND source != "user",
998
+ * tag source as "assistant" and cap importance at 7 (keep the fact).
999
+ * - Drop facts below importance 5 (unless DELETE action).
1000
+ */
1001
+ export function applyProvenanceFilterLax(facts, conversationText) {
1002
+ const userTurnsLower = conversationText
1003
+ .split(/\n\n/)
1004
+ .filter((line) => line.startsWith('[user]:'))
1005
+ .join(' ')
1006
+ .toLowerCase();
1007
+ return facts
1008
+ .map((f) => {
1009
+ if (f.source === 'assistant') {
1010
+ return { ...f, importance: Math.min(f.importance, 7) };
1011
+ }
1012
+ const factWords = f.text
1013
+ .toLowerCase()
1014
+ .replace(/[^a-z0-9\s]/g, ' ')
1015
+ .split(/\s+/)
1016
+ .filter((w) => w.length >= 4);
1017
+ const matchedWords = factWords.filter((w) => userTurnsLower.includes(w)).length;
1018
+ const matchRatio = factWords.length > 0 ? matchedWords / factWords.length : 0;
1019
+ if (matchRatio < 0.3 && f.source !== 'user') {
1020
+ return {
1021
+ ...f,
1022
+ source: 'assistant',
1023
+ importance: Math.min(f.importance, 7),
1024
+ };
1025
+ }
1026
+ return f;
1027
+ })
1028
+ .filter((f) => f.importance >= 5 || f.action === 'DELETE');
1029
+ }
1030
+ /**
1031
+ * Heuristic fallback volatility when the LLM doesn't assign one.
1032
+ */
1033
+ export function defaultVolatility(f) {
1034
+ if (f.type === 'commitment')
1035
+ return 'updatable';
1036
+ if (f.type === 'episode')
1037
+ return 'stable';
1038
+ if (f.type === 'directive')
1039
+ return 'stable';
1040
+ if (f.scope === 'health' || f.scope === 'family')
1041
+ return 'stable';
1042
+ return 'updatable';
1043
+ }
1044
+ const COMPARATIVE_PROMPT_V1 = `You are a memory re-ranker for the v1 taxonomy. You receive facts already extracted from one conversation, each with initial importance. Your job is twofold:
1045
+
1046
+ 1. RE-RANK importance to spread across the 1-10 range (avoid clustering at 7-8-9)
1047
+ 2. ASSIGN volatility to each fact
1048
+
1049
+ Re-ranking rules:
1050
+ - Top 1/3 of facts (most significant for this user): importance 9-10
1051
+ - Middle 1/3: importance 7-8
1052
+ - Bottom 1/3: importance 5-6 (borderline, may be dropped)
1053
+ - A fact may stay at 10 if it's clearly identity-defining (name, birthday) or marked as "never forget"
1054
+ - Never raise without justification; never lower below 5 unless clearly noise
1055
+ - You MUST produce a spread
1056
+
1057
+ Volatility rules:
1058
+ - stable: unlikely to change for years (name, allergies, birthplace, fundamental traits)
1059
+ - updatable: changes occasionally (current job, active project, partner's name, address)
1060
+ - ephemeral: short-lived state (today's task, this week's plan, current trip itinerary)
1061
+
1062
+ Use the FULL conversation context to judge volatility — a single claim may be ambiguous, but in context you can usually tell.
1063
+
1064
+ Return JSON array, same order as input, ONLY with importance + volatility fields:
1065
+ [{"importance": N, "volatility": "stable|updatable|ephemeral"}, ...]
1066
+ No markdown.`;
1067
+ /**
1068
+ * Comparative re-scoring pass (v1). Forces re-scoring when facts.length >= 5
1069
+ * so the importance distribution spreads across the 1-10 range. When
1070
+ * facts.length < 5, assigns defaultVolatility and returns.
1071
+ */
1072
+ export async function comparativeRescoreV1(facts, conversationText, logger) {
1073
+ // G-tuned behavior: force rescore when >= 5 facts
1074
+ if (facts.length < 2 || facts.length < 5) {
1075
+ return facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
1076
+ }
1077
+ const config = resolveLLMConfig();
1078
+ if (!config) {
1079
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1080
+ }
1081
+ const factsForPrompt = facts
1082
+ .map((f, i) => `${i + 1}. [imp: ${f.importance}] [type: ${f.type}] [scope: ${f.scope ?? 'unspecified'}] ${f.text}`)
1083
+ .join('\n');
1084
+ const userPrompt = `Conversation context:\n${conversationText}\n\nExtracted facts:\n${factsForPrompt}\n\nReturn ${facts.length} JSON objects, each with "importance" + "volatility". Match input order.`;
1085
+ let response;
1086
+ try {
1087
+ response = await chatCompletion(config, [
1088
+ { role: 'system', content: COMPARATIVE_PROMPT_V1 },
1089
+ { role: 'user', content: userPrompt },
1090
+ ], {
1091
+ // 3.3.1-rc.2: retry transient 429 / timeout (rescore is an inner
1092
+ // call after extractFacts — if extraction backs off successfully
1093
+ // the rescore usually also passes on first try, but keep symmetry).
1094
+ retry: { attempts: 3, baseDelayMs: 1000 },
1095
+ timeoutMs: 30_000,
1096
+ logger,
1097
+ });
1098
+ }
1099
+ catch (err) {
1100
+ const msg = err instanceof Error ? err.message : String(err);
1101
+ logger?.warn?.(`comparativeRescoreV1: chatCompletion threw: ${msg}`);
1102
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1103
+ }
1104
+ if (!response) {
1105
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1106
+ }
1107
+ let cleaned = response.trim();
1108
+ cleaned = cleaned.replace(/<think(?:ing)?>[\s\S]*?<\/think(?:ing)?>/gi, '').trim();
1109
+ if (cleaned.startsWith('```')) {
1110
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
1111
+ }
1112
+ const match = cleaned.match(/\[[\s\S]*\]/);
1113
+ if (match)
1114
+ cleaned = match[0];
1115
+ let parsed;
1116
+ try {
1117
+ parsed = JSON.parse(cleaned);
1118
+ }
1119
+ catch {
1120
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1121
+ }
1122
+ if (!Array.isArray(parsed)) {
1123
+ return facts.map((f) => ({ ...f, volatility: defaultVolatility(f) }));
1124
+ }
1125
+ return facts.map((f, i) => {
1126
+ const entry = parsed[i];
1127
+ const rawImp = entry && typeof entry === 'object' ? Number(entry.importance) : NaN;
1128
+ const rawVol = entry && typeof entry === 'object' ? String(entry.volatility ?? '').toLowerCase() : '';
1129
+ const newImp = Number.isFinite(rawImp)
1130
+ ? Math.max(5, Math.min(10, Math.round(rawImp)))
1131
+ : f.importance;
1132
+ const newVol = VALID_MEMORY_VOLATILITIES.includes(rawVol)
1133
+ ? rawVol
1134
+ : defaultVolatility(f);
1135
+ return { ...f, importance: newImp, volatility: newVol };
1136
+ });
1137
+ }
1138
+ /**
1139
+ * Main extraction entry point (default pipeline as of plugin v3.0.0).
1140
+ *
1141
+ * Pipeline: single merged-topic LLM call → `applyProvenanceFilterLax`
1142
+ * (tag-don't-drop) → `comparativeRescoreV1` (forces re-rank when >= 5 facts)
1143
+ * → `defaultVolatility` fallback → lexical importance bumps.
1144
+ *
1145
+ * Produces v1-shaped facts with `type`, `source`, `scope`, `volatility`,
1146
+ * and optional `reasoning` fields populated. The caller should hand the
1147
+ * result to `storeExtractedFacts` which emits a v1 canonical claim blob.
1148
+ */
1149
+ export async function extractFacts(rawMessages, mode, existingMemories, profileContext, logger) {
1150
+ const config = resolveLLMConfig();
1151
+ if (!config) {
1152
+ logger?.info?.('extractFacts: no LLM config resolved (skipping extraction)');
1153
+ return [];
1154
+ }
1155
+ const parsed = rawMessages
1156
+ .map(messageToText)
1157
+ .filter((m) => m !== null);
1158
+ if (parsed.length === 0) {
1159
+ logger?.info?.(`extractFacts: no parseable messages (raw count=${rawMessages.length})`);
1160
+ return [];
1161
+ }
1162
+ const relevantMessages = mode === 'turn' ? parsed.slice(-6) : parsed;
1163
+ const conversationText = truncateMessages(relevantMessages, 12_000);
1164
+ if (conversationText.length < 20) {
1165
+ logger?.info?.(`extractFacts: conversation too short (${conversationText.length} chars < 20, parsed=${parsed.length}, mode=${mode})`);
1166
+ return [];
1167
+ }
1168
+ let memoriesContext = '';
1169
+ if (existingMemories && existingMemories.length > 0) {
1170
+ const memoriesStr = existingMemories
1171
+ .map((m) => `[ID: ${m.id}] ${m.text}`)
1172
+ .join('\n');
1173
+ memoriesContext = `\n\nExisting memories (use these for dedup — classify as UPDATE/DELETE/NOOP if they conflict or overlap):\n${memoriesStr}`;
1174
+ }
1175
+ const userPrompt = mode === 'turn'
1176
+ ? `Extract important facts from these recent conversation turns:\n\n${conversationText}${memoriesContext}`
1177
+ : `Extract ALL valuable long-term memories from this conversation before it is lost:\n\n${conversationText}${memoriesContext}`;
1178
+ const systemPrompt = profileContext || EXTRACTION_SYSTEM_PROMPT;
1179
+ let response;
1180
+ try {
1181
+ response = await chatCompletion(config, [
1182
+ { role: 'system', content: systemPrompt },
1183
+ { role: 'user', content: userPrompt },
1184
+ ], {
1185
+ // 3.3.1-rc.2: the headline fix for the rc.1 QA NO-GO — 5/6 extraction
1186
+ // windows failed on zai 429 + timeouts with no retry. 3 attempts with
1187
+ // 1s → 2s → 4s backoff recovers virtually all transient rate-limit
1188
+ // hiccups. Graceful timeout: per-attempt 30s, total worst-case 30+1+30+2+30+4≈97s.
1189
+ retry: { attempts: 3, baseDelayMs: 1000 },
1190
+ timeoutMs: 30_000,
1191
+ logger,
1192
+ });
1193
+ }
1194
+ catch (err) {
1195
+ const msg = err instanceof Error ? err.message : String(err);
1196
+ logger?.warn?.(`extractFacts: chatCompletion threw: ${msg}`);
1197
+ return [];
1198
+ }
1199
+ if (!response) {
1200
+ logger?.info?.('extractFacts: chatCompletion returned null/empty response');
1201
+ return [];
1202
+ }
1203
+ logger?.info?.(`extractFacts: LLM returned ${response.length} chars; parsing merged response`);
1204
+ const { topics, facts: rawFacts } = parseMergedResponseV1(response, logger);
1205
+ if (topics.length > 0) {
1206
+ logger?.info?.(`extractFacts: topics = ${JSON.stringify(topics)}`);
1207
+ }
1208
+ // Provenance filter (tag-don't-drop)
1209
+ let facts = applyProvenanceFilterLax(rawFacts, conversationText);
1210
+ // Comparative rescore (forces re-rank when >= 5 facts)
1211
+ facts = await comparativeRescoreV1(facts, conversationText, logger);
1212
+ // Ensure every fact has a volatility (defensive: rescore may have skipped)
1213
+ facts = facts.map((f) => ({ ...f, volatility: f.volatility ?? defaultVolatility(f) }));
1214
+ // Lexical importance bumps (same as v0 pipeline)
1215
+ for (const f of facts) {
1216
+ const bump = computeLexicalImportanceBump(f.text, conversationText);
1217
+ if (bump > 0) {
1218
+ const oldImportance = f.importance;
1219
+ const effectiveBump = f.importance >= 8 ? Math.min(bump, 1) : bump;
1220
+ f.importance = Math.min(10, f.importance + effectiveBump);
1221
+ logger?.info?.(`extractFacts: lexical bump +${bump} for "${f.text.slice(0, 60)}..." (${oldImportance} → ${f.importance})`);
1222
+ }
1223
+ }
1224
+ return facts;
1225
+ }