@shadowforge0/aquifer-memory 1.0.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +29 -20
  2. package/consumers/claude-code.js +117 -0
  3. package/consumers/cli.js +17 -0
  4. package/consumers/default/daily-entries.js +196 -0
  5. package/consumers/default/index.js +282 -0
  6. package/consumers/default/prompts/summary.js +153 -0
  7. package/consumers/mcp.js +3 -23
  8. package/consumers/miranda/context-inject.js +119 -0
  9. package/consumers/miranda/daily-entries.js +224 -0
  10. package/consumers/miranda/index.js +353 -0
  11. package/consumers/miranda/instance.js +55 -0
  12. package/consumers/miranda/llm.js +99 -0
  13. package/consumers/miranda/prompts/summary.js +303 -0
  14. package/consumers/miranda/recall-format.js +74 -0
  15. package/consumers/miranda/workspace-files.js +91 -0
  16. package/consumers/openclaw-ext/index.js +38 -0
  17. package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
  18. package/consumers/openclaw-ext/package.json +10 -0
  19. package/consumers/openclaw-plugin.js +66 -74
  20. package/consumers/opencode.js +21 -24
  21. package/consumers/shared/autodetect.js +64 -0
  22. package/consumers/shared/entity-parser.js +119 -0
  23. package/consumers/shared/ingest.js +148 -0
  24. package/consumers/shared/llm-autodetect.js +137 -0
  25. package/consumers/shared/normalize.js +129 -0
  26. package/consumers/shared/recall-format.js +110 -0
  27. package/core/aquifer.js +200 -82
  28. package/core/entity.js +29 -17
  29. package/core/storage.js +116 -45
  30. package/docs/postprocess-contract.md +132 -0
  31. package/index.js +9 -1
  32. package/package.json +23 -2
  33. package/pipeline/_http.js +1 -1
  34. package/pipeline/consolidation/apply.js +176 -0
  35. package/pipeline/consolidation/index.js +21 -0
  36. package/pipeline/extract-entities.js +2 -2
  37. package/pipeline/rerank.js +1 -1
  38. package/pipeline/summarize.js +4 -1
  39. package/schema/001-base.sql +61 -24
  40. package/schema/002-entities.sql +17 -3
  41. package/schema/004-facts.sql +67 -0
  42. package/scripts/diagnose-fts-zh.js +168 -134
  43. package/scripts/diagnose-vector.js +188 -0
  44. package/scripts/install-openclaw.sh +59 -0
  45. package/scripts/smoke.mjs +2 -2
package/core/storage.js CHANGED
@@ -281,7 +281,10 @@ async function searchSessions(pool, query, {
281
281
  FROM ${qi(schema)}.sessions s
282
282
  LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
283
283
  WHERE ${where.join(' AND ')}
284
- ORDER BY fts_rank DESC, s.last_message_at DESC NULLS LAST
284
+ ORDER BY
285
+ COALESCE(ss.search_text ILIKE '%' || $1 || '%', FALSE) DESC,
286
+ fts_rank DESC,
287
+ s.last_message_at DESC NULLS LAST
285
288
  LIMIT $${params.length}`,
286
289
  params
287
290
  );
@@ -360,32 +363,44 @@ async function upsertTurnEmbeddings(pool, sessionRowId, {
360
363
  throw new Error(`turns.length (${turns.length}) !== vectors.length (${vectors.length})`);
361
364
  }
362
365
 
366
+ // Batch insert: build multi-row VALUES clause
367
+ const valueClauses = [];
368
+ const params = [];
369
+
363
370
  for (let i = 0; i < turns.length; i++) {
364
371
  const t = turns[i];
365
372
  const vec = vectors[i];
366
373
  if (!vec) continue;
367
374
 
368
375
  const contentHash = crypto.createHash('sha256').update(t.text).digest('hex').slice(0, 16);
369
- await pool.query(
370
- `INSERT INTO ${qi(schema)}.turn_embeddings
371
- (session_row_id, tenant_id, session_id, agent_id, source,
372
- turn_index, message_index, role, content_text, content_hash, embedding)
373
- VALUES ($1,$2,$3,$4,$5,$6,$7,'user',$8,$9,$10::vector)
374
- ON CONFLICT (session_row_id, message_index) DO UPDATE SET
375
- content_text = EXCLUDED.content_text,
376
- content_hash = EXCLUDED.content_hash,
377
- embedding = CASE
378
- WHEN ${qi(schema)}.turn_embeddings.content_hash = EXCLUDED.content_hash
379
- THEN ${qi(schema)}.turn_embeddings.embedding
380
- ELSE EXCLUDED.embedding
381
- END`,
382
- [
383
- sessionRowId, tenantId, sessionId, agentId, source || null,
384
- t.turnIndex, t.messageIndex,
385
- t.text, contentHash, vecToStr(vec),
386
- ]
376
+ const off = params.length;
377
+ params.push(
378
+ sessionRowId, tenantId, sessionId, agentId, source || null,
379
+ t.turnIndex, t.messageIndex,
380
+ t.text, contentHash, vecToStr(vec),
381
+ );
382
+ valueClauses.push(
383
+ `($${off+1},$${off+2},$${off+3},$${off+4},$${off+5},$${off+6},$${off+7},'user',$${off+8},$${off+9},$${off+10}::vector)`
387
384
  );
388
385
  }
386
+
387
+ if (valueClauses.length === 0) return;
388
+
389
+ await pool.query(
390
+ `INSERT INTO ${qi(schema)}.turn_embeddings
391
+ (session_row_id, tenant_id, session_id, agent_id, source,
392
+ turn_index, message_index, role, content_text, content_hash, embedding)
393
+ VALUES ${valueClauses.join(',\n')}
394
+ ON CONFLICT (session_row_id, message_index) DO UPDATE SET
395
+ content_text = EXCLUDED.content_text,
396
+ content_hash = EXCLUDED.content_hash,
397
+ embedding = CASE
398
+ WHEN ${qi(schema)}.turn_embeddings.content_hash = EXCLUDED.content_hash
399
+ THEN ${qi(schema)}.turn_embeddings.embedding
400
+ ELSE EXCLUDED.embedding
401
+ END`,
402
+ params
403
+ );
389
404
  }
390
405
 
391
406
  // ---------------------------------------------------------------------------
@@ -403,6 +418,16 @@ async function searchTurnEmbeddings(pool, {
403
418
  source,
404
419
  limit = 15,
405
420
  }) {
421
+ // HNSW index fires only on `ORDER BY embedding <=> $vec LIMIT N` without
422
+ // additional predicates in the same query level. So the CTE does a plain
423
+ // nearest-neighbor scan (uses idx_turn_emb_embedding_hnsw at scale), then
424
+ // the outer SELECT applies tenant/agent/date/source filters and dedups.
425
+ //
426
+ // Filter narrowness may leave fewer than `limit` rows after post-filter;
427
+ // NN_OVERFETCH trades extra vector work for filter survival headroom.
428
+ const NN_OVERFETCH = 10;
429
+ const nnLimit = Math.max(50, limit * NN_OVERFETCH);
430
+
406
431
  const where = ['s.tenant_id = $1'];
407
432
  const params = [tenantId];
408
433
 
@@ -421,40 +446,70 @@ async function searchTurnEmbeddings(pool, {
421
446
  }
422
447
  if (agentIds) {
423
448
  params.push(agentIds);
424
- where.push(`t.agent_id = ANY($${params.length})`);
449
+ where.push(`s.agent_id = ANY($${params.length})`);
425
450
  }
426
451
  if (source) {
427
452
  params.push(source);
428
- where.push(`t.source = $${params.length}`);
453
+ where.push(`s.source = $${params.length}`);
429
454
  }
430
455
 
431
456
  params.push(`[${queryVec.join(',')}]`);
432
457
  const vecPos = params.length;
433
-
434
- // m5: use subquery with LIMIT to avoid scanning all rows
435
- params.push(limit * 3); // fetch more than needed for DISTINCT ON dedup
436
- const innerLimitPos = params.length;
458
+ params.push(nnLimit);
459
+ const nnLimitPos = params.length;
437
460
 
438
461
  const result = await pool.query(
439
- `SELECT * FROM (
440
- SELECT DISTINCT ON (t.session_row_id)
462
+ `WITH nn AS (
463
+ SELECT t.session_row_id, t.content_text, t.turn_index,
464
+ (t.embedding <=> $${vecPos}::vector) AS turn_distance
465
+ FROM ${qi(schema)}.turn_embeddings t
466
+ ORDER BY t.embedding <=> $${vecPos}::vector ASC
467
+ LIMIT $${nnLimitPos}
468
+ )
469
+ SELECT * FROM (
470
+ SELECT DISTINCT ON (nn.session_row_id)
441
471
  s.session_id, s.id AS session_row_id, s.agent_id, s.source, s.started_at,
442
472
  ss.summary_text, ss.structured_summary, ss.access_count, ss.last_accessed_at,
443
473
  COALESCE(ss.trust_score, 0.5) AS trust_score,
444
- t.content_text AS matched_turn_text, t.turn_index AS matched_turn_index,
445
- (t.embedding <=> $${vecPos}::vector) AS turn_distance
446
- FROM ${qi(schema)}.turn_embeddings t
447
- JOIN ${qi(schema)}.sessions s ON s.id = t.session_row_id
474
+ nn.content_text AS matched_turn_text, nn.turn_index AS matched_turn_index,
475
+ nn.turn_distance
476
+ FROM nn
477
+ JOIN ${qi(schema)}.sessions s ON s.id = nn.session_row_id
448
478
  LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
449
479
  WHERE ${where.join(' AND ')}
450
- ORDER BY t.session_row_id, turn_distance ASC
451
- ) sub
452
- ORDER BY turn_distance ASC
453
- LIMIT $${innerLimitPos}`,
480
+ ORDER BY nn.session_row_id, nn.turn_distance ASC
481
+ ) dedup
482
+ ORDER BY turn_distance ASC`,
454
483
  params
455
484
  );
456
485
 
457
- return { rows: result.rows.slice(0, limit) };
486
+ if (result.rows.length > 0) {
487
+ return { rows: result.rows.slice(0, limit) };
488
+ }
489
+
490
+ // Fallback: HNSW-first path filtered out to nothing. This can happen when
491
+ // tenant/agent filters are narrow enough to eliminate every NN candidate.
492
+ // Pay the cost of a filter-first scan to guarantee we don't silently return
493
+ // empty when qualifying rows exist. No HNSW on this path — slower, correct.
494
+ const fallbackParams = params.slice(0, params.length - 1); // drop nnLimit
495
+ fallbackParams.push(limit);
496
+ const fallbackLimitPos = fallbackParams.length;
497
+ const fallback = await pool.query(
498
+ `SELECT DISTINCT ON (t.session_row_id)
499
+ s.session_id, s.id AS session_row_id, s.agent_id, s.source, s.started_at,
500
+ ss.summary_text, ss.structured_summary, ss.access_count, ss.last_accessed_at,
501
+ COALESCE(ss.trust_score, 0.5) AS trust_score,
502
+ t.content_text AS matched_turn_text, t.turn_index AS matched_turn_index,
503
+ (t.embedding <=> $${vecPos}::vector) AS turn_distance
504
+ FROM ${qi(schema)}.turn_embeddings t
505
+ JOIN ${qi(schema)}.sessions s ON s.id = t.session_row_id
506
+ LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
507
+ WHERE ${where.join(' AND ')}
508
+ ORDER BY t.session_row_id, t.embedding <=> $${vecPos}::vector ASC
509
+ LIMIT $${fallbackLimitPos}`,
510
+ fallbackParams
511
+ );
512
+ return { rows: fallback.rows };
458
513
  }
459
514
 
460
515
  // ---------------------------------------------------------------------------
@@ -491,16 +546,32 @@ async function recordFeedback(pool, {
491
546
  }
492
547
 
493
548
  const trustBefore = parseFloat(current.rows[0].trust_score);
494
- const trustAfter = verdict === 'helpful'
495
- ? Math.min(1.0, trustBefore + TRUST_UP)
496
- : Math.max(0.0, trustBefore - TRUST_DOWN);
497
549
 
498
- await client.query(
499
- `UPDATE ${qi(schema)}.session_summaries
500
- SET trust_score = $1, updated_at = now()
501
- WHERE session_row_id = $2`,
502
- [trustAfter, sessionRowId]
550
+ // Dedupe: the same (agent, verdict) applied more than once must not stack.
551
+ // Audit row is still inserted so the sequence of feedback events is
552
+ // preserved; only the trust_score delta is skipped.
553
+ const prior = await client.query(
554
+ `SELECT 1 FROM ${qi(schema)}.session_feedback
555
+ WHERE session_row_id = $1 AND agent_id = $2 AND verdict = $3
556
+ LIMIT 1`,
557
+ [sessionRowId, agentId, verdict]
503
558
  );
559
+ const isDup = prior.rows.length > 0;
560
+
561
+ const trustAfter = isDup
562
+ ? trustBefore
563
+ : (verdict === 'helpful'
564
+ ? Math.min(1.0, trustBefore + TRUST_UP)
565
+ : Math.max(0.0, trustBefore - TRUST_DOWN));
566
+
567
+ if (!isDup) {
568
+ await client.query(
569
+ `UPDATE ${qi(schema)}.session_summaries
570
+ SET trust_score = $1, updated_at = now()
571
+ WHERE session_row_id = $2`,
572
+ [trustAfter, sessionRowId]
573
+ );
574
+ }
504
575
 
505
576
  await client.query(
506
577
  `INSERT INTO ${qi(schema)}.session_feedback
@@ -510,7 +581,7 @@ async function recordFeedback(pool, {
510
581
  );
511
582
 
512
583
  await client.query('COMMIT');
513
- return { trustBefore, trustAfter, verdict };
584
+ return { trustBefore, trustAfter, verdict, duplicate: isDup };
514
585
  } catch (err) {
515
586
  await client.query('ROLLBACK').catch(() => {});
516
587
  throw err;
@@ -0,0 +1,132 @@
1
+ # `enrich({ postProcess })` Contract
2
+
3
+ `aquifer.enrich(sessionId, opts)` runs commit → summarize → embed → entity-extract → mark-status inside a single DB transaction. After the transaction commits and the client is released, if `opts.postProcess` was supplied, Aquifer invokes it once with a context object. This is how consumers hook persona-specific side-effects (daily logs, workspace files, consolidation, narrative regen, metrics) without mutating core.
4
+
5
+ **Stability**: stable in 1.x. Additive changes only (new ctx fields). No removals or breaking renames without a major bump.
6
+
7
+ ## Signature
8
+
9
+ ```ts
10
+ postProcess?: (ctx: PostProcessContext) => Promise<void>
11
+ ```
12
+
13
+ ## When it runs
14
+
15
+ - **After** transaction commit and client release. The session row is already at its final status (`succeeded` or `partial`); nothing in postProcess can affect that.
16
+ - **At most once per enrich call**. No retry. If `postProcess` throws, the error is captured on the returned result as `postProcessError` (not re-thrown).
17
+ - Best-effort. The enrich call's return value resolves regardless of postProcess outcome.
18
+
19
+ ## `ctx` shape
20
+
21
+ ```ts
22
+ interface PostProcessContext {
23
+ session: {
24
+ id: number; // DB primary key (miranda.sessions.id)
25
+ sessionId: string; // caller-provided session key
26
+ agentId: string;
27
+ model: string | null;
28
+ source: string | null;
29
+ startedAt: string | null; // ISO-8601
30
+ endedAt: string | null; // ISO-8601
31
+ };
32
+
33
+ // opts.model override, falling back to session.model. Handy for consumers
34
+ // that want to pass the runtime model into downstream consolidation prompts.
35
+ effectiveModel: string | null;
36
+
37
+ // Summary result, if summarize ran. Null when skipSummary or summary failed.
38
+ summary: {
39
+ summaryText: string;
40
+ structuredSummary: object | null; // custom summaryFn payload
41
+ } | null;
42
+
43
+ // Summary-level embedding vector (size = embed.dim). Null if embed skipped/failed.
44
+ embedding: number[] | null;
45
+
46
+ // Per-turn embedding vectors (one per user turn). Null if skipped/failed.
47
+ turnVectors: number[][] | null;
48
+
49
+ // Passthrough from customSummaryFn return { extra }. Consumers use this to
50
+ // smuggle intermediate results (recap/sections/workingFacts) from summaryFn
51
+ // into postProcess without recomputing.
52
+ extra: any;
53
+
54
+ // Messages used for embedding/entity extraction. Same array commit() saw.
55
+ normalized: Array<{ role: string; content: string; timestamp?: string }>;
56
+
57
+ // Parsed entities from entityParseFn (or built-in parser).
58
+ parsedEntities: Array<{ name: string; normalizedName: string; aliases: string[]; type: string }>;
59
+
60
+ // Which pipeline steps ran.
61
+ skipped: { summary: boolean; entities: boolean; turns: boolean };
62
+
63
+ // Counts from the tx.
64
+ turnsEmbedded: number;
65
+ entitiesFound: number;
66
+
67
+ // Non-fatal failures collected inside enrich. Defensive copy — mutating this
68
+ // array does NOT affect enrich's own warnings list.
69
+ warnings: string[];
70
+ }
71
+ ```
72
+
73
+ ## Typical usage
74
+
75
+ ```js
76
+ const result = await aquifer.enrich(sessionId, {
77
+ agentId: 'main',
78
+ summaryFn: async (msgs) => {
79
+ const output = await callLlm(buildPrompt({ msgs }));
80
+ const sections = parseSummaryOutput(output);
81
+ const recap = parseRecapLines(sections.recap);
82
+ return {
83
+ summaryText: recap.overview || '',
84
+ structuredSummary: recap,
85
+ entityRaw: sections.entities || null,
86
+ extra: { sections, recap, workingFacts: parseWorkingFacts(sections.working_facts) },
87
+ };
88
+ },
89
+ entityParseFn: (text) => parseEntitySection(text).entities,
90
+ postProcess: async (ctx) => {
91
+ const recap = ctx.extra?.recap;
92
+ const sections = ctx.extra?.sections;
93
+ const workingFacts = ctx.extra?.workingFacts || [];
94
+
95
+ // Daily log
96
+ if (recap || sections) {
97
+ await writeDailyEntries({ recap, sections, sessionId: ctx.session.sessionId, agentId: ctx.session.agentId });
98
+ }
99
+
100
+ // Write fact candidates (consumer-specific table, not in Aquifer schema)
101
+ if (workingFacts.length > 0) {
102
+ await writeFactCandidates({ facts: workingFacts, sessionId: ctx.session.sessionId });
103
+ }
104
+
105
+ // Consolidation (optional — requires enableFacts())
106
+ if (recap) {
107
+ const prompt = buildConsolidationPrompt({ recap, activeFacts, candidates, currentNarrative });
108
+ const output = await callLlm(prompt);
109
+ const { actions } = parseConsolidationOutput(output);
110
+ if (actions.length > 0) {
111
+ await aquifer.consolidate(ctx.session.sessionId, { actions, agentId: ctx.session.agentId });
112
+ }
113
+ }
114
+ },
115
+ });
116
+
117
+ if (result.postProcessError) {
118
+ logger.warn(`postProcess failed: ${result.postProcessError.message}`);
119
+ }
120
+ ```
121
+
122
+ ## What NOT to do in postProcess
123
+
124
+ - Don't throw as a signal of "enrich should have failed" — enrich is already committed. Use warnings or a separate audit table.
125
+ - Don't mutate `ctx.normalized`, `ctx.parsedEntities`, or `ctx.warnings`. They're shared-reference with the enrich return; defensive copy if you need to modify.
126
+ - Don't rely on postProcess running quickly — it's outside the tx. Long-running work should be fire-and-forget (see Miranda's `setImmediate` consolidation) or queued.
127
+
128
+ ## What Aquifer guarantees
129
+
130
+ - `postProcess` receives the same `session` row the tx wrote. No stale reads.
131
+ - If enrich's tx rolls back, postProcess is NOT called.
132
+ - If postProcess throws, the error is on `result.postProcessError`. The session status is unaffected.
package/index.js CHANGED
@@ -3,5 +3,13 @@
3
3
  const { createAquifer } = require('./core/aquifer');
4
4
  const { createEmbedder } = require('./pipeline/embed');
5
5
  const { createReranker } = require('./pipeline/rerank');
6
+ const { normalizeEntityName } = require('./core/entity');
7
+ const { parseEntitySection } = require('./consumers/shared/entity-parser');
6
8
 
7
- module.exports = { createAquifer, createEmbedder, createReranker };
9
+ module.exports = {
10
+ createAquifer,
11
+ createEmbedder,
12
+ createReranker,
13
+ normalizeEntityName,
14
+ parseEntitySection,
15
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shadowforge0/aquifer-memory",
3
- "version": "1.0.2",
3
+ "version": "1.2.1",
4
4
  "description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. MCP server, CLI, and library API.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -9,6 +9,9 @@
9
9
  "pipeline/",
10
10
  "schema/",
11
11
  "consumers/",
12
+ "consumers/miranda/",
13
+ "consumers/default/",
14
+ "consumers/openclaw-ext/",
12
15
  "docs/",
13
16
  "scripts/"
14
17
  ],
@@ -20,8 +23,17 @@
20
23
  "./consumers/mcp": "./consumers/mcp.js",
21
24
  "./consumers/openclaw-plugin": "./consumers/openclaw-plugin.js",
22
25
  "./consumers/opencode": "./consumers/opencode.js",
26
+ "./consumers/claude-code": "./consumers/claude-code.js",
27
+ "./consumers/miranda": "./consumers/miranda/index.js",
28
+ "./consumers/default": "./consumers/default/index.js",
29
+ "./consumers/openclaw-ext": "./consumers/openclaw-ext/index.js",
23
30
  "./consumers/shared/config": "./consumers/shared/config.js",
24
- "./consumers/shared/factory": "./consumers/shared/factory.js"
31
+ "./consumers/shared/factory": "./consumers/shared/factory.js",
32
+ "./consumers/shared/entity-parser": "./consumers/shared/entity-parser.js",
33
+ "./consumers/shared/normalize": "./consumers/shared/normalize.js",
34
+ "./consumers/shared/ingest": "./consumers/shared/ingest.js",
35
+ "./consumers/shared/recall-format": "./consumers/shared/recall-format.js",
36
+ "./consumers/shared/llm-autodetect": "./consumers/shared/llm-autodetect.js"
25
37
  },
26
38
  "repository": {
27
39
  "type": "git",
@@ -32,11 +44,20 @@
32
44
  "url": "https://github.com/shadowforge0/aquifer/issues"
33
45
  },
34
46
  "author": "shadowforge0",
47
+ "scripts": {
48
+ "test": "node --test test/*.test.js",
49
+ "test:integration": "node --test test/integration.test.js",
50
+ "lint": "eslint index.js core/*.js consumers/*.js consumers/shared/*.js consumers/miranda/*.js consumers/miranda/prompts/*.js consumers/default/*.js consumers/default/prompts/*.js consumers/openclaw-ext/*.js pipeline/*.js pipeline/consolidation/*.js scripts/*.js test/*.js",
51
+ "prepare": "git config core.hooksPath .githooks 2>/dev/null || true"
52
+ },
35
53
  "dependencies": {
36
54
  "@modelcontextprotocol/sdk": "^1.29.0",
37
55
  "pg": "^8.13.0",
38
56
  "zod": "^3.25.76"
39
57
  },
58
+ "devDependencies": {
59
+ "eslint": "^9.0.0"
60
+ },
40
61
  "engines": {
41
62
  "node": ">=18.0.0"
42
63
  },
package/pipeline/_http.js CHANGED
@@ -28,7 +28,7 @@ function httpRequest(url, options, body) {
28
28
  }
29
29
  try {
30
30
  finish(resolve, JSON.parse(raw));
31
- } catch (e) {
31
+ } catch {
32
32
  finish(reject, new Error(`Invalid JSON response: ${raw.slice(0, 200)}`));
33
33
  }
34
34
  });
@@ -0,0 +1,176 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Consolidation apply — executes a batch of fact-lifecycle actions in one tx.
5
+ //
6
+ // Actions (each object in the array):
7
+ // { action: 'promote', factId } candidate → active
8
+ // { action: 'create', subject, statement, importance? } new active fact
9
+ // { action: 'update', factId, statement } refresh active statement
10
+ // { action: 'confirm', factId } bump last_confirmed_at
11
+ // { action: 'stale', factId } active → stale
12
+ // { action: 'discard', factId } candidate → archived
13
+ // { action: 'merge', factId, targetId } candidate archived, target confirmed
14
+ // { action: 'supersede', factId, targetId } active → superseded by target
15
+ //
16
+ // All mutations scoped to (tenantId, agentId). The caller is responsible for
17
+ // providing a normalizer for subject_key (fall back to raw subject if absent).
18
+ // ---------------------------------------------------------------------------
19
+
20
+ function qi(identifier) { return `"${identifier}"`; }
21
+
22
+ async function applyConsolidation(pool, {
23
+ actions,
24
+ agentId,
25
+ sessionId,
26
+ schema,
27
+ tenantId = 'default',
28
+ normalizeSubject = null,
29
+ recapOverview = '',
30
+ } = {}) {
31
+ if (!pool) throw new Error('pool is required');
32
+ if (!schema) throw new Error('schema is required');
33
+ if (!agentId) throw new Error('agentId is required');
34
+ if (!Array.isArray(actions)) throw new Error('actions must be an array');
35
+
36
+ const tbl = `${qi(schema)}.facts`;
37
+ const summary = {
38
+ promote: 0, create: 0, update: 0, confirm: 0,
39
+ stale: 0, discard: 0, merge: 0, supersede: 0,
40
+ skipped: 0,
41
+ };
42
+
43
+ if (actions.length === 0) return summary;
44
+
45
+ const client = await pool.connect();
46
+ try {
47
+ await client.query('BEGIN');
48
+
49
+ for (const act of actions) {
50
+ switch (act.action) {
51
+ case 'promote': {
52
+ const r = await client.query(
53
+ `UPDATE ${tbl} SET status = 'active', last_confirmed_at = now()
54
+ WHERE id = $1 AND status = 'candidate' AND agent_id = $2 AND tenant_id = $3`,
55
+ [act.factId, agentId, tenantId],
56
+ );
57
+ summary.promote += r.rowCount;
58
+ if (r.rowCount === 0) summary.skipped++;
59
+ break;
60
+ }
61
+
62
+ case 'create': {
63
+ const subjectLabel = act.subject ? String(act.subject).slice(0, 200) : '';
64
+ const subjectKey = normalizeSubject ? normalizeSubject(subjectLabel) : subjectLabel.trim().toLowerCase();
65
+ if (!subjectKey) { summary.skipped++; break; }
66
+ const statement = act.statement ? String(act.statement).slice(0, 2000) : '';
67
+ if (!statement) { summary.skipped++; break; }
68
+ const importance = Number.isFinite(act.importance) ? act.importance : 7;
69
+ const evidence = JSON.stringify([{
70
+ type: 'session_ref',
71
+ session_id: sessionId || null,
72
+ excerpt: (recapOverview || '').slice(0, 200),
73
+ }]);
74
+ const r = await client.query(
75
+ `INSERT INTO ${tbl}
76
+ (tenant_id, subject_key, subject_label, statement, status, importance,
77
+ source_session_id, agent_id, evidence)
78
+ VALUES ($1, $2, $3, $4, 'active', $5, $6, $7, $8::jsonb)
79
+ ON CONFLICT DO NOTHING`,
80
+ [tenantId, subjectKey, subjectLabel, statement, importance, sessionId || null, agentId, evidence],
81
+ );
82
+ summary.create += r.rowCount;
83
+ if (r.rowCount === 0) summary.skipped++;
84
+ break;
85
+ }
86
+
87
+ case 'update': {
88
+ const statement = act.statement ? String(act.statement).slice(0, 2000) : '';
89
+ if (!statement) { summary.skipped++; break; }
90
+ const r = await client.query(
91
+ `UPDATE ${tbl} SET statement = $1, last_confirmed_at = now()
92
+ WHERE id = $2 AND status = 'active' AND agent_id = $3 AND tenant_id = $4`,
93
+ [statement, act.factId, agentId, tenantId],
94
+ );
95
+ summary.update += r.rowCount;
96
+ if (r.rowCount === 0) summary.skipped++;
97
+ break;
98
+ }
99
+
100
+ case 'confirm': {
101
+ const r = await client.query(
102
+ `UPDATE ${tbl} SET last_confirmed_at = now()
103
+ WHERE id = $1 AND status = 'active' AND agent_id = $2 AND tenant_id = $3`,
104
+ [act.factId, agentId, tenantId],
105
+ );
106
+ summary.confirm += r.rowCount;
107
+ if (r.rowCount === 0) summary.skipped++;
108
+ break;
109
+ }
110
+
111
+ case 'stale': {
112
+ const r = await client.query(
113
+ `UPDATE ${tbl} SET status = 'stale'
114
+ WHERE id = $1 AND status = 'active' AND agent_id = $2 AND tenant_id = $3`,
115
+ [act.factId, agentId, tenantId],
116
+ );
117
+ summary.stale += r.rowCount;
118
+ if (r.rowCount === 0) summary.skipped++;
119
+ break;
120
+ }
121
+
122
+ case 'discard': {
123
+ const r = await client.query(
124
+ `UPDATE ${tbl} SET status = 'archived'
125
+ WHERE id = $1 AND status = 'candidate' AND agent_id = $2 AND tenant_id = $3`,
126
+ [act.factId, agentId, tenantId],
127
+ );
128
+ summary.discard += r.rowCount;
129
+ if (r.rowCount === 0) summary.skipped++;
130
+ break;
131
+ }
132
+
133
+ case 'merge': {
134
+ const r1 = await client.query(
135
+ `UPDATE ${tbl} SET last_confirmed_at = now()
136
+ WHERE id = $1 AND status = 'active' AND tenant_id = $2`,
137
+ [act.targetId, tenantId],
138
+ );
139
+ const r2 = await client.query(
140
+ `UPDATE ${tbl} SET status = 'archived'
141
+ WHERE id = $1 AND status = 'candidate' AND tenant_id = $2`,
142
+ [act.factId, tenantId],
143
+ );
144
+ summary.merge += Math.min(r1.rowCount, r2.rowCount);
145
+ if (r1.rowCount === 0 || r2.rowCount === 0) summary.skipped++;
146
+ break;
147
+ }
148
+
149
+ case 'supersede': {
150
+ const r = await client.query(
151
+ `UPDATE ${tbl} SET status = 'superseded', superseded_by = $1
152
+ WHERE id = $2 AND status = 'active' AND tenant_id = $3`,
153
+ [act.targetId, act.factId, tenantId],
154
+ );
155
+ summary.supersede += r.rowCount;
156
+ if (r.rowCount === 0) summary.skipped++;
157
+ break;
158
+ }
159
+
160
+ default:
161
+ summary.skipped++;
162
+ }
163
+ }
164
+
165
+ await client.query('COMMIT');
166
+ } catch (err) {
167
+ await client.query('ROLLBACK').catch(() => {});
168
+ throw err;
169
+ } finally {
170
+ client.release();
171
+ }
172
+
173
+ return summary;
174
+ }
175
+
176
+ module.exports = { applyConsolidation };
@@ -0,0 +1,21 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Consolidation pipeline
5
+ //
6
+ // Mechanics only — Aquifer ships the 8-action apply + schema. The prompt and
7
+ // output parser stay in consumers (they're persona-specific: different agents
8
+ // want different wording, language, and action vocabulary extensions).
9
+ //
10
+ // Typical flow in a consumer:
11
+ //
12
+ // const output = await llmFn(consumerBuildPrompt({...}));
13
+ // const { actions } = consumerParse(output);
14
+ // await aquifer.consolidate(sessionId, { actions, agentId });
15
+ //
16
+ // aquifer.consolidate() is defined in core/aquifer.js and delegates here.
17
+ // ---------------------------------------------------------------------------
18
+
19
+ const { applyConsolidation } = require('./apply');
20
+
21
+ module.exports = { applyConsolidation };
@@ -6,7 +6,7 @@ const { parseEntityOutput } = require('../core/entity');
6
6
  // defaultEntityPrompt
7
7
  // ---------------------------------------------------------------------------
8
8
 
9
- function defaultEntityPrompt(messages, opts = {}) {
9
+ function defaultEntityPrompt(messages) {
10
10
  const conversation = messages
11
11
  .map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
12
12
  .join('\n');
@@ -60,7 +60,7 @@ async function extractEntities(messages, {
60
60
  const prompt = buildPrompt(messages, {});
61
61
  const response = await llmFn(prompt);
62
62
  return parseEntityOutput(response);
63
- } catch (err) {
63
+ } catch {
64
64
  // LLM failure: return empty, never throw
65
65
  return [];
66
66
  }
@@ -39,7 +39,7 @@ function createTEIReranker(config) {
39
39
  const initialBackoffMs = config.initialBackoffMs || 250;
40
40
 
41
41
  return {
42
- async rerank(query, documents, opts = {}) {
42
+ async rerank(query, documents, _opts = {}) {
43
43
  if (!query || !documents || documents.length === 0) return [];
44
44
 
45
45
  const result = await withRetry(