@shadowforge0/aquifer-memory 1.5.8 → 1.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/insights.js CHANGED
@@ -26,65 +26,20 @@ const DEFAULT_RECALL_WEIGHTS = Object.freeze({
26
26
  recency: 0.10,
27
27
  });
28
28
 
29
+ const DEFAULT_DEDUP = Object.freeze({
30
+ mode: 'off',
31
+ cosineThreshold: 0.88,
32
+ closeBandFrom: 0.85,
33
+ });
34
+
35
+ const VALID_DEDUP_MODES = new Set(['off', 'shadow', 'enforce']);
36
+
29
37
  // Recency linear decay horizon — an insight is treated as "fully recent" at
30
38
  // creation (age=0) and "zero recency" at age >= recencyWindowDays. Beyond,
31
39
  // recency contribution is clamped to 0 rather than going negative. Configurable
32
40
  // via createAquifer({ insights: { recencyWindowDays } }).
33
41
  const DEFAULT_RECENCY_WINDOW_DAYS = 90;
34
42
 
35
- const LEADING_PUNCT_RE = /^[\s\-_.,;:!?'"()\[\]{}@#]+/;
36
- const TRAILING_PUNCT_RE = /[\s\-_.,;:!?'"()\[\]{}@#]+$/;
37
-
38
- function _normalizeText(input) {
39
- if (typeof input !== 'string' || !input) return '';
40
- let s = input.normalize('NFKC');
41
- s = s.toLowerCase();
42
- s = s.replace(/\s+/g, ' ');
43
- s = s.replace(LEADING_PUNCT_RE, '');
44
- s = s.replace(TRAILING_PUNCT_RE, '');
45
- return s;
46
- }
47
-
48
- function normalizeCanonicalClaim(text) {
49
- return _normalizeText(text);
50
- }
51
-
52
- function normalizeBody(text) {
53
- return _normalizeText(text);
54
- }
55
-
56
- function normalizeEntitySet(entities) {
57
- if (!entities || !Array.isArray(entities)) return '';
58
- const { normalizeEntityName } = require('./entity');
59
- const normalized = entities
60
- .map(e => normalizeEntityName(e))
61
- .filter(Boolean);
62
- const deduped = [...new Set(normalized)];
63
- deduped.sort();
64
- return deduped.join('|');
65
- }
66
-
67
- function defaultCanonicalKey({ tenantId, agentId, type, canonicalClaim, entities }) {
68
- const normClaim = normalizeCanonicalClaim(canonicalClaim);
69
- const normEntities = normalizeEntitySet(entities);
70
- const input = `${tenantId || ''}|${agentId || ''}|${type || ''}|${normClaim}|${normEntities}`;
71
- return crypto.createHash('sha256').update(input).digest('hex');
72
- }
73
-
74
- function defaultIdempotencyKey({
75
- tenantId, agentId, type, title, body, sourceSessionIds, evidenceWindow,
76
- }) {
77
- const sorted = (sourceSessionIds || []).slice().sort().join('|');
78
- const winFrom = evidenceWindow && evidenceWindow.from ? new Date(evidenceWindow.from).toISOString() : '';
79
- const winTo = evidenceWindow && evidenceWindow.to ? new Date(evidenceWindow.to).toISOString() : '';
80
- // Hash must include body + window so legitimate revisions (same sessions but
81
- // tightened body, or extended window) get a new key and replace the old row
82
- // via supersede, not get swallowed as a duplicate.
83
- return crypto.createHash('sha256')
84
- .update(`${tenantId}|${agentId}|${type}|${title}|${body || ''}|${sorted}|${winFrom}|${winTo}`)
85
- .digest('hex');
86
- }
87
-
88
43
  // ---------------------------------------------------------------------------
89
44
  // Canonical identity helpers (Phase 2 C1)
90
45
  //
@@ -160,6 +115,94 @@ function vecToPgLiteral(v) {
160
115
  return `[${v.join(',')}]`;
161
116
  }
162
117
 
118
+ function truncate(input, limit) {
119
+ if (typeof input !== 'string') return '';
120
+ if (!Number.isFinite(limit) || limit < 0) return '';
121
+ return input.length <= limit ? input : input.slice(0, limit);
122
+ }
123
+
124
+ function truncateNormalized(input, limit) {
125
+ return truncate(normalizeBody(input), limit);
126
+ }
127
+
128
+ function resolveDedupConfig(dedup, embedFn) {
129
+ let resolved;
130
+ if (dedup === true) {
131
+ resolved = { ...DEFAULT_DEDUP, mode: 'enforce' };
132
+ } else if (dedup === false || dedup === undefined) {
133
+ resolved = { ...DEFAULT_DEDUP };
134
+ } else if (dedup && typeof dedup === 'object') {
135
+ resolved = { ...DEFAULT_DEDUP, ...dedup };
136
+ } else {
137
+ resolved = { ...DEFAULT_DEDUP };
138
+ }
139
+
140
+ const rawMode = typeof resolved.mode === 'string' ? resolved.mode.trim().toLowerCase() : resolved.mode;
141
+ if (!VALID_DEDUP_MODES.has(rawMode)) {
142
+ console.warn(`[aquifer] insights dedup: invalid mode ${JSON.stringify(resolved.mode)}; coercing to 'off'`);
143
+ resolved.mode = 'off';
144
+ } else {
145
+ resolved.mode = rawMode;
146
+ }
147
+
148
+ const envMode = process.env.AQUIFER_INSIGHTS_DEDUP_MODE;
149
+ if (typeof envMode === 'string') {
150
+ const normalizedEnvMode = envMode.trim().toLowerCase();
151
+ if (VALID_DEDUP_MODES.has(normalizedEnvMode)) {
152
+ resolved.mode = normalizedEnvMode;
153
+ }
154
+ }
155
+
156
+ // Reject non-numeric sentinels (null, bool, objects) BEFORE Number()
157
+ // coerces them to 0 — 0 would silently become a "merge everything"
158
+ // threshold in enforce mode.
159
+ let cosineThreshold;
160
+ if (resolved.cosineThreshold === null || resolved.cosineThreshold === undefined
161
+ || typeof resolved.cosineThreshold === 'boolean') {
162
+ console.warn(`[aquifer] insights dedup: invalid cosineThreshold ${JSON.stringify(resolved.cosineThreshold)}; defaulting to 0.88`);
163
+ cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
164
+ } else {
165
+ cosineThreshold = Number(resolved.cosineThreshold);
166
+ if (!Number.isFinite(cosineThreshold)) {
167
+ console.warn('[aquifer] insights dedup: invalid cosineThreshold; defaulting to 0.88');
168
+ cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
169
+ } else if (cosineThreshold < 0.75 || cosineThreshold > 0.95) {
170
+ const clamped = Math.max(0, Math.min(1, cosineThreshold));
171
+ console.warn(`[aquifer] insights dedup: cosineThreshold ${cosineThreshold} outside recommended [0.75,0.95]; using ${clamped}`);
172
+ cosineThreshold = (cosineThreshold >= 0 && cosineThreshold <= 1) ? cosineThreshold : clamped;
173
+ }
174
+ }
175
+ resolved.cosineThreshold = cosineThreshold;
176
+
177
+ let closeBandFrom;
178
+ if (resolved.closeBandFrom === null || resolved.closeBandFrom === undefined
179
+ || typeof resolved.closeBandFrom === 'boolean') {
180
+ console.warn(`[aquifer] insights dedup: invalid closeBandFrom ${JSON.stringify(resolved.closeBandFrom)}; defaulting to 0.85`);
181
+ closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
182
+ } else {
183
+ closeBandFrom = Number(resolved.closeBandFrom);
184
+ if (!Number.isFinite(closeBandFrom)) {
185
+ console.warn('[aquifer] insights dedup: invalid closeBandFrom; defaulting to 0.85');
186
+ closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
187
+ }
188
+ }
189
+ if (closeBandFrom >= resolved.cosineThreshold) {
190
+ const adjusted = Math.max(0, resolved.cosineThreshold - 0.03);
191
+ console.warn(`[aquifer] insights dedup: closeBandFrom ${closeBandFrom} must be below cosineThreshold ${resolved.cosineThreshold}; using ${adjusted}`);
192
+ closeBandFrom = adjusted;
193
+ }
194
+ resolved.closeBandFrom = closeBandFrom;
195
+
196
+ if (resolved.mode !== 'off') {
197
+ console.log(`[aquifer] insights dedup: mode=${resolved.mode} threshold=${resolved.cosineThreshold} close_band_from=${resolved.closeBandFrom}`);
198
+ if (!embedFn) {
199
+ console.warn('[aquifer] insights dedup: embedFn unavailable; semantic dedup disabled at runtime');
200
+ }
201
+ }
202
+
203
+ return Object.freeze(resolved);
204
+ }
205
+
163
206
  function mapRow(row) {
164
207
  if (!row) return null;
165
208
  return {
@@ -184,7 +227,7 @@ function mapRow(row) {
184
227
  };
185
228
  }
186
229
 
187
- function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays }) {
230
+ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays, dedup }) {
188
231
  if (!pool) throw new Error('createInsights: pool is required');
189
232
  if (!schema) throw new Error('createInsights: schema is required');
190
233
 
@@ -192,6 +235,24 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
192
235
  const recencyWindow = Number.isFinite(recencyWindowDays) && recencyWindowDays > 0
193
236
  ? recencyWindowDays : DEFAULT_RECENCY_WINDOW_DAYS;
194
237
  const tbl = `${schema}.insights`;
238
+ const dedupConfig = resolveDedupConfig(dedup, embedFn);
239
+
240
+ if (dedupConfig.mode !== 'off') {
241
+ pool.query(
242
+ `SELECT count(*)::int AS n FROM ${tbl}
243
+ WHERE canonical_key_v2 IS NULL AND status = 'active'`
244
+ ).then(r => {
245
+ const n = r && r.rows && r.rows[0] ? Number(r.rows[0].n) : 0;
246
+ if (n > 0) {
247
+ console.warn(
248
+ `[aquifer] insights: ${n} active rows with canonical_key_v2 IS NULL. `
249
+ + 'Run scripts/backfill-canonical-key.js to include them in canonical dedup.'
250
+ );
251
+ }
252
+ }).catch(() => {
253
+ // non-fatal
254
+ });
255
+ }
195
256
 
196
257
  // -------------------------------------------------------------------------
197
258
  // commitInsight
@@ -283,9 +344,101 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
283
344
  toSupersede = Number(activeRow.id);
284
345
  }
285
346
 
286
- // Optional embedding.
287
347
  let embedding = null;
288
- if (embedFn) {
348
+ let embeddingReady = false;
349
+
350
+ if (dedupConfig.mode !== 'off' && !toSupersede && embedFn) {
351
+ // Embed the incoming title+body once. If this throws, the label
352
+ // is genuinely 'embed_failed' — the candidate SELECT never ran.
353
+ let embedFailed = false;
354
+ try {
355
+ const v = await embedFn([`${title}\n\n${body}`]);
356
+ if (Array.isArray(v) && Array.isArray(v[0])) {
357
+ embedding = vecToPgLiteral(v[0]);
358
+ }
359
+ embeddingReady = true;
360
+ } catch {
361
+ embedFailed = true;
362
+ embeddingReady = true;
363
+ metadata = { ...metadata, dedupSkipped: 'embed_failed' };
364
+ }
365
+
366
+ if (!embedFailed && embedding) {
367
+ // Candidate lookup. If this throws (DB error), let it bubble
368
+ // to the outer commitInsight try/catch → AQ_INTERNAL. Do NOT
369
+ // mislabel it as embed_failed.
370
+ const semanticLookup = await pool.query(
371
+ `SELECT *, 1.0 - (embedding <=> $4::vector) AS cos_sim
372
+ FROM ${tbl}
373
+ WHERE tenant_id = $1
374
+ AND agent_id = $2
375
+ AND insight_type = $3
376
+ AND status = 'active'
377
+ AND embedding IS NOT NULL
378
+ ORDER BY embedding <=> $4::vector
379
+ LIMIT 1`,
380
+ [tenantId, agentId, type, embedding]
381
+ );
382
+
383
+ if (semanticLookup.rowCount > 0) {
384
+ const candidate = semanticLookup.rows[0];
385
+ const cosine = Number(candidate.cos_sim);
386
+
387
+ if (cosine >= dedupConfig.cosineThreshold) {
388
+ const candidateUpper = parseUpperFromRange(candidate.evidence_window);
389
+ const isStaleReplay = candidateUpper
390
+ && new Date(toIso).getTime() < candidateUpper.getTime();
391
+
392
+ if (dedupConfig.mode === 'enforce') {
393
+ // Enforce path: stale-replay returns the candidate as
394
+ // duplicate; otherwise supersede.
395
+ if (isStaleReplay) {
396
+ return ok({ insight: mapRow(candidate), duplicate: true });
397
+ }
398
+ toSupersede = Number(candidate.id);
399
+ metadata = {
400
+ ...metadata,
401
+ dedupVia: 'semantic',
402
+ dedupCandidate: { id: Number(candidate.id), cosine },
403
+ };
404
+ } else {
405
+ // Shadow path: always insert the new row, always record
406
+ // shadowMatch metadata. staleReplay flag tells reviewers
407
+ // the enforce-mode twin would have returned duplicate
408
+ // instead of superseding.
409
+ metadata = {
410
+ ...metadata,
411
+ shadowMatch: {
412
+ candidateId: Number(candidate.id),
413
+ cosine,
414
+ threshold: dedupConfig.cosineThreshold,
415
+ candidateTitle: truncate(candidate.title, 200),
416
+ candidateBody: truncateNormalized(candidate.body, 200),
417
+ wouldSupersede: !isStaleReplay,
418
+ staleReplay: Boolean(isStaleReplay),
419
+ ranAt: new Date().toISOString(),
420
+ },
421
+ };
422
+ }
423
+ } else if (cosine >= dedupConfig.closeBandFrom) {
424
+ metadata = {
425
+ ...metadata,
426
+ dedupNear: {
427
+ candidateId: Number(candidate.id),
428
+ cosine,
429
+ threshold: dedupConfig.cosineThreshold,
430
+ closeBandFrom: dedupConfig.closeBandFrom,
431
+ candidateTitle: truncate(candidate.title, 200),
432
+ candidateBody: truncateNormalized(candidate.body, 200),
433
+ },
434
+ };
435
+ }
436
+ }
437
+ }
438
+ }
439
+
440
+ // Optional embedding.
441
+ if (embedFn && !embeddingReady) {
289
442
  try {
290
443
  const v = await embedFn([`${title}\n\n${body}`]);
291
444
  if (Array.isArray(v) && Array.isArray(v[0])) embedding = vecToPgLiteral(v[0]);
@@ -485,13 +638,12 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
485
638
  recallInsights,
486
639
  markStale,
487
640
  supersede,
488
- _internal: { defaultIdempotencyKey, vecToPgLiteral, mapRow, weights },
641
+ _internal: { vecToPgLiteral, mapRow, weights, dedup: dedupConfig },
489
642
  };
490
643
  }
491
644
 
492
645
  module.exports = {
493
646
  createInsights,
494
- defaultIdempotencyKey,
495
647
  defaultCanonicalKey,
496
648
  normalizeCanonicalClaim,
497
649
  normalizeBody,
@@ -46,13 +46,17 @@ const MCP_TOOL_MANIFEST = Object.freeze([
46
46
  enum: ['fts', 'hybrid', 'vector'],
47
47
  description: 'Recall mode: "fts" (keyword only, no embed needed), "hybrid" (default, FTS + vector), "vector" (vector only)',
48
48
  },
49
+ explain: {
50
+ type: 'boolean',
51
+ description: 'Include per-result score breakdown (rrf, timeDecay, entity, trust, rerank). Diagnostic use only.',
52
+ },
49
53
  },
50
54
  required: ['query'],
51
55
  },
52
56
  },
53
57
  {
54
58
  name: 'session_feedback',
55
- description: 'Record trust feedback on a recalled session. Helpful sessions rank higher in future recalls.',
59
+ description: 'After using session_recall, mark the result helpful if it directly informed your answer, or unhelpful if it was irrelevant/outdated. Include a short note. Sessions with more helpful feedback rank higher in future recalls.',
56
60
  inputSchema: {
57
61
  type: 'object',
58
62
  additionalProperties: false,
@@ -85,6 +89,19 @@ const MCP_TOOL_MANIFEST = Object.freeze([
85
89
  },
86
90
  },
87
91
  },
92
+ {
93
+ name: 'feedback_stats',
94
+ description: 'Return trust feedback statistics: total feedback count, helpful/unhelpful breakdown, trust score distribution, and coverage (how many sessions have been rated).',
95
+ inputSchema: {
96
+ type: 'object',
97
+ additionalProperties: false,
98
+ properties: {
99
+ agentId: { type: 'string', description: 'Filter by agent ID' },
100
+ dateFrom: { type: 'string', description: 'Start date YYYY-MM-DD for feedback window' },
101
+ dateTo: { type: 'string', description: 'End date YYYY-MM-DD for feedback window' },
102
+ },
103
+ },
104
+ },
88
105
  {
89
106
  name: 'session_bootstrap',
90
107
  description: 'Load recent session context for a new conversation. Returns summaries, open items, and decisions from recent sessions. Call this at the start of a conversation for continuity; use session_recall for keyword search.',
package/core/storage.js CHANGED
@@ -666,6 +666,76 @@ async function recordFeedback(pool, {
666
666
  }
667
667
  }
668
668
 
669
+ // ---------------------------------------------------------------------------
670
+ // getFeedbackStats — aggregate feedback and trust score metrics
671
+ // ---------------------------------------------------------------------------
672
+
673
+ async function getFeedbackStats(pool, { schema, tenantId, agentId, dateFrom, dateTo }) {
674
+ const params = [tenantId];
675
+ let sessionClause = '';
676
+ if (agentId) {
677
+ params.push(agentId);
678
+ sessionClause += ` AND s.agent_id = $${params.length}`;
679
+ }
680
+ if (dateFrom) {
681
+ params.push(dateFrom);
682
+ sessionClause += ` AND s.started_at >= $${params.length}::date`;
683
+ }
684
+ if (dateTo) {
685
+ params.push(dateTo);
686
+ sessionClause += ` AND s.started_at < ($${params.length}::date + interval '1 day')`;
687
+ }
688
+
689
+ const fbQuery = `
690
+ WITH scoped_sessions AS (
691
+ SELECT s.id
692
+ FROM ${qi(schema)}.sessions s
693
+ WHERE s.tenant_id = $1${sessionClause}
694
+ )
695
+ SELECT
696
+ COUNT(sf.*)::int AS total,
697
+ COUNT(*) FILTER (WHERE sf.verdict = 'helpful')::int AS helpful,
698
+ COUNT(*) FILTER (WHERE sf.verdict = 'unhelpful')::int AS unhelpful,
699
+ COUNT(DISTINCT sf.session_row_id)::int AS rated_sessions
700
+ FROM scoped_sessions ss
701
+ LEFT JOIN ${qi(schema)}.session_feedback sf
702
+ ON sf.session_row_id = ss.id`;
703
+
704
+ const ssQuery = `
705
+ WITH scoped_sessions AS (
706
+ SELECT s.id
707
+ FROM ${qi(schema)}.sessions s
708
+ WHERE s.tenant_id = $1${sessionClause}
709
+ )
710
+ SELECT
711
+ COUNT(scoped_sessions.id)::int AS total_sessions,
712
+ ROUND(AVG(summary.trust_score)::numeric, 3) AS avg_ts,
713
+ MIN(summary.trust_score) AS min_ts,
714
+ MAX(summary.trust_score) AS max_ts
715
+ FROM scoped_sessions
716
+ LEFT JOIN ${qi(schema)}.session_summaries summary
717
+ ON summary.session_row_id = scoped_sessions.id`;
718
+
719
+ const [fbResult, ssResult] = await Promise.all([
720
+ pool.query(fbQuery, params),
721
+ pool.query(ssQuery, params),
722
+ ]);
723
+
724
+ const fb = fbResult.rows[0];
725
+ const ss = ssResult.rows[0];
726
+
727
+ return {
728
+ totalFeedback: fb.total,
729
+ helpfulCount: fb.helpful,
730
+ unhelpfulCount: fb.unhelpful,
731
+ feedbackSessions: fb.rated_sessions,
732
+ totalSessions: ss.total_sessions,
733
+ trustScoreAvg: (ss.avg_ts !== null && ss.avg_ts !== undefined) ? parseFloat(ss.avg_ts) : 0.5,
734
+ trustScoreMin: (ss.min_ts !== null && ss.min_ts !== undefined) ? parseFloat(ss.min_ts) : 0.5,
735
+ trustScoreMax: (ss.max_ts !== null && ss.max_ts !== undefined) ? parseFloat(ss.max_ts) : 0.5,
736
+ };
737
+ }
738
+
669
739
  // ---------------------------------------------------------------------------
670
740
  // Exports
671
741
  // ---------------------------------------------------------------------------
@@ -683,4 +753,5 @@ module.exports = {
683
753
  searchTurnEmbeddings,
684
754
  searchSummaryEmbeddings,
685
755
  recordFeedback,
756
+ getFeedbackStats,
686
757
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@shadowforge0/aquifer-memory",
3
- "version": "1.5.8",
3
+ "version": "1.5.12",
4
4
  "description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. MCP server, CLI, and library API.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -13,7 +13,15 @@
13
13
  "consumers/default/",
14
14
  "consumers/openclaw-ext/",
15
15
  "docs/",
16
- "scripts/"
16
+ "scripts/backfill-canonical-key.js",
17
+ "scripts/diagnose-fts-zh.js",
18
+ "scripts/diagnose-vector.js",
19
+ "scripts/drop-entity-state-history.sql",
20
+ "scripts/drop-insights.sql",
21
+ "scripts/extract-insights-from-recent-sessions.js",
22
+ "scripts/find-dburl-hints.js",
23
+ "scripts/install-openclaw.sh",
24
+ "scripts/smoke.mjs"
17
25
  ],
18
26
  "bin": {
19
27
  "aquifer": "./consumers/cli.js"