@shadowforge0/aquifer-memory 1.5.9 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.env.example +23 -0
  2. package/README.md +96 -73
  3. package/README_CN.md +659 -0
  4. package/README_TW.md +680 -0
  5. package/aquifer.config.example.json +34 -0
  6. package/consumers/claude-code.js +11 -11
  7. package/consumers/cli.js +374 -39
  8. package/consumers/codex-handoff.js +152 -0
  9. package/consumers/codex.js +1549 -0
  10. package/consumers/default/daily-entries.js +23 -4
  11. package/consumers/default/index.js +2 -2
  12. package/consumers/default/prompts/summary.js +6 -6
  13. package/consumers/mcp.js +131 -7
  14. package/consumers/openclaw-ext/index.js +0 -1
  15. package/consumers/openclaw-plugin.js +44 -4
  16. package/consumers/shared/config.js +28 -0
  17. package/consumers/shared/factory.js +2 -0
  18. package/consumers/shared/ingest.js +1 -1
  19. package/consumers/shared/normalize.js +14 -3
  20. package/consumers/shared/recall-format.js +53 -0
  21. package/consumers/shared/summary-parser.js +151 -0
  22. package/core/aquifer.js +384 -18
  23. package/core/finalization-review.js +319 -0
  24. package/core/insights.js +210 -58
  25. package/core/mcp-manifest.js +69 -2
  26. package/core/memory-bootstrap.js +188 -0
  27. package/core/memory-consolidation.js +1236 -0
  28. package/core/memory-promotion.js +544 -0
  29. package/core/memory-recall.js +247 -0
  30. package/core/memory-records.js +581 -0
  31. package/core/memory-safety-gate.js +224 -0
  32. package/core/session-finalization.js +350 -0
  33. package/core/storage.js +456 -2
  34. package/docs/getting-started.md +99 -0
  35. package/docs/postprocess-contract.md +2 -2
  36. package/docs/setup.md +51 -2
  37. package/package.json +31 -9
  38. package/pipeline/normalize/adapters/codex.js +106 -0
  39. package/pipeline/normalize/detect.js +3 -2
  40. package/schema/001-base.sql +3 -0
  41. package/schema/007-v1-foundation.sql +273 -0
  42. package/schema/008-session-finalizations.sql +50 -0
  43. package/schema/009-v1-assertion-plane.sql +193 -0
  44. package/schema/010-v1-finalization-review.sql +160 -0
  45. package/schema/011-v1-compaction-claim.sql +46 -0
  46. package/schema/012-v1-compaction-lease.sql +39 -0
  47. package/schema/013-v1-compaction-lineage.sql +193 -0
  48. package/scripts/backfill-canonical-key.js +250 -0
  49. package/scripts/codex-recovery.js +532 -0
  50. package/consumers/miranda/context-inject.js +0 -119
  51. package/consumers/miranda/daily-entries.js +0 -224
  52. package/consumers/miranda/index.js +0 -364
  53. package/consumers/miranda/instance.js +0 -55
  54. package/consumers/miranda/llm.js +0 -99
  55. package/consumers/miranda/profile.json +0 -145
  56. package/consumers/miranda/prompts/summary.js +0 -303
  57. package/consumers/miranda/recall-format.js +0 -76
  58. package/consumers/miranda/render-daily-md.js +0 -186
  59. package/consumers/miranda/workspace-files.js +0 -91
  60. package/scripts/drop-entity-state-history.sql +0 -17
  61. package/scripts/drop-insights.sql +0 -12
  62. package/scripts/install-openclaw.sh +0 -59
  63. package/scripts/queries.json +0 -45
  64. package/scripts/retro-recall-bench.js +0 -409
  65. package/scripts/sample-bench-queries.sql +0 -75
@@ -0,0 +1,319 @@
1
+ 'use strict';
2
+
3
+ const TYPE_LABELS = {
4
+ state: '狀態',
5
+ decision: '決策',
6
+ fact: '事實',
7
+ preference: '偏好',
8
+ constraint: '限制',
9
+ entity_note: '註記',
10
+ open_loop: '未完成',
11
+ conclusion: '判斷',
12
+ };
13
+
14
+ const SESSION_START_TYPE_PRIORITY = {
15
+ state: 0,
16
+ open_loop: 1,
17
+ constraint: 2,
18
+ preference: 3,
19
+ decision: 4,
20
+ fact: 5,
21
+ conclusion: 6,
22
+ entity_note: 7,
23
+ };
24
+
25
+ const AUTHORITY_PRIORITY = {
26
+ user_explicit: 0,
27
+ executable_evidence: 1,
28
+ manual: 2,
29
+ system: 3,
30
+ verified_summary: 4,
31
+ llm_inference: 5,
32
+ raw_transcript: 6,
33
+ };
34
+
35
+ const MEMORY_KEYS = [
36
+ 'summary',
37
+ 'title',
38
+ 'decision',
39
+ 'item',
40
+ 'conclusion',
41
+ 'statement',
42
+ 'fact',
43
+ 'preference',
44
+ 'constraint',
45
+ 'state',
46
+ 'note',
47
+ 'text',
48
+ 'value',
49
+ ];
50
+
51
+ const STRUCTURED_FIELDS = [
52
+ ['states', 'state'],
53
+ ['state', 'state'],
54
+ ['decisions', 'decision'],
55
+ ['important_facts', 'fact'],
56
+ ['facts', 'fact'],
57
+ ['preferences', 'preference'],
58
+ ['constraints', 'constraint'],
59
+ ['conclusions', 'conclusion'],
60
+ ['entity_notes', 'entity_note'],
61
+ ['open_loops', 'open_loop'],
62
+ ];
63
+
64
+ const DEFAULT_OMIT = [
65
+ '整段逐字稿、工具輸出、debug 訊息',
66
+ 'DB row id、hash、message count 這類 audit 欄位',
67
+ '已作廢、隔離、錯誤或 superseded 的記憶',
68
+ ];
69
+
70
+ function normalizeText(value) {
71
+ return String(value || '').trim().replace(/\s+/g, ' ');
72
+ }
73
+
74
+ function sanitizeHumanText(value) {
75
+ return normalizeText(value)
76
+ .replace(/\bDB Write Plan\b/g, 'DB 寫入計畫')
77
+ .replace(/\bLegacy Continuity Text\b/g, '舊 handoff 包裝文字')
78
+ .replace(/\bStructured Summary\b/g, 'structured summary 原始欄位')
79
+ .replace(/\braw JSON\b/gi, '原始 JSON');
80
+ }
81
+
82
+ function stripTerminalPunctuation(value) {
83
+ return normalizeText(value).replace(/[。.!?!?]+$/g, '');
84
+ }
85
+
86
+ function comparable(value) {
87
+ return stripTerminalPunctuation(value).toLowerCase();
88
+ }
89
+
90
+ function firstText(value) {
91
+ if (typeof value === 'string') return normalizeText(value);
92
+ if (!value || typeof value !== 'object') return '';
93
+ for (const key of MEMORY_KEYS) {
94
+ const text = normalizeText(value[key]);
95
+ if (text) return text;
96
+ }
97
+ const payload = value.payload && typeof value.payload === 'object' ? value.payload : null;
98
+ if (payload) return firstText(payload);
99
+ return '';
100
+ }
101
+
102
+ function memoryTypeOf(value) {
103
+ if (!value || typeof value !== 'object') return 'memory';
104
+ return value.memoryType || value.memory_type || value.type || 'memory';
105
+ }
106
+
107
+ function labelFor(type) {
108
+ return TYPE_LABELS[type] || TYPE_LABELS[String(type || '').toLowerCase()] || '記憶';
109
+ }
110
+
111
+ function pushUnique(out, text) {
112
+ const normalized = sanitizeHumanText(text);
113
+ if (!normalized) return;
114
+ const key = comparable(normalized);
115
+ if (!key || out.some(item => comparable(item) === key)) return;
116
+ out.push(normalized);
117
+ }
118
+
119
+ function asLine(type, text, suffix = '') {
120
+ const body = normalizeText(text);
121
+ if (!body) return '';
122
+ return `${labelFor(type)}:${body}${suffix}`;
123
+ }
124
+
125
+ function truncate(text, max = 220) {
126
+ const normalized = sanitizeHumanText(text);
127
+ if (normalized.length <= max) return normalized;
128
+ return `${normalized.slice(0, max - 1)}...`;
129
+ }
130
+
131
+ function addStructuredItems(out, structuredSummary = {}, filter = null) {
132
+ for (const [field, type] of STRUCTURED_FIELDS) {
133
+ if (filter && !filter(type)) continue;
134
+ const items = Array.isArray(structuredSummary[field]) ? structuredSummary[field] : [];
135
+ for (const item of items) {
136
+ const text = firstText(item);
137
+ if (!text) continue;
138
+ const owner = type === 'open_loop' && item && typeof item === 'object' && normalizeText(item.owner)
139
+ ? `(owner: ${normalizeText(item.owner)})`
140
+ : '';
141
+ pushUnique(out, asLine(type, text, owner));
142
+ }
143
+ }
144
+ }
145
+
146
+ function promotedMemoryLines(memoryResults = []) {
147
+ const lines = [];
148
+ for (const result of memoryResults || []) {
149
+ if (!result || result.action !== 'promote') continue;
150
+ const memory = result.memory || result.record || result.candidate || {};
151
+ const type = memoryTypeOf(memory);
152
+ if (type === 'open_loop') continue;
153
+ pushUnique(lines, asLine(type, firstText(memory)));
154
+ }
155
+ return lines;
156
+ }
157
+
158
+ function openLoopLines(memoryResults = [], structuredSummary = {}) {
159
+ const lines = [];
160
+ for (const result of memoryResults || []) {
161
+ if (!result || result.action !== 'promote') continue;
162
+ const memory = result.memory || result.record || result.candidate || {};
163
+ const type = memoryTypeOf(memory);
164
+ if (type !== 'open_loop') continue;
165
+ const owner = normalizeText(memory.owner || memory.payload?.owner);
166
+ pushUnique(lines, asLine(type, firstText(memory), owner ? `(owner: ${owner})` : ''));
167
+ }
168
+ if (lines.length === 0) {
169
+ addStructuredItems(lines, structuredSummary, type => type === 'open_loop');
170
+ }
171
+ return lines;
172
+ }
173
+
174
+ function inactiveLines(memoryResults = [], extraInactive = []) {
175
+ const lines = [];
176
+ for (const result of memoryResults || []) {
177
+ if (!result || result.action === 'promote') continue;
178
+ const candidate = result.candidate || result.memory || result.record || {};
179
+ const text = firstText(candidate);
180
+ const reason = normalizeText(result.reason);
181
+ const action = normalizeText(result.action || 'skipped');
182
+ if (text || reason) {
183
+ pushUnique(lines, `${action}:${text || '未命名候選'}${reason ? `(${reason})` : ''}`);
184
+ }
185
+ }
186
+ for (const item of extraInactive || []) {
187
+ const text = firstText(item);
188
+ const status = normalizeText(item.status || item.action || 'inactive');
189
+ const reason = normalizeText(item.reason || item.obsoleteReason || item.obsolete_reason);
190
+ if (text || reason) {
191
+ pushUnique(lines, `${status}:${text || '未命名記憶'}${reason ? `(${reason})` : ''}`);
192
+ }
193
+ }
194
+ return lines;
195
+ }
196
+
197
+ function linesOrNone(lines) {
198
+ if (!lines || lines.length === 0) return '無';
199
+ return lines.map(line => `- ${line}`).join('\n');
200
+ }
201
+
202
+ function buildAuditLines(input = {}) {
203
+ const finalization = input.finalization || {};
204
+ const memoryResult = input.memoryResult || {};
205
+ const audit = input.audit || {};
206
+ const pairs = [
207
+ ['sessionId', audit.sessionId || input.sessionId],
208
+ ['finalizationId', audit.finalizationId || finalization.id],
209
+ ['handoffId', audit.handoffId || input.handoffId],
210
+ ['transcriptHash', audit.transcriptHash || input.transcriptHash],
211
+ ['promoted', memoryResult.promoted],
212
+ ['quarantined', memoryResult.quarantined],
213
+ ['skipped', memoryResult.skipped],
214
+ ['policyVersion', audit.policyVersion || input.policyVersion],
215
+ ['schemaVersion', audit.schemaVersion || input.schemaVersion],
216
+ ].filter(([, value]) => value !== undefined && value !== null && value !== '');
217
+ return pairs.map(([key, value]) => `${key}: ${value}`);
218
+ }
219
+
220
+ function collectRemembered(input = {}) {
221
+ const structuredSummary = input.structuredSummary || input.summary?.structuredSummary || {};
222
+ const memoryResults = input.memoryResults || [];
223
+ const lines = promotedMemoryLines(memoryResults);
224
+ if (lines.length === 0) {
225
+ addStructuredItems(lines, structuredSummary, type => type !== 'open_loop');
226
+ }
227
+ return lines;
228
+ }
229
+
230
+ function buildCarryForwardLines(input = {}) {
231
+ const lines = [];
232
+ for (const line of input.openLoops || openLoopLines(input.memoryResults, input.structuredSummary || input.summary?.structuredSummary || {})) {
233
+ pushUnique(lines, line);
234
+ }
235
+ const next = normalizeText(input.next || input.metadata?.handoff?.next);
236
+ if (next && next !== '無') pushUnique(lines, `下一步:${next}`);
237
+ return lines;
238
+ }
239
+
240
+ function buildFinalizationReview(input = {}, opts = {}) {
241
+ const summary = input.summary || {};
242
+ const structuredSummary = input.structuredSummary || summary.structuredSummary || {};
243
+ const summaryText = input.summaryText || summary.summaryText || input.overview || '';
244
+ const statusLine = truncate(input.currentStatus || summaryText || input.title || '已完成本段 finalization。');
245
+ const remembered = collectRemembered({ ...input, structuredSummary });
246
+ const openLoops = openLoopLines(input.memoryResults || [], structuredSummary);
247
+ const inactive = inactiveLines(input.memoryResults || [], input.inactive || []);
248
+ const carryForward = buildCarryForwardLines({ ...input, structuredSummary, openLoops });
249
+ const omit = [];
250
+ for (const item of opts.omit || input.omit || DEFAULT_OMIT) pushUnique(omit, item);
251
+ const heading = opts.preview ? '準備整理進 DB:' : '已整理進 DB:';
252
+ const lines = [
253
+ heading,
254
+ `目前狀態:\n${linesOrNone([statusLine])}`,
255
+ `已記住:\n${linesOrNone(remembered)}`,
256
+ `未完成:\n${linesOrNone(openLoops)}`,
257
+ `已作廢或隔離:\n${linesOrNone(inactive)}`,
258
+ `下一段只需要帶:\n${linesOrNone(carryForward)}`,
259
+ `不要帶:\n${linesOrNone(omit)}`,
260
+ ];
261
+ if (opts.includeAudit === true) {
262
+ lines.push(`Audit:\n${linesOrNone(buildAuditLines(input))}`);
263
+ }
264
+ return `${lines.join('\n\n')}\n`;
265
+ }
266
+
267
+ function buildSessionStartContext(records = [], opts = {}) {
268
+ const asOf = opts.asOf ? Date.parse(opts.asOf) : null;
269
+ const limit = Math.max(1, Math.min(50, opts.limit || 12));
270
+ const maxChars = Math.max(120, opts.maxChars || 1800);
271
+ const active = [];
272
+ for (const [index, record] of (records || []).entries()) {
273
+ const status = record.status || 'candidate';
274
+ const visible = record.visibleInBootstrap ?? record.visible_in_bootstrap;
275
+ if (status !== 'active' || visible !== true) continue;
276
+ if (Number.isFinite(asOf)) {
277
+ const validFrom = Date.parse(record.validFrom || record.valid_from || '');
278
+ const validTo = Date.parse(record.validTo || record.valid_to || '');
279
+ const staleAfter = Date.parse(record.staleAfter || record.stale_after || '');
280
+ if (Number.isFinite(validFrom) && validFrom > asOf) continue;
281
+ if (Number.isFinite(validTo) && validTo <= asOf) continue;
282
+ if (Number.isFinite(staleAfter) && staleAfter <= asOf) continue;
283
+ }
284
+ active.push({ record, index });
285
+ }
286
+
287
+ active.sort((a, b) => {
288
+ const aType = SESSION_START_TYPE_PRIORITY[memoryTypeOf(a.record)] ?? 99;
289
+ const bType = SESSION_START_TYPE_PRIORITY[memoryTypeOf(b.record)] ?? 99;
290
+ if (aType !== bType) return aType - bType;
291
+
292
+ const aAuth = AUTHORITY_PRIORITY[a.record.authority] ?? 99;
293
+ const bAuth = AUTHORITY_PRIORITY[b.record.authority] ?? 99;
294
+ if (aAuth !== bAuth) return aAuth - bAuth;
295
+
296
+ const aAccepted = Date.parse(a.record.acceptedAt || a.record.accepted_at || '') || 0;
297
+ const bAccepted = Date.parse(b.record.acceptedAt || b.record.accepted_at || '') || 0;
298
+ if (aAccepted !== bAccepted) return bAccepted - aAccepted;
299
+ return a.index - b.index;
300
+ });
301
+
302
+ const lines = [];
303
+ for (const { record } of active.slice(0, limit)) {
304
+ const type = memoryTypeOf(record);
305
+ pushUnique(lines, asLine(type, firstText(record)));
306
+ }
307
+ let selected = lines;
308
+ let text = `下一段只需要帶:\n${linesOrNone(selected)}\n`;
309
+ while (text.length > maxChars && selected.length > 1) {
310
+ selected = selected.slice(0, -1);
311
+ text = `下一段只需要帶:\n${linesOrNone(selected)}\n`;
312
+ }
313
+ return text;
314
+ }
315
+
316
+ module.exports = {
317
+ buildFinalizationReview,
318
+ buildSessionStartContext,
319
+ };
package/core/insights.js CHANGED
@@ -26,65 +26,20 @@ const DEFAULT_RECALL_WEIGHTS = Object.freeze({
26
26
  recency: 0.10,
27
27
  });
28
28
 
29
+ const DEFAULT_DEDUP = Object.freeze({
30
+ mode: 'off',
31
+ cosineThreshold: 0.88,
32
+ closeBandFrom: 0.85,
33
+ });
34
+
35
+ const VALID_DEDUP_MODES = new Set(['off', 'shadow', 'enforce']);
36
+
29
37
  // Recency linear decay horizon — an insight is treated as "fully recent" at
30
38
  // creation (age=0) and "zero recency" at age >= recencyWindowDays. Beyond,
31
39
  // recency contribution is clamped to 0 rather than going negative. Configurable
32
40
  // via createAquifer({ insights: { recencyWindowDays } }).
33
41
  const DEFAULT_RECENCY_WINDOW_DAYS = 90;
34
42
 
35
- const LEADING_PUNCT_RE = /^[\s\-_.,;:!?'"()\[\]{}@#]+/;
36
- const TRAILING_PUNCT_RE = /[\s\-_.,;:!?'"()\[\]{}@#]+$/;
37
-
38
- function _normalizeText(input) {
39
- if (typeof input !== 'string' || !input) return '';
40
- let s = input.normalize('NFKC');
41
- s = s.toLowerCase();
42
- s = s.replace(/\s+/g, ' ');
43
- s = s.replace(LEADING_PUNCT_RE, '');
44
- s = s.replace(TRAILING_PUNCT_RE, '');
45
- return s;
46
- }
47
-
48
- function normalizeCanonicalClaim(text) {
49
- return _normalizeText(text);
50
- }
51
-
52
- function normalizeBody(text) {
53
- return _normalizeText(text);
54
- }
55
-
56
- function normalizeEntitySet(entities) {
57
- if (!entities || !Array.isArray(entities)) return '';
58
- const { normalizeEntityName } = require('./entity');
59
- const normalized = entities
60
- .map(e => normalizeEntityName(e))
61
- .filter(Boolean);
62
- const deduped = [...new Set(normalized)];
63
- deduped.sort();
64
- return deduped.join('|');
65
- }
66
-
67
- function defaultCanonicalKey({ tenantId, agentId, type, canonicalClaim, entities }) {
68
- const normClaim = normalizeCanonicalClaim(canonicalClaim);
69
- const normEntities = normalizeEntitySet(entities);
70
- const input = `${tenantId || ''}|${agentId || ''}|${type || ''}|${normClaim}|${normEntities}`;
71
- return crypto.createHash('sha256').update(input).digest('hex');
72
- }
73
-
74
- function defaultIdempotencyKey({
75
- tenantId, agentId, type, title, body, sourceSessionIds, evidenceWindow,
76
- }) {
77
- const sorted = (sourceSessionIds || []).slice().sort().join('|');
78
- const winFrom = evidenceWindow && evidenceWindow.from ? new Date(evidenceWindow.from).toISOString() : '';
79
- const winTo = evidenceWindow && evidenceWindow.to ? new Date(evidenceWindow.to).toISOString() : '';
80
- // Hash must include body + window so legitimate revisions (same sessions but
81
- // tightened body, or extended window) get a new key and replace the old row
82
- // via supersede, not get swallowed as a duplicate.
83
- return crypto.createHash('sha256')
84
- .update(`${tenantId}|${agentId}|${type}|${title}|${body || ''}|${sorted}|${winFrom}|${winTo}`)
85
- .digest('hex');
86
- }
87
-
88
43
  // ---------------------------------------------------------------------------
89
44
  // Canonical identity helpers (Phase 2 C1)
90
45
  //
@@ -160,6 +115,94 @@ function vecToPgLiteral(v) {
160
115
  return `[${v.join(',')}]`;
161
116
  }
162
117
 
118
+ function truncate(input, limit) {
119
+ if (typeof input !== 'string') return '';
120
+ if (!Number.isFinite(limit) || limit < 0) return '';
121
+ return input.length <= limit ? input : input.slice(0, limit);
122
+ }
123
+
124
+ function truncateNormalized(input, limit) {
125
+ return truncate(normalizeBody(input), limit);
126
+ }
127
+
128
+ function resolveDedupConfig(dedup, embedFn) {
129
+ let resolved;
130
+ if (dedup === true) {
131
+ resolved = { ...DEFAULT_DEDUP, mode: 'enforce' };
132
+ } else if (dedup === false || dedup === undefined) {
133
+ resolved = { ...DEFAULT_DEDUP };
134
+ } else if (dedup && typeof dedup === 'object') {
135
+ resolved = { ...DEFAULT_DEDUP, ...dedup };
136
+ } else {
137
+ resolved = { ...DEFAULT_DEDUP };
138
+ }
139
+
140
+ const rawMode = typeof resolved.mode === 'string' ? resolved.mode.trim().toLowerCase() : resolved.mode;
141
+ if (!VALID_DEDUP_MODES.has(rawMode)) {
142
+ console.warn(`[aquifer] insights dedup: invalid mode ${JSON.stringify(resolved.mode)}; coercing to 'off'`);
143
+ resolved.mode = 'off';
144
+ } else {
145
+ resolved.mode = rawMode;
146
+ }
147
+
148
+ const envMode = process.env.AQUIFER_INSIGHTS_DEDUP_MODE;
149
+ if (typeof envMode === 'string') {
150
+ const normalizedEnvMode = envMode.trim().toLowerCase();
151
+ if (VALID_DEDUP_MODES.has(normalizedEnvMode)) {
152
+ resolved.mode = normalizedEnvMode;
153
+ }
154
+ }
155
+
156
+ // Reject non-numeric sentinels (null, bool, objects) BEFORE Number()
157
+ // coerces them to 0 — 0 would silently become a "merge everything"
158
+ // threshold in enforce mode.
159
+ let cosineThreshold;
160
+ if (resolved.cosineThreshold === null || resolved.cosineThreshold === undefined
161
+ || typeof resolved.cosineThreshold === 'boolean') {
162
+ console.warn(`[aquifer] insights dedup: invalid cosineThreshold ${JSON.stringify(resolved.cosineThreshold)}; defaulting to 0.88`);
163
+ cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
164
+ } else {
165
+ cosineThreshold = Number(resolved.cosineThreshold);
166
+ if (!Number.isFinite(cosineThreshold)) {
167
+ console.warn('[aquifer] insights dedup: invalid cosineThreshold; defaulting to 0.88');
168
+ cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
169
+ } else if (cosineThreshold < 0.75 || cosineThreshold > 0.95) {
170
+ const clamped = Math.max(0, Math.min(1, cosineThreshold));
171
+ console.warn(`[aquifer] insights dedup: cosineThreshold ${cosineThreshold} outside recommended [0.75,0.95]; using ${clamped}`);
172
+ cosineThreshold = (cosineThreshold >= 0 && cosineThreshold <= 1) ? cosineThreshold : clamped;
173
+ }
174
+ }
175
+ resolved.cosineThreshold = cosineThreshold;
176
+
177
+ let closeBandFrom;
178
+ if (resolved.closeBandFrom === null || resolved.closeBandFrom === undefined
179
+ || typeof resolved.closeBandFrom === 'boolean') {
180
+ console.warn(`[aquifer] insights dedup: invalid closeBandFrom ${JSON.stringify(resolved.closeBandFrom)}; defaulting to 0.85`);
181
+ closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
182
+ } else {
183
+ closeBandFrom = Number(resolved.closeBandFrom);
184
+ if (!Number.isFinite(closeBandFrom)) {
185
+ console.warn('[aquifer] insights dedup: invalid closeBandFrom; defaulting to 0.85');
186
+ closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
187
+ }
188
+ }
189
+ if (closeBandFrom >= resolved.cosineThreshold) {
190
+ const adjusted = Math.max(0, resolved.cosineThreshold - 0.03);
191
+ console.warn(`[aquifer] insights dedup: closeBandFrom ${closeBandFrom} must be below cosineThreshold ${resolved.cosineThreshold}; using ${adjusted}`);
192
+ closeBandFrom = adjusted;
193
+ }
194
+ resolved.closeBandFrom = closeBandFrom;
195
+
196
+ if (resolved.mode !== 'off') {
197
+ console.log(`[aquifer] insights dedup: mode=${resolved.mode} threshold=${resolved.cosineThreshold} close_band_from=${resolved.closeBandFrom}`);
198
+ if (!embedFn) {
199
+ console.warn('[aquifer] insights dedup: embedFn unavailable; semantic dedup disabled at runtime');
200
+ }
201
+ }
202
+
203
+ return Object.freeze(resolved);
204
+ }
205
+
163
206
  function mapRow(row) {
164
207
  if (!row) return null;
165
208
  return {
@@ -184,7 +227,7 @@ function mapRow(row) {
184
227
  };
185
228
  }
186
229
 
187
- function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays }) {
230
+ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays, dedup }) {
188
231
  if (!pool) throw new Error('createInsights: pool is required');
189
232
  if (!schema) throw new Error('createInsights: schema is required');
190
233
 
@@ -192,6 +235,24 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
192
235
  const recencyWindow = Number.isFinite(recencyWindowDays) && recencyWindowDays > 0
193
236
  ? recencyWindowDays : DEFAULT_RECENCY_WINDOW_DAYS;
194
237
  const tbl = `${schema}.insights`;
238
+ const dedupConfig = resolveDedupConfig(dedup, embedFn);
239
+
240
+ if (dedupConfig.mode !== 'off') {
241
+ pool.query(
242
+ `SELECT count(*)::int AS n FROM ${tbl}
243
+ WHERE canonical_key_v2 IS NULL AND status = 'active'`
244
+ ).then(r => {
245
+ const n = r && r.rows && r.rows[0] ? Number(r.rows[0].n) : 0;
246
+ if (n > 0) {
247
+ console.warn(
248
+ `[aquifer] insights: ${n} active rows with canonical_key_v2 IS NULL. `
249
+ + 'Run scripts/backfill-canonical-key.js to include them in canonical dedup.'
250
+ );
251
+ }
252
+ }).catch(() => {
253
+ // non-fatal
254
+ });
255
+ }
195
256
 
196
257
  // -------------------------------------------------------------------------
197
258
  // commitInsight
@@ -283,9 +344,101 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
283
344
  toSupersede = Number(activeRow.id);
284
345
  }
285
346
 
286
- // Optional embedding.
287
347
  let embedding = null;
288
- if (embedFn) {
348
+ let embeddingReady = false;
349
+
350
+ if (dedupConfig.mode !== 'off' && !toSupersede && embedFn) {
351
+ // Embed the incoming title+body once. If this throws, the label
352
+ // is genuinely 'embed_failed' — the candidate SELECT never ran.
353
+ let embedFailed = false;
354
+ try {
355
+ const v = await embedFn([`${title}\n\n${body}`]);
356
+ if (Array.isArray(v) && Array.isArray(v[0])) {
357
+ embedding = vecToPgLiteral(v[0]);
358
+ }
359
+ embeddingReady = true;
360
+ } catch {
361
+ embedFailed = true;
362
+ embeddingReady = true;
363
+ metadata = { ...metadata, dedupSkipped: 'embed_failed' };
364
+ }
365
+
366
+ if (!embedFailed && embedding) {
367
+ // Candidate lookup. If this throws (DB error), let it bubble
368
+ // to the outer commitInsight try/catch → AQ_INTERNAL. Do NOT
369
+ // mislabel it as embed_failed.
370
+ const semanticLookup = await pool.query(
371
+ `SELECT *, 1.0 - (embedding <=> $4::vector) AS cos_sim
372
+ FROM ${tbl}
373
+ WHERE tenant_id = $1
374
+ AND agent_id = $2
375
+ AND insight_type = $3
376
+ AND status = 'active'
377
+ AND embedding IS NOT NULL
378
+ ORDER BY embedding <=> $4::vector
379
+ LIMIT 1`,
380
+ [tenantId, agentId, type, embedding]
381
+ );
382
+
383
+ if (semanticLookup.rowCount > 0) {
384
+ const candidate = semanticLookup.rows[0];
385
+ const cosine = Number(candidate.cos_sim);
386
+
387
+ if (cosine >= dedupConfig.cosineThreshold) {
388
+ const candidateUpper = parseUpperFromRange(candidate.evidence_window);
389
+ const isStaleReplay = candidateUpper
390
+ && new Date(toIso).getTime() < candidateUpper.getTime();
391
+
392
+ if (dedupConfig.mode === 'enforce') {
393
+ // Enforce path: stale-replay returns the candidate as
394
+ // duplicate; otherwise supersede.
395
+ if (isStaleReplay) {
396
+ return ok({ insight: mapRow(candidate), duplicate: true });
397
+ }
398
+ toSupersede = Number(candidate.id);
399
+ metadata = {
400
+ ...metadata,
401
+ dedupVia: 'semantic',
402
+ dedupCandidate: { id: Number(candidate.id), cosine },
403
+ };
404
+ } else {
405
+ // Shadow path: always insert the new row, always record
406
+ // shadowMatch metadata. staleReplay flag tells reviewers
407
+ // the enforce-mode twin would have returned duplicate
408
+ // instead of superseding.
409
+ metadata = {
410
+ ...metadata,
411
+ shadowMatch: {
412
+ candidateId: Number(candidate.id),
413
+ cosine,
414
+ threshold: dedupConfig.cosineThreshold,
415
+ candidateTitle: truncate(candidate.title, 200),
416
+ candidateBody: truncateNormalized(candidate.body, 200),
417
+ wouldSupersede: !isStaleReplay,
418
+ staleReplay: Boolean(isStaleReplay),
419
+ ranAt: new Date().toISOString(),
420
+ },
421
+ };
422
+ }
423
+ } else if (cosine >= dedupConfig.closeBandFrom) {
424
+ metadata = {
425
+ ...metadata,
426
+ dedupNear: {
427
+ candidateId: Number(candidate.id),
428
+ cosine,
429
+ threshold: dedupConfig.cosineThreshold,
430
+ closeBandFrom: dedupConfig.closeBandFrom,
431
+ candidateTitle: truncate(candidate.title, 200),
432
+ candidateBody: truncateNormalized(candidate.body, 200),
433
+ },
434
+ };
435
+ }
436
+ }
437
+ }
438
+ }
439
+
440
+ // Optional embedding.
441
+ if (embedFn && !embeddingReady) {
289
442
  try {
290
443
  const v = await embedFn([`${title}\n\n${body}`]);
291
444
  if (Array.isArray(v) && Array.isArray(v[0])) embedding = vecToPgLiteral(v[0]);
@@ -485,13 +638,12 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
485
638
  recallInsights,
486
639
  markStale,
487
640
  supersede,
488
- _internal: { defaultIdempotencyKey, vecToPgLiteral, mapRow, weights },
641
+ _internal: { vecToPgLiteral, mapRow, weights, dedup: dedupConfig },
489
642
  };
490
643
  }
491
644
 
492
645
  module.exports = {
493
646
  createInsights,
494
- defaultIdempotencyKey,
495
647
  defaultCanonicalKey,
496
648
  normalizeCanonicalClaim,
497
649
  normalizeBody,