@shadowforge0/aquifer-memory 1.5.9 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +23 -0
- package/README.md +96 -73
- package/README_CN.md +659 -0
- package/README_TW.md +680 -0
- package/aquifer.config.example.json +34 -0
- package/consumers/claude-code.js +11 -11
- package/consumers/cli.js +374 -39
- package/consumers/codex-handoff.js +152 -0
- package/consumers/codex.js +1549 -0
- package/consumers/default/daily-entries.js +23 -4
- package/consumers/default/index.js +2 -2
- package/consumers/default/prompts/summary.js +6 -6
- package/consumers/mcp.js +131 -7
- package/consumers/openclaw-ext/index.js +0 -1
- package/consumers/openclaw-plugin.js +44 -4
- package/consumers/shared/config.js +28 -0
- package/consumers/shared/factory.js +2 -0
- package/consumers/shared/ingest.js +1 -1
- package/consumers/shared/normalize.js +14 -3
- package/consumers/shared/recall-format.js +53 -0
- package/consumers/shared/summary-parser.js +151 -0
- package/core/aquifer.js +384 -18
- package/core/finalization-review.js +319 -0
- package/core/insights.js +210 -58
- package/core/mcp-manifest.js +69 -2
- package/core/memory-bootstrap.js +188 -0
- package/core/memory-consolidation.js +1236 -0
- package/core/memory-promotion.js +544 -0
- package/core/memory-recall.js +247 -0
- package/core/memory-records.js +581 -0
- package/core/memory-safety-gate.js +224 -0
- package/core/session-finalization.js +350 -0
- package/core/storage.js +456 -2
- package/docs/getting-started.md +99 -0
- package/docs/postprocess-contract.md +2 -2
- package/docs/setup.md +51 -2
- package/package.json +31 -9
- package/pipeline/normalize/adapters/codex.js +106 -0
- package/pipeline/normalize/detect.js +3 -2
- package/schema/001-base.sql +3 -0
- package/schema/007-v1-foundation.sql +273 -0
- package/schema/008-session-finalizations.sql +50 -0
- package/schema/009-v1-assertion-plane.sql +193 -0
- package/schema/010-v1-finalization-review.sql +160 -0
- package/schema/011-v1-compaction-claim.sql +46 -0
- package/schema/012-v1-compaction-lease.sql +39 -0
- package/schema/013-v1-compaction-lineage.sql +193 -0
- package/scripts/backfill-canonical-key.js +250 -0
- package/scripts/codex-recovery.js +532 -0
- package/consumers/miranda/context-inject.js +0 -119
- package/consumers/miranda/daily-entries.js +0 -224
- package/consumers/miranda/index.js +0 -364
- package/consumers/miranda/instance.js +0 -55
- package/consumers/miranda/llm.js +0 -99
- package/consumers/miranda/profile.json +0 -145
- package/consumers/miranda/prompts/summary.js +0 -303
- package/consumers/miranda/recall-format.js +0 -76
- package/consumers/miranda/render-daily-md.js +0 -186
- package/consumers/miranda/workspace-files.js +0 -91
- package/scripts/drop-entity-state-history.sql +0 -17
- package/scripts/drop-insights.sql +0 -12
- package/scripts/install-openclaw.sh +0 -59
- package/scripts/queries.json +0 -45
- package/scripts/retro-recall-bench.js +0 -409
- package/scripts/sample-bench-queries.sql +0 -75
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const TYPE_LABELS = {
|
|
4
|
+
state: '狀態',
|
|
5
|
+
decision: '決策',
|
|
6
|
+
fact: '事實',
|
|
7
|
+
preference: '偏好',
|
|
8
|
+
constraint: '限制',
|
|
9
|
+
entity_note: '註記',
|
|
10
|
+
open_loop: '未完成',
|
|
11
|
+
conclusion: '判斷',
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const SESSION_START_TYPE_PRIORITY = {
|
|
15
|
+
state: 0,
|
|
16
|
+
open_loop: 1,
|
|
17
|
+
constraint: 2,
|
|
18
|
+
preference: 3,
|
|
19
|
+
decision: 4,
|
|
20
|
+
fact: 5,
|
|
21
|
+
conclusion: 6,
|
|
22
|
+
entity_note: 7,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const AUTHORITY_PRIORITY = {
|
|
26
|
+
user_explicit: 0,
|
|
27
|
+
executable_evidence: 1,
|
|
28
|
+
manual: 2,
|
|
29
|
+
system: 3,
|
|
30
|
+
verified_summary: 4,
|
|
31
|
+
llm_inference: 5,
|
|
32
|
+
raw_transcript: 6,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const MEMORY_KEYS = [
|
|
36
|
+
'summary',
|
|
37
|
+
'title',
|
|
38
|
+
'decision',
|
|
39
|
+
'item',
|
|
40
|
+
'conclusion',
|
|
41
|
+
'statement',
|
|
42
|
+
'fact',
|
|
43
|
+
'preference',
|
|
44
|
+
'constraint',
|
|
45
|
+
'state',
|
|
46
|
+
'note',
|
|
47
|
+
'text',
|
|
48
|
+
'value',
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
const STRUCTURED_FIELDS = [
|
|
52
|
+
['states', 'state'],
|
|
53
|
+
['state', 'state'],
|
|
54
|
+
['decisions', 'decision'],
|
|
55
|
+
['important_facts', 'fact'],
|
|
56
|
+
['facts', 'fact'],
|
|
57
|
+
['preferences', 'preference'],
|
|
58
|
+
['constraints', 'constraint'],
|
|
59
|
+
['conclusions', 'conclusion'],
|
|
60
|
+
['entity_notes', 'entity_note'],
|
|
61
|
+
['open_loops', 'open_loop'],
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
const DEFAULT_OMIT = [
|
|
65
|
+
'整段逐字稿、工具輸出、debug 訊息',
|
|
66
|
+
'DB row id、hash、message count 這類 audit 欄位',
|
|
67
|
+
'已作廢、隔離、錯誤或 superseded 的記憶',
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
function normalizeText(value) {
|
|
71
|
+
return String(value || '').trim().replace(/\s+/g, ' ');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function sanitizeHumanText(value) {
|
|
75
|
+
return normalizeText(value)
|
|
76
|
+
.replace(/\bDB Write Plan\b/g, 'DB 寫入計畫')
|
|
77
|
+
.replace(/\bLegacy Continuity Text\b/g, '舊 handoff 包裝文字')
|
|
78
|
+
.replace(/\bStructured Summary\b/g, 'structured summary 原始欄位')
|
|
79
|
+
.replace(/\braw JSON\b/gi, '原始 JSON');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function stripTerminalPunctuation(value) {
|
|
83
|
+
return normalizeText(value).replace(/[。.!?!?]+$/g, '');
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function comparable(value) {
|
|
87
|
+
return stripTerminalPunctuation(value).toLowerCase();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function firstText(value) {
|
|
91
|
+
if (typeof value === 'string') return normalizeText(value);
|
|
92
|
+
if (!value || typeof value !== 'object') return '';
|
|
93
|
+
for (const key of MEMORY_KEYS) {
|
|
94
|
+
const text = normalizeText(value[key]);
|
|
95
|
+
if (text) return text;
|
|
96
|
+
}
|
|
97
|
+
const payload = value.payload && typeof value.payload === 'object' ? value.payload : null;
|
|
98
|
+
if (payload) return firstText(payload);
|
|
99
|
+
return '';
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function memoryTypeOf(value) {
|
|
103
|
+
if (!value || typeof value !== 'object') return 'memory';
|
|
104
|
+
return value.memoryType || value.memory_type || value.type || 'memory';
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function labelFor(type) {
|
|
108
|
+
return TYPE_LABELS[type] || TYPE_LABELS[String(type || '').toLowerCase()] || '記憶';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function pushUnique(out, text) {
|
|
112
|
+
const normalized = sanitizeHumanText(text);
|
|
113
|
+
if (!normalized) return;
|
|
114
|
+
const key = comparable(normalized);
|
|
115
|
+
if (!key || out.some(item => comparable(item) === key)) return;
|
|
116
|
+
out.push(normalized);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function asLine(type, text, suffix = '') {
|
|
120
|
+
const body = normalizeText(text);
|
|
121
|
+
if (!body) return '';
|
|
122
|
+
return `${labelFor(type)}:${body}${suffix}`;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function truncate(text, max = 220) {
|
|
126
|
+
const normalized = sanitizeHumanText(text);
|
|
127
|
+
if (normalized.length <= max) return normalized;
|
|
128
|
+
return `${normalized.slice(0, max - 1)}...`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function addStructuredItems(out, structuredSummary = {}, filter = null) {
|
|
132
|
+
for (const [field, type] of STRUCTURED_FIELDS) {
|
|
133
|
+
if (filter && !filter(type)) continue;
|
|
134
|
+
const items = Array.isArray(structuredSummary[field]) ? structuredSummary[field] : [];
|
|
135
|
+
for (const item of items) {
|
|
136
|
+
const text = firstText(item);
|
|
137
|
+
if (!text) continue;
|
|
138
|
+
const owner = type === 'open_loop' && item && typeof item === 'object' && normalizeText(item.owner)
|
|
139
|
+
? `(owner: ${normalizeText(item.owner)})`
|
|
140
|
+
: '';
|
|
141
|
+
pushUnique(out, asLine(type, text, owner));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function promotedMemoryLines(memoryResults = []) {
|
|
147
|
+
const lines = [];
|
|
148
|
+
for (const result of memoryResults || []) {
|
|
149
|
+
if (!result || result.action !== 'promote') continue;
|
|
150
|
+
const memory = result.memory || result.record || result.candidate || {};
|
|
151
|
+
const type = memoryTypeOf(memory);
|
|
152
|
+
if (type === 'open_loop') continue;
|
|
153
|
+
pushUnique(lines, asLine(type, firstText(memory)));
|
|
154
|
+
}
|
|
155
|
+
return lines;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function openLoopLines(memoryResults = [], structuredSummary = {}) {
|
|
159
|
+
const lines = [];
|
|
160
|
+
for (const result of memoryResults || []) {
|
|
161
|
+
if (!result || result.action !== 'promote') continue;
|
|
162
|
+
const memory = result.memory || result.record || result.candidate || {};
|
|
163
|
+
const type = memoryTypeOf(memory);
|
|
164
|
+
if (type !== 'open_loop') continue;
|
|
165
|
+
const owner = normalizeText(memory.owner || memory.payload?.owner);
|
|
166
|
+
pushUnique(lines, asLine(type, firstText(memory), owner ? `(owner: ${owner})` : ''));
|
|
167
|
+
}
|
|
168
|
+
if (lines.length === 0) {
|
|
169
|
+
addStructuredItems(lines, structuredSummary, type => type === 'open_loop');
|
|
170
|
+
}
|
|
171
|
+
return lines;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function inactiveLines(memoryResults = [], extraInactive = []) {
|
|
175
|
+
const lines = [];
|
|
176
|
+
for (const result of memoryResults || []) {
|
|
177
|
+
if (!result || result.action === 'promote') continue;
|
|
178
|
+
const candidate = result.candidate || result.memory || result.record || {};
|
|
179
|
+
const text = firstText(candidate);
|
|
180
|
+
const reason = normalizeText(result.reason);
|
|
181
|
+
const action = normalizeText(result.action || 'skipped');
|
|
182
|
+
if (text || reason) {
|
|
183
|
+
pushUnique(lines, `${action}:${text || '未命名候選'}${reason ? `(${reason})` : ''}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
for (const item of extraInactive || []) {
|
|
187
|
+
const text = firstText(item);
|
|
188
|
+
const status = normalizeText(item.status || item.action || 'inactive');
|
|
189
|
+
const reason = normalizeText(item.reason || item.obsoleteReason || item.obsolete_reason);
|
|
190
|
+
if (text || reason) {
|
|
191
|
+
pushUnique(lines, `${status}:${text || '未命名記憶'}${reason ? `(${reason})` : ''}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return lines;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function linesOrNone(lines) {
|
|
198
|
+
if (!lines || lines.length === 0) return '無';
|
|
199
|
+
return lines.map(line => `- ${line}`).join('\n');
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function buildAuditLines(input = {}) {
|
|
203
|
+
const finalization = input.finalization || {};
|
|
204
|
+
const memoryResult = input.memoryResult || {};
|
|
205
|
+
const audit = input.audit || {};
|
|
206
|
+
const pairs = [
|
|
207
|
+
['sessionId', audit.sessionId || input.sessionId],
|
|
208
|
+
['finalizationId', audit.finalizationId || finalization.id],
|
|
209
|
+
['handoffId', audit.handoffId || input.handoffId],
|
|
210
|
+
['transcriptHash', audit.transcriptHash || input.transcriptHash],
|
|
211
|
+
['promoted', memoryResult.promoted],
|
|
212
|
+
['quarantined', memoryResult.quarantined],
|
|
213
|
+
['skipped', memoryResult.skipped],
|
|
214
|
+
['policyVersion', audit.policyVersion || input.policyVersion],
|
|
215
|
+
['schemaVersion', audit.schemaVersion || input.schemaVersion],
|
|
216
|
+
].filter(([, value]) => value !== undefined && value !== null && value !== '');
|
|
217
|
+
return pairs.map(([key, value]) => `${key}: ${value}`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function collectRemembered(input = {}) {
|
|
221
|
+
const structuredSummary = input.structuredSummary || input.summary?.structuredSummary || {};
|
|
222
|
+
const memoryResults = input.memoryResults || [];
|
|
223
|
+
const lines = promotedMemoryLines(memoryResults);
|
|
224
|
+
if (lines.length === 0) {
|
|
225
|
+
addStructuredItems(lines, structuredSummary, type => type !== 'open_loop');
|
|
226
|
+
}
|
|
227
|
+
return lines;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function buildCarryForwardLines(input = {}) {
|
|
231
|
+
const lines = [];
|
|
232
|
+
for (const line of input.openLoops || openLoopLines(input.memoryResults, input.structuredSummary || input.summary?.structuredSummary || {})) {
|
|
233
|
+
pushUnique(lines, line);
|
|
234
|
+
}
|
|
235
|
+
const next = normalizeText(input.next || input.metadata?.handoff?.next);
|
|
236
|
+
if (next && next !== '無') pushUnique(lines, `下一步:${next}`);
|
|
237
|
+
return lines;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function buildFinalizationReview(input = {}, opts = {}) {
|
|
241
|
+
const summary = input.summary || {};
|
|
242
|
+
const structuredSummary = input.structuredSummary || summary.structuredSummary || {};
|
|
243
|
+
const summaryText = input.summaryText || summary.summaryText || input.overview || '';
|
|
244
|
+
const statusLine = truncate(input.currentStatus || summaryText || input.title || '已完成本段 finalization。');
|
|
245
|
+
const remembered = collectRemembered({ ...input, structuredSummary });
|
|
246
|
+
const openLoops = openLoopLines(input.memoryResults || [], structuredSummary);
|
|
247
|
+
const inactive = inactiveLines(input.memoryResults || [], input.inactive || []);
|
|
248
|
+
const carryForward = buildCarryForwardLines({ ...input, structuredSummary, openLoops });
|
|
249
|
+
const omit = [];
|
|
250
|
+
for (const item of opts.omit || input.omit || DEFAULT_OMIT) pushUnique(omit, item);
|
|
251
|
+
const heading = opts.preview ? '準備整理進 DB:' : '已整理進 DB:';
|
|
252
|
+
const lines = [
|
|
253
|
+
heading,
|
|
254
|
+
`目前狀態:\n${linesOrNone([statusLine])}`,
|
|
255
|
+
`已記住:\n${linesOrNone(remembered)}`,
|
|
256
|
+
`未完成:\n${linesOrNone(openLoops)}`,
|
|
257
|
+
`已作廢或隔離:\n${linesOrNone(inactive)}`,
|
|
258
|
+
`下一段只需要帶:\n${linesOrNone(carryForward)}`,
|
|
259
|
+
`不要帶:\n${linesOrNone(omit)}`,
|
|
260
|
+
];
|
|
261
|
+
if (opts.includeAudit === true) {
|
|
262
|
+
lines.push(`Audit:\n${linesOrNone(buildAuditLines(input))}`);
|
|
263
|
+
}
|
|
264
|
+
return `${lines.join('\n\n')}\n`;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function buildSessionStartContext(records = [], opts = {}) {
|
|
268
|
+
const asOf = opts.asOf ? Date.parse(opts.asOf) : null;
|
|
269
|
+
const limit = Math.max(1, Math.min(50, opts.limit || 12));
|
|
270
|
+
const maxChars = Math.max(120, opts.maxChars || 1800);
|
|
271
|
+
const active = [];
|
|
272
|
+
for (const [index, record] of (records || []).entries()) {
|
|
273
|
+
const status = record.status || 'candidate';
|
|
274
|
+
const visible = record.visibleInBootstrap ?? record.visible_in_bootstrap;
|
|
275
|
+
if (status !== 'active' || visible !== true) continue;
|
|
276
|
+
if (Number.isFinite(asOf)) {
|
|
277
|
+
const validFrom = Date.parse(record.validFrom || record.valid_from || '');
|
|
278
|
+
const validTo = Date.parse(record.validTo || record.valid_to || '');
|
|
279
|
+
const staleAfter = Date.parse(record.staleAfter || record.stale_after || '');
|
|
280
|
+
if (Number.isFinite(validFrom) && validFrom > asOf) continue;
|
|
281
|
+
if (Number.isFinite(validTo) && validTo <= asOf) continue;
|
|
282
|
+
if (Number.isFinite(staleAfter) && staleAfter <= asOf) continue;
|
|
283
|
+
}
|
|
284
|
+
active.push({ record, index });
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
active.sort((a, b) => {
|
|
288
|
+
const aType = SESSION_START_TYPE_PRIORITY[memoryTypeOf(a.record)] ?? 99;
|
|
289
|
+
const bType = SESSION_START_TYPE_PRIORITY[memoryTypeOf(b.record)] ?? 99;
|
|
290
|
+
if (aType !== bType) return aType - bType;
|
|
291
|
+
|
|
292
|
+
const aAuth = AUTHORITY_PRIORITY[a.record.authority] ?? 99;
|
|
293
|
+
const bAuth = AUTHORITY_PRIORITY[b.record.authority] ?? 99;
|
|
294
|
+
if (aAuth !== bAuth) return aAuth - bAuth;
|
|
295
|
+
|
|
296
|
+
const aAccepted = Date.parse(a.record.acceptedAt || a.record.accepted_at || '') || 0;
|
|
297
|
+
const bAccepted = Date.parse(b.record.acceptedAt || b.record.accepted_at || '') || 0;
|
|
298
|
+
if (aAccepted !== bAccepted) return bAccepted - aAccepted;
|
|
299
|
+
return a.index - b.index;
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
const lines = [];
|
|
303
|
+
for (const { record } of active.slice(0, limit)) {
|
|
304
|
+
const type = memoryTypeOf(record);
|
|
305
|
+
pushUnique(lines, asLine(type, firstText(record)));
|
|
306
|
+
}
|
|
307
|
+
let selected = lines;
|
|
308
|
+
let text = `下一段只需要帶:\n${linesOrNone(selected)}\n`;
|
|
309
|
+
while (text.length > maxChars && selected.length > 1) {
|
|
310
|
+
selected = selected.slice(0, -1);
|
|
311
|
+
text = `下一段只需要帶:\n${linesOrNone(selected)}\n`;
|
|
312
|
+
}
|
|
313
|
+
return text;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
module.exports = {
|
|
317
|
+
buildFinalizationReview,
|
|
318
|
+
buildSessionStartContext,
|
|
319
|
+
};
|
package/core/insights.js
CHANGED
|
@@ -26,65 +26,20 @@ const DEFAULT_RECALL_WEIGHTS = Object.freeze({
|
|
|
26
26
|
recency: 0.10,
|
|
27
27
|
});
|
|
28
28
|
|
|
29
|
+
const DEFAULT_DEDUP = Object.freeze({
|
|
30
|
+
mode: 'off',
|
|
31
|
+
cosineThreshold: 0.88,
|
|
32
|
+
closeBandFrom: 0.85,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const VALID_DEDUP_MODES = new Set(['off', 'shadow', 'enforce']);
|
|
36
|
+
|
|
29
37
|
// Recency linear decay horizon — an insight is treated as "fully recent" at
|
|
30
38
|
// creation (age=0) and "zero recency" at age >= recencyWindowDays. Beyond,
|
|
31
39
|
// recency contribution is clamped to 0 rather than going negative. Configurable
|
|
32
40
|
// via createAquifer({ insights: { recencyWindowDays } }).
|
|
33
41
|
const DEFAULT_RECENCY_WINDOW_DAYS = 90;
|
|
34
42
|
|
|
35
|
-
const LEADING_PUNCT_RE = /^[\s\-_.,;:!?'"()\[\]{}@#]+/;
|
|
36
|
-
const TRAILING_PUNCT_RE = /[\s\-_.,;:!?'"()\[\]{}@#]+$/;
|
|
37
|
-
|
|
38
|
-
function _normalizeText(input) {
|
|
39
|
-
if (typeof input !== 'string' || !input) return '';
|
|
40
|
-
let s = input.normalize('NFKC');
|
|
41
|
-
s = s.toLowerCase();
|
|
42
|
-
s = s.replace(/\s+/g, ' ');
|
|
43
|
-
s = s.replace(LEADING_PUNCT_RE, '');
|
|
44
|
-
s = s.replace(TRAILING_PUNCT_RE, '');
|
|
45
|
-
return s;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function normalizeCanonicalClaim(text) {
|
|
49
|
-
return _normalizeText(text);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function normalizeBody(text) {
|
|
53
|
-
return _normalizeText(text);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function normalizeEntitySet(entities) {
|
|
57
|
-
if (!entities || !Array.isArray(entities)) return '';
|
|
58
|
-
const { normalizeEntityName } = require('./entity');
|
|
59
|
-
const normalized = entities
|
|
60
|
-
.map(e => normalizeEntityName(e))
|
|
61
|
-
.filter(Boolean);
|
|
62
|
-
const deduped = [...new Set(normalized)];
|
|
63
|
-
deduped.sort();
|
|
64
|
-
return deduped.join('|');
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function defaultCanonicalKey({ tenantId, agentId, type, canonicalClaim, entities }) {
|
|
68
|
-
const normClaim = normalizeCanonicalClaim(canonicalClaim);
|
|
69
|
-
const normEntities = normalizeEntitySet(entities);
|
|
70
|
-
const input = `${tenantId || ''}|${agentId || ''}|${type || ''}|${normClaim}|${normEntities}`;
|
|
71
|
-
return crypto.createHash('sha256').update(input).digest('hex');
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
function defaultIdempotencyKey({
|
|
75
|
-
tenantId, agentId, type, title, body, sourceSessionIds, evidenceWindow,
|
|
76
|
-
}) {
|
|
77
|
-
const sorted = (sourceSessionIds || []).slice().sort().join('|');
|
|
78
|
-
const winFrom = evidenceWindow && evidenceWindow.from ? new Date(evidenceWindow.from).toISOString() : '';
|
|
79
|
-
const winTo = evidenceWindow && evidenceWindow.to ? new Date(evidenceWindow.to).toISOString() : '';
|
|
80
|
-
// Hash must include body + window so legitimate revisions (same sessions but
|
|
81
|
-
// tightened body, or extended window) get a new key and replace the old row
|
|
82
|
-
// via supersede, not get swallowed as a duplicate.
|
|
83
|
-
return crypto.createHash('sha256')
|
|
84
|
-
.update(`${tenantId}|${agentId}|${type}|${title}|${body || ''}|${sorted}|${winFrom}|${winTo}`)
|
|
85
|
-
.digest('hex');
|
|
86
|
-
}
|
|
87
|
-
|
|
88
43
|
// ---------------------------------------------------------------------------
|
|
89
44
|
// Canonical identity helpers (Phase 2 C1)
|
|
90
45
|
//
|
|
@@ -160,6 +115,94 @@ function vecToPgLiteral(v) {
|
|
|
160
115
|
return `[${v.join(',')}]`;
|
|
161
116
|
}
|
|
162
117
|
|
|
118
|
+
function truncate(input, limit) {
|
|
119
|
+
if (typeof input !== 'string') return '';
|
|
120
|
+
if (!Number.isFinite(limit) || limit < 0) return '';
|
|
121
|
+
return input.length <= limit ? input : input.slice(0, limit);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function truncateNormalized(input, limit) {
|
|
125
|
+
return truncate(normalizeBody(input), limit);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function resolveDedupConfig(dedup, embedFn) {
|
|
129
|
+
let resolved;
|
|
130
|
+
if (dedup === true) {
|
|
131
|
+
resolved = { ...DEFAULT_DEDUP, mode: 'enforce' };
|
|
132
|
+
} else if (dedup === false || dedup === undefined) {
|
|
133
|
+
resolved = { ...DEFAULT_DEDUP };
|
|
134
|
+
} else if (dedup && typeof dedup === 'object') {
|
|
135
|
+
resolved = { ...DEFAULT_DEDUP, ...dedup };
|
|
136
|
+
} else {
|
|
137
|
+
resolved = { ...DEFAULT_DEDUP };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const rawMode = typeof resolved.mode === 'string' ? resolved.mode.trim().toLowerCase() : resolved.mode;
|
|
141
|
+
if (!VALID_DEDUP_MODES.has(rawMode)) {
|
|
142
|
+
console.warn(`[aquifer] insights dedup: invalid mode ${JSON.stringify(resolved.mode)}; coercing to 'off'`);
|
|
143
|
+
resolved.mode = 'off';
|
|
144
|
+
} else {
|
|
145
|
+
resolved.mode = rawMode;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const envMode = process.env.AQUIFER_INSIGHTS_DEDUP_MODE;
|
|
149
|
+
if (typeof envMode === 'string') {
|
|
150
|
+
const normalizedEnvMode = envMode.trim().toLowerCase();
|
|
151
|
+
if (VALID_DEDUP_MODES.has(normalizedEnvMode)) {
|
|
152
|
+
resolved.mode = normalizedEnvMode;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Reject non-numeric sentinels (null, bool, objects) BEFORE Number()
|
|
157
|
+
// coerces them to 0 — 0 would silently become a "merge everything"
|
|
158
|
+
// threshold in enforce mode.
|
|
159
|
+
let cosineThreshold;
|
|
160
|
+
if (resolved.cosineThreshold === null || resolved.cosineThreshold === undefined
|
|
161
|
+
|| typeof resolved.cosineThreshold === 'boolean') {
|
|
162
|
+
console.warn(`[aquifer] insights dedup: invalid cosineThreshold ${JSON.stringify(resolved.cosineThreshold)}; defaulting to 0.88`);
|
|
163
|
+
cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
|
|
164
|
+
} else {
|
|
165
|
+
cosineThreshold = Number(resolved.cosineThreshold);
|
|
166
|
+
if (!Number.isFinite(cosineThreshold)) {
|
|
167
|
+
console.warn('[aquifer] insights dedup: invalid cosineThreshold; defaulting to 0.88');
|
|
168
|
+
cosineThreshold = DEFAULT_DEDUP.cosineThreshold;
|
|
169
|
+
} else if (cosineThreshold < 0.75 || cosineThreshold > 0.95) {
|
|
170
|
+
const clamped = Math.max(0, Math.min(1, cosineThreshold));
|
|
171
|
+
console.warn(`[aquifer] insights dedup: cosineThreshold ${cosineThreshold} outside recommended [0.75,0.95]; using ${clamped}`);
|
|
172
|
+
cosineThreshold = (cosineThreshold >= 0 && cosineThreshold <= 1) ? cosineThreshold : clamped;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
resolved.cosineThreshold = cosineThreshold;
|
|
176
|
+
|
|
177
|
+
let closeBandFrom;
|
|
178
|
+
if (resolved.closeBandFrom === null || resolved.closeBandFrom === undefined
|
|
179
|
+
|| typeof resolved.closeBandFrom === 'boolean') {
|
|
180
|
+
console.warn(`[aquifer] insights dedup: invalid closeBandFrom ${JSON.stringify(resolved.closeBandFrom)}; defaulting to 0.85`);
|
|
181
|
+
closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
|
|
182
|
+
} else {
|
|
183
|
+
closeBandFrom = Number(resolved.closeBandFrom);
|
|
184
|
+
if (!Number.isFinite(closeBandFrom)) {
|
|
185
|
+
console.warn('[aquifer] insights dedup: invalid closeBandFrom; defaulting to 0.85');
|
|
186
|
+
closeBandFrom = DEFAULT_DEDUP.closeBandFrom;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (closeBandFrom >= resolved.cosineThreshold) {
|
|
190
|
+
const adjusted = Math.max(0, resolved.cosineThreshold - 0.03);
|
|
191
|
+
console.warn(`[aquifer] insights dedup: closeBandFrom ${closeBandFrom} must be below cosineThreshold ${resolved.cosineThreshold}; using ${adjusted}`);
|
|
192
|
+
closeBandFrom = adjusted;
|
|
193
|
+
}
|
|
194
|
+
resolved.closeBandFrom = closeBandFrom;
|
|
195
|
+
|
|
196
|
+
if (resolved.mode !== 'off') {
|
|
197
|
+
console.log(`[aquifer] insights dedup: mode=${resolved.mode} threshold=${resolved.cosineThreshold} close_band_from=${resolved.closeBandFrom}`);
|
|
198
|
+
if (!embedFn) {
|
|
199
|
+
console.warn('[aquifer] insights dedup: embedFn unavailable; semantic dedup disabled at runtime');
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return Object.freeze(resolved);
|
|
204
|
+
}
|
|
205
|
+
|
|
163
206
|
function mapRow(row) {
|
|
164
207
|
if (!row) return null;
|
|
165
208
|
return {
|
|
@@ -184,7 +227,7 @@ function mapRow(row) {
|
|
|
184
227
|
};
|
|
185
228
|
}
|
|
186
229
|
|
|
187
|
-
function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays }) {
|
|
230
|
+
function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights, recencyWindowDays, dedup }) {
|
|
188
231
|
if (!pool) throw new Error('createInsights: pool is required');
|
|
189
232
|
if (!schema) throw new Error('createInsights: schema is required');
|
|
190
233
|
|
|
@@ -192,6 +235,24 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
|
|
|
192
235
|
const recencyWindow = Number.isFinite(recencyWindowDays) && recencyWindowDays > 0
|
|
193
236
|
? recencyWindowDays : DEFAULT_RECENCY_WINDOW_DAYS;
|
|
194
237
|
const tbl = `${schema}.insights`;
|
|
238
|
+
const dedupConfig = resolveDedupConfig(dedup, embedFn);
|
|
239
|
+
|
|
240
|
+
if (dedupConfig.mode !== 'off') {
|
|
241
|
+
pool.query(
|
|
242
|
+
`SELECT count(*)::int AS n FROM ${tbl}
|
|
243
|
+
WHERE canonical_key_v2 IS NULL AND status = 'active'`
|
|
244
|
+
).then(r => {
|
|
245
|
+
const n = r && r.rows && r.rows[0] ? Number(r.rows[0].n) : 0;
|
|
246
|
+
if (n > 0) {
|
|
247
|
+
console.warn(
|
|
248
|
+
`[aquifer] insights: ${n} active rows with canonical_key_v2 IS NULL. `
|
|
249
|
+
+ 'Run scripts/backfill-canonical-key.js to include them in canonical dedup.'
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
}).catch(() => {
|
|
253
|
+
// non-fatal
|
|
254
|
+
});
|
|
255
|
+
}
|
|
195
256
|
|
|
196
257
|
// -------------------------------------------------------------------------
|
|
197
258
|
// commitInsight
|
|
@@ -283,9 +344,101 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
|
|
|
283
344
|
toSupersede = Number(activeRow.id);
|
|
284
345
|
}
|
|
285
346
|
|
|
286
|
-
// Optional embedding.
|
|
287
347
|
let embedding = null;
|
|
288
|
-
|
|
348
|
+
let embeddingReady = false;
|
|
349
|
+
|
|
350
|
+
if (dedupConfig.mode !== 'off' && !toSupersede && embedFn) {
|
|
351
|
+
// Embed the incoming title+body once. If this throws, the label
|
|
352
|
+
// is genuinely 'embed_failed' — the candidate SELECT never ran.
|
|
353
|
+
let embedFailed = false;
|
|
354
|
+
try {
|
|
355
|
+
const v = await embedFn([`${title}\n\n${body}`]);
|
|
356
|
+
if (Array.isArray(v) && Array.isArray(v[0])) {
|
|
357
|
+
embedding = vecToPgLiteral(v[0]);
|
|
358
|
+
}
|
|
359
|
+
embeddingReady = true;
|
|
360
|
+
} catch {
|
|
361
|
+
embedFailed = true;
|
|
362
|
+
embeddingReady = true;
|
|
363
|
+
metadata = { ...metadata, dedupSkipped: 'embed_failed' };
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
if (!embedFailed && embedding) {
|
|
367
|
+
// Candidate lookup. If this throws (DB error), let it bubble
|
|
368
|
+
// to the outer commitInsight try/catch → AQ_INTERNAL. Do NOT
|
|
369
|
+
// mislabel it as embed_failed.
|
|
370
|
+
const semanticLookup = await pool.query(
|
|
371
|
+
`SELECT *, 1.0 - (embedding <=> $4::vector) AS cos_sim
|
|
372
|
+
FROM ${tbl}
|
|
373
|
+
WHERE tenant_id = $1
|
|
374
|
+
AND agent_id = $2
|
|
375
|
+
AND insight_type = $3
|
|
376
|
+
AND status = 'active'
|
|
377
|
+
AND embedding IS NOT NULL
|
|
378
|
+
ORDER BY embedding <=> $4::vector
|
|
379
|
+
LIMIT 1`,
|
|
380
|
+
[tenantId, agentId, type, embedding]
|
|
381
|
+
);
|
|
382
|
+
|
|
383
|
+
if (semanticLookup.rowCount > 0) {
|
|
384
|
+
const candidate = semanticLookup.rows[0];
|
|
385
|
+
const cosine = Number(candidate.cos_sim);
|
|
386
|
+
|
|
387
|
+
if (cosine >= dedupConfig.cosineThreshold) {
|
|
388
|
+
const candidateUpper = parseUpperFromRange(candidate.evidence_window);
|
|
389
|
+
const isStaleReplay = candidateUpper
|
|
390
|
+
&& new Date(toIso).getTime() < candidateUpper.getTime();
|
|
391
|
+
|
|
392
|
+
if (dedupConfig.mode === 'enforce') {
|
|
393
|
+
// Enforce path: stale-replay returns the candidate as
|
|
394
|
+
// duplicate; otherwise supersede.
|
|
395
|
+
if (isStaleReplay) {
|
|
396
|
+
return ok({ insight: mapRow(candidate), duplicate: true });
|
|
397
|
+
}
|
|
398
|
+
toSupersede = Number(candidate.id);
|
|
399
|
+
metadata = {
|
|
400
|
+
...metadata,
|
|
401
|
+
dedupVia: 'semantic',
|
|
402
|
+
dedupCandidate: { id: Number(candidate.id), cosine },
|
|
403
|
+
};
|
|
404
|
+
} else {
|
|
405
|
+
// Shadow path: always insert the new row, always record
|
|
406
|
+
// shadowMatch metadata. staleReplay flag tells reviewers
|
|
407
|
+
// the enforce-mode twin would have returned duplicate
|
|
408
|
+
// instead of superseding.
|
|
409
|
+
metadata = {
|
|
410
|
+
...metadata,
|
|
411
|
+
shadowMatch: {
|
|
412
|
+
candidateId: Number(candidate.id),
|
|
413
|
+
cosine,
|
|
414
|
+
threshold: dedupConfig.cosineThreshold,
|
|
415
|
+
candidateTitle: truncate(candidate.title, 200),
|
|
416
|
+
candidateBody: truncateNormalized(candidate.body, 200),
|
|
417
|
+
wouldSupersede: !isStaleReplay,
|
|
418
|
+
staleReplay: Boolean(isStaleReplay),
|
|
419
|
+
ranAt: new Date().toISOString(),
|
|
420
|
+
},
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
} else if (cosine >= dedupConfig.closeBandFrom) {
|
|
424
|
+
metadata = {
|
|
425
|
+
...metadata,
|
|
426
|
+
dedupNear: {
|
|
427
|
+
candidateId: Number(candidate.id),
|
|
428
|
+
cosine,
|
|
429
|
+
threshold: dedupConfig.cosineThreshold,
|
|
430
|
+
closeBandFrom: dedupConfig.closeBandFrom,
|
|
431
|
+
candidateTitle: truncate(candidate.title, 200),
|
|
432
|
+
candidateBody: truncateNormalized(candidate.body, 200),
|
|
433
|
+
},
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Optional embedding.
|
|
441
|
+
if (embedFn && !embeddingReady) {
|
|
289
442
|
try {
|
|
290
443
|
const v = await embedFn([`${title}\n\n${body}`]);
|
|
291
444
|
if (Array.isArray(v) && Array.isArray(v[0])) embedding = vecToPgLiteral(v[0]);
|
|
@@ -485,13 +638,12 @@ function createInsights({ pool, schema, defaultTenantId, embedFn, recallWeights,
|
|
|
485
638
|
recallInsights,
|
|
486
639
|
markStale,
|
|
487
640
|
supersede,
|
|
488
|
-
_internal: {
|
|
641
|
+
_internal: { vecToPgLiteral, mapRow, weights, dedup: dedupConfig },
|
|
489
642
|
};
|
|
490
643
|
}
|
|
491
644
|
|
|
492
645
|
module.exports = {
|
|
493
646
|
createInsights,
|
|
494
|
-
defaultIdempotencyKey,
|
|
495
647
|
defaultCanonicalKey,
|
|
496
648
|
normalizeCanonicalClaim,
|
|
497
649
|
normalizeBody,
|