@shadowforge0/aquifer-memory 1.3.0 → 1.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -1
- package/consumers/default/index.js +17 -4
- package/consumers/mcp.js +21 -0
- package/consumers/miranda/index.js +15 -4
- package/consumers/miranda/recall-format.js +5 -3
- package/consumers/shared/config.js +8 -0
- package/consumers/shared/factory.js +2 -1
- package/consumers/shared/llm.js +1 -1
- package/consumers/shared/recall-format.js +21 -1
- package/core/aquifer.js +669 -92
- package/core/entity-state.js +483 -0
- package/core/insights.js +499 -0
- package/core/mcp-manifest.js +1 -1
- package/core/storage.js +82 -5
- package/package.json +1 -1
- package/pipeline/extract-state-changes.js +205 -0
- package/schema/001-base.sql +186 -16
- package/schema/002-entities.sql +35 -1
- package/schema/004-completion.sql +23 -7
- package/schema/005-entity-state-history.sql +87 -0
- package/schema/006-insights.sql +138 -0
- package/scripts/diagnose-fts-zh.js +37 -4
- package/scripts/drop-entity-state-history.sql +17 -0
- package/scripts/drop-insights.sql +12 -0
- package/scripts/extract-insights-from-recent-sessions.js +315 -0
- package/scripts/find-dburl-hints.js +29 -0
- package/scripts/queries.json +45 -0
- package/scripts/retro-recall-bench.js +409 -0
- package/scripts/sample-bench-queries.sql +75 -0
package/core/aquifer.js
CHANGED
|
@@ -42,15 +42,45 @@ function loadSql(filename, schema) {
|
|
|
42
42
|
// ---------------------------------------------------------------------------
|
|
43
43
|
|
|
44
44
|
function buildRerankDocument(row, maxChars) {
|
|
45
|
-
|
|
45
|
+
// Prefer structured_summary fields when available — title/overview carry
|
|
46
|
+
// more signal than summary_text for short Chinese recaps, and topics /
|
|
47
|
+
// decisions / open_loops give the cross-encoder substantive content.
|
|
48
|
+
// Fall back to summary_text / matched_turn_text when structured is absent.
|
|
49
|
+
const ss = row.structured_summary || null;
|
|
50
|
+
const parts = [];
|
|
51
|
+
if (ss) {
|
|
52
|
+
if (ss.title) parts.push(String(ss.title).trim());
|
|
53
|
+
if (ss.overview) parts.push(String(ss.overview).trim());
|
|
54
|
+
if (Array.isArray(ss.topics)) {
|
|
55
|
+
const topics = ss.topics
|
|
56
|
+
.map(t => typeof t === 'string' ? t : (t && t.name ? `${t.name}${t.summary ? ': ' + t.summary : ''}` : ''))
|
|
57
|
+
.filter(Boolean).join(' / ');
|
|
58
|
+
if (topics) parts.push(topics);
|
|
59
|
+
}
|
|
60
|
+
if (Array.isArray(ss.decisions)) {
|
|
61
|
+
const decisions = ss.decisions
|
|
62
|
+
.map(d => typeof d === 'string' ? d : (d && d.decision ? d.decision : ''))
|
|
63
|
+
.filter(Boolean).join(' / ');
|
|
64
|
+
if (decisions) parts.push(`Decisions: ${decisions}`);
|
|
65
|
+
}
|
|
66
|
+
if (Array.isArray(ss.open_loops)) {
|
|
67
|
+
const loops = ss.open_loops
|
|
68
|
+
.map(l => typeof l === 'string' ? l : (l && l.item ? l.item : ''))
|
|
69
|
+
.filter(Boolean).join(' / ');
|
|
70
|
+
if (loops) parts.push(`Open loops: ${loops}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (!parts.length) {
|
|
74
|
+
const bare = (row.summary_text || row.summary_snippet || '').trim();
|
|
75
|
+
if (bare) parts.push(bare);
|
|
76
|
+
}
|
|
46
77
|
const turn = (row.matched_turn_text || '').replace(/\s+/g, ' ').trim();
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
} else if (turn && !text.includes(turn)) {
|
|
51
|
-
text = `${text}\n\nMatched turn:\n${turn}`;
|
|
78
|
+
if (turn) {
|
|
79
|
+
const joined = parts.join(' \n ');
|
|
80
|
+
if (!joined.includes(turn)) parts.push(`Matched turn: ${turn}`);
|
|
52
81
|
}
|
|
53
82
|
|
|
83
|
+
let text = parts.join('\n\n').replace(/[ \t]+/g, ' ').trim();
|
|
54
84
|
if (text.length > maxChars) text = text.slice(0, maxChars);
|
|
55
85
|
return text;
|
|
56
86
|
}
|
|
@@ -92,6 +122,48 @@ function resolveEmbedFn(embedConfig, env) {
|
|
|
92
122
|
// createAquifer
|
|
93
123
|
// ---------------------------------------------------------------------------
|
|
94
124
|
|
|
125
|
+
// Decide whether to invoke the optional reranker on this recall call.
|
|
126
|
+
// Returns `{ apply: boolean, reason: string }`. Pure function — no side effects.
|
|
127
|
+
function shouldAutoRerank({ query, mode, ranked, hasEntities, autoTrigger }) {
|
|
128
|
+
if (!autoTrigger.enabled) return { apply: false, reason: 'auto_disabled' };
|
|
129
|
+
|
|
130
|
+
if (hasEntities && autoTrigger.alwaysWhenEntities) {
|
|
131
|
+
return { apply: true, reason: 'entities_present' };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const len = ranked.length;
|
|
135
|
+
if (len < autoTrigger.minResults) return { apply: false, reason: 'too_few_results' };
|
|
136
|
+
if (len > autoTrigger.maxResults) return { apply: false, reason: 'too_many_results' };
|
|
137
|
+
|
|
138
|
+
const q = String(query || '').trim();
|
|
139
|
+
const tokenCount = q.split(/\s+/).filter(Boolean).length;
|
|
140
|
+
if (q.length < autoTrigger.minQueryChars && tokenCount < autoTrigger.minQueryTokens) {
|
|
141
|
+
return { apply: false, reason: 'query_too_short' };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// FTS-only path: rerank when results are wide enough that semantic narrowing
|
|
145
|
+
// is valuable. Cohere-style cross-encoders excel at re-ranking keyword hits.
|
|
146
|
+
if (mode === 'fts') {
|
|
147
|
+
if (len > autoTrigger.ftsMinResults) return { apply: true, reason: 'fts_wide_shortlist' };
|
|
148
|
+
return { apply: false, reason: 'fts_shortlist_too_narrow' };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (!autoTrigger.modes.includes(mode)) {
|
|
152
|
+
return { apply: false, reason: 'mode_not_in_autotrigger_modes' };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Hybrid: if top-1 and top-2 are close, signals are mixed enough to benefit.
|
|
156
|
+
if (len >= 2) {
|
|
157
|
+
const s0 = ranked[0]?._score ?? 0;
|
|
158
|
+
const s1 = ranked[1]?._score ?? 0;
|
|
159
|
+
if (s0 - s1 <= autoTrigger.maxTopScoreGap) {
|
|
160
|
+
return { apply: true, reason: 'top_score_gap_close' };
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return { apply: false, reason: 'top_score_gap_wide' };
|
|
165
|
+
}
|
|
166
|
+
|
|
95
167
|
function createAquifer(config = {}) {
|
|
96
168
|
// v1.2.0: db falls back to DATABASE_URL / AQUIFER_DB_URL env so hosts can
|
|
97
169
|
// call createAquifer() with zero args for install-and-go.
|
|
@@ -176,6 +248,24 @@ function createAquifer(config = {}) {
|
|
|
176
248
|
const defaultRerankTopK = rerankConfig ? Math.max(1, rerankConfig.topK || 20) : 0;
|
|
177
249
|
const rerankMaxChars = rerankConfig ? Math.max(200, rerankConfig.maxChars || 1600) : 0;
|
|
178
250
|
|
|
251
|
+
// Auto-trigger gate for rerank: when reranker is configured but caller didn't
|
|
252
|
+
// explicitly pass opts.rerank, decide per-call whether the cost is worth it.
|
|
253
|
+
// Defaults aim for "rerank when shortlist is dense enough to benefit, query
|
|
254
|
+
// is non-trivial, and either signals are mixed (hybrid) or FTS returned a
|
|
255
|
+
// wide candidate set worth narrowing semantically."
|
|
256
|
+
const autoTriggerCfg = (rerankConfig && rerankConfig.autoTrigger) || {};
|
|
257
|
+
const autoTrigger = {
|
|
258
|
+
enabled: autoTriggerCfg.enabled !== false, // default true when reranker exists
|
|
259
|
+
modes: autoTriggerCfg.modes || ['hybrid'],
|
|
260
|
+
minQueryChars: autoTriggerCfg.minQueryChars ?? 6,
|
|
261
|
+
minQueryTokens: autoTriggerCfg.minQueryTokens ?? 2,
|
|
262
|
+
minResults: autoTriggerCfg.minResults ?? 2,
|
|
263
|
+
maxResults: autoTriggerCfg.maxResults ?? 12,
|
|
264
|
+
maxTopScoreGap: autoTriggerCfg.maxTopScoreGap ?? 0.08,
|
|
265
|
+
alwaysWhenEntities: autoTriggerCfg.alwaysWhenEntities !== false, // default true
|
|
266
|
+
ftsMinResults: autoTriggerCfg.ftsMinResults ?? 5, // FTS-only mode triggers when results > this
|
|
267
|
+
};
|
|
268
|
+
|
|
179
269
|
// Source registry (in-memory)
|
|
180
270
|
const sources = new Map();
|
|
181
271
|
|
|
@@ -183,57 +273,104 @@ function createAquifer(config = {}) {
|
|
|
183
273
|
let migrated = false;
|
|
184
274
|
let migratePromise = null;
|
|
185
275
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
276
|
+
// FTS tsconfig — auto-detected during migrate(). 'zhcfg' if zhparser is
|
|
277
|
+
// installed (better Chinese segmentation), otherwise 'simple' (legacy).
|
|
278
|
+
// Override via config.ftsConfig if you need to force one or the other.
|
|
279
|
+
let ftsConfig = config.ftsConfig || null;
|
|
280
|
+
|
|
281
|
+
// State-change extraction (Q3): off by default. When enabled, enrich() runs
|
|
282
|
+
// an extra LLM call to capture temporal state transitions on whitelisted
|
|
283
|
+
// entities. See pipeline/extract-state-changes.js + core/entity-state.js.
|
|
284
|
+
const stateChangesCfg = config.stateChanges || {};
|
|
285
|
+
const stateChangesEnabled = stateChangesCfg.enabled === true;
|
|
286
|
+
const stateChangesWhitelist = new Set(
|
|
287
|
+
(Array.isArray(stateChangesCfg.whitelist) ? stateChangesCfg.whitelist : [])
|
|
288
|
+
.map(s => String(s).toLowerCase())
|
|
289
|
+
);
|
|
290
|
+
const stateChangesPromptFn = stateChangesCfg.promptFn || null;
|
|
291
|
+
const stateChangesConfThreshold = Number.isFinite(stateChangesCfg.confidenceThreshold)
|
|
292
|
+
? stateChangesCfg.confidenceThreshold : 0.7;
|
|
293
|
+
const stateChangesTimeoutMs = Number.isFinite(stateChangesCfg.timeoutMs)
|
|
294
|
+
? stateChangesCfg.timeoutMs : 10000;
|
|
295
|
+
const stateChangesMaxOutputTokens = Number.isFinite(stateChangesCfg.maxOutputTokens)
|
|
296
|
+
? stateChangesCfg.maxOutputTokens : 600;
|
|
297
|
+
|
|
298
|
+
const migrationsCfg = config.migrations || {};
|
|
299
|
+
const migrationsMode = (() => {
|
|
300
|
+
const raw = migrationsCfg.mode;
|
|
301
|
+
if (raw === 'apply' || raw === 'check' || raw === 'off') return raw;
|
|
302
|
+
if (raw === undefined || raw === null) return 'apply';
|
|
303
|
+
throw new Error(`config.migrations.mode must be 'apply' | 'check' | 'off' (got ${JSON.stringify(raw)})`);
|
|
304
|
+
})();
|
|
305
|
+
const migrationLockTimeoutMs = Number.isFinite(migrationsCfg.lockTimeoutMs)
|
|
306
|
+
? Math.max(0, migrationsCfg.lockTimeoutMs) : 30000;
|
|
307
|
+
const migrationStartupTimeoutMs = Number.isFinite(migrationsCfg.startupTimeoutMs)
|
|
308
|
+
? Math.max(0, migrationsCfg.startupTimeoutMs) : 60000;
|
|
309
|
+
const migrationOnEvent = typeof migrationsCfg.onEvent === 'function' ? migrationsCfg.onEvent : null;
|
|
310
|
+
|
|
311
|
+
function emitMigrationEvent(name, payload) {
|
|
312
|
+
if (!migrationOnEvent) return;
|
|
313
|
+
try { migrationOnEvent({ name, schema, ...payload }); } catch (err) {
|
|
314
|
+
console.warn(`[aquifer] migrations.onEvent handler threw: ${err.message}`);
|
|
315
|
+
}
|
|
191
316
|
}
|
|
192
317
|
|
|
193
|
-
//
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
}
|
|
214
|
-
if (source) {
|
|
215
|
-
params.push(source);
|
|
216
|
-
where.push(`s.source = $${params.length}`);
|
|
217
|
-
}
|
|
318
|
+
// Expected migration set — used for lazy plan introspection. `always: true`
|
|
319
|
+
// runs every migrate(); others are gated by feature flags. Signature tables
|
|
320
|
+
// let listPendingMigrations() probe pg_tables without executing DDL.
|
|
321
|
+
const MIGRATION_PLAN = [
|
|
322
|
+
{ id: '001-base', file: '001-base.sql', always: true, signature: 'sessions' },
|
|
323
|
+
{ id: '002-entities', file: '002-entities.sql', gate: 'entities', signature: 'entities' },
|
|
324
|
+
{ id: '003-trust-feedback', file: '003-trust-feedback.sql', always: true, signature: 'session_feedback' },
|
|
325
|
+
{ id: '004-facts', file: '004-facts.sql', gate: 'facts', signature: 'facts' },
|
|
326
|
+
{ id: '004-completion', file: '004-completion.sql', always: true, signature: 'narratives' },
|
|
327
|
+
{ id: '005-entity-state-history',file: '005-entity-state-history.sql',gate: 'entities', signature: 'entity_state_history' },
|
|
328
|
+
{ id: '006-insights', file: '006-insights.sql', always: true, signature: 'insights' },
|
|
329
|
+
];
|
|
330
|
+
|
|
331
|
+
function requiredMigrations() {
|
|
332
|
+
return MIGRATION_PLAN
|
|
333
|
+
.filter(m => m.always
|
|
334
|
+
|| (m.gate === 'entities' && entitiesEnabled)
|
|
335
|
+
|| (m.gate === 'facts' && factsEnabled))
|
|
336
|
+
.map(m => m.id);
|
|
337
|
+
}
|
|
218
338
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
WHERE ss.embedding IS NOT NULL
|
|
230
|
-
AND ${where.join(' AND ')}
|
|
231
|
-
ORDER BY distance ASC
|
|
232
|
-
LIMIT $${params.length}`,
|
|
233
|
-
params
|
|
339
|
+
async function readAppliedMigrations(queryRunner) {
|
|
340
|
+
const required = MIGRATION_PLAN.filter(m => m.always
|
|
341
|
+
|| (m.gate === 'entities' && entitiesEnabled)
|
|
342
|
+
|| (m.gate === 'facts' && factsEnabled));
|
|
343
|
+
const signatures = required.map(m => m.signature);
|
|
344
|
+
if (signatures.length === 0) return [];
|
|
345
|
+
const r = await queryRunner.query(
|
|
346
|
+
`SELECT tablename FROM pg_tables
|
|
347
|
+
WHERE schemaname = $1 AND tablename = ANY($2::text[])`,
|
|
348
|
+
[schema, signatures]
|
|
234
349
|
);
|
|
350
|
+
const present = new Set(r.rows.map(row => row.tablename));
|
|
351
|
+
return required.filter(m => present.has(m.signature)).map(m => m.id);
|
|
352
|
+
}
|
|
235
353
|
|
|
236
|
-
|
|
354
|
+
async function buildMigrationPlan(queryRunner) {
|
|
355
|
+
const required = requiredMigrations();
|
|
356
|
+
const applied = await readAppliedMigrations(queryRunner);
|
|
357
|
+
const appliedSet = new Set(applied);
|
|
358
|
+
const pending = required.filter(id => !appliedSet.has(id));
|
|
359
|
+
return { required, applied, pending };
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
async function ensureMigrated() {
|
|
363
|
+
if (migrated) return;
|
|
364
|
+
if (migratePromise) return migratePromise;
|
|
365
|
+
if (migrationsMode === 'off') { migrated = true; return; }
|
|
366
|
+
if (migrationsMode === 'check') {
|
|
367
|
+
// Lazy compare only — don't execute DDL implicitly.
|
|
368
|
+
const plan = await buildMigrationPlan(pool).catch(() => null);
|
|
369
|
+
if (plan && plan.pending.length === 0) migrated = true;
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
migratePromise = aquifer.migrate().finally(() => { migratePromise = null; });
|
|
373
|
+
return migratePromise;
|
|
237
374
|
}
|
|
238
375
|
|
|
239
376
|
// =========================================================================
|
|
@@ -243,44 +380,329 @@ function createAquifer(config = {}) {
|
|
|
243
380
|
const aquifer = {
|
|
244
381
|
// --- lifecycle ---
|
|
245
382
|
|
|
383
|
+
async ensureMigrated() {
|
|
384
|
+
return ensureMigrated();
|
|
385
|
+
},
|
|
386
|
+
|
|
246
387
|
async migrate() {
|
|
388
|
+
const t0 = Date.now();
|
|
247
389
|
// Advisory lock prevents concurrent migrations across processes.
|
|
248
390
|
// Lock key is derived from schema name to allow parallel migration
|
|
249
391
|
// of different schemas in the same database.
|
|
250
392
|
const lockKey = Buffer.from(`aquifer:${schema}`).reduce((h, b) => (h * 31 + b) & 0x7fffffff, 0);
|
|
251
|
-
|
|
393
|
+
|
|
394
|
+
emitMigrationEvent('init_started', { mode: migrationsMode });
|
|
395
|
+
|
|
396
|
+
// Run all migration DDL on a single checked-out client so we can
|
|
397
|
+
// capture RAISE NOTICE/WARNING emitted by the DO blocks. node-postgres
|
|
398
|
+
// swallows notices on pool.query(); attaching a 'notice' listener to a
|
|
399
|
+
// held client surfaces them. Fall back to pool.query() when the caller
|
|
400
|
+
// passed a bare mock (no connect/release) — tests using minimal pool
|
|
401
|
+
// stubs still exercise the migration shape, just without notice capture.
|
|
402
|
+
const supportsCheckout = typeof pool.connect === 'function';
|
|
403
|
+
const client = supportsCheckout ? await pool.connect() : pool;
|
|
404
|
+
const releasesClient = supportsCheckout && typeof client.release === 'function';
|
|
405
|
+
const notices = [];
|
|
406
|
+
const onNotice = (n) => {
|
|
407
|
+
notices.push({ severity: n.severity || 'NOTICE', message: n.message || String(n) });
|
|
408
|
+
};
|
|
409
|
+
const hasEvents = typeof client.on === 'function' && typeof client.off === 'function';
|
|
410
|
+
if (hasEvents) client.on('notice', onNotice);
|
|
411
|
+
|
|
412
|
+
const ddlExecuted = [];
|
|
413
|
+
let lockAcquired = false;
|
|
414
|
+
|
|
252
415
|
try {
|
|
253
|
-
//
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
416
|
+
// Plan probe before lock: lets consumers see pending list and lets
|
|
417
|
+
// us emit an accurate check_completed event even when the DDL is a
|
|
418
|
+
// no-op on an already-migrated schema.
|
|
419
|
+
const planBefore = await buildMigrationPlan(client).catch(() => null);
|
|
420
|
+
emitMigrationEvent('check_completed', {
|
|
421
|
+
required: planBefore ? planBefore.required : requiredMigrations(),
|
|
422
|
+
applied: planBefore ? planBefore.applied : [],
|
|
423
|
+
pending: planBefore ? planBefore.pending : requiredMigrations(),
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
// Try-lock with poll + timeout. Replaces the old blocking
|
|
427
|
+
// pg_advisory_lock() which could hang indefinitely if another
|
|
428
|
+
// process crashed holding the lock. Defensive against mock pools:
|
|
429
|
+
// only poll when PG explicitly returns ok=false; a missing/empty
|
|
430
|
+
// response (test mocks that don't model pg_try_advisory_lock) is
|
|
431
|
+
// treated as acquired so suite doesn't hang on the deadline.
|
|
432
|
+
const lockDeadline = Date.now() + migrationLockTimeoutMs;
|
|
433
|
+
const pollMs = 250;
|
|
434
|
+
while (true) {
|
|
435
|
+
const r = await client.query('SELECT pg_try_advisory_lock($1) AS ok', [lockKey]);
|
|
436
|
+
const row = r && r.rows ? r.rows[0] : null;
|
|
437
|
+
if (row && row.ok === false) {
|
|
438
|
+
if (Date.now() >= lockDeadline) break;
|
|
439
|
+
await new Promise(res => setTimeout(res, pollMs));
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
lockAcquired = true;
|
|
443
|
+
break;
|
|
444
|
+
}
|
|
445
|
+
if (!lockAcquired) {
|
|
446
|
+
const err = new Error(`aquifer: failed to acquire migration advisory lock within ${migrationLockTimeoutMs}ms for schema "${schema}"`);
|
|
447
|
+
err.code = 'AQ_MIGRATION_LOCK_TIMEOUT';
|
|
448
|
+
err.failedAt = 'acquire_lock';
|
|
449
|
+
throw err;
|
|
261
450
|
}
|
|
262
451
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
452
|
+
emitMigrationEvent('apply_started', {
|
|
453
|
+
pending: planBefore ? planBefore.pending : requiredMigrations(),
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
try {
|
|
457
|
+
// 1. Run base DDL
|
|
458
|
+
const baseSql = loadSql('001-base.sql', schema);
|
|
459
|
+
await client.query(baseSql); ddlExecuted.push('001-base');
|
|
460
|
+
|
|
461
|
+
// 2. If entities enabled, run entity DDL
|
|
462
|
+
if (entitiesEnabled) {
|
|
463
|
+
const entitySql = loadSql('002-entities.sql', schema);
|
|
464
|
+
await client.query(entitySql); ddlExecuted.push('002-entities');
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// 3. Trust + feedback (always, not gated by entities)
|
|
468
|
+
const trustSql = loadSql('003-trust-feedback.sql', schema);
|
|
469
|
+
await client.query(trustSql); ddlExecuted.push('003-trust-feedback');
|
|
470
|
+
|
|
471
|
+
// 4. Facts / consolidation (opt-in)
|
|
472
|
+
if (factsEnabled) {
|
|
473
|
+
const factsSql = loadSql('004-facts.sql', schema);
|
|
474
|
+
await client.query(factsSql); ddlExecuted.push('004-facts');
|
|
475
|
+
}
|
|
266
476
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
477
|
+
// 5. Completion foundation (always, additive): narratives,
|
|
478
|
+
// consumer_profiles, sessions.consolidation_phases. Pure additive DDL
|
|
479
|
+
// with IF NOT EXISTS guards — safe on every migrate() call.
|
|
480
|
+
const completionSql = loadSql('004-completion.sql', schema);
|
|
481
|
+
await client.query(completionSql); ddlExecuted.push('004-completion');
|
|
482
|
+
|
|
483
|
+
// 6. Entity state history (always, gated by entitiesEnabled because
|
|
484
|
+
// it FK-references entities). Drop-clean — see scripts/drop-entity-state-history.sql.
|
|
485
|
+
if (entitiesEnabled) {
|
|
486
|
+
const stateHistorySql = loadSql('005-entity-state-history.sql', schema);
|
|
487
|
+
await client.query(stateHistorySql); ddlExecuted.push('005-entity-state-history');
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// 7. Insights (always, additive). No FK from anywhere into this table —
|
|
491
|
+
// safe to DROP CASCADE. See scripts/drop-insights.sql.
|
|
492
|
+
const insightsSql = loadSql('006-insights.sql', schema);
|
|
493
|
+
await client.query(insightsSql); ddlExecuted.push('006-insights');
|
|
494
|
+
|
|
495
|
+
migrated = true;
|
|
496
|
+
} finally {
|
|
497
|
+
await client.query('SELECT pg_advisory_unlock($1)', [lockKey]).catch((err) => {
|
|
498
|
+
console.warn(`[aquifer] failed to release migration advisory lock for schema "${schema}": ${err.message}`);
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
} catch (err) {
|
|
502
|
+
err.notices = Array.isArray(err.notices) ? err.notices : notices.slice();
|
|
503
|
+
err.failedAt = err.failedAt || 'apply_ddl';
|
|
504
|
+
emitMigrationEvent('apply_failed', {
|
|
505
|
+
error: { code: err.code || null, message: err.message },
|
|
506
|
+
failedAt: err.failedAt,
|
|
507
|
+
notices: err.notices,
|
|
508
|
+
durationMs: Date.now() - t0,
|
|
509
|
+
});
|
|
510
|
+
throw err;
|
|
511
|
+
} finally {
|
|
512
|
+
if (hasEvents) client.off('notice', onNotice);
|
|
513
|
+
if (releasesClient) client.release();
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Surface captured migration notices that operators need to see:
|
|
517
|
+
// - any WARNING/ERROR (zhcfg rebuild warnings, HNSW OOM, etc.)
|
|
518
|
+
// - aquifer-authored NOTICE messages ('[aquifer] ...' prefix in the
|
|
519
|
+
// migration DO blocks; these announce extension-install fallback,
|
|
520
|
+
// HNSW deferral, and other operational decisions)
|
|
521
|
+
// Filtered out: PG's own "relation already exists, skipping" and
|
|
522
|
+
// similar idempotent-DDL chatter that floods a re-run.
|
|
523
|
+
for (const n of notices) {
|
|
524
|
+
const sev = (n.severity || 'NOTICE').toUpperCase();
|
|
525
|
+
const msg = n.message || '';
|
|
526
|
+
const line = `[aquifer] migration ${sev.toLowerCase()}: ${msg}`;
|
|
527
|
+
if (sev === 'WARNING' || sev === 'ERROR') {
|
|
528
|
+
console.warn(line);
|
|
529
|
+
} else if (sev === 'NOTICE' && msg.startsWith('[aquifer]')) {
|
|
530
|
+
process.stderr.write(line + '\n');
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Auto-detect FTS tsconfig if not forced by config. Restrict to the
|
|
535
|
+
// public namespace — same restriction the trigger function uses — so a
|
|
536
|
+
// same-named config in another schema doesn't fool the detection.
|
|
537
|
+
if (!ftsConfig) {
|
|
538
|
+
try {
|
|
539
|
+
const r = await pool.query(
|
|
540
|
+
`SELECT 1 FROM pg_ts_config
|
|
541
|
+
WHERE cfgname = 'zhcfg' AND cfgnamespace = 'public'::regnamespace
|
|
542
|
+
LIMIT 1`);
|
|
543
|
+
ftsConfig = r.rowCount > 0 ? 'zhcfg' : 'simple';
|
|
544
|
+
} catch {
|
|
545
|
+
ftsConfig = 'simple';
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Post-flight: surface which Chinese FTS backend the migration actually
|
|
550
|
+
// landed on, and warm the backend's tokenizer so the first live query
|
|
551
|
+
// doesn't pay cold-start cost unpredictably. RAISE NOTICE/WARNING from
|
|
552
|
+
// the migration DO blocks are swallowed by node-postgres unless a
|
|
553
|
+
// notice handler is attached, so without this operators can't tell if
|
|
554
|
+
// pg_jieba silently failed to install and FTS is degraded to 'simple'.
|
|
555
|
+
//
|
|
556
|
+
// pg_jieba first-backend load is ~60MB RAM + 0.5-1s to mmap the dict.
|
|
557
|
+
// Warming once inside migrate() amortizes that on the backend that runs
|
|
558
|
+
// migration; other pool backends still pay it on first use, but the
|
|
559
|
+
// timing surfaces the cost so operators who see unexpected latency
|
|
560
|
+
// know where to look.
|
|
561
|
+
try {
|
|
562
|
+
const f = await pool.query(`
|
|
563
|
+
SELECT
|
|
564
|
+
EXISTS(SELECT 1 FROM pg_extension WHERE extname='pg_jieba') AS have_jieba,
|
|
565
|
+
EXISTS(SELECT 1 FROM pg_extension WHERE extname='zhparser') AS have_zhparser,
|
|
566
|
+
(SELECT p.prsname FROM pg_ts_config c
|
|
567
|
+
JOIN pg_ts_parser p ON c.cfgparser = p.oid
|
|
568
|
+
WHERE c.cfgname='zhcfg' AND c.cfgnamespace='public'::regnamespace
|
|
569
|
+
LIMIT 1) AS zhcfg_parser
|
|
570
|
+
`);
|
|
571
|
+
const row = f.rows[0] || {};
|
|
572
|
+
const backend = row.zhcfg_parser
|
|
573
|
+
? `zhcfg(parser=${row.zhcfg_parser})`
|
|
574
|
+
: `simple (no zhcfg in public namespace)`;
|
|
575
|
+
|
|
576
|
+
let warmupMs = null;
|
|
577
|
+
if (row.zhcfg_parser) {
|
|
578
|
+
const t0 = Date.now();
|
|
579
|
+
await pool.query(`SELECT to_tsvector('zhcfg', $1)`, ['warmup 記憶系統 aquifer'])
|
|
580
|
+
.catch(() => {});
|
|
581
|
+
warmupMs = Date.now() - t0;
|
|
271
582
|
}
|
|
272
583
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
584
|
+
const warmupNote = warmupMs !== null ? ` warmup=${warmupMs}ms` : '';
|
|
585
|
+
process.stderr.write(
|
|
586
|
+
`[aquifer] FTS post-flight: backend=${backend} ` +
|
|
587
|
+
`jieba=${row.have_jieba} zhparser=${row.have_zhparser} ` +
|
|
588
|
+
`selected=${ftsConfig}${warmupNote}\n`
|
|
589
|
+
);
|
|
590
|
+
if (warmupMs !== null && warmupMs > 500) {
|
|
591
|
+
process.stderr.write(
|
|
592
|
+
`[aquifer] Note: first FTS call paid ~${warmupMs}ms for tokenizer init ` +
|
|
593
|
+
`(dictionary mmap). Subsequent calls on the same backend are cached.\n`
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
} catch (err) {
|
|
597
|
+
console.warn(`[aquifer] FTS post-flight check failed: ${err.message}`);
|
|
598
|
+
}
|
|
278
599
|
|
|
279
|
-
|
|
600
|
+
const durationMs = Date.now() - t0;
|
|
601
|
+
emitMigrationEvent('apply_succeeded', {
|
|
602
|
+
ddlExecuted,
|
|
603
|
+
durationMs,
|
|
604
|
+
notices: notices.slice(),
|
|
605
|
+
});
|
|
606
|
+
return { ok: true, durationMs, notices: notices.slice(), ddlExecuted };
|
|
607
|
+
},
|
|
608
|
+
|
|
609
|
+
async listPendingMigrations() {
|
|
610
|
+
const plan = await buildMigrationPlan(pool);
|
|
611
|
+
return { ...plan, lastRunAt: null };
|
|
612
|
+
},
|
|
613
|
+
|
|
614
|
+
async getMigrationStatus() {
|
|
615
|
+
return this.listPendingMigrations();
|
|
616
|
+
},
|
|
617
|
+
|
|
618
|
+
async init() {
|
|
619
|
+
const t0 = Date.now();
|
|
620
|
+
const mode = migrationsMode;
|
|
621
|
+
|
|
622
|
+
let deadlineTimer = null;
|
|
623
|
+
const startupDeadline = migrationStartupTimeoutMs > 0
|
|
624
|
+
? new Promise((_, reject) => {
|
|
625
|
+
deadlineTimer = setTimeout(() => {
|
|
626
|
+
const err = new Error(`aquifer: init() exceeded startupTimeoutMs=${migrationStartupTimeoutMs}ms`);
|
|
627
|
+
err.code = 'AQ_MIGRATION_STARTUP_TIMEOUT';
|
|
628
|
+
reject(err);
|
|
629
|
+
}, migrationStartupTimeoutMs);
|
|
630
|
+
if (typeof deadlineTimer.unref === 'function') deadlineTimer.unref();
|
|
631
|
+
})
|
|
632
|
+
: null;
|
|
633
|
+
const withDeadline = (p) => startupDeadline ? Promise.race([p, startupDeadline]) : p;
|
|
634
|
+
const clearDeadline = () => { if (deadlineTimer) { clearTimeout(deadlineTimer); deadlineTimer = null; } };
|
|
635
|
+
|
|
636
|
+
try {
|
|
637
|
+
let plan;
|
|
638
|
+
try {
|
|
639
|
+
plan = await withDeadline(buildMigrationPlan(pool));
|
|
640
|
+
} catch (err) {
|
|
641
|
+
const durationMs = Date.now() - t0;
|
|
642
|
+
emitMigrationEvent('apply_failed', {
|
|
643
|
+
error: { code: err.code || null, message: err.message },
|
|
644
|
+
failedAt: 'plan_probe',
|
|
645
|
+
notices: [],
|
|
646
|
+
durationMs,
|
|
647
|
+
});
|
|
648
|
+
return {
|
|
649
|
+
ready: false,
|
|
650
|
+
memoryMode: 'off',
|
|
651
|
+
migrationMode: mode,
|
|
652
|
+
pendingMigrations: [],
|
|
653
|
+
appliedMigrations: [],
|
|
654
|
+
error: { code: err.code || 'AQ_MIGRATION_PROBE_FAILED', message: err.message },
|
|
655
|
+
durationMs,
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
if (mode === 'off') {
|
|
660
|
+
return {
|
|
661
|
+
ready: true, memoryMode: 'rw', migrationMode: mode,
|
|
662
|
+
pendingMigrations: plan.pending, appliedMigrations: plan.applied,
|
|
663
|
+
error: null, durationMs: Date.now() - t0,
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
if (mode === 'check') {
|
|
668
|
+
const ready = plan.pending.length === 0;
|
|
669
|
+
if (ready) migrated = true;
|
|
670
|
+
return {
|
|
671
|
+
ready, memoryMode: ready ? 'rw' : 'ro', migrationMode: mode,
|
|
672
|
+
pendingMigrations: plan.pending, appliedMigrations: plan.applied,
|
|
673
|
+
error: null, durationMs: Date.now() - t0,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// mode === 'apply'
|
|
678
|
+
if (plan.pending.length === 0) {
|
|
679
|
+
migrated = true;
|
|
680
|
+
return {
|
|
681
|
+
ready: true, memoryMode: 'rw', migrationMode: mode,
|
|
682
|
+
pendingMigrations: [], appliedMigrations: plan.applied,
|
|
683
|
+
error: null, durationMs: Date.now() - t0,
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
try {
|
|
688
|
+
const result = await withDeadline(this.migrate());
|
|
689
|
+
const planAfter = await buildMigrationPlan(pool).catch(() => null);
|
|
690
|
+
return {
|
|
691
|
+
ready: true, memoryMode: 'rw', migrationMode: mode,
|
|
692
|
+
pendingMigrations: planAfter ? planAfter.pending : [],
|
|
693
|
+
appliedMigrations: planAfter ? planAfter.applied : plan.required,
|
|
694
|
+
error: null, durationMs: result.durationMs || (Date.now() - t0),
|
|
695
|
+
};
|
|
696
|
+
} catch (err) {
|
|
697
|
+
return {
|
|
698
|
+
ready: false, memoryMode: 'ro', migrationMode: mode,
|
|
699
|
+
pendingMigrations: plan.pending, appliedMigrations: plan.applied,
|
|
700
|
+
error: { code: err.code || 'AQ_MIGRATION_FAILED', message: err.message },
|
|
701
|
+
durationMs: Date.now() - t0,
|
|
702
|
+
};
|
|
703
|
+
}
|
|
280
704
|
} finally {
|
|
281
|
-
|
|
282
|
-
console.warn(`[aquifer] failed to release migration advisory lock for schema "${schema}": ${err.message}`);
|
|
283
|
-
});
|
|
705
|
+
clearDeadline();
|
|
284
706
|
}
|
|
285
707
|
},
|
|
286
708
|
|
|
@@ -504,6 +926,34 @@ function createAquifer(config = {}) {
|
|
|
504
926
|
} catch (e) { warnings.push(`entity extraction failed: ${e.message}`); }
|
|
505
927
|
}
|
|
506
928
|
|
|
929
|
+
// 4d. State-change extraction (Q3) — only if enabled, entities available,
|
|
930
|
+
// and at least one parsed entity matches whitelist. Returns changes with
|
|
931
|
+
// entity_name (not id); resolution happens in tx after entity upsert.
|
|
932
|
+
let parsedStateChanges = [];
|
|
933
|
+
if (stateChangesEnabled && entitiesEnabled && !skipEntities && parsedEntities.length > 0 && llmFn) {
|
|
934
|
+
const scopedEntities = stateChangesWhitelist.size === 0
|
|
935
|
+
? parsedEntities // empty whitelist == all parsed entities in scope
|
|
936
|
+
: parsedEntities.filter(e => stateChangesWhitelist.has(String(e.name).toLowerCase()));
|
|
937
|
+
if (scopedEntities.length > 0) {
|
|
938
|
+
try {
|
|
939
|
+
const { extractStateChanges } = require('../pipeline/extract-state-changes');
|
|
940
|
+
const result = await extractStateChanges(normalized, {
|
|
941
|
+
llmFn,
|
|
942
|
+
promptFn: stateChangesPromptFn,
|
|
943
|
+
entities: scopedEntities.map(e => ({ name: e.name, aliases: e.aliases || [] })),
|
|
944
|
+
sessionStartedAt: session.started_at ? new Date(session.started_at).toISOString() : null,
|
|
945
|
+
evidenceSessionId: sessionId,
|
|
946
|
+
confidenceThreshold: stateChangesConfThreshold,
|
|
947
|
+
timeoutMs: stateChangesTimeoutMs,
|
|
948
|
+
maxOutputTokens: stateChangesMaxOutputTokens,
|
|
949
|
+
logger: { warn: (m) => warnings.push(`state-change: ${m}`) },
|
|
950
|
+
});
|
|
951
|
+
parsedStateChanges = result.changes || [];
|
|
952
|
+
for (const w of (result.warnings || [])) warnings.push(`state-change: ${w}`);
|
|
953
|
+
} catch (e) { warnings.push(`state-change extraction failed: ${e.message}`); }
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
|
|
507
957
|
// 5. Now open transaction — only DB writes, no external calls
|
|
508
958
|
const client = await pool.connect();
|
|
509
959
|
let turnsEmbedded = 0;
|
|
@@ -595,6 +1045,49 @@ function createAquifer(config = {}) {
|
|
|
595
1045
|
}
|
|
596
1046
|
|
|
597
1047
|
entitiesFound = entityIds.length;
|
|
1048
|
+
|
|
1049
|
+
// 5d. Apply state changes (Q3) inside SAVEPOINT so a CONFLICT or
|
|
1050
|
+
// CHECK violation can't poison the parent transaction.
|
|
1051
|
+
if (parsedStateChanges.length > 0) {
|
|
1052
|
+
// Build name→id map from upserted entities (parsedEntities aligned
|
|
1053
|
+
// with entityIds by index).
|
|
1054
|
+
const nameToId = new Map();
|
|
1055
|
+
for (let i = 0; i < parsedEntities.length && i < entityIds.length; i++) {
|
|
1056
|
+
const ent = parsedEntities[i];
|
|
1057
|
+
if (!ent || entityIds[i] === null || entityIds[i] === undefined) continue;
|
|
1058
|
+
nameToId.set(String(ent.name).toLowerCase(), entityIds[i]);
|
|
1059
|
+
for (const a of (ent.aliases || [])) {
|
|
1060
|
+
if (typeof a === 'string') nameToId.set(a.toLowerCase(), entityIds[i]);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
const resolved = [];
|
|
1064
|
+
for (const ch of parsedStateChanges) {
|
|
1065
|
+
const id = nameToId.get(String(ch.entityName || '').toLowerCase());
|
|
1066
|
+
if (id === null || id === undefined) continue;
|
|
1067
|
+
const { entityName: _drop, ...rest } = ch;
|
|
1068
|
+
void _drop;
|
|
1069
|
+
resolved.push({ ...rest, entityId: id, sessionRowId: session.id });
|
|
1070
|
+
}
|
|
1071
|
+
if (resolved.length > 0) {
|
|
1072
|
+
try {
|
|
1073
|
+
await client.query('SAVEPOINT state_changes');
|
|
1074
|
+
const r = await aquifer.entityState.applyChanges(client, {
|
|
1075
|
+
agentId,
|
|
1076
|
+
sessionRowId: session.id,
|
|
1077
|
+
changes: resolved,
|
|
1078
|
+
});
|
|
1079
|
+
if (!r.ok) {
|
|
1080
|
+
warnings.push(`state-change apply failed: ${r.error.code} ${r.error.message}`);
|
|
1081
|
+
await client.query('ROLLBACK TO SAVEPOINT state_changes');
|
|
1082
|
+
} else {
|
|
1083
|
+
await client.query('RELEASE SAVEPOINT state_changes');
|
|
1084
|
+
}
|
|
1085
|
+
} catch (e) {
|
|
1086
|
+
warnings.push(`state-change savepoint error: ${e.message}`);
|
|
1087
|
+
try { await client.query('ROLLBACK TO SAVEPOINT state_changes'); } catch { /* ignore */ }
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
598
1091
|
}
|
|
599
1092
|
|
|
600
1093
|
// 8. Mark status + commit (M5: use 'partial' if warnings)
|
|
@@ -672,7 +1165,13 @@ function createAquifer(config = {}) {
|
|
|
672
1165
|
// --- read path ---
|
|
673
1166
|
|
|
674
1167
|
async recall(query, opts = {}) {
|
|
675
|
-
|
|
1168
|
+
// Contract (aligned across core / manifest / consumer tools): query must
|
|
1169
|
+
// be a non-empty string. Empty strings previously short-circuited to []
|
|
1170
|
+
// silently — that masks caller bugs. Callers wanting "recent sessions"
|
|
1171
|
+
// should use a dedicated API, not pass empty to recall().
|
|
1172
|
+
if (typeof query !== 'string' || query.trim().length === 0) {
|
|
1173
|
+
throw new Error('aquifer.recall(query): query must be a non-empty string');
|
|
1174
|
+
}
|
|
676
1175
|
|
|
677
1176
|
const VALID_MODES = ['fts', 'hybrid', 'vector'];
|
|
678
1177
|
const mode = opts.mode !== undefined ? opts.mode : 'hybrid';
|
|
@@ -724,8 +1223,12 @@ function createAquifer(config = {}) {
|
|
|
724
1223
|
|
|
725
1224
|
await ensureMigrated();
|
|
726
1225
|
|
|
727
|
-
|
|
728
|
-
|
|
1226
|
+
// rerank gating: provider must be configured + caller didn't disable.
|
|
1227
|
+
// Whether to actually invoke is decided after hybridRank, since the
|
|
1228
|
+
// shortlist is needed for the auto-trigger heuristics.
|
|
1229
|
+
const rerankProviderReady = !!reranker && opts.rerank !== false;
|
|
1230
|
+
const rerankForced = opts.rerank === true;
|
|
1231
|
+
const rerankTopK = rerankProviderReady ? Math.max(limit, opts.rerankTopK || defaultRerankTopK) : limit;
|
|
729
1232
|
const fetchLimit = rerankTopK * 4;
|
|
730
1233
|
|
|
731
1234
|
// 1. Embed query (only needed for hybrid/vector modes)
|
|
@@ -769,14 +1272,24 @@ function createAquifer(config = {}) {
|
|
|
769
1272
|
entityScoreBySession.set(row.session_id, 1.0);
|
|
770
1273
|
}
|
|
771
1274
|
} else {
|
|
772
|
-
// 'any' mode with explicit entities: use resolved IDs for boost
|
|
1275
|
+
// 'any' mode with explicit entities: use resolved IDs for boost.
|
|
1276
|
+
// Filter by tenant_id + agentIds to prevent cross-tenant / cross-agent
|
|
1277
|
+
// boost pollution (session_id is caller-supplied and not globally unique).
|
|
1278
|
+
const esParams = [entityIds, tenantId];
|
|
1279
|
+
let esAgentClause = '';
|
|
1280
|
+
if (resolvedAgentIds && resolvedAgentIds.length > 0) {
|
|
1281
|
+
esParams.push(resolvedAgentIds);
|
|
1282
|
+
esAgentClause = `AND s.agent_id = ANY($${esParams.length})`;
|
|
1283
|
+
}
|
|
773
1284
|
const esResult = await pool.query(
|
|
774
1285
|
`SELECT es.session_row_id, s.session_id, COUNT(*) AS entity_count
|
|
775
1286
|
FROM ${qi(schema)}.entity_sessions es
|
|
776
1287
|
JOIN ${qi(schema)}.sessions s ON s.id = es.session_row_id
|
|
777
1288
|
WHERE es.entity_id = ANY($1)
|
|
1289
|
+
AND s.tenant_id = $2
|
|
1290
|
+
${esAgentClause}
|
|
778
1291
|
GROUP BY es.session_row_id, s.session_id`,
|
|
779
|
-
|
|
1292
|
+
esParams
|
|
780
1293
|
);
|
|
781
1294
|
|
|
782
1295
|
const maxCount = Math.max(1, ...esResult.rows.map(r => parseInt(r.entity_count)));
|
|
@@ -793,13 +1306,21 @@ function createAquifer(config = {}) {
|
|
|
793
1306
|
|
|
794
1307
|
if (matchedEntities.length > 0) {
|
|
795
1308
|
const entityIds = matchedEntities.map(e => e.id);
|
|
1309
|
+
const esParams = [entityIds, tenantId];
|
|
1310
|
+
let esAgentClause = '';
|
|
1311
|
+
if (resolvedAgentIds && resolvedAgentIds.length > 0) {
|
|
1312
|
+
esParams.push(resolvedAgentIds);
|
|
1313
|
+
esAgentClause = `AND s.agent_id = ANY($${esParams.length})`;
|
|
1314
|
+
}
|
|
796
1315
|
const esResult = await pool.query(
|
|
797
1316
|
`SELECT es.session_row_id, s.session_id, COUNT(*) AS entity_count
|
|
798
1317
|
FROM ${qi(schema)}.entity_sessions es
|
|
799
1318
|
JOIN ${qi(schema)}.sessions s ON s.id = es.session_row_id
|
|
800
1319
|
WHERE es.entity_id = ANY($1)
|
|
1320
|
+
AND s.tenant_id = $2
|
|
1321
|
+
${esAgentClause}
|
|
801
1322
|
GROUP BY es.session_row_id, s.session_id`,
|
|
802
|
-
|
|
1323
|
+
esParams
|
|
803
1324
|
);
|
|
804
1325
|
|
|
805
1326
|
const maxCount = Math.max(1, ...esResult.rows.map(r => parseInt(r.entity_count)));
|
|
@@ -814,23 +1335,25 @@ function createAquifer(config = {}) {
|
|
|
814
1335
|
const runFts = mode === 'fts' || mode === 'hybrid';
|
|
815
1336
|
const runVector = mode === 'vector' || mode === 'hybrid';
|
|
816
1337
|
|
|
817
|
-
const [ftsRows,
|
|
1338
|
+
const [ftsRows, embResult, turnResult] = await Promise.all([
|
|
818
1339
|
runFts
|
|
819
1340
|
? storage.searchSessions(pool, query, {
|
|
820
1341
|
schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
|
|
1342
|
+
ftsConfig,
|
|
821
1343
|
}).catch((err) => {
|
|
822
1344
|
recordSearchError('fts', err);
|
|
823
1345
|
return [];
|
|
824
1346
|
})
|
|
825
1347
|
: Promise.resolve([]),
|
|
826
1348
|
runVector
|
|
827
|
-
?
|
|
1349
|
+
? storage.searchSummaryEmbeddings(pool, {
|
|
1350
|
+
schema, tenantId, queryVec,
|
|
828
1351
|
agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
|
|
829
1352
|
}).catch((err) => {
|
|
830
1353
|
recordSearchError('summary-vector', err);
|
|
831
|
-
return [];
|
|
1354
|
+
return { rows: [] };
|
|
832
1355
|
})
|
|
833
|
-
: Promise.resolve([]),
|
|
1356
|
+
: Promise.resolve({ rows: [] }),
|
|
834
1357
|
runVector
|
|
835
1358
|
? storage.searchTurnEmbeddings(pool, {
|
|
836
1359
|
schema, tenantId, queryVec, dateFrom, dateTo, agentIds: resolvedAgentIds, source, limit: fetchLimit,
|
|
@@ -841,6 +1364,7 @@ function createAquifer(config = {}) {
|
|
|
841
1364
|
: Promise.resolve({ rows: [] }),
|
|
842
1365
|
]);
|
|
843
1366
|
|
|
1367
|
+
const embRows = embResult.rows || [];
|
|
844
1368
|
const turnRows = turnResult.rows || [];
|
|
845
1369
|
|
|
846
1370
|
// 3b. Apply candidate filter (entityMode 'all')
|
|
@@ -908,9 +1432,35 @@ function createAquifer(config = {}) {
|
|
|
908
1432
|
},
|
|
909
1433
|
);
|
|
910
1434
|
|
|
911
|
-
// 6b. Rerank (optional)
|
|
1435
|
+
// 6b. Rerank (optional, with auto-trigger gate)
|
|
912
1436
|
let finalRanked = ranked;
|
|
913
|
-
|
|
1437
|
+
let rerankDecision = { apply: false, reason: 'provider_not_ready' };
|
|
1438
|
+
if (rerankProviderReady && ranked.length > 1) {
|
|
1439
|
+
if (rerankForced) {
|
|
1440
|
+
rerankDecision = { apply: true, reason: 'forced' };
|
|
1441
|
+
} else {
|
|
1442
|
+
// hasEntities = either caller passed entities explicitly OR the
|
|
1443
|
+
// query-derived path found matching entities (non-empty boost map).
|
|
1444
|
+
// shouldAutoRerank names the condition "entities present"; honour both.
|
|
1445
|
+
rerankDecision = shouldAutoRerank({
|
|
1446
|
+
query,
|
|
1447
|
+
mode,
|
|
1448
|
+
ranked,
|
|
1449
|
+
hasEntities: (explicitEntities && explicitEntities.length > 0)
|
|
1450
|
+
|| entityScoreBySession.size > 0,
|
|
1451
|
+
autoTrigger,
|
|
1452
|
+
});
|
|
1453
|
+
}
|
|
1454
|
+
} else if (!rerankProviderReady) {
|
|
1455
|
+
rerankDecision = {
|
|
1456
|
+
apply: false,
|
|
1457
|
+
reason: !reranker ? 'no_provider_configured' : 'caller_disabled',
|
|
1458
|
+
};
|
|
1459
|
+
} else {
|
|
1460
|
+
rerankDecision = { apply: false, reason: 'shortlist_too_short' };
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
if (rerankDecision.apply) {
|
|
914
1464
|
try {
|
|
915
1465
|
const docs = ranked.map(r => buildRerankDocument(r, rerankMaxChars));
|
|
916
1466
|
const rerankResult = await reranker.rerank(query, docs, { topN: ranked.length });
|
|
@@ -920,6 +1470,7 @@ function createAquifer(config = {}) {
|
|
|
920
1470
|
...r,
|
|
921
1471
|
_hybridScore: r._score,
|
|
922
1472
|
_rerankScore: scoreMap.has(i) ? scoreMap.get(i) : null,
|
|
1473
|
+
_rerankReason: rerankDecision.reason,
|
|
923
1474
|
}));
|
|
924
1475
|
|
|
925
1476
|
finalRanked.sort((a, b) => {
|
|
@@ -932,10 +1483,15 @@ function createAquifer(config = {}) {
|
|
|
932
1483
|
} catch (rerankErr) {
|
|
933
1484
|
// Fallback: use original hybrid-rank order, flag in debug
|
|
934
1485
|
if (process.env.AQUIFER_DEBUG) console.error('[aquifer] rerank error:', rerankErr.message);
|
|
935
|
-
finalRanked = ranked.slice(0, limit).map(r => ({
|
|
1486
|
+
finalRanked = ranked.slice(0, limit).map(r => ({
|
|
1487
|
+
...r,
|
|
1488
|
+
_rerankFallback: true,
|
|
1489
|
+
_rerankReason: rerankDecision.reason,
|
|
1490
|
+
_rerankErrorMessage: rerankErr.message,
|
|
1491
|
+
}));
|
|
936
1492
|
}
|
|
937
1493
|
} else {
|
|
938
|
-
finalRanked = ranked.slice(0, limit);
|
|
1494
|
+
finalRanked = ranked.slice(0, limit).map(r => ({ ...r, _rerankReason: rerankDecision.reason }));
|
|
939
1495
|
}
|
|
940
1496
|
|
|
941
1497
|
// 7. Record access
|
|
@@ -972,6 +1528,9 @@ function createAquifer(config = {}) {
|
|
|
972
1528
|
hybridScore: r._hybridScore ?? r._score,
|
|
973
1529
|
rerankScore: r._rerankScore ?? null,
|
|
974
1530
|
rerankFallback: r._rerankFallback || false,
|
|
1531
|
+
rerankApplied: rerankDecision.apply,
|
|
1532
|
+
rerankReason: r._rerankReason || rerankDecision.reason,
|
|
1533
|
+
rerankErrorMessage: r._rerankErrorMessage || null,
|
|
975
1534
|
searchErrors: searchErrors.slice(),
|
|
976
1535
|
},
|
|
977
1536
|
}));
|
|
@@ -1251,6 +1810,8 @@ function createAquifer(config = {}) {
|
|
|
1251
1810
|
const { createArtifacts } = require('./artifacts');
|
|
1252
1811
|
const { createConsolidation } = require('./consolidation');
|
|
1253
1812
|
const { createBundles } = require('./bundles');
|
|
1813
|
+
const { createEntityState } = require('./entity-state');
|
|
1814
|
+
const { createInsights } = require('./insights');
|
|
1254
1815
|
const qSchema = qi(schema);
|
|
1255
1816
|
aquifer.narratives = createNarratives({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
1256
1817
|
aquifer.timeline = createTimeline({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
@@ -1261,6 +1822,22 @@ function createAquifer(config = {}) {
|
|
|
1261
1822
|
aquifer.artifacts = createArtifacts({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
1262
1823
|
aquifer.consolidation = createConsolidation({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
1263
1824
|
aquifer.bundles = createBundles({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
1825
|
+
// entityState materialises in schema/005-entity-state-history.sql, gated on
|
|
1826
|
+
// entitiesEnabled (it FK-references entities). Drop-clean — see
|
|
1827
|
+
// scripts/drop-entity-state-history.sql.
|
|
1828
|
+
aquifer.entityState = createEntityState({ pool, schema: qSchema, defaultTenantId: tenantId });
|
|
1829
|
+
// insights materialises in schema/006-insights.sql. No FK from elsewhere
|
|
1830
|
+
// into this table; DROP CASCADE is clean. See scripts/drop-insights.sql.
|
|
1831
|
+
// Recall ranking weights configurable via config.insights.recallWeights.
|
|
1832
|
+
aquifer.insights = createInsights({
|
|
1833
|
+
pool,
|
|
1834
|
+
schema: qSchema,
|
|
1835
|
+
defaultTenantId: tenantId,
|
|
1836
|
+
embedFn,
|
|
1837
|
+
recallWeights: (config.insights && config.insights.recallWeights) || null,
|
|
1838
|
+
recencyWindowDays: config.insights && Number.isFinite(config.insights.recencyWindowDays)
|
|
1839
|
+
? config.insights.recencyWindowDays : undefined,
|
|
1840
|
+
});
|
|
1264
1841
|
|
|
1265
1842
|
return aquifer;
|
|
1266
1843
|
}
|
|
@@ -1320,4 +1897,4 @@ function formatBootstrapText(data, maxChars) {
|
|
|
1320
1897
|
// Exports
|
|
1321
1898
|
// ---------------------------------------------------------------------------
|
|
1322
1899
|
|
|
1323
|
-
module.exports = { createAquifer, formatBootstrapText };
|
|
1900
|
+
module.exports = { createAquifer, formatBootstrapText, shouldAutoRerank };
|