@shadowforge0/aquifer-memory 1.0.3 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -20
- package/consumers/claude-code.js +117 -0
- package/consumers/cli.js +17 -0
- package/consumers/default/daily-entries.js +196 -0
- package/consumers/default/index.js +282 -0
- package/consumers/default/prompts/summary.js +153 -0
- package/consumers/mcp.js +3 -23
- package/consumers/miranda/context-inject.js +119 -0
- package/consumers/miranda/daily-entries.js +224 -0
- package/consumers/miranda/index.js +353 -0
- package/consumers/miranda/instance.js +55 -0
- package/consumers/miranda/llm.js +99 -0
- package/consumers/miranda/prompts/summary.js +303 -0
- package/consumers/miranda/recall-format.js +74 -0
- package/consumers/miranda/workspace-files.js +91 -0
- package/consumers/openclaw-ext/index.js +38 -0
- package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
- package/consumers/openclaw-ext/package.json +10 -0
- package/consumers/openclaw-plugin.js +66 -74
- package/consumers/opencode.js +21 -24
- package/consumers/shared/autodetect.js +64 -0
- package/consumers/shared/entity-parser.js +119 -0
- package/consumers/shared/ingest.js +148 -0
- package/consumers/shared/llm-autodetect.js +137 -0
- package/consumers/shared/normalize.js +129 -0
- package/consumers/shared/recall-format.js +110 -0
- package/core/aquifer.js +180 -71
- package/core/entity.js +1 -3
- package/core/storage.js +86 -28
- package/docs/postprocess-contract.md +132 -0
- package/index.js +9 -1
- package/package.json +23 -2
- package/pipeline/_http.js +1 -1
- package/pipeline/consolidation/apply.js +176 -0
- package/pipeline/consolidation/index.js +21 -0
- package/pipeline/extract-entities.js +2 -2
- package/pipeline/rerank.js +1 -1
- package/pipeline/summarize.js +4 -1
- package/schema/001-base.sql +61 -24
- package/schema/002-entities.sql +17 -3
- package/schema/004-facts.sql +67 -0
- package/scripts/diagnose-fts-zh.js +168 -134
- package/scripts/diagnose-vector.js +188 -0
- package/scripts/install-openclaw.sh +59 -0
- package/scripts/smoke.mjs +2 -2
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# `enrich({ postProcess })` Contract
|
|
2
|
+
|
|
3
|
+
`aquifer.enrich(sessionId, opts)` runs commit → summarize → embed → entity-extract → mark-status inside a single DB transaction. After the transaction commits and the client is released, if `opts.postProcess` was supplied, Aquifer invokes it once with a context object. This is how consumers hook persona-specific side-effects (daily logs, workspace files, consolidation, narrative regen, metrics) without mutating core.
|
|
4
|
+
|
|
5
|
+
**Stability**: stable in 1.x. Additive changes only (new ctx fields). No removals or breaking renames without a major bump.
|
|
6
|
+
|
|
7
|
+
## Signature
|
|
8
|
+
|
|
9
|
+
```ts
|
|
10
|
+
postProcess?: (ctx: PostProcessContext) => Promise<void>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## When it runs
|
|
14
|
+
|
|
15
|
+
- **After** transaction commit and client release. The session row is already at its final status (`succeeded` or `partial`); nothing in postProcess can affect that.
|
|
16
|
+
- **At most once per enrich call**. No retry. If `postProcess` throws, the error is captured on the returned result as `postProcessError` (not re-thrown).
|
|
17
|
+
- Best-effort. The enrich call's return value resolves regardless of postProcess outcome.
|
|
18
|
+
|
|
19
|
+
## `ctx` shape
|
|
20
|
+
|
|
21
|
+
```ts
|
|
22
|
+
interface PostProcessContext {
|
|
23
|
+
session: {
|
|
24
|
+
id: number; // DB primary key (miranda.sessions.id)
|
|
25
|
+
sessionId: string; // caller-provided session key
|
|
26
|
+
agentId: string;
|
|
27
|
+
model: string | null;
|
|
28
|
+
source: string | null;
|
|
29
|
+
startedAt: string | null; // ISO-8601
|
|
30
|
+
endedAt: string | null; // ISO-8601
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
// opts.model override, falling back to session.model. Handy for consumers
|
|
34
|
+
// that want to pass the runtime model into downstream consolidation prompts.
|
|
35
|
+
effectiveModel: string | null;
|
|
36
|
+
|
|
37
|
+
// Summary result, if summarize ran. Null when skipSummary or summary failed.
|
|
38
|
+
summary: {
|
|
39
|
+
summaryText: string;
|
|
40
|
+
structuredSummary: object | null; // custom summaryFn payload
|
|
41
|
+
} | null;
|
|
42
|
+
|
|
43
|
+
// Summary-level embedding vector (size = embed.dim). Null if embed skipped/failed.
|
|
44
|
+
embedding: number[] | null;
|
|
45
|
+
|
|
46
|
+
// Per-turn embedding vectors (one per user turn). Null if skipped/failed.
|
|
47
|
+
turnVectors: number[][] | null;
|
|
48
|
+
|
|
49
|
+
// Passthrough from customSummaryFn return { extra }. Consumers use this to
|
|
50
|
+
// smuggle intermediate results (recap/sections/workingFacts) from summaryFn
|
|
51
|
+
// into postProcess without recomputing.
|
|
52
|
+
extra: any;
|
|
53
|
+
|
|
54
|
+
// Messages used for embedding/entity extraction. Same array commit() saw.
|
|
55
|
+
normalized: Array<{ role: string; content: string; timestamp?: string }>;
|
|
56
|
+
|
|
57
|
+
// Parsed entities from entityParseFn (or built-in parser).
|
|
58
|
+
parsedEntities: Array<{ name: string; normalizedName: string; aliases: string[]; type: string }>;
|
|
59
|
+
|
|
60
|
+
// Which pipeline steps ran.
|
|
61
|
+
skipped: { summary: boolean; entities: boolean; turns: boolean };
|
|
62
|
+
|
|
63
|
+
// Counts from the tx.
|
|
64
|
+
turnsEmbedded: number;
|
|
65
|
+
entitiesFound: number;
|
|
66
|
+
|
|
67
|
+
// Non-fatal failures collected inside enrich. Defensive copy — mutating this
|
|
68
|
+
// array does NOT affect enrich's own warnings list.
|
|
69
|
+
warnings: string[];
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Typical usage
|
|
74
|
+
|
|
75
|
+
```js
|
|
76
|
+
const result = await aquifer.enrich(sessionId, {
|
|
77
|
+
agentId: 'main',
|
|
78
|
+
summaryFn: async (msgs) => {
|
|
79
|
+
const output = await callLlm(buildPrompt({ msgs }));
|
|
80
|
+
const sections = parseSummaryOutput(output);
|
|
81
|
+
const recap = parseRecapLines(sections.recap);
|
|
82
|
+
return {
|
|
83
|
+
summaryText: recap.overview || '',
|
|
84
|
+
structuredSummary: recap,
|
|
85
|
+
entityRaw: sections.entities || null,
|
|
86
|
+
extra: { sections, recap, workingFacts: parseWorkingFacts(sections.working_facts) },
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
entityParseFn: (text) => parseEntitySection(text).entities,
|
|
90
|
+
postProcess: async (ctx) => {
|
|
91
|
+
const recap = ctx.extra?.recap;
|
|
92
|
+
const sections = ctx.extra?.sections;
|
|
93
|
+
const workingFacts = ctx.extra?.workingFacts || [];
|
|
94
|
+
|
|
95
|
+
// Daily log
|
|
96
|
+
if (recap || sections) {
|
|
97
|
+
await writeDailyEntries({ recap, sections, sessionId: ctx.session.sessionId, agentId: ctx.session.agentId });
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Write fact candidates (consumer-specific table, not in Aquifer schema)
|
|
101
|
+
if (workingFacts.length > 0) {
|
|
102
|
+
await writeFactCandidates({ facts: workingFacts, sessionId: ctx.session.sessionId });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Consolidation (optional — requires enableFacts())
|
|
106
|
+
if (recap) {
|
|
107
|
+
const prompt = buildConsolidationPrompt({ recap, activeFacts, candidates, currentNarrative });
|
|
108
|
+
const output = await callLlm(prompt);
|
|
109
|
+
const { actions } = parseConsolidationOutput(output);
|
|
110
|
+
if (actions.length > 0) {
|
|
111
|
+
await aquifer.consolidate(ctx.session.sessionId, { actions, agentId: ctx.session.agentId });
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
if (result.postProcessError) {
|
|
118
|
+
logger.warn(`postProcess failed: ${result.postProcessError.message}`);
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## What NOT to do in postProcess
|
|
123
|
+
|
|
124
|
+
- Don't throw as a signal of "enrich should have failed" — enrich is already committed. Use warnings or a separate audit table.
|
|
125
|
+
- Don't mutate `ctx.normalized`, `ctx.parsedEntities`, or `ctx.warnings`. They're shared-reference with the enrich return; defensive copy if you need to modify.
|
|
126
|
+
- Don't rely on postProcess running quickly — it's outside the tx. Long-running work should be fire-and-forget (see Miranda's `setImmediate` consolidation) or queued.
|
|
127
|
+
|
|
128
|
+
## What Aquifer guarantees
|
|
129
|
+
|
|
130
|
+
- `postProcess` receives the same `session` row the tx wrote. No stale reads.
|
|
131
|
+
- If enrich's tx rolls back, postProcess is NOT called.
|
|
132
|
+
- If postProcess throws, the error is on `result.postProcessError`. The session status is unaffected.
|
package/index.js
CHANGED
|
@@ -3,5 +3,13 @@
|
|
|
3
3
|
const { createAquifer } = require('./core/aquifer');
|
|
4
4
|
const { createEmbedder } = require('./pipeline/embed');
|
|
5
5
|
const { createReranker } = require('./pipeline/rerank');
|
|
6
|
+
const { normalizeEntityName } = require('./core/entity');
|
|
7
|
+
const { parseEntitySection } = require('./consumers/shared/entity-parser');
|
|
6
8
|
|
|
7
|
-
module.exports = {
|
|
9
|
+
module.exports = {
|
|
10
|
+
createAquifer,
|
|
11
|
+
createEmbedder,
|
|
12
|
+
createReranker,
|
|
13
|
+
normalizeEntityName,
|
|
14
|
+
parseEntitySection,
|
|
15
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shadowforge0/aquifer-memory",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. MCP server, CLI, and library API.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
"pipeline/",
|
|
10
10
|
"schema/",
|
|
11
11
|
"consumers/",
|
|
12
|
+
"consumers/miranda/",
|
|
13
|
+
"consumers/default/",
|
|
14
|
+
"consumers/openclaw-ext/",
|
|
12
15
|
"docs/",
|
|
13
16
|
"scripts/"
|
|
14
17
|
],
|
|
@@ -20,8 +23,17 @@
|
|
|
20
23
|
"./consumers/mcp": "./consumers/mcp.js",
|
|
21
24
|
"./consumers/openclaw-plugin": "./consumers/openclaw-plugin.js",
|
|
22
25
|
"./consumers/opencode": "./consumers/opencode.js",
|
|
26
|
+
"./consumers/claude-code": "./consumers/claude-code.js",
|
|
27
|
+
"./consumers/miranda": "./consumers/miranda/index.js",
|
|
28
|
+
"./consumers/default": "./consumers/default/index.js",
|
|
29
|
+
"./consumers/openclaw-ext": "./consumers/openclaw-ext/index.js",
|
|
23
30
|
"./consumers/shared/config": "./consumers/shared/config.js",
|
|
24
|
-
"./consumers/shared/factory": "./consumers/shared/factory.js"
|
|
31
|
+
"./consumers/shared/factory": "./consumers/shared/factory.js",
|
|
32
|
+
"./consumers/shared/entity-parser": "./consumers/shared/entity-parser.js",
|
|
33
|
+
"./consumers/shared/normalize": "./consumers/shared/normalize.js",
|
|
34
|
+
"./consumers/shared/ingest": "./consumers/shared/ingest.js",
|
|
35
|
+
"./consumers/shared/recall-format": "./consumers/shared/recall-format.js",
|
|
36
|
+
"./consumers/shared/llm-autodetect": "./consumers/shared/llm-autodetect.js"
|
|
25
37
|
},
|
|
26
38
|
"repository": {
|
|
27
39
|
"type": "git",
|
|
@@ -32,11 +44,20 @@
|
|
|
32
44
|
"url": "https://github.com/shadowforge0/aquifer/issues"
|
|
33
45
|
},
|
|
34
46
|
"author": "shadowforge0",
|
|
47
|
+
"scripts": {
|
|
48
|
+
"test": "node --test test/*.test.js",
|
|
49
|
+
"test:integration": "node --test test/integration.test.js",
|
|
50
|
+
"lint": "eslint index.js core/*.js consumers/*.js consumers/shared/*.js consumers/miranda/*.js consumers/miranda/prompts/*.js consumers/default/*.js consumers/default/prompts/*.js consumers/openclaw-ext/*.js pipeline/*.js pipeline/consolidation/*.js scripts/*.js test/*.js",
|
|
51
|
+
"prepare": "git config core.hooksPath .githooks 2>/dev/null || true"
|
|
52
|
+
},
|
|
35
53
|
"dependencies": {
|
|
36
54
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
37
55
|
"pg": "^8.13.0",
|
|
38
56
|
"zod": "^3.25.76"
|
|
39
57
|
},
|
|
58
|
+
"devDependencies": {
|
|
59
|
+
"eslint": "^9.0.0"
|
|
60
|
+
},
|
|
40
61
|
"engines": {
|
|
41
62
|
"node": ">=18.0.0"
|
|
42
63
|
},
|
package/pipeline/_http.js
CHANGED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Consolidation apply — executes a batch of fact-lifecycle actions in one tx.
|
|
5
|
+
//
|
|
6
|
+
// Actions (each object in the array):
|
|
7
|
+
// { action: 'promote', factId } candidate → active
|
|
8
|
+
// { action: 'create', subject, statement, importance? } new active fact
|
|
9
|
+
// { action: 'update', factId, statement } refresh active statement
|
|
10
|
+
// { action: 'confirm', factId } bump last_confirmed_at
|
|
11
|
+
// { action: 'stale', factId } active → stale
|
|
12
|
+
// { action: 'discard', factId } candidate → archived
|
|
13
|
+
// { action: 'merge', factId, targetId } candidate archived, target confirmed
|
|
14
|
+
// { action: 'supersede', factId, targetId } active → superseded by target
|
|
15
|
+
//
|
|
16
|
+
// All mutations scoped to (tenantId, agentId). The caller is responsible for
|
|
17
|
+
// providing a normalizer for subject_key (fall back to raw subject if absent).
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
function qi(identifier) { return `"${identifier}"`; }
|
|
21
|
+
|
|
22
|
+
async function applyConsolidation(pool, {
|
|
23
|
+
actions,
|
|
24
|
+
agentId,
|
|
25
|
+
sessionId,
|
|
26
|
+
schema,
|
|
27
|
+
tenantId = 'default',
|
|
28
|
+
normalizeSubject = null,
|
|
29
|
+
recapOverview = '',
|
|
30
|
+
} = {}) {
|
|
31
|
+
if (!pool) throw new Error('pool is required');
|
|
32
|
+
if (!schema) throw new Error('schema is required');
|
|
33
|
+
if (!agentId) throw new Error('agentId is required');
|
|
34
|
+
if (!Array.isArray(actions)) throw new Error('actions must be an array');
|
|
35
|
+
|
|
36
|
+
const tbl = `${qi(schema)}.facts`;
|
|
37
|
+
const summary = {
|
|
38
|
+
promote: 0, create: 0, update: 0, confirm: 0,
|
|
39
|
+
stale: 0, discard: 0, merge: 0, supersede: 0,
|
|
40
|
+
skipped: 0,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
if (actions.length === 0) return summary;
|
|
44
|
+
|
|
45
|
+
const client = await pool.connect();
|
|
46
|
+
try {
|
|
47
|
+
await client.query('BEGIN');
|
|
48
|
+
|
|
49
|
+
for (const act of actions) {
|
|
50
|
+
switch (act.action) {
|
|
51
|
+
case 'promote': {
|
|
52
|
+
const r = await client.query(
|
|
53
|
+
`UPDATE ${tbl} SET status = 'active', last_confirmed_at = now()
|
|
54
|
+
WHERE id = $1 AND status = 'candidate' AND agent_id = $2 AND tenant_id = $3`,
|
|
55
|
+
[act.factId, agentId, tenantId],
|
|
56
|
+
);
|
|
57
|
+
summary.promote += r.rowCount;
|
|
58
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
case 'create': {
|
|
63
|
+
const subjectLabel = act.subject ? String(act.subject).slice(0, 200) : '';
|
|
64
|
+
const subjectKey = normalizeSubject ? normalizeSubject(subjectLabel) : subjectLabel.trim().toLowerCase();
|
|
65
|
+
if (!subjectKey) { summary.skipped++; break; }
|
|
66
|
+
const statement = act.statement ? String(act.statement).slice(0, 2000) : '';
|
|
67
|
+
if (!statement) { summary.skipped++; break; }
|
|
68
|
+
const importance = Number.isFinite(act.importance) ? act.importance : 7;
|
|
69
|
+
const evidence = JSON.stringify([{
|
|
70
|
+
type: 'session_ref',
|
|
71
|
+
session_id: sessionId || null,
|
|
72
|
+
excerpt: (recapOverview || '').slice(0, 200),
|
|
73
|
+
}]);
|
|
74
|
+
const r = await client.query(
|
|
75
|
+
`INSERT INTO ${tbl}
|
|
76
|
+
(tenant_id, subject_key, subject_label, statement, status, importance,
|
|
77
|
+
source_session_id, agent_id, evidence)
|
|
78
|
+
VALUES ($1, $2, $3, $4, 'active', $5, $6, $7, $8::jsonb)
|
|
79
|
+
ON CONFLICT DO NOTHING`,
|
|
80
|
+
[tenantId, subjectKey, subjectLabel, statement, importance, sessionId || null, agentId, evidence],
|
|
81
|
+
);
|
|
82
|
+
summary.create += r.rowCount;
|
|
83
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
case 'update': {
|
|
88
|
+
const statement = act.statement ? String(act.statement).slice(0, 2000) : '';
|
|
89
|
+
if (!statement) { summary.skipped++; break; }
|
|
90
|
+
const r = await client.query(
|
|
91
|
+
`UPDATE ${tbl} SET statement = $1, last_confirmed_at = now()
|
|
92
|
+
WHERE id = $2 AND status = 'active' AND agent_id = $3 AND tenant_id = $4`,
|
|
93
|
+
[statement, act.factId, agentId, tenantId],
|
|
94
|
+
);
|
|
95
|
+
summary.update += r.rowCount;
|
|
96
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
case 'confirm': {
|
|
101
|
+
const r = await client.query(
|
|
102
|
+
`UPDATE ${tbl} SET last_confirmed_at = now()
|
|
103
|
+
WHERE id = $1 AND status = 'active' AND agent_id = $2 AND tenant_id = $3`,
|
|
104
|
+
[act.factId, agentId, tenantId],
|
|
105
|
+
);
|
|
106
|
+
summary.confirm += r.rowCount;
|
|
107
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
case 'stale': {
|
|
112
|
+
const r = await client.query(
|
|
113
|
+
`UPDATE ${tbl} SET status = 'stale'
|
|
114
|
+
WHERE id = $1 AND status = 'active' AND agent_id = $2 AND tenant_id = $3`,
|
|
115
|
+
[act.factId, agentId, tenantId],
|
|
116
|
+
);
|
|
117
|
+
summary.stale += r.rowCount;
|
|
118
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
case 'discard': {
|
|
123
|
+
const r = await client.query(
|
|
124
|
+
`UPDATE ${tbl} SET status = 'archived'
|
|
125
|
+
WHERE id = $1 AND status = 'candidate' AND agent_id = $2 AND tenant_id = $3`,
|
|
126
|
+
[act.factId, agentId, tenantId],
|
|
127
|
+
);
|
|
128
|
+
summary.discard += r.rowCount;
|
|
129
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
case 'merge': {
|
|
134
|
+
const r1 = await client.query(
|
|
135
|
+
`UPDATE ${tbl} SET last_confirmed_at = now()
|
|
136
|
+
WHERE id = $1 AND status = 'active' AND tenant_id = $2`,
|
|
137
|
+
[act.targetId, tenantId],
|
|
138
|
+
);
|
|
139
|
+
const r2 = await client.query(
|
|
140
|
+
`UPDATE ${tbl} SET status = 'archived'
|
|
141
|
+
WHERE id = $1 AND status = 'candidate' AND tenant_id = $2`,
|
|
142
|
+
[act.factId, tenantId],
|
|
143
|
+
);
|
|
144
|
+
summary.merge += Math.min(r1.rowCount, r2.rowCount);
|
|
145
|
+
if (r1.rowCount === 0 || r2.rowCount === 0) summary.skipped++;
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
case 'supersede': {
|
|
150
|
+
const r = await client.query(
|
|
151
|
+
`UPDATE ${tbl} SET status = 'superseded', superseded_by = $1
|
|
152
|
+
WHERE id = $2 AND status = 'active' AND tenant_id = $3`,
|
|
153
|
+
[act.targetId, act.factId, tenantId],
|
|
154
|
+
);
|
|
155
|
+
summary.supersede += r.rowCount;
|
|
156
|
+
if (r.rowCount === 0) summary.skipped++;
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
default:
|
|
161
|
+
summary.skipped++;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
await client.query('COMMIT');
|
|
166
|
+
} catch (err) {
|
|
167
|
+
await client.query('ROLLBACK').catch(() => {});
|
|
168
|
+
throw err;
|
|
169
|
+
} finally {
|
|
170
|
+
client.release();
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return summary;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
module.exports = { applyConsolidation };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Consolidation pipeline
|
|
5
|
+
//
|
|
6
|
+
// Mechanics only — Aquifer ships the 8-action apply + schema. The prompt and
|
|
7
|
+
// output parser stay in consumers (they're persona-specific: different agents
|
|
8
|
+
// want different wording, language, and action vocabulary extensions).
|
|
9
|
+
//
|
|
10
|
+
// Typical flow in a consumer:
|
|
11
|
+
//
|
|
12
|
+
// const output = await llmFn(consumerBuildPrompt({...}));
|
|
13
|
+
// const { actions } = consumerParse(output);
|
|
14
|
+
// await aquifer.consolidate(sessionId, { actions, agentId });
|
|
15
|
+
//
|
|
16
|
+
// aquifer.consolidate() is defined in core/aquifer.js and delegates here.
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
const { applyConsolidation } = require('./apply');
|
|
20
|
+
|
|
21
|
+
module.exports = { applyConsolidation };
|
|
@@ -6,7 +6,7 @@ const { parseEntityOutput } = require('../core/entity');
|
|
|
6
6
|
// defaultEntityPrompt
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
8
8
|
|
|
9
|
-
function defaultEntityPrompt(messages
|
|
9
|
+
function defaultEntityPrompt(messages) {
|
|
10
10
|
const conversation = messages
|
|
11
11
|
.map(m => `[${m.role}] ${typeof m.content === 'string' ? m.content : JSON.stringify(m.content)}`)
|
|
12
12
|
.join('\n');
|
|
@@ -60,7 +60,7 @@ async function extractEntities(messages, {
|
|
|
60
60
|
const prompt = buildPrompt(messages, {});
|
|
61
61
|
const response = await llmFn(prompt);
|
|
62
62
|
return parseEntityOutput(response);
|
|
63
|
-
} catch
|
|
63
|
+
} catch {
|
|
64
64
|
// LLM failure: return empty, never throw
|
|
65
65
|
return [];
|
|
66
66
|
}
|
package/pipeline/rerank.js
CHANGED
|
@@ -39,7 +39,7 @@ function createTEIReranker(config) {
|
|
|
39
39
|
const initialBackoffMs = config.initialBackoffMs || 250;
|
|
40
40
|
|
|
41
41
|
return {
|
|
42
|
-
async rerank(query, documents,
|
|
42
|
+
async rerank(query, documents, _opts = {}) {
|
|
43
43
|
if (!query || !documents || documents.length === 0) return [];
|
|
44
44
|
|
|
45
45
|
const result = await withRetry(
|
package/pipeline/summarize.js
CHANGED
|
@@ -206,6 +206,9 @@ async function summarize(messages, {
|
|
|
206
206
|
try {
|
|
207
207
|
const prompt = buildPrompt(messages, { mergeEntities });
|
|
208
208
|
const response = await llmFn(prompt);
|
|
209
|
+
if (typeof response !== 'string' || response.trim() === '') {
|
|
210
|
+
return extractiveFallback(messages);
|
|
211
|
+
}
|
|
209
212
|
|
|
210
213
|
// Parse structured fields
|
|
211
214
|
const structuredSummary = _parseStructuredSummary(response);
|
|
@@ -232,7 +235,7 @@ async function summarize(messages, {
|
|
|
232
235
|
entityRaw,
|
|
233
236
|
isExtractive: false,
|
|
234
237
|
};
|
|
235
|
-
} catch
|
|
238
|
+
} catch {
|
|
236
239
|
// LLM failure: fall back to extractive
|
|
237
240
|
return extractiveFallback(messages);
|
|
238
241
|
}
|
package/schema/001-base.sql
CHANGED
|
@@ -43,27 +43,6 @@ CREATE INDEX IF NOT EXISTS idx_sessions_processing_status
|
|
|
43
43
|
ON ${schema}.sessions (processing_status)
|
|
44
44
|
WHERE processing_status IN ('pending', 'processing');
|
|
45
45
|
|
|
46
|
-
-- =========================================================================
|
|
47
|
-
-- Session segments: conversation boundary metadata
|
|
48
|
-
-- =========================================================================
|
|
49
|
-
CREATE TABLE IF NOT EXISTS ${schema}.session_segments (
|
|
50
|
-
id BIGSERIAL PRIMARY KEY,
|
|
51
|
-
session_row_id BIGINT NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
52
|
-
segment_no INT NOT NULL,
|
|
53
|
-
start_msg_idx INT,
|
|
54
|
-
end_msg_idx INT,
|
|
55
|
-
started_at TIMESTAMPTZ,
|
|
56
|
-
ended_at TIMESTAMPTZ,
|
|
57
|
-
raw_msg_count INT NOT NULL DEFAULT 0,
|
|
58
|
-
effective_msg_count INT NOT NULL DEFAULT 0,
|
|
59
|
-
boundary_type TEXT,
|
|
60
|
-
boundary_meta JSONB NOT NULL DEFAULT '{}',
|
|
61
|
-
UNIQUE (session_row_id, segment_no)
|
|
62
|
-
);
|
|
63
|
-
|
|
64
|
-
CREATE INDEX IF NOT EXISTS idx_session_segments_row
|
|
65
|
-
ON ${schema}.session_segments (session_row_id);
|
|
66
|
-
|
|
67
46
|
-- =========================================================================
|
|
68
47
|
-- Session summaries: LLM-generated or extractive summaries
|
|
69
48
|
-- =========================================================================
|
|
@@ -78,8 +57,6 @@ CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
|
|
|
78
57
|
message_count INT NOT NULL DEFAULT 0,
|
|
79
58
|
user_message_count INT NOT NULL DEFAULT 0,
|
|
80
59
|
assistant_message_count INT NOT NULL DEFAULT 0,
|
|
81
|
-
boundary_count INT NOT NULL DEFAULT 0,
|
|
82
|
-
fresh_tail_count INT NOT NULL DEFAULT 0,
|
|
83
60
|
started_at TIMESTAMPTZ,
|
|
84
61
|
ended_at TIMESTAMPTZ,
|
|
85
62
|
summary_text TEXT,
|
|
@@ -92,6 +69,23 @@ CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
|
|
|
92
69
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
93
70
|
);
|
|
94
71
|
|
|
72
|
+
-- Cleanup legacy segment-era schema artifacts so migrate() converges old installs.
|
|
73
|
+
-- Wrapped because the implicit sequence on session_segments can be referenced from
|
|
74
|
+
-- other schemas (e.g. bench/staging created via CREATE TABLE LIKE), which would
|
|
75
|
+
-- otherwise hard-fail the migration. Operators get a NOTICE and must decouple
|
|
76
|
+
-- dependents themselves before the table will actually drop.
|
|
77
|
+
DO $$
|
|
78
|
+
BEGIN
|
|
79
|
+
BEGIN
|
|
80
|
+
DROP TABLE IF EXISTS ${schema}.session_segments;
|
|
81
|
+
EXCEPTION
|
|
82
|
+
WHEN dependent_objects_still_exist THEN
|
|
83
|
+
RAISE NOTICE '[aquifer] skipped session_segments drop: %; decouple cross-schema dependents and re-run migrate to complete cleanup', SQLERRM;
|
|
84
|
+
END;
|
|
85
|
+
END$$;
|
|
86
|
+
ALTER TABLE ${schema}.session_summaries DROP COLUMN IF EXISTS boundary_count;
|
|
87
|
+
ALTER TABLE ${schema}.session_summaries DROP COLUMN IF EXISTS fresh_tail_count;
|
|
88
|
+
|
|
95
89
|
CREATE INDEX IF NOT EXISTS idx_summaries_tenant
|
|
96
90
|
ON ${schema}.session_summaries (tenant_id);
|
|
97
91
|
|
|
@@ -105,6 +99,27 @@ CREATE INDEX IF NOT EXISTS idx_summaries_embedding
|
|
|
105
99
|
ON ${schema}.session_summaries (session_row_id)
|
|
106
100
|
WHERE embedding IS NOT NULL;
|
|
107
101
|
|
|
102
|
+
-- HNSW approximate nearest-neighbor index for cosine-distance vector search.
|
|
103
|
+
-- Without this, ORDER BY embedding <=> $vec degrades to seq scan at scale.
|
|
104
|
+
-- Requires pgvector >= 0.5.0. HNSW cannot build on an empty unsized `vector`
|
|
105
|
+
-- column (can't infer dim), so we defer on failure — re-running migrate()
|
|
106
|
+
-- after the first insert will finish the job.
|
|
107
|
+
DO $$
|
|
108
|
+
BEGIN
|
|
109
|
+
BEGIN
|
|
110
|
+
EXECUTE 'CREATE INDEX IF NOT EXISTS idx_summaries_embedding_hnsw ON ${schema}.session_summaries USING hnsw (embedding vector_cosine_ops)';
|
|
111
|
+
EXCEPTION
|
|
112
|
+
WHEN invalid_parameter_value THEN
|
|
113
|
+
RAISE NOTICE '[aquifer] HNSW index on session_summaries.embedding deferred; re-run migrate() after the first embedded row';
|
|
114
|
+
WHEN feature_not_supported THEN
|
|
115
|
+
RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated vector search';
|
|
116
|
+
WHEN out_of_memory THEN
|
|
117
|
+
RAISE WARNING '[aquifer] HNSW build on session_summaries.embedding ran out of memory; raise maintenance_work_mem and re-run migrate()';
|
|
118
|
+
WHEN program_limit_exceeded THEN
|
|
119
|
+
RAISE WARNING '[aquifer] HNSW build on session_summaries.embedding exceeded an internal limit; inspect pgvector logs';
|
|
120
|
+
END;
|
|
121
|
+
END$$;
|
|
122
|
+
|
|
108
123
|
-- FTS trigger: auto-update search_tsv on INSERT/UPDATE
|
|
109
124
|
CREATE OR REPLACE FUNCTION ${schema}.session_summaries_search_tsv_update()
|
|
110
125
|
RETURNS trigger
|
|
@@ -158,8 +173,12 @@ $$;
|
|
|
158
173
|
DROP TRIGGER IF EXISTS trg_session_summaries_search_tsv
|
|
159
174
|
ON ${schema}.session_summaries;
|
|
160
175
|
|
|
176
|
+
-- Trigger fires on input-column changes only. search_text is a trigger output
|
|
177
|
+
-- (derived from structured_summary + summary_text) and listing it here was
|
|
178
|
+
-- redundant — PG's BEFORE semantics already prevent the assignment inside the
|
|
179
|
+
-- trigger body from re-firing the trigger.
|
|
161
180
|
CREATE TRIGGER trg_session_summaries_search_tsv
|
|
162
|
-
BEFORE INSERT OR UPDATE OF summary_text, structured_summary
|
|
181
|
+
BEFORE INSERT OR UPDATE OF summary_text, structured_summary
|
|
163
182
|
ON ${schema}.session_summaries
|
|
164
183
|
FOR EACH ROW
|
|
165
184
|
EXECUTE FUNCTION ${schema}.session_summaries_search_tsv_update();
|
|
@@ -189,3 +208,21 @@ CREATE INDEX IF NOT EXISTS idx_turn_emb_session_row
|
|
|
189
208
|
|
|
190
209
|
CREATE INDEX IF NOT EXISTS idx_turn_emb_tenant_agent
|
|
191
210
|
ON ${schema}.turn_embeddings (tenant_id, agent_id, source);
|
|
211
|
+
|
|
212
|
+
-- HNSW approximate nearest-neighbor index for turn-level vector search.
|
|
213
|
+
-- See notes on session_summaries.embedding HNSW above.
|
|
214
|
+
DO $$
|
|
215
|
+
BEGIN
|
|
216
|
+
BEGIN
|
|
217
|
+
EXECUTE 'CREATE INDEX IF NOT EXISTS idx_turn_emb_embedding_hnsw ON ${schema}.turn_embeddings USING hnsw (embedding vector_cosine_ops)';
|
|
218
|
+
EXCEPTION
|
|
219
|
+
WHEN invalid_parameter_value THEN
|
|
220
|
+
RAISE NOTICE '[aquifer] HNSW index on turn_embeddings.embedding deferred; re-run migrate() after the first embedded row';
|
|
221
|
+
WHEN feature_not_supported THEN
|
|
222
|
+
RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated vector search';
|
|
223
|
+
WHEN out_of_memory THEN
|
|
224
|
+
RAISE WARNING '[aquifer] HNSW build on turn_embeddings.embedding ran out of memory; raise maintenance_work_mem and re-run migrate()';
|
|
225
|
+
WHEN program_limit_exceeded THEN
|
|
226
|
+
RAISE WARNING '[aquifer] HNSW build on turn_embeddings.embedding exceeded an internal limit; inspect pgvector logs';
|
|
227
|
+
END;
|
|
228
|
+
END$$;
|
package/schema/002-entities.sql
CHANGED
|
@@ -28,10 +28,24 @@ CREATE TABLE IF NOT EXISTS ${schema}.entities (
|
|
|
28
28
|
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
29
29
|
);
|
|
30
30
|
|
|
31
|
-
-- Migration: add entity_scope if missing (idempotent)
|
|
32
|
-
-- For upgrades: backfill from agent_id
|
|
31
|
+
-- Migration: add entity_scope if missing (idempotent, scope-corruption-safe).
|
|
32
|
+
-- For upgrades: backfill from agent_id ONLY on the first run of this migration,
|
|
33
|
+
-- detected via the column still being NULL-able. Once SET NOT NULL below fires,
|
|
34
|
+
-- subsequent runs skip the backfill so operator-assigned 'default' values are
|
|
35
|
+
-- never clobbered.
|
|
33
36
|
ALTER TABLE ${schema}.entities ADD COLUMN IF NOT EXISTS entity_scope TEXT DEFAULT 'default';
|
|
34
|
-
|
|
37
|
+
DO $$
|
|
38
|
+
BEGIN
|
|
39
|
+
IF EXISTS (
|
|
40
|
+
SELECT 1 FROM information_schema.columns
|
|
41
|
+
WHERE table_schema = '${schema}' AND table_name = 'entities'
|
|
42
|
+
AND column_name = 'entity_scope' AND is_nullable = 'YES'
|
|
43
|
+
) THEN
|
|
44
|
+
UPDATE ${schema}.entities
|
|
45
|
+
SET entity_scope = agent_id
|
|
46
|
+
WHERE entity_scope IS NULL OR entity_scope = 'default';
|
|
47
|
+
END IF;
|
|
48
|
+
END$$;
|
|
35
49
|
ALTER TABLE ${schema}.entities ALTER COLUMN entity_scope SET NOT NULL;
|
|
36
50
|
|
|
37
51
|
-- Unique constraint: entity identity is (tenant, name, scope)
|