@shadowforge0/aquifer-memory 1.0.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -20
- package/consumers/claude-code.js +117 -0
- package/consumers/cli.js +17 -0
- package/consumers/default/daily-entries.js +196 -0
- package/consumers/default/index.js +282 -0
- package/consumers/default/prompts/summary.js +153 -0
- package/consumers/mcp.js +3 -23
- package/consumers/miranda/context-inject.js +119 -0
- package/consumers/miranda/daily-entries.js +224 -0
- package/consumers/miranda/index.js +353 -0
- package/consumers/miranda/instance.js +55 -0
- package/consumers/miranda/llm.js +99 -0
- package/consumers/miranda/prompts/summary.js +303 -0
- package/consumers/miranda/recall-format.js +74 -0
- package/consumers/miranda/workspace-files.js +91 -0
- package/consumers/openclaw-ext/index.js +38 -0
- package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
- package/consumers/openclaw-ext/package.json +10 -0
- package/consumers/openclaw-plugin.js +66 -74
- package/consumers/opencode.js +21 -24
- package/consumers/shared/autodetect.js +64 -0
- package/consumers/shared/entity-parser.js +119 -0
- package/consumers/shared/ingest.js +148 -0
- package/consumers/shared/llm-autodetect.js +137 -0
- package/consumers/shared/normalize.js +129 -0
- package/consumers/shared/recall-format.js +110 -0
- package/core/aquifer.js +200 -82
- package/core/entity.js +29 -17
- package/core/storage.js +116 -45
- package/docs/postprocess-contract.md +132 -0
- package/index.js +9 -1
- package/package.json +23 -2
- package/pipeline/_http.js +1 -1
- package/pipeline/consolidation/apply.js +176 -0
- package/pipeline/consolidation/index.js +21 -0
- package/pipeline/extract-entities.js +2 -2
- package/pipeline/rerank.js +1 -1
- package/pipeline/summarize.js +4 -1
- package/schema/001-base.sql +61 -24
- package/schema/002-entities.sql +17 -3
- package/schema/004-facts.sql +67 -0
- package/scripts/diagnose-fts-zh.js +168 -134
- package/scripts/diagnose-vector.js +188 -0
- package/scripts/install-openclaw.sh +59 -0
- package/scripts/smoke.mjs +2 -2
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
18
|
const { createAquiferFromConfig } = require('./shared/factory');
|
|
19
|
+
const { runIngest } = require('./shared/ingest');
|
|
20
|
+
const { formatRecallResults: sharedFormatRecallResults } = require('./shared/recall-format');
|
|
19
21
|
|
|
20
22
|
// ---------------------------------------------------------------------------
|
|
21
23
|
// Helpers
|
|
@@ -38,6 +40,7 @@ function normalizeEntries(rawEntries) {
|
|
|
38
40
|
let startedAt = null, lastMessageAt = null;
|
|
39
41
|
|
|
40
42
|
for (const entry of rawEntries) {
|
|
43
|
+
if (!entry) continue;
|
|
41
44
|
const msg = entry.message || entry;
|
|
42
45
|
if (!msg || !msg.role) continue;
|
|
43
46
|
if (!['user', 'assistant', 'system'].includes(msg.role)) continue;
|
|
@@ -79,43 +82,63 @@ function normalizeEntries(rawEntries) {
|
|
|
79
82
|
};
|
|
80
83
|
}
|
|
81
84
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
}
|
|
100
|
-
if (r.matchedTurnText) {
|
|
101
|
-
lines.push(`Matched: ${r.matchedTurnText.slice(0, 200)}`);
|
|
102
|
-
}
|
|
103
|
-
return lines.join('\n');
|
|
104
|
-
}).join('\n\n');
|
|
105
|
-
}
|
|
85
|
+
// Thin adapter over the shared formatter. OpenClaw's tool output historically
|
|
86
|
+
// used "Matched:" instead of "Matched turn:" and joined with blank lines, so
|
|
87
|
+
// we supply a pair of renderer overrides to preserve that shape.
|
|
88
|
+
const formatRecallResults = (function () {
|
|
89
|
+
const { createRecallFormatter } = require('./shared/recall-format');
|
|
90
|
+
const _fmt = createRecallFormatter({
|
|
91
|
+
header: () => null,
|
|
92
|
+
matched: (r) => r.matchedTurnText ? `Matched: ${String(r.matchedTurnText).slice(0, 200)}` : null,
|
|
93
|
+
separator: () => '',
|
|
94
|
+
});
|
|
95
|
+
return (results) => {
|
|
96
|
+
if (!results || results.length === 0) return 'No matching sessions found.';
|
|
97
|
+
return _fmt(results);
|
|
98
|
+
};
|
|
99
|
+
})();
|
|
100
|
+
// Re-export the shared formatter too for callers that want the default shape.
|
|
101
|
+
formatRecallResults.shared = sharedFormatRecallResults;
|
|
106
102
|
|
|
107
103
|
// ---------------------------------------------------------------------------
|
|
108
104
|
// Plugin
|
|
109
105
|
// ---------------------------------------------------------------------------
|
|
110
106
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
107
|
+
function buildPlugin() {
|
|
108
|
+
return {
|
|
109
|
+
id: 'aquifer-memory',
|
|
110
|
+
name: 'Aquifer Memory',
|
|
111
|
+
register,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
module.exports = buildPlugin();
|
|
116
|
+
// Expose helpers for unit testing. Not part of the plugin's OpenClaw-visible
|
|
117
|
+
// contract; OpenClaw reads { id, name, register } only.
|
|
118
|
+
module.exports.normalizeEntries = normalizeEntries;
|
|
119
|
+
module.exports.coerceRawEntries = coerceRawEntries;
|
|
114
120
|
|
|
115
|
-
|
|
121
|
+
function register(api) {
|
|
116
122
|
const pluginConfig = api.pluginConfig || {};
|
|
117
|
-
let aquifer;
|
|
118
123
|
|
|
124
|
+
// v1.2.0: delegate to a persona layer if one is configured, otherwise
|
|
125
|
+
// run the generic default path (before_reset + session_recall + feedback).
|
|
126
|
+
const personaPath = pluginConfig.persona || process.env.AQUIFER_PERSONA;
|
|
127
|
+
if (personaPath) {
|
|
128
|
+
try {
|
|
129
|
+
const persona = require(personaPath);
|
|
130
|
+
if (persona && typeof persona.mountOnOpenClaw === 'function') {
|
|
131
|
+
persona.mountOnOpenClaw(api, pluginConfig);
|
|
132
|
+
api.logger.info(`[aquifer-memory] registered via persona: ${personaPath}`);
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
api.logger.warn(`[aquifer-memory] persona at ${personaPath} lacks mountOnOpenClaw; falling back to default`);
|
|
136
|
+
} catch (err) {
|
|
137
|
+
api.logger.warn(`[aquifer-memory] failed to load persona ${personaPath}: ${err.message}; falling back to default`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
let aquifer;
|
|
119
142
|
try {
|
|
120
143
|
aquifer = createAquiferFromConfig(pluginConfig);
|
|
121
144
|
} catch (err) {
|
|
@@ -138,65 +161,35 @@ module.exports = {
|
|
|
138
161
|
if ((sessionKey || '').includes('subagent')) return;
|
|
139
162
|
if ((sessionKey || '').includes(':cron:')) return;
|
|
140
163
|
|
|
141
|
-
const dedupKey = `${agentId}:${sessionId}`;
|
|
142
|
-
if (recentlyProcessed.has(dedupKey) || inFlight.has(dedupKey)) return;
|
|
143
|
-
|
|
144
164
|
const rawEntries = coerceRawEntries(event?.messages || []);
|
|
145
165
|
if (rawEntries.length < 3) {
|
|
146
166
|
api.logger.info(`[aquifer-memory] skip: ${sessionId} only ${rawEntries.length} msgs`);
|
|
147
167
|
return;
|
|
148
168
|
}
|
|
149
169
|
|
|
150
|
-
inFlight.add(dedupKey);
|
|
151
170
|
api.logger.info(`[aquifer-memory] capturing ${sessionId} (${rawEntries.length} entries)`);
|
|
152
171
|
|
|
153
172
|
(async () => {
|
|
154
173
|
try {
|
|
174
|
+
// OpenClaw hands us flat {role, content} entries; normalizeEntries
|
|
175
|
+
// produces the commit-ready shape, which we feed to shared runIngest
|
|
176
|
+
// as 'preNormalized' so commit+enrich+dedup stays host-agnostic.
|
|
155
177
|
const norm = normalizeEntries(rawEntries);
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
// Commit
|
|
162
|
-
await aquifer.commit(sessionId, norm.messages, {
|
|
178
|
+
await runIngest({
|
|
179
|
+
aquifer,
|
|
180
|
+
sessionId,
|
|
163
181
|
agentId,
|
|
164
182
|
source: 'openclaw',
|
|
165
183
|
sessionKey,
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
184
|
+
adapter: 'preNormalized',
|
|
185
|
+
preNormalized: norm,
|
|
186
|
+
minUserMessages,
|
|
187
|
+
dedupMap: recentlyProcessed,
|
|
188
|
+
inFlight,
|
|
189
|
+
logger: api.logger,
|
|
171
190
|
});
|
|
172
|
-
api.logger.info(`[aquifer-memory] committed ${sessionId}`);
|
|
173
|
-
|
|
174
|
-
// Enrich (if enough messages)
|
|
175
|
-
if (norm.userCount >= minUserMessages) {
|
|
176
|
-
try {
|
|
177
|
-
const result = await aquifer.enrich(sessionId, { agentId });
|
|
178
|
-
api.logger.info(`[aquifer-memory] enriched ${sessionId} (${result.turnsEmbedded} turns, ${result.entitiesFound} entities)`);
|
|
179
|
-
} catch (enrichErr) {
|
|
180
|
-
api.logger.warn(`[aquifer-memory] enrich failed for ${sessionId}: ${enrichErr.message}`);
|
|
181
|
-
}
|
|
182
|
-
} else {
|
|
183
|
-
try {
|
|
184
|
-
await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
|
|
185
|
-
} catch (e) { api.logger.warn(`[aquifer-memory] skip failed for ${sessionId}: ${e.message}`); }
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
recentlyProcessed.set(dedupKey, Date.now());
|
|
189
191
|
} catch (err) {
|
|
190
192
|
api.logger.warn(`[aquifer-memory] capture failed for ${sessionId}: ${err.message}`);
|
|
191
|
-
} finally {
|
|
192
|
-
inFlight.delete(dedupKey);
|
|
193
|
-
// Evict old entries
|
|
194
|
-
if (recentlyProcessed.size > 200) {
|
|
195
|
-
const cutoff = Date.now() - 30 * 60 * 1000;
|
|
196
|
-
for (const [k, ts] of recentlyProcessed) {
|
|
197
|
-
if (ts < cutoff) recentlyProcessed.delete(k);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
193
|
}
|
|
201
194
|
})();
|
|
202
195
|
});
|
|
@@ -292,6 +285,5 @@ module.exports = {
|
|
|
292
285
|
};
|
|
293
286
|
}, { name: 'session_feedback' });
|
|
294
287
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
};
|
|
288
|
+
api.logger.info('[aquifer-memory] registered (before_reset + session_recall + session_feedback)');
|
|
289
|
+
}
|
package/consumers/opencode.js
CHANGED
|
@@ -26,8 +26,7 @@
|
|
|
26
26
|
|
|
27
27
|
const path = require('path');
|
|
28
28
|
const os = require('os');
|
|
29
|
-
const {
|
|
30
|
-
|
|
29
|
+
const { runIngest } = require('./shared/ingest');
|
|
31
30
|
// ---------------------------------------------------------------------------
|
|
32
31
|
// SQLite access — use Node 22+ built-in or fall back to better-sqlite3
|
|
33
32
|
// ---------------------------------------------------------------------------
|
|
@@ -37,7 +36,7 @@ function openSqlite(dbPath) {
|
|
|
37
36
|
try {
|
|
38
37
|
const { DatabaseSync } = require('node:sqlite');
|
|
39
38
|
return new DatabaseSync(dbPath, { open: true, readOnly: true });
|
|
40
|
-
} catch
|
|
39
|
+
} catch {
|
|
41
40
|
// not available
|
|
42
41
|
}
|
|
43
42
|
|
|
@@ -45,7 +44,7 @@ function openSqlite(dbPath) {
|
|
|
45
44
|
try {
|
|
46
45
|
const Database = require('better-sqlite3');
|
|
47
46
|
return new Database(dbPath, { readonly: true });
|
|
48
|
-
} catch
|
|
47
|
+
} catch {
|
|
49
48
|
// not available
|
|
50
49
|
}
|
|
51
50
|
|
|
@@ -220,7 +219,7 @@ async function ingestOpenCode(aquifer, args) {
|
|
|
220
219
|
try {
|
|
221
220
|
const existing = await aquifer.exportSessions({ source: 'opencode', limit: 10000 });
|
|
222
221
|
for (const row of existing) existingSet.add(row.session_id);
|
|
223
|
-
} catch
|
|
222
|
+
} catch {
|
|
224
223
|
// exportSessions may not exist in all versions
|
|
225
224
|
}
|
|
226
225
|
|
|
@@ -271,36 +270,34 @@ async function ingestOpenCode(aquifer, args) {
|
|
|
271
270
|
continue;
|
|
272
271
|
}
|
|
273
272
|
|
|
274
|
-
// Commit
|
|
273
|
+
// Commit + optional enrich via shared ingest pipeline
|
|
275
274
|
try {
|
|
276
|
-
await
|
|
275
|
+
const ingestResult = await runIngest({
|
|
276
|
+
aquifer,
|
|
277
|
+
sessionId: sid,
|
|
277
278
|
agentId,
|
|
278
279
|
source: 'opencode',
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
280
|
+
adapter: 'preNormalized',
|
|
281
|
+
preNormalized: norm,
|
|
282
|
+
enrich: doEnrich,
|
|
283
|
+
minUserMessages,
|
|
284
|
+
logger: { info() {}, warn(m) { info.enrichError = m; } },
|
|
284
285
|
});
|
|
286
|
+
|
|
285
287
|
committed++;
|
|
286
288
|
info.status = 'committed';
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
const enrichResult = await aquifer.enrich(sid, { agentId });
|
|
292
|
-
info.status = 'enriched';
|
|
293
|
-
info.turnsEmbedded = enrichResult.turnsEmbedded;
|
|
294
|
-
info.entitiesFound = enrichResult.entitiesFound;
|
|
295
|
-
} catch (enrichErr) {
|
|
296
|
-
info.enrichError = enrichErr.message;
|
|
297
|
-
}
|
|
289
|
+
if (ingestResult.enrichResult) {
|
|
290
|
+
info.status = 'enriched';
|
|
291
|
+
info.turnsEmbedded = ingestResult.enrichResult.turnsEmbedded;
|
|
292
|
+
info.entitiesFound = ingestResult.enrichResult.entitiesFound;
|
|
298
293
|
}
|
|
299
294
|
|
|
300
295
|
if (jsonOutput) {
|
|
301
296
|
results.push(info);
|
|
302
297
|
} else {
|
|
303
|
-
const enrichNote = info.turnsEmbedded
|
|
298
|
+
const enrichNote = info.turnsEmbedded !== null && info.turnsEmbedded !== undefined
|
|
299
|
+
? ` (${info.turnsEmbedded} turns, ${info.entitiesFound} entities)`
|
|
300
|
+
: '';
|
|
304
301
|
console.log(` [${committed}] ${sid} "${session.title}"${enrichNote}`);
|
|
305
302
|
}
|
|
306
303
|
} catch (err) {
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { Pool } = require('pg');
|
|
4
|
+
|
|
5
|
+
const DEFAULT_PG_URL = 'postgresql://aquifer:aquifer@localhost:5432/aquifer';
|
|
6
|
+
const DEFAULT_OLLAMA_URL = 'http://localhost:11434';
|
|
7
|
+
|
|
8
|
+
async function probePostgres(url, { timeoutMs = 1500 } = {}) {
|
|
9
|
+
const pool = new Pool({
|
|
10
|
+
connectionString: url,
|
|
11
|
+
connectionTimeoutMillis: timeoutMs,
|
|
12
|
+
max: 1,
|
|
13
|
+
});
|
|
14
|
+
try {
|
|
15
|
+
await pool.query('SELECT 1');
|
|
16
|
+
return true;
|
|
17
|
+
} catch {
|
|
18
|
+
return false;
|
|
19
|
+
} finally {
|
|
20
|
+
try { await pool.end(); } catch { /* ignore */ }
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function probeOllama(baseUrl, { timeoutMs = 1500 } = {}) {
|
|
25
|
+
const controller = new AbortController();
|
|
26
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
27
|
+
try {
|
|
28
|
+
const res = await fetch(`${baseUrl.replace(/\/$/, '')}/api/tags`, {
|
|
29
|
+
signal: controller.signal,
|
|
30
|
+
});
|
|
31
|
+
return res.ok;
|
|
32
|
+
} catch {
|
|
33
|
+
return false;
|
|
34
|
+
} finally {
|
|
35
|
+
clearTimeout(timer);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function autodetectForQuickstart(env, probes = {}) {
|
|
40
|
+
const probePg = probes.probePostgres || probePostgres;
|
|
41
|
+
const probeOll = probes.probeOllama || probeOllama;
|
|
42
|
+
const detected = {};
|
|
43
|
+
|
|
44
|
+
const hasDb = env.DATABASE_URL || env.AQUIFER_DB_URL;
|
|
45
|
+
if (!hasDb && await probePg(DEFAULT_PG_URL)) {
|
|
46
|
+
detected.DATABASE_URL = DEFAULT_PG_URL;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const hasEmbed = env.EMBED_PROVIDER
|
|
50
|
+
|| (env.AQUIFER_EMBED_BASE_URL && env.AQUIFER_EMBED_MODEL);
|
|
51
|
+
if (!hasEmbed && await probeOll(DEFAULT_OLLAMA_URL)) {
|
|
52
|
+
detected.EMBED_PROVIDER = 'ollama';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return detected;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = {
|
|
59
|
+
autodetectForQuickstart,
|
|
60
|
+
probePostgres,
|
|
61
|
+
probeOllama,
|
|
62
|
+
DEFAULT_PG_URL,
|
|
63
|
+
DEFAULT_OLLAMA_URL,
|
|
64
|
+
};
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Entity section parser — shared across consumers.
|
|
5
|
+
//
|
|
6
|
+
// Parses LLM output lines of the form:
|
|
7
|
+
// ENTITY: <name> | <type> | <alias1, alias2, ...>
|
|
8
|
+
// RELATION: <src> | <dst>
|
|
9
|
+
//
|
|
10
|
+
// Returns { entities, relations } ready for Aquifer entityParseFn.
|
|
11
|
+
// Dedups, normalizes names via Aquifer's normalizeEntityName, and drops noise
|
|
12
|
+
// entities (generic roles, pure-numeric, file paths, CLI flags, etc.).
|
|
13
|
+
//
|
|
14
|
+
// Consumers that use a different ENTITIES prompt format should write their own
|
|
15
|
+
// parser — this one is for the ENTITY:/RELATION: line protocol.
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
// Import directly from core/entity to avoid a circular dep with top-level
|
|
19
|
+
// index.js, which itself re-exports parseEntitySection from here.
|
|
20
|
+
const { normalizeEntityName } = require('../../core/entity');
|
|
21
|
+
|
|
22
|
+
const VALID_ENTITY_TYPES = new Set([
|
|
23
|
+
'person', 'project', 'concept', 'tool', 'metric',
|
|
24
|
+
'org', 'place', 'event', 'doc', 'task', 'topic', 'other',
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
const ENTITY_STOPLIST = new Set([
|
|
28
|
+
// Role generics
|
|
29
|
+
'助理', '使用者', '用戶', 'assistant', 'user', 'agent', 'agents', '我',
|
|
30
|
+
// Too broad
|
|
31
|
+
'api', 'db', 'llm', 'cli', 'bash', 'diff', 'bug', 'config',
|
|
32
|
+
'extensions', 'hooks', 'cron', 'manifest', 'index.js', 'node.js',
|
|
33
|
+
// Common noise
|
|
34
|
+
'ok', 'timeout', 'error', 'test', 'cache', 'token',
|
|
35
|
+
'登入狀態', '授權提示', 'chat_id', 'promise.race',
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
const CODE_EXT_RE = /\.(js|ts|jsx|tsx|mjs|cjs|sh|py|sql|md|json|yml|yaml|css|html|vue|svelte|go|rs|rb|php|java|kt|c|cpp|h|toml|ini|cfg|conf|lock|env|proto)$/i;
|
|
39
|
+
const PATH_RE = /^[.\/~].*\//;
|
|
40
|
+
const DOTFILE_RE = /^\.[a-z][a-z0-9._-]*$/i;
|
|
41
|
+
|
|
42
|
+
function isNoiseEntity(normalizedName, rawName) {
|
|
43
|
+
if (ENTITY_STOPLIST.has(normalizedName)) return true;
|
|
44
|
+
if (/^\d+[秒分時天日月年kKgG%]/.test(rawName)) return true;
|
|
45
|
+
if (/^\d{2,}[mM]/.test(rawName)) return true;
|
|
46
|
+
if (/^\d+錯誤/.test(rawName)) return true;
|
|
47
|
+
if (/^\d{10,}$/.test(rawName)) return true;
|
|
48
|
+
if (normalizedName.length < 2) return true;
|
|
49
|
+
if (PATH_RE.test(rawName)) return true;
|
|
50
|
+
if (DOTFILE_RE.test(rawName)) return true;
|
|
51
|
+
if (CODE_EXT_RE.test(rawName)) return true;
|
|
52
|
+
if (/^--?\w/.test(rawName)) return true;
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function splitFields(line) {
|
|
57
|
+
if (line.includes('|')) return line.split('|').map(s => s.trim());
|
|
58
|
+
if (line.includes('\t')) return line.split('\t').map(s => s.trim());
|
|
59
|
+
return [line.trim()];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function parseEntitySection(text, opts = {}) {
|
|
63
|
+
if (!text || typeof text !== 'string') return { entities: [], relations: [] };
|
|
64
|
+
|
|
65
|
+
const maxEntities = Number.isFinite(opts.maxEntities) ? opts.maxEntities : 10;
|
|
66
|
+
const maxRelations = Number.isFinite(opts.maxRelations) ? opts.maxRelations : 15;
|
|
67
|
+
|
|
68
|
+
const entityMap = new Map();
|
|
69
|
+
const relationSet = new Set();
|
|
70
|
+
const relations = [];
|
|
71
|
+
|
|
72
|
+
for (const rawLine of text.split('\n')) {
|
|
73
|
+
const line = rawLine.trim();
|
|
74
|
+
if (!line) continue;
|
|
75
|
+
|
|
76
|
+
if (/^ENTITY:/i.test(line)) {
|
|
77
|
+
if (entityMap.size >= maxEntities) continue;
|
|
78
|
+
const fields = splitFields(line.replace(/^ENTITY:\s*/i, ''));
|
|
79
|
+
const rawName = (fields[0] || '').trim().slice(0, 200);
|
|
80
|
+
if (!rawName) continue;
|
|
81
|
+
const normalizedName = normalizeEntityName(rawName);
|
|
82
|
+
if (!normalizedName || entityMap.has(normalizedName)) continue;
|
|
83
|
+
if (isNoiseEntity(normalizedName, rawName)) continue;
|
|
84
|
+
const rawType = (fields[1] || '').toLowerCase().trim();
|
|
85
|
+
const type = VALID_ENTITY_TYPES.has(rawType) ? rawType : 'other';
|
|
86
|
+
const rawAliases = fields[2] || '';
|
|
87
|
+
const aliases = (rawAliases && rawAliases !== '-')
|
|
88
|
+
? rawAliases.split(',').map(a => a.trim().slice(0, 200)).filter(a => a && a !== '-')
|
|
89
|
+
: [];
|
|
90
|
+
entityMap.set(normalizedName, { name: rawName, normalizedName, type, aliases });
|
|
91
|
+
} else if (/^RELATION:/i.test(line)) {
|
|
92
|
+
if (relations.length >= maxRelations) continue;
|
|
93
|
+
const fields = splitFields(line.replace(/^RELATION:\s*/i, ''));
|
|
94
|
+
const src = (fields[0] || '').trim();
|
|
95
|
+
const dst = (fields[1] || '').trim();
|
|
96
|
+
if (!src || !dst) continue;
|
|
97
|
+
const ns = normalizeEntityName(src);
|
|
98
|
+
const nd = normalizeEntityName(dst);
|
|
99
|
+
if (!ns || !nd || ns === nd) continue;
|
|
100
|
+
const pairKey = ns < nd ? `${ns}|||${nd}` : `${nd}|||${ns}`;
|
|
101
|
+
if (relationSet.has(pairKey)) continue;
|
|
102
|
+
relationSet.add(pairKey);
|
|
103
|
+
relations.push({ src, dst });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const filteredRelations = relations.filter(r =>
|
|
108
|
+
entityMap.has(normalizeEntityName(r.src)) && entityMap.has(normalizeEntityName(r.dst))
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
return { entities: [...entityMap.values()], relations: filteredRelations };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
module.exports = {
|
|
115
|
+
parseEntitySection,
|
|
116
|
+
isNoiseEntity,
|
|
117
|
+
VALID_ENTITY_TYPES,
|
|
118
|
+
ENTITY_STOPLIST,
|
|
119
|
+
};
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Shared ingest flow — the standard "received session → Aquifer" pipeline.
|
|
5
|
+
//
|
|
6
|
+
// All three host adapters (OpenClaw before_reset, Claude Code afterburn,
|
|
7
|
+
// OpenCode backfill) do the same three things:
|
|
8
|
+
// 1. Normalize raw entries to commit-ready shape
|
|
9
|
+
// 2. commit() the messages + metadata
|
|
10
|
+
// 3. enrich() if enough user turns, else skip()
|
|
11
|
+
// With dedup on (agentId, sessionId) so the same hook firing twice is safe.
|
|
12
|
+
//
|
|
13
|
+
// runIngest() centralizes this. Host adapters pass in their raw entries, the
|
|
14
|
+
// adapter name, and an optional postProcess callback for persona side effects.
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
const { normalizeMessages } = require('./normalize');
|
|
18
|
+
|
|
19
|
+
const RECENT_CAP = 200;
|
|
20
|
+
const RECENT_TTL_MS = 30 * 60 * 1000;
|
|
21
|
+
|
|
22
|
+
function evictStale(dedupMap, now = Date.now()) {
|
|
23
|
+
if (!dedupMap || dedupMap.size <= RECENT_CAP) return;
|
|
24
|
+
const cutoff = now - RECENT_TTL_MS;
|
|
25
|
+
for (const [k, ts] of dedupMap) {
|
|
26
|
+
if (ts < cutoff) dedupMap.delete(k);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Run the standard commit-then-enrich flow for a single session.
|
|
32
|
+
*
|
|
33
|
+
* @param {object} opts
|
|
34
|
+
* @param {object} opts.aquifer — Aquifer instance
|
|
35
|
+
* @param {string} opts.sessionId
|
|
36
|
+
* @param {string} opts.agentId
|
|
37
|
+
* @param {string} [opts.source] — caller-provided source tag (e.g. 'openclaw', 'cc', 'opencode')
|
|
38
|
+
* @param {string} [opts.sessionKey] — passed through to commit()
|
|
39
|
+
* @param {any[]} opts.rawEntries — host-native session entries
|
|
40
|
+
* @param {'gateway'|'cc'|'claude-code'|'preNormalized'} [opts.adapter]
|
|
41
|
+
* 'preNormalized' means rawEntries already matches normalizeMessages output
|
|
42
|
+
* (used by OpenCode which reads SQLite directly).
|
|
43
|
+
* @param {object} [opts.preNormalized] — { messages, userCount, ... } ready to commit,
|
|
44
|
+
* required when adapter === 'preNormalized'
|
|
45
|
+
* @param {number} [opts.minUserMessages=3] — enrich threshold
|
|
46
|
+
* @param {boolean} [opts.enrich=true] — when false, commit only; don't enrich or skip.
|
|
47
|
+
* Useful for pull-style ingest (OpenCode) where enrichment runs later.
|
|
48
|
+
* @param {Map} [opts.dedupMap] — Map<key, timestamp>; same session won't process twice within TTL
|
|
49
|
+
* @param {Set} [opts.inFlight] — Set<key>; concurrent firings are guarded
|
|
50
|
+
* @param {function} [opts.postProcess] — forwarded to enrich()
|
|
51
|
+
* @param {function} [opts.summaryFn] — forwarded to enrich()
|
|
52
|
+
* @param {function} [opts.entityParseFn] — forwarded to enrich()
|
|
53
|
+
* @param {object} [opts.logger] — { info, warn }
|
|
54
|
+
* @returns {Promise<{status:string, normalized:any[]|null, counts:object|null, enrichResult:object|null, skipReason?:string}>}
|
|
55
|
+
*/
|
|
56
|
+
async function runIngest(opts = {}) {
|
|
57
|
+
const {
|
|
58
|
+
aquifer, sessionId, agentId, source, sessionKey,
|
|
59
|
+
rawEntries, adapter, preNormalized,
|
|
60
|
+
minUserMessages = 3,
|
|
61
|
+
enrich = true,
|
|
62
|
+
dedupMap = null, inFlight = null,
|
|
63
|
+
postProcess = null, summaryFn = null, entityParseFn = null,
|
|
64
|
+
logger = console,
|
|
65
|
+
} = opts;
|
|
66
|
+
|
|
67
|
+
if (!aquifer) throw new Error('aquifer is required');
|
|
68
|
+
if (!sessionId) throw new Error('sessionId is required');
|
|
69
|
+
if (!agentId) throw new Error('agentId is required');
|
|
70
|
+
|
|
71
|
+
const dedupKey = `${agentId}:${sessionId}`;
|
|
72
|
+
if (dedupMap && dedupMap.has(dedupKey)) {
|
|
73
|
+
return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'recent' };
|
|
74
|
+
}
|
|
75
|
+
if (inFlight && inFlight.has(dedupKey)) {
|
|
76
|
+
return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'in_flight' };
|
|
77
|
+
}
|
|
78
|
+
if (inFlight) inFlight.add(dedupKey);
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
// 1. Normalize
|
|
82
|
+
let norm;
|
|
83
|
+
if (adapter === 'preNormalized') {
|
|
84
|
+
if (!preNormalized) throw new Error('preNormalized adapter requires opts.preNormalized');
|
|
85
|
+
norm = preNormalized;
|
|
86
|
+
} else {
|
|
87
|
+
norm = normalizeMessages(rawEntries, { adapter });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (norm.userCount === 0) {
|
|
91
|
+
return { status: 'skipped_empty', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: 'no_user_messages' };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 2. Commit
|
|
95
|
+
await aquifer.commit(sessionId, norm.messages, {
|
|
96
|
+
agentId,
|
|
97
|
+
source: source || adapter || 'api',
|
|
98
|
+
sessionKey: sessionKey || null,
|
|
99
|
+
model: norm.model,
|
|
100
|
+
tokensIn: norm.tokensIn,
|
|
101
|
+
tokensOut: norm.tokensOut,
|
|
102
|
+
startedAt: norm.startedAt,
|
|
103
|
+
lastMessageAt: norm.lastMessageAt,
|
|
104
|
+
});
|
|
105
|
+
if (logger && logger.info) logger.info(`[aquifer-ingest] committed ${sessionId} (${norm.messages.length} msgs, user=${norm.userCount})`);
|
|
106
|
+
|
|
107
|
+
// 3. Enrich or skip (unless caller opts out — then commit only)
|
|
108
|
+
let enrichResult = null;
|
|
109
|
+
if (!enrich) {
|
|
110
|
+
if (dedupMap) { dedupMap.set(dedupKey, Date.now()); evictStale(dedupMap); }
|
|
111
|
+
return { status: 'committed_only', normalized: norm.messages, counts: norm, enrichResult: null };
|
|
112
|
+
}
|
|
113
|
+
if (norm.userCount >= minUserMessages) {
|
|
114
|
+
try {
|
|
115
|
+
enrichResult = await aquifer.enrich(sessionId, {
|
|
116
|
+
agentId,
|
|
117
|
+
summaryFn: summaryFn || undefined,
|
|
118
|
+
entityParseFn: entityParseFn || undefined,
|
|
119
|
+
postProcess: postProcess || undefined,
|
|
120
|
+
});
|
|
121
|
+
if (logger && logger.info) {
|
|
122
|
+
logger.info(`[aquifer-ingest] enriched ${sessionId} (turns=${enrichResult.turnsEmbedded}, entities=${enrichResult.entitiesFound})`);
|
|
123
|
+
}
|
|
124
|
+
} catch (enrichErr) {
|
|
125
|
+
if (logger && logger.warn) logger.warn(`[aquifer-ingest] enrich failed for ${sessionId}: ${enrichErr.message}`);
|
|
126
|
+
// Commit already succeeded — don't rethrow
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
129
|
+
try {
|
|
130
|
+
await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
|
|
131
|
+
} catch (skipErr) {
|
|
132
|
+
if (logger && logger.warn) logger.warn(`[aquifer-ingest] skip failed for ${sessionId}: ${skipErr.message}`);
|
|
133
|
+
}
|
|
134
|
+
return { status: 'skipped_short', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: `user_count=${norm.userCount}` };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (dedupMap) {
|
|
138
|
+
dedupMap.set(dedupKey, Date.now());
|
|
139
|
+
evictStale(dedupMap);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return { status: 'ok', normalized: norm.messages, counts: norm, enrichResult };
|
|
143
|
+
} finally {
|
|
144
|
+
if (inFlight) inFlight.delete(dedupKey);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
module.exports = { runIngest };
|