@shadowforge0/aquifer-memory 1.0.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +29 -20
  2. package/consumers/claude-code.js +117 -0
  3. package/consumers/cli.js +17 -0
  4. package/consumers/default/daily-entries.js +196 -0
  5. package/consumers/default/index.js +282 -0
  6. package/consumers/default/prompts/summary.js +153 -0
  7. package/consumers/mcp.js +3 -23
  8. package/consumers/miranda/context-inject.js +119 -0
  9. package/consumers/miranda/daily-entries.js +224 -0
  10. package/consumers/miranda/index.js +353 -0
  11. package/consumers/miranda/instance.js +55 -0
  12. package/consumers/miranda/llm.js +99 -0
  13. package/consumers/miranda/prompts/summary.js +303 -0
  14. package/consumers/miranda/recall-format.js +74 -0
  15. package/consumers/miranda/workspace-files.js +91 -0
  16. package/consumers/openclaw-ext/index.js +38 -0
  17. package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
  18. package/consumers/openclaw-ext/package.json +10 -0
  19. package/consumers/openclaw-plugin.js +66 -74
  20. package/consumers/opencode.js +21 -24
  21. package/consumers/shared/autodetect.js +64 -0
  22. package/consumers/shared/entity-parser.js +119 -0
  23. package/consumers/shared/ingest.js +148 -0
  24. package/consumers/shared/llm-autodetect.js +137 -0
  25. package/consumers/shared/normalize.js +129 -0
  26. package/consumers/shared/recall-format.js +110 -0
  27. package/core/aquifer.js +200 -82
  28. package/core/entity.js +29 -17
  29. package/core/storage.js +116 -45
  30. package/docs/postprocess-contract.md +132 -0
  31. package/index.js +9 -1
  32. package/package.json +23 -2
  33. package/pipeline/_http.js +1 -1
  34. package/pipeline/consolidation/apply.js +176 -0
  35. package/pipeline/consolidation/index.js +21 -0
  36. package/pipeline/extract-entities.js +2 -2
  37. package/pipeline/rerank.js +1 -1
  38. package/pipeline/summarize.js +4 -1
  39. package/schema/001-base.sql +61 -24
  40. package/schema/002-entities.sql +17 -3
  41. package/schema/004-facts.sql +67 -0
  42. package/scripts/diagnose-fts-zh.js +168 -134
  43. package/scripts/diagnose-vector.js +188 -0
  44. package/scripts/install-openclaw.sh +59 -0
  45. package/scripts/smoke.mjs +2 -2
@@ -16,6 +16,8 @@
16
16
  */
17
17
 
18
18
  const { createAquiferFromConfig } = require('./shared/factory');
19
+ const { runIngest } = require('./shared/ingest');
20
+ const { formatRecallResults: sharedFormatRecallResults } = require('./shared/recall-format');
19
21
 
20
22
  // ---------------------------------------------------------------------------
21
23
  // Helpers
@@ -38,6 +40,7 @@ function normalizeEntries(rawEntries) {
38
40
  let startedAt = null, lastMessageAt = null;
39
41
 
40
42
  for (const entry of rawEntries) {
43
+ if (!entry) continue;
41
44
  const msg = entry.message || entry;
42
45
  if (!msg || !msg.role) continue;
43
46
  if (!['user', 'assistant', 'system'].includes(msg.role)) continue;
@@ -79,43 +82,63 @@ function normalizeEntries(rawEntries) {
79
82
  };
80
83
  }
81
84
 
82
- function formatDate(value) {
83
- if (!value) return 'unknown';
84
- const parsed = new Date(value);
85
- return isNaN(parsed.getTime()) ? 'unknown' : parsed.toISOString().slice(0, 10);
86
- }
87
-
88
- function formatRecallResults(results) {
89
- if (results.length === 0) return 'No matching sessions found.';
90
-
91
- return results.map((r, i) => {
92
- const ss = r.structuredSummary || {};
93
- const title = ss.title || r.summaryText?.slice(0, 60) || '(untitled)';
94
- const date = formatDate(r.startedAt);
95
-
96
- const lines = [`### ${i + 1}. ${title} (${date}, ${r.agentId || 'default'})`];
97
- if (ss.overview || r.summaryText) {
98
- lines.push((ss.overview || r.summaryText).slice(0, 300));
99
- }
100
- if (r.matchedTurnText) {
101
- lines.push(`Matched: ${r.matchedTurnText.slice(0, 200)}`);
102
- }
103
- return lines.join('\n');
104
- }).join('\n\n');
105
- }
85
+ // Thin adapter over the shared formatter. OpenClaw's tool output historically
86
+ // used "Matched:" instead of "Matched turn:" and joined with blank lines, so
87
+ // we supply a pair of renderer overrides to preserve that shape.
88
+ const formatRecallResults = (function () {
89
+ const { createRecallFormatter } = require('./shared/recall-format');
90
+ const _fmt = createRecallFormatter({
91
+ header: () => null,
92
+ matched: (r) => r.matchedTurnText ? `Matched: ${String(r.matchedTurnText).slice(0, 200)}` : null,
93
+ separator: () => '',
94
+ });
95
+ return (results) => {
96
+ if (!results || results.length === 0) return 'No matching sessions found.';
97
+ return _fmt(results);
98
+ };
99
+ })();
100
+ // Re-export the shared formatter too for callers that want the default shape.
101
+ formatRecallResults.shared = sharedFormatRecallResults;
106
102
 
107
103
  // ---------------------------------------------------------------------------
108
104
  // Plugin
109
105
  // ---------------------------------------------------------------------------
110
106
 
111
- module.exports = {
112
- id: 'aquifer-memory',
113
- name: 'Aquifer Memory',
107
+ function buildPlugin() {
108
+ return {
109
+ id: 'aquifer-memory',
110
+ name: 'Aquifer Memory',
111
+ register,
112
+ };
113
+ }
114
+
115
+ module.exports = buildPlugin();
116
+ // Expose helpers for unit testing. Not part of the plugin's OpenClaw-visible
117
+ // contract; OpenClaw reads { id, name, register } only.
118
+ module.exports.normalizeEntries = normalizeEntries;
119
+ module.exports.coerceRawEntries = coerceRawEntries;
114
120
 
115
- register(api) {
121
+ function register(api) {
116
122
  const pluginConfig = api.pluginConfig || {};
117
- let aquifer;
118
123
 
124
+ // v1.2.0: delegate to a persona layer if one is configured, otherwise
125
+ // run the generic default path (before_reset + session_recall + feedback).
126
+ const personaPath = pluginConfig.persona || process.env.AQUIFER_PERSONA;
127
+ if (personaPath) {
128
+ try {
129
+ const persona = require(personaPath);
130
+ if (persona && typeof persona.mountOnOpenClaw === 'function') {
131
+ persona.mountOnOpenClaw(api, pluginConfig);
132
+ api.logger.info(`[aquifer-memory] registered via persona: ${personaPath}`);
133
+ return;
134
+ }
135
+ api.logger.warn(`[aquifer-memory] persona at ${personaPath} lacks mountOnOpenClaw; falling back to default`);
136
+ } catch (err) {
137
+ api.logger.warn(`[aquifer-memory] failed to load persona ${personaPath}: ${err.message}; falling back to default`);
138
+ }
139
+ }
140
+
141
+ let aquifer;
119
142
  try {
120
143
  aquifer = createAquiferFromConfig(pluginConfig);
121
144
  } catch (err) {
@@ -138,65 +161,35 @@ module.exports = {
138
161
  if ((sessionKey || '').includes('subagent')) return;
139
162
  if ((sessionKey || '').includes(':cron:')) return;
140
163
 
141
- const dedupKey = `${agentId}:${sessionId}`;
142
- if (recentlyProcessed.has(dedupKey) || inFlight.has(dedupKey)) return;
143
-
144
164
  const rawEntries = coerceRawEntries(event?.messages || []);
145
165
  if (rawEntries.length < 3) {
146
166
  api.logger.info(`[aquifer-memory] skip: ${sessionId} only ${rawEntries.length} msgs`);
147
167
  return;
148
168
  }
149
169
 
150
- inFlight.add(dedupKey);
151
170
  api.logger.info(`[aquifer-memory] capturing ${sessionId} (${rawEntries.length} entries)`);
152
171
 
153
172
  (async () => {
154
173
  try {
174
+ // OpenClaw hands us flat {role, content} entries; normalizeEntries
175
+ // produces the commit-ready shape, which we feed to shared runIngest
176
+ // as 'preNormalized' so commit+enrich+dedup stays host-agnostic.
155
177
  const norm = normalizeEntries(rawEntries);
156
- if (norm.userCount === 0) {
157
- api.logger.info(`[aquifer-memory] skip: no user messages in ${sessionId}`);
158
- return;
159
- }
160
-
161
- // Commit
162
- await aquifer.commit(sessionId, norm.messages, {
178
+ await runIngest({
179
+ aquifer,
180
+ sessionId,
163
181
  agentId,
164
182
  source: 'openclaw',
165
183
  sessionKey,
166
- model: norm.model,
167
- tokensIn: norm.tokensIn,
168
- tokensOut: norm.tokensOut,
169
- startedAt: norm.startedAt,
170
- lastMessageAt: norm.lastMessageAt,
184
+ adapter: 'preNormalized',
185
+ preNormalized: norm,
186
+ minUserMessages,
187
+ dedupMap: recentlyProcessed,
188
+ inFlight,
189
+ logger: api.logger,
171
190
  });
172
- api.logger.info(`[aquifer-memory] committed ${sessionId}`);
173
-
174
- // Enrich (if enough messages)
175
- if (norm.userCount >= minUserMessages) {
176
- try {
177
- const result = await aquifer.enrich(sessionId, { agentId });
178
- api.logger.info(`[aquifer-memory] enriched ${sessionId} (${result.turnsEmbedded} turns, ${result.entitiesFound} entities)`);
179
- } catch (enrichErr) {
180
- api.logger.warn(`[aquifer-memory] enrich failed for ${sessionId}: ${enrichErr.message}`);
181
- }
182
- } else {
183
- try {
184
- await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
185
- } catch (e) { api.logger.warn(`[aquifer-memory] skip failed for ${sessionId}: ${e.message}`); }
186
- }
187
-
188
- recentlyProcessed.set(dedupKey, Date.now());
189
191
  } catch (err) {
190
192
  api.logger.warn(`[aquifer-memory] capture failed for ${sessionId}: ${err.message}`);
191
- } finally {
192
- inFlight.delete(dedupKey);
193
- // Evict old entries
194
- if (recentlyProcessed.size > 200) {
195
- const cutoff = Date.now() - 30 * 60 * 1000;
196
- for (const [k, ts] of recentlyProcessed) {
197
- if (ts < cutoff) recentlyProcessed.delete(k);
198
- }
199
- }
200
193
  }
201
194
  })();
202
195
  });
@@ -292,6 +285,5 @@ module.exports = {
292
285
  };
293
286
  }, { name: 'session_feedback' });
294
287
 
295
- api.logger.info('[aquifer-memory] registered (before_reset + session_recall + session_feedback)');
296
- },
297
- };
288
+ api.logger.info('[aquifer-memory] registered (before_reset + session_recall + session_feedback)');
289
+ }
@@ -26,8 +26,7 @@
26
26
 
27
27
  const path = require('path');
28
28
  const os = require('os');
29
- const { createAquiferFromConfig } = require('./shared/factory');
30
-
29
+ const { runIngest } = require('./shared/ingest');
31
30
  // ---------------------------------------------------------------------------
32
31
  // SQLite access — use Node 22+ built-in or fall back to better-sqlite3
33
32
  // ---------------------------------------------------------------------------
@@ -37,7 +36,7 @@ function openSqlite(dbPath) {
37
36
  try {
38
37
  const { DatabaseSync } = require('node:sqlite');
39
38
  return new DatabaseSync(dbPath, { open: true, readOnly: true });
40
- } catch (_) {
39
+ } catch {
41
40
  // not available
42
41
  }
43
42
 
@@ -45,7 +44,7 @@ function openSqlite(dbPath) {
45
44
  try {
46
45
  const Database = require('better-sqlite3');
47
46
  return new Database(dbPath, { readonly: true });
48
- } catch (_) {
47
+ } catch {
49
48
  // not available
50
49
  }
51
50
 
@@ -220,7 +219,7 @@ async function ingestOpenCode(aquifer, args) {
220
219
  try {
221
220
  const existing = await aquifer.exportSessions({ source: 'opencode', limit: 10000 });
222
221
  for (const row of existing) existingSet.add(row.session_id);
223
- } catch (_) {
222
+ } catch {
224
223
  // exportSessions may not exist in all versions
225
224
  }
226
225
 
@@ -271,36 +270,34 @@ async function ingestOpenCode(aquifer, args) {
271
270
  continue;
272
271
  }
273
272
 
274
- // Commit to Aquifer
273
+ // Commit + optional enrich via shared ingest pipeline
275
274
  try {
276
- await aquifer.commit(sid, norm.messages, {
275
+ const ingestResult = await runIngest({
276
+ aquifer,
277
+ sessionId: sid,
277
278
  agentId,
278
279
  source: 'opencode',
279
- model: norm.model,
280
- tokensIn: norm.tokensIn,
281
- tokensOut: norm.tokensOut,
282
- startedAt: norm.startedAt,
283
- lastMessageAt: norm.lastMessageAt,
280
+ adapter: 'preNormalized',
281
+ preNormalized: norm,
282
+ enrich: doEnrich,
283
+ minUserMessages,
284
+ logger: { info() {}, warn(m) { info.enrichError = m; } },
284
285
  });
286
+
285
287
  committed++;
286
288
  info.status = 'committed';
287
-
288
- // Enrich if requested
289
- if (doEnrich) {
290
- try {
291
- const enrichResult = await aquifer.enrich(sid, { agentId });
292
- info.status = 'enriched';
293
- info.turnsEmbedded = enrichResult.turnsEmbedded;
294
- info.entitiesFound = enrichResult.entitiesFound;
295
- } catch (enrichErr) {
296
- info.enrichError = enrichErr.message;
297
- }
289
+ if (ingestResult.enrichResult) {
290
+ info.status = 'enriched';
291
+ info.turnsEmbedded = ingestResult.enrichResult.turnsEmbedded;
292
+ info.entitiesFound = ingestResult.enrichResult.entitiesFound;
298
293
  }
299
294
 
300
295
  if (jsonOutput) {
301
296
  results.push(info);
302
297
  } else {
303
- const enrichNote = info.turnsEmbedded != null ? ` (${info.turnsEmbedded} turns, ${info.entitiesFound} entities)` : '';
298
+ const enrichNote = info.turnsEmbedded !== null && info.turnsEmbedded !== undefined
299
+ ? ` (${info.turnsEmbedded} turns, ${info.entitiesFound} entities)`
300
+ : '';
304
301
  console.log(` [${committed}] ${sid} "${session.title}"${enrichNote}`);
305
302
  }
306
303
  } catch (err) {
@@ -0,0 +1,64 @@
1
+ 'use strict';
2
+
3
+ const { Pool } = require('pg');
4
+
5
+ const DEFAULT_PG_URL = 'postgresql://aquifer:aquifer@localhost:5432/aquifer';
6
+ const DEFAULT_OLLAMA_URL = 'http://localhost:11434';
7
+
8
+ async function probePostgres(url, { timeoutMs = 1500 } = {}) {
9
+ const pool = new Pool({
10
+ connectionString: url,
11
+ connectionTimeoutMillis: timeoutMs,
12
+ max: 1,
13
+ });
14
+ try {
15
+ await pool.query('SELECT 1');
16
+ return true;
17
+ } catch {
18
+ return false;
19
+ } finally {
20
+ try { await pool.end(); } catch { /* ignore */ }
21
+ }
22
+ }
23
+
24
+ async function probeOllama(baseUrl, { timeoutMs = 1500 } = {}) {
25
+ const controller = new AbortController();
26
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
27
+ try {
28
+ const res = await fetch(`${baseUrl.replace(/\/$/, '')}/api/tags`, {
29
+ signal: controller.signal,
30
+ });
31
+ return res.ok;
32
+ } catch {
33
+ return false;
34
+ } finally {
35
+ clearTimeout(timer);
36
+ }
37
+ }
38
+
39
+ async function autodetectForQuickstart(env, probes = {}) {
40
+ const probePg = probes.probePostgres || probePostgres;
41
+ const probeOll = probes.probeOllama || probeOllama;
42
+ const detected = {};
43
+
44
+ const hasDb = env.DATABASE_URL || env.AQUIFER_DB_URL;
45
+ if (!hasDb && await probePg(DEFAULT_PG_URL)) {
46
+ detected.DATABASE_URL = DEFAULT_PG_URL;
47
+ }
48
+
49
+ const hasEmbed = env.EMBED_PROVIDER
50
+ || (env.AQUIFER_EMBED_BASE_URL && env.AQUIFER_EMBED_MODEL);
51
+ if (!hasEmbed && await probeOll(DEFAULT_OLLAMA_URL)) {
52
+ detected.EMBED_PROVIDER = 'ollama';
53
+ }
54
+
55
+ return detected;
56
+ }
57
+
58
+ module.exports = {
59
+ autodetectForQuickstart,
60
+ probePostgres,
61
+ probeOllama,
62
+ DEFAULT_PG_URL,
63
+ DEFAULT_OLLAMA_URL,
64
+ };
@@ -0,0 +1,119 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Entity section parser — shared across consumers.
5
+ //
6
+ // Parses LLM output lines of the form:
7
+ // ENTITY: <name> | <type> | <alias1, alias2, ...>
8
+ // RELATION: <src> | <dst>
9
+ //
10
+ // Returns { entities, relations } ready for Aquifer entityParseFn.
11
+ // Dedups, normalizes names via Aquifer's normalizeEntityName, and drops noise
12
+ // entities (generic roles, pure-numeric, file paths, CLI flags, etc.).
13
+ //
14
+ // Consumers that use a different ENTITIES prompt format should write their own
15
+ // parser — this one is for the ENTITY:/RELATION: line protocol.
16
+ // ---------------------------------------------------------------------------
17
+
18
+ // Import directly from core/entity to avoid a circular dep with top-level
19
+ // index.js, which itself re-exports parseEntitySection from here.
20
+ const { normalizeEntityName } = require('../../core/entity');
21
+
22
+ const VALID_ENTITY_TYPES = new Set([
23
+ 'person', 'project', 'concept', 'tool', 'metric',
24
+ 'org', 'place', 'event', 'doc', 'task', 'topic', 'other',
25
+ ]);
26
+
27
+ const ENTITY_STOPLIST = new Set([
28
+ // Role generics
29
+ '助理', '使用者', '用戶', 'assistant', 'user', 'agent', 'agents', '我',
30
+ // Too broad
31
+ 'api', 'db', 'llm', 'cli', 'bash', 'diff', 'bug', 'config',
32
+ 'extensions', 'hooks', 'cron', 'manifest', 'index.js', 'node.js',
33
+ // Common noise
34
+ 'ok', 'timeout', 'error', 'test', 'cache', 'token',
35
+ '登入狀態', '授權提示', 'chat_id', 'promise.race',
36
+ ]);
37
+
38
+ const CODE_EXT_RE = /\.(js|ts|jsx|tsx|mjs|cjs|sh|py|sql|md|json|yml|yaml|css|html|vue|svelte|go|rs|rb|php|java|kt|c|cpp|h|toml|ini|cfg|conf|lock|env|proto)$/i;
39
+ const PATH_RE = /^[.\/~].*\//;
40
+ const DOTFILE_RE = /^\.[a-z][a-z0-9._-]*$/i;
41
+
42
+ function isNoiseEntity(normalizedName, rawName) {
43
+ if (ENTITY_STOPLIST.has(normalizedName)) return true;
44
+ if (/^\d+[秒分時天日月年kKgG%]/.test(rawName)) return true;
45
+ if (/^\d{2,}[mM]/.test(rawName)) return true;
46
+ if (/^\d+錯誤/.test(rawName)) return true;
47
+ if (/^\d{10,}$/.test(rawName)) return true;
48
+ if (normalizedName.length < 2) return true;
49
+ if (PATH_RE.test(rawName)) return true;
50
+ if (DOTFILE_RE.test(rawName)) return true;
51
+ if (CODE_EXT_RE.test(rawName)) return true;
52
+ if (/^--?\w/.test(rawName)) return true;
53
+ return false;
54
+ }
55
+
56
+ function splitFields(line) {
57
+ if (line.includes('|')) return line.split('|').map(s => s.trim());
58
+ if (line.includes('\t')) return line.split('\t').map(s => s.trim());
59
+ return [line.trim()];
60
+ }
61
+
62
+ function parseEntitySection(text, opts = {}) {
63
+ if (!text || typeof text !== 'string') return { entities: [], relations: [] };
64
+
65
+ const maxEntities = Number.isFinite(opts.maxEntities) ? opts.maxEntities : 10;
66
+ const maxRelations = Number.isFinite(opts.maxRelations) ? opts.maxRelations : 15;
67
+
68
+ const entityMap = new Map();
69
+ const relationSet = new Set();
70
+ const relations = [];
71
+
72
+ for (const rawLine of text.split('\n')) {
73
+ const line = rawLine.trim();
74
+ if (!line) continue;
75
+
76
+ if (/^ENTITY:/i.test(line)) {
77
+ if (entityMap.size >= maxEntities) continue;
78
+ const fields = splitFields(line.replace(/^ENTITY:\s*/i, ''));
79
+ const rawName = (fields[0] || '').trim().slice(0, 200);
80
+ if (!rawName) continue;
81
+ const normalizedName = normalizeEntityName(rawName);
82
+ if (!normalizedName || entityMap.has(normalizedName)) continue;
83
+ if (isNoiseEntity(normalizedName, rawName)) continue;
84
+ const rawType = (fields[1] || '').toLowerCase().trim();
85
+ const type = VALID_ENTITY_TYPES.has(rawType) ? rawType : 'other';
86
+ const rawAliases = fields[2] || '';
87
+ const aliases = (rawAliases && rawAliases !== '-')
88
+ ? rawAliases.split(',').map(a => a.trim().slice(0, 200)).filter(a => a && a !== '-')
89
+ : [];
90
+ entityMap.set(normalizedName, { name: rawName, normalizedName, type, aliases });
91
+ } else if (/^RELATION:/i.test(line)) {
92
+ if (relations.length >= maxRelations) continue;
93
+ const fields = splitFields(line.replace(/^RELATION:\s*/i, ''));
94
+ const src = (fields[0] || '').trim();
95
+ const dst = (fields[1] || '').trim();
96
+ if (!src || !dst) continue;
97
+ const ns = normalizeEntityName(src);
98
+ const nd = normalizeEntityName(dst);
99
+ if (!ns || !nd || ns === nd) continue;
100
+ const pairKey = ns < nd ? `${ns}|||${nd}` : `${nd}|||${ns}`;
101
+ if (relationSet.has(pairKey)) continue;
102
+ relationSet.add(pairKey);
103
+ relations.push({ src, dst });
104
+ }
105
+ }
106
+
107
+ const filteredRelations = relations.filter(r =>
108
+ entityMap.has(normalizeEntityName(r.src)) && entityMap.has(normalizeEntityName(r.dst))
109
+ );
110
+
111
+ return { entities: [...entityMap.values()], relations: filteredRelations };
112
+ }
113
+
114
+ module.exports = {
115
+ parseEntitySection,
116
+ isNoiseEntity,
117
+ VALID_ENTITY_TYPES,
118
+ ENTITY_STOPLIST,
119
+ };
@@ -0,0 +1,148 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Shared ingest flow — the standard "received session → Aquifer" pipeline.
5
+ //
6
+ // All three host adapters (OpenClaw before_reset, Claude Code afterburn,
7
+ // OpenCode backfill) do the same three things:
8
+ // 1. Normalize raw entries to commit-ready shape
9
+ // 2. commit() the messages + metadata
10
+ // 3. enrich() if enough user turns, else skip()
11
+ // With dedup on (agentId, sessionId) so the same hook firing twice is safe.
12
+ //
13
+ // runIngest() centralizes this. Host adapters pass in their raw entries, the
14
+ // adapter name, and an optional postProcess callback for persona side effects.
15
+ // ---------------------------------------------------------------------------
16
+
17
+ const { normalizeMessages } = require('./normalize');
18
+
19
+ const RECENT_CAP = 200;
20
+ const RECENT_TTL_MS = 30 * 60 * 1000;
21
+
22
+ function evictStale(dedupMap, now = Date.now()) {
23
+ if (!dedupMap || dedupMap.size <= RECENT_CAP) return;
24
+ const cutoff = now - RECENT_TTL_MS;
25
+ for (const [k, ts] of dedupMap) {
26
+ if (ts < cutoff) dedupMap.delete(k);
27
+ }
28
+ }
29
+
30
+ /**
31
+ * Run the standard commit-then-enrich flow for a single session.
32
+ *
33
+ * @param {object} opts
34
+ * @param {object} opts.aquifer — Aquifer instance
35
+ * @param {string} opts.sessionId
36
+ * @param {string} opts.agentId
37
+ * @param {string} [opts.source] — caller-provided source tag (e.g. 'openclaw', 'cc', 'opencode')
38
+ * @param {string} [opts.sessionKey] — passed through to commit()
39
+ * @param {any[]} opts.rawEntries — host-native session entries
40
+ * @param {'gateway'|'cc'|'claude-code'|'preNormalized'} [opts.adapter]
41
+ * 'preNormalized' means rawEntries already matches normalizeMessages output
42
+ * (used by OpenCode which reads SQLite directly).
43
+ * @param {object} [opts.preNormalized] — { messages, userCount, ... } ready to commit,
44
+ * required when adapter === 'preNormalized'
45
+ * @param {number} [opts.minUserMessages=3] — enrich threshold
46
+ * @param {boolean} [opts.enrich=true] — when false, commit only; don't enrich or skip.
47
+ * Useful for pull-style ingest (OpenCode) where enrichment runs later.
48
+ * @param {Map} [opts.dedupMap] — Map<key, timestamp>; same session won't process twice within TTL
49
+ * @param {Set} [opts.inFlight] — Set<key>; concurrent firings are guarded
50
+ * @param {function} [opts.postProcess] — forwarded to enrich()
51
+ * @param {function} [opts.summaryFn] — forwarded to enrich()
52
+ * @param {function} [opts.entityParseFn] — forwarded to enrich()
53
+ * @param {object} [opts.logger] — { info, warn }
54
+ * @returns {Promise<{status:string, normalized:any[]|null, counts:object|null, enrichResult:object|null, skipReason?:string}>}
55
+ */
56
+ async function runIngest(opts = {}) {
57
+ const {
58
+ aquifer, sessionId, agentId, source, sessionKey,
59
+ rawEntries, adapter, preNormalized,
60
+ minUserMessages = 3,
61
+ enrich = true,
62
+ dedupMap = null, inFlight = null,
63
+ postProcess = null, summaryFn = null, entityParseFn = null,
64
+ logger = console,
65
+ } = opts;
66
+
67
+ if (!aquifer) throw new Error('aquifer is required');
68
+ if (!sessionId) throw new Error('sessionId is required');
69
+ if (!agentId) throw new Error('agentId is required');
70
+
71
+ const dedupKey = `${agentId}:${sessionId}`;
72
+ if (dedupMap && dedupMap.has(dedupKey)) {
73
+ return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'recent' };
74
+ }
75
+ if (inFlight && inFlight.has(dedupKey)) {
76
+ return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'in_flight' };
77
+ }
78
+ if (inFlight) inFlight.add(dedupKey);
79
+
80
+ try {
81
+ // 1. Normalize
82
+ let norm;
83
+ if (adapter === 'preNormalized') {
84
+ if (!preNormalized) throw new Error('preNormalized adapter requires opts.preNormalized');
85
+ norm = preNormalized;
86
+ } else {
87
+ norm = normalizeMessages(rawEntries, { adapter });
88
+ }
89
+
90
+ if (norm.userCount === 0) {
91
+ return { status: 'skipped_empty', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: 'no_user_messages' };
92
+ }
93
+
94
+ // 2. Commit
95
+ await aquifer.commit(sessionId, norm.messages, {
96
+ agentId,
97
+ source: source || adapter || 'api',
98
+ sessionKey: sessionKey || null,
99
+ model: norm.model,
100
+ tokensIn: norm.tokensIn,
101
+ tokensOut: norm.tokensOut,
102
+ startedAt: norm.startedAt,
103
+ lastMessageAt: norm.lastMessageAt,
104
+ });
105
+ if (logger && logger.info) logger.info(`[aquifer-ingest] committed ${sessionId} (${norm.messages.length} msgs, user=${norm.userCount})`);
106
+
107
+ // 3. Enrich or skip (unless caller opts out — then commit only)
108
+ let enrichResult = null;
109
+ if (!enrich) {
110
+ if (dedupMap) { dedupMap.set(dedupKey, Date.now()); evictStale(dedupMap); }
111
+ return { status: 'committed_only', normalized: norm.messages, counts: norm, enrichResult: null };
112
+ }
113
+ if (norm.userCount >= minUserMessages) {
114
+ try {
115
+ enrichResult = await aquifer.enrich(sessionId, {
116
+ agentId,
117
+ summaryFn: summaryFn || undefined,
118
+ entityParseFn: entityParseFn || undefined,
119
+ postProcess: postProcess || undefined,
120
+ });
121
+ if (logger && logger.info) {
122
+ logger.info(`[aquifer-ingest] enriched ${sessionId} (turns=${enrichResult.turnsEmbedded}, entities=${enrichResult.entitiesFound})`);
123
+ }
124
+ } catch (enrichErr) {
125
+ if (logger && logger.warn) logger.warn(`[aquifer-ingest] enrich failed for ${sessionId}: ${enrichErr.message}`);
126
+ // Commit already succeeded — don't rethrow
127
+ }
128
+ } else {
129
+ try {
130
+ await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
131
+ } catch (skipErr) {
132
+ if (logger && logger.warn) logger.warn(`[aquifer-ingest] skip failed for ${sessionId}: ${skipErr.message}`);
133
+ }
134
+ return { status: 'skipped_short', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: `user_count=${norm.userCount}` };
135
+ }
136
+
137
+ if (dedupMap) {
138
+ dedupMap.set(dedupKey, Date.now());
139
+ evictStale(dedupMap);
140
+ }
141
+
142
+ return { status: 'ok', normalized: norm.messages, counts: norm, enrichResult };
143
+ } finally {
144
+ if (inFlight) inFlight.delete(dedupKey);
145
+ }
146
+ }
147
+
148
+ module.exports = { runIngest };