@shadowforge0/aquifer-memory 1.0.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -29
- package/consumers/claude-code.js +117 -0
- package/consumers/cli.js +28 -1
- package/consumers/default/daily-entries.js +196 -0
- package/consumers/default/index.js +282 -0
- package/consumers/default/prompts/summary.js +153 -0
- package/consumers/mcp.js +3 -23
- package/consumers/miranda/context-inject.js +119 -0
- package/consumers/miranda/daily-entries.js +224 -0
- package/consumers/miranda/index.js +353 -0
- package/consumers/miranda/instance.js +55 -0
- package/consumers/miranda/llm.js +99 -0
- package/consumers/miranda/profile.json +145 -0
- package/consumers/miranda/prompts/summary.js +303 -0
- package/consumers/miranda/recall-format.js +74 -0
- package/consumers/miranda/render-daily-md.js +186 -0
- package/consumers/miranda/workspace-files.js +91 -0
- package/consumers/openclaw-ext/index.js +38 -0
- package/consumers/openclaw-ext/openclaw.plugin.json +9 -0
- package/consumers/openclaw-ext/package.json +10 -0
- package/consumers/openclaw-plugin.js +66 -74
- package/consumers/opencode.js +21 -24
- package/consumers/shared/autodetect.js +64 -0
- package/consumers/shared/entity-parser.js +119 -0
- package/consumers/shared/ingest.js +148 -0
- package/consumers/shared/llm-autodetect.js +137 -0
- package/consumers/shared/normalize.js +129 -0
- package/consumers/shared/recall-format.js +110 -0
- package/core/aquifer.js +209 -71
- package/core/artifacts.js +174 -0
- package/core/bundles.js +400 -0
- package/core/consolidation.js +340 -0
- package/core/decisions.js +164 -0
- package/core/entity.js +1 -3
- package/core/errors.js +97 -0
- package/core/handoff.js +153 -0
- package/core/mcp-manifest.js +131 -0
- package/core/narratives.js +212 -0
- package/core/profiles.js +171 -0
- package/core/state.js +163 -0
- package/core/storage.js +86 -28
- package/core/timeline.js +152 -0
- package/docs/postprocess-contract.md +132 -0
- package/index.js +23 -1
- package/package.json +23 -2
- package/pipeline/_http.js +1 -1
- package/pipeline/consolidation/apply.js +176 -0
- package/pipeline/consolidation/index.js +21 -0
- package/pipeline/extract-entities.js +2 -2
- package/pipeline/rerank.js +1 -1
- package/pipeline/summarize.js +4 -1
- package/schema/001-base.sql +61 -24
- package/schema/002-entities.sql +17 -3
- package/schema/004-completion.sql +375 -0
- package/schema/004-facts.sql +67 -0
- package/scripts/diagnose-fts-zh.js +168 -134
- package/scripts/diagnose-vector.js +188 -0
- package/scripts/install-openclaw.sh +59 -0
- package/scripts/smoke.mjs +2 -2
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { Pool } = require('pg');
|
|
4
|
+
|
|
5
|
+
const DEFAULT_PG_URL = 'postgresql://aquifer:aquifer@localhost:5432/aquifer';
|
|
6
|
+
const DEFAULT_OLLAMA_URL = 'http://localhost:11434';
|
|
7
|
+
|
|
8
|
+
async function probePostgres(url, { timeoutMs = 1500 } = {}) {
|
|
9
|
+
const pool = new Pool({
|
|
10
|
+
connectionString: url,
|
|
11
|
+
connectionTimeoutMillis: timeoutMs,
|
|
12
|
+
max: 1,
|
|
13
|
+
});
|
|
14
|
+
try {
|
|
15
|
+
await pool.query('SELECT 1');
|
|
16
|
+
return true;
|
|
17
|
+
} catch {
|
|
18
|
+
return false;
|
|
19
|
+
} finally {
|
|
20
|
+
try { await pool.end(); } catch { /* ignore */ }
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async function probeOllama(baseUrl, { timeoutMs = 1500 } = {}) {
|
|
25
|
+
const controller = new AbortController();
|
|
26
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
27
|
+
try {
|
|
28
|
+
const res = await fetch(`${baseUrl.replace(/\/$/, '')}/api/tags`, {
|
|
29
|
+
signal: controller.signal,
|
|
30
|
+
});
|
|
31
|
+
return res.ok;
|
|
32
|
+
} catch {
|
|
33
|
+
return false;
|
|
34
|
+
} finally {
|
|
35
|
+
clearTimeout(timer);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function autodetectForQuickstart(env, probes = {}) {
|
|
40
|
+
const probePg = probes.probePostgres || probePostgres;
|
|
41
|
+
const probeOll = probes.probeOllama || probeOllama;
|
|
42
|
+
const detected = {};
|
|
43
|
+
|
|
44
|
+
const hasDb = env.DATABASE_URL || env.AQUIFER_DB_URL;
|
|
45
|
+
if (!hasDb && await probePg(DEFAULT_PG_URL)) {
|
|
46
|
+
detected.DATABASE_URL = DEFAULT_PG_URL;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const hasEmbed = env.EMBED_PROVIDER
|
|
50
|
+
|| (env.AQUIFER_EMBED_BASE_URL && env.AQUIFER_EMBED_MODEL);
|
|
51
|
+
if (!hasEmbed && await probeOll(DEFAULT_OLLAMA_URL)) {
|
|
52
|
+
detected.EMBED_PROVIDER = 'ollama';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return detected;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = {
|
|
59
|
+
autodetectForQuickstart,
|
|
60
|
+
probePostgres,
|
|
61
|
+
probeOllama,
|
|
62
|
+
DEFAULT_PG_URL,
|
|
63
|
+
DEFAULT_OLLAMA_URL,
|
|
64
|
+
};
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Entity section parser — shared across consumers.
|
|
5
|
+
//
|
|
6
|
+
// Parses LLM output lines of the form:
|
|
7
|
+
// ENTITY: <name> | <type> | <alias1, alias2, ...>
|
|
8
|
+
// RELATION: <src> | <dst>
|
|
9
|
+
//
|
|
10
|
+
// Returns { entities, relations } ready for Aquifer entityParseFn.
|
|
11
|
+
// Dedups, normalizes names via Aquifer's normalizeEntityName, and drops noise
|
|
12
|
+
// entities (generic roles, pure-numeric, file paths, CLI flags, etc.).
|
|
13
|
+
//
|
|
14
|
+
// Consumers that use a different ENTITIES prompt format should write their own
|
|
15
|
+
// parser — this one is for the ENTITY:/RELATION: line protocol.
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
// Import directly from core/entity to avoid a circular dep with top-level
|
|
19
|
+
// index.js, which itself re-exports parseEntitySection from here.
|
|
20
|
+
const { normalizeEntityName } = require('../../core/entity');
|
|
21
|
+
|
|
22
|
+
const VALID_ENTITY_TYPES = new Set([
|
|
23
|
+
'person', 'project', 'concept', 'tool', 'metric',
|
|
24
|
+
'org', 'place', 'event', 'doc', 'task', 'topic', 'other',
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
const ENTITY_STOPLIST = new Set([
|
|
28
|
+
// Role generics
|
|
29
|
+
'助理', '使用者', '用戶', 'assistant', 'user', 'agent', 'agents', '我',
|
|
30
|
+
// Too broad
|
|
31
|
+
'api', 'db', 'llm', 'cli', 'bash', 'diff', 'bug', 'config',
|
|
32
|
+
'extensions', 'hooks', 'cron', 'manifest', 'index.js', 'node.js',
|
|
33
|
+
// Common noise
|
|
34
|
+
'ok', 'timeout', 'error', 'test', 'cache', 'token',
|
|
35
|
+
'登入狀態', '授權提示', 'chat_id', 'promise.race',
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
const CODE_EXT_RE = /\.(js|ts|jsx|tsx|mjs|cjs|sh|py|sql|md|json|yml|yaml|css|html|vue|svelte|go|rs|rb|php|java|kt|c|cpp|h|toml|ini|cfg|conf|lock|env|proto)$/i;
|
|
39
|
+
const PATH_RE = /^[.\/~].*\//;
|
|
40
|
+
const DOTFILE_RE = /^\.[a-z][a-z0-9._-]*$/i;
|
|
41
|
+
|
|
42
|
+
function isNoiseEntity(normalizedName, rawName) {
|
|
43
|
+
if (ENTITY_STOPLIST.has(normalizedName)) return true;
|
|
44
|
+
if (/^\d+[秒分時天日月年kKgG%]/.test(rawName)) return true;
|
|
45
|
+
if (/^\d{2,}[mM]/.test(rawName)) return true;
|
|
46
|
+
if (/^\d+錯誤/.test(rawName)) return true;
|
|
47
|
+
if (/^\d{10,}$/.test(rawName)) return true;
|
|
48
|
+
if (normalizedName.length < 2) return true;
|
|
49
|
+
if (PATH_RE.test(rawName)) return true;
|
|
50
|
+
if (DOTFILE_RE.test(rawName)) return true;
|
|
51
|
+
if (CODE_EXT_RE.test(rawName)) return true;
|
|
52
|
+
if (/^--?\w/.test(rawName)) return true;
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function splitFields(line) {
|
|
57
|
+
if (line.includes('|')) return line.split('|').map(s => s.trim());
|
|
58
|
+
if (line.includes('\t')) return line.split('\t').map(s => s.trim());
|
|
59
|
+
return [line.trim()];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function parseEntitySection(text, opts = {}) {
|
|
63
|
+
if (!text || typeof text !== 'string') return { entities: [], relations: [] };
|
|
64
|
+
|
|
65
|
+
const maxEntities = Number.isFinite(opts.maxEntities) ? opts.maxEntities : 10;
|
|
66
|
+
const maxRelations = Number.isFinite(opts.maxRelations) ? opts.maxRelations : 15;
|
|
67
|
+
|
|
68
|
+
const entityMap = new Map();
|
|
69
|
+
const relationSet = new Set();
|
|
70
|
+
const relations = [];
|
|
71
|
+
|
|
72
|
+
for (const rawLine of text.split('\n')) {
|
|
73
|
+
const line = rawLine.trim();
|
|
74
|
+
if (!line) continue;
|
|
75
|
+
|
|
76
|
+
if (/^ENTITY:/i.test(line)) {
|
|
77
|
+
if (entityMap.size >= maxEntities) continue;
|
|
78
|
+
const fields = splitFields(line.replace(/^ENTITY:\s*/i, ''));
|
|
79
|
+
const rawName = (fields[0] || '').trim().slice(0, 200);
|
|
80
|
+
if (!rawName) continue;
|
|
81
|
+
const normalizedName = normalizeEntityName(rawName);
|
|
82
|
+
if (!normalizedName || entityMap.has(normalizedName)) continue;
|
|
83
|
+
if (isNoiseEntity(normalizedName, rawName)) continue;
|
|
84
|
+
const rawType = (fields[1] || '').toLowerCase().trim();
|
|
85
|
+
const type = VALID_ENTITY_TYPES.has(rawType) ? rawType : 'other';
|
|
86
|
+
const rawAliases = fields[2] || '';
|
|
87
|
+
const aliases = (rawAliases && rawAliases !== '-')
|
|
88
|
+
? rawAliases.split(',').map(a => a.trim().slice(0, 200)).filter(a => a && a !== '-')
|
|
89
|
+
: [];
|
|
90
|
+
entityMap.set(normalizedName, { name: rawName, normalizedName, type, aliases });
|
|
91
|
+
} else if (/^RELATION:/i.test(line)) {
|
|
92
|
+
if (relations.length >= maxRelations) continue;
|
|
93
|
+
const fields = splitFields(line.replace(/^RELATION:\s*/i, ''));
|
|
94
|
+
const src = (fields[0] || '').trim();
|
|
95
|
+
const dst = (fields[1] || '').trim();
|
|
96
|
+
if (!src || !dst) continue;
|
|
97
|
+
const ns = normalizeEntityName(src);
|
|
98
|
+
const nd = normalizeEntityName(dst);
|
|
99
|
+
if (!ns || !nd || ns === nd) continue;
|
|
100
|
+
const pairKey = ns < nd ? `${ns}|||${nd}` : `${nd}|||${ns}`;
|
|
101
|
+
if (relationSet.has(pairKey)) continue;
|
|
102
|
+
relationSet.add(pairKey);
|
|
103
|
+
relations.push({ src, dst });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const filteredRelations = relations.filter(r =>
|
|
108
|
+
entityMap.has(normalizeEntityName(r.src)) && entityMap.has(normalizeEntityName(r.dst))
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
return { entities: [...entityMap.values()], relations: filteredRelations };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
module.exports = {
|
|
115
|
+
parseEntitySection,
|
|
116
|
+
isNoiseEntity,
|
|
117
|
+
VALID_ENTITY_TYPES,
|
|
118
|
+
ENTITY_STOPLIST,
|
|
119
|
+
};
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Shared ingest flow — the standard "received session → Aquifer" pipeline.
|
|
5
|
+
//
|
|
6
|
+
// All three host adapters (OpenClaw before_reset, Claude Code afterburn,
|
|
7
|
+
// OpenCode backfill) do the same three things:
|
|
8
|
+
// 1. Normalize raw entries to commit-ready shape
|
|
9
|
+
// 2. commit() the messages + metadata
|
|
10
|
+
// 3. enrich() if enough user turns, else skip()
|
|
11
|
+
// With dedup on (agentId, sessionId) so the same hook firing twice is safe.
|
|
12
|
+
//
|
|
13
|
+
// runIngest() centralizes this. Host adapters pass in their raw entries, the
|
|
14
|
+
// adapter name, and an optional postProcess callback for persona side effects.
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
const { normalizeMessages } = require('./normalize');
|
|
18
|
+
|
|
19
|
+
const RECENT_CAP = 200;
|
|
20
|
+
const RECENT_TTL_MS = 30 * 60 * 1000;
|
|
21
|
+
|
|
22
|
+
function evictStale(dedupMap, now = Date.now()) {
|
|
23
|
+
if (!dedupMap || dedupMap.size <= RECENT_CAP) return;
|
|
24
|
+
const cutoff = now - RECENT_TTL_MS;
|
|
25
|
+
for (const [k, ts] of dedupMap) {
|
|
26
|
+
if (ts < cutoff) dedupMap.delete(k);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Run the standard commit-then-enrich flow for a single session.
|
|
32
|
+
*
|
|
33
|
+
* @param {object} opts
|
|
34
|
+
* @param {object} opts.aquifer — Aquifer instance
|
|
35
|
+
* @param {string} opts.sessionId
|
|
36
|
+
* @param {string} opts.agentId
|
|
37
|
+
* @param {string} [opts.source] — caller-provided source tag (e.g. 'openclaw', 'cc', 'opencode')
|
|
38
|
+
* @param {string} [opts.sessionKey] — passed through to commit()
|
|
39
|
+
* @param {any[]} opts.rawEntries — host-native session entries
|
|
40
|
+
* @param {'gateway'|'cc'|'claude-code'|'preNormalized'} [opts.adapter]
|
|
41
|
+
* 'preNormalized' means rawEntries already matches normalizeMessages output
|
|
42
|
+
* (used by OpenCode which reads SQLite directly).
|
|
43
|
+
* @param {object} [opts.preNormalized] — { messages, userCount, ... } ready to commit,
|
|
44
|
+
* required when adapter === 'preNormalized'
|
|
45
|
+
* @param {number} [opts.minUserMessages=3] — enrich threshold
|
|
46
|
+
* @param {boolean} [opts.enrich=true] — when false, commit only; don't enrich or skip.
|
|
47
|
+
* Useful for pull-style ingest (OpenCode) where enrichment runs later.
|
|
48
|
+
* @param {Map} [opts.dedupMap] — Map<key, timestamp>; same session won't process twice within TTL
|
|
49
|
+
* @param {Set} [opts.inFlight] — Set<key>; concurrent firings are guarded
|
|
50
|
+
* @param {function} [opts.postProcess] — forwarded to enrich()
|
|
51
|
+
* @param {function} [opts.summaryFn] — forwarded to enrich()
|
|
52
|
+
* @param {function} [opts.entityParseFn] — forwarded to enrich()
|
|
53
|
+
* @param {object} [opts.logger] — { info, warn }
|
|
54
|
+
* @returns {Promise<{status:string, normalized:any[]|null, counts:object|null, enrichResult:object|null, skipReason?:string}>}
|
|
55
|
+
*/
|
|
56
|
+
async function runIngest(opts = {}) {
|
|
57
|
+
const {
|
|
58
|
+
aquifer, sessionId, agentId, source, sessionKey,
|
|
59
|
+
rawEntries, adapter, preNormalized,
|
|
60
|
+
minUserMessages = 3,
|
|
61
|
+
enrich = true,
|
|
62
|
+
dedupMap = null, inFlight = null,
|
|
63
|
+
postProcess = null, summaryFn = null, entityParseFn = null,
|
|
64
|
+
logger = console,
|
|
65
|
+
} = opts;
|
|
66
|
+
|
|
67
|
+
if (!aquifer) throw new Error('aquifer is required');
|
|
68
|
+
if (!sessionId) throw new Error('sessionId is required');
|
|
69
|
+
if (!agentId) throw new Error('agentId is required');
|
|
70
|
+
|
|
71
|
+
const dedupKey = `${agentId}:${sessionId}`;
|
|
72
|
+
if (dedupMap && dedupMap.has(dedupKey)) {
|
|
73
|
+
return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'recent' };
|
|
74
|
+
}
|
|
75
|
+
if (inFlight && inFlight.has(dedupKey)) {
|
|
76
|
+
return { status: 'dedup', normalized: null, counts: null, enrichResult: null, skipReason: 'in_flight' };
|
|
77
|
+
}
|
|
78
|
+
if (inFlight) inFlight.add(dedupKey);
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
// 1. Normalize
|
|
82
|
+
let norm;
|
|
83
|
+
if (adapter === 'preNormalized') {
|
|
84
|
+
if (!preNormalized) throw new Error('preNormalized adapter requires opts.preNormalized');
|
|
85
|
+
norm = preNormalized;
|
|
86
|
+
} else {
|
|
87
|
+
norm = normalizeMessages(rawEntries, { adapter });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (norm.userCount === 0) {
|
|
91
|
+
return { status: 'skipped_empty', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: 'no_user_messages' };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 2. Commit
|
|
95
|
+
await aquifer.commit(sessionId, norm.messages, {
|
|
96
|
+
agentId,
|
|
97
|
+
source: source || adapter || 'api',
|
|
98
|
+
sessionKey: sessionKey || null,
|
|
99
|
+
model: norm.model,
|
|
100
|
+
tokensIn: norm.tokensIn,
|
|
101
|
+
tokensOut: norm.tokensOut,
|
|
102
|
+
startedAt: norm.startedAt,
|
|
103
|
+
lastMessageAt: norm.lastMessageAt,
|
|
104
|
+
});
|
|
105
|
+
if (logger && logger.info) logger.info(`[aquifer-ingest] committed ${sessionId} (${norm.messages.length} msgs, user=${norm.userCount})`);
|
|
106
|
+
|
|
107
|
+
// 3. Enrich or skip (unless caller opts out — then commit only)
|
|
108
|
+
let enrichResult = null;
|
|
109
|
+
if (!enrich) {
|
|
110
|
+
if (dedupMap) { dedupMap.set(dedupKey, Date.now()); evictStale(dedupMap); }
|
|
111
|
+
return { status: 'committed_only', normalized: norm.messages, counts: norm, enrichResult: null };
|
|
112
|
+
}
|
|
113
|
+
if (norm.userCount >= minUserMessages) {
|
|
114
|
+
try {
|
|
115
|
+
enrichResult = await aquifer.enrich(sessionId, {
|
|
116
|
+
agentId,
|
|
117
|
+
summaryFn: summaryFn || undefined,
|
|
118
|
+
entityParseFn: entityParseFn || undefined,
|
|
119
|
+
postProcess: postProcess || undefined,
|
|
120
|
+
});
|
|
121
|
+
if (logger && logger.info) {
|
|
122
|
+
logger.info(`[aquifer-ingest] enriched ${sessionId} (turns=${enrichResult.turnsEmbedded}, entities=${enrichResult.entitiesFound})`);
|
|
123
|
+
}
|
|
124
|
+
} catch (enrichErr) {
|
|
125
|
+
if (logger && logger.warn) logger.warn(`[aquifer-ingest] enrich failed for ${sessionId}: ${enrichErr.message}`);
|
|
126
|
+
// Commit already succeeded — don't rethrow
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
129
|
+
try {
|
|
130
|
+
await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
|
|
131
|
+
} catch (skipErr) {
|
|
132
|
+
if (logger && logger.warn) logger.warn(`[aquifer-ingest] skip failed for ${sessionId}: ${skipErr.message}`);
|
|
133
|
+
}
|
|
134
|
+
return { status: 'skipped_short', normalized: norm.messages, counts: norm, enrichResult: null, skipReason: `user_count=${norm.userCount}` };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (dedupMap) {
|
|
138
|
+
dedupMap.set(dedupKey, Date.now());
|
|
139
|
+
evictStale(dedupMap);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return { status: 'ok', normalized: norm.messages, counts: norm, enrichResult };
|
|
143
|
+
} finally {
|
|
144
|
+
if (inFlight) inFlight.delete(dedupKey);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
module.exports = { runIngest };
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Aquifer v1.2.0: LLM provider autodetect from env for install-and-go.
|
|
4
|
+
//
|
|
5
|
+
// Precedence:
|
|
6
|
+
// 1. config.llm.fn (explicit function — host supplies)
|
|
7
|
+
// 2. AQUIFER_LLM_PROVIDER env + provider-specific api key + optional model
|
|
8
|
+
//
|
|
9
|
+
// We do NOT silently pick a provider from multiple keys (ambiguous). Hosts
|
|
10
|
+
// must opt in by setting AQUIFER_LLM_PROVIDER explicitly when they want env
|
|
11
|
+
// autowiring.
|
|
12
|
+
//
|
|
13
|
+
// Two response shapes in flight:
|
|
14
|
+
// - Anthropic-shape: { content: [{ type:'text', text:'...' }] }
|
|
15
|
+
// Used by: minimax, opencode
|
|
16
|
+
// - OpenAI-shape: { choices:[{ message:{ content:'...' } }] }
|
|
17
|
+
// Used by: openai, openrouter
|
|
18
|
+
|
|
19
|
+
const { createLlmFn } = require('./llm');
|
|
20
|
+
|
|
21
|
+
const ANTHROPIC_PROVIDERS = {
|
|
22
|
+
minimax: {
|
|
23
|
+
envKey: 'MINIMAX_API_KEY',
|
|
24
|
+
baseUrl: 'https://api.minimax.io/anthropic/v1/messages',
|
|
25
|
+
defaultModel: 'MiniMax-M2.7',
|
|
26
|
+
extraHeaders: { 'anthropic-version': '2023-06-01' },
|
|
27
|
+
},
|
|
28
|
+
opencode: {
|
|
29
|
+
envKey: 'OPENCODE_API_KEY',
|
|
30
|
+
baseUrl: 'https://opencode.ai/zen/go/v1/messages',
|
|
31
|
+
defaultModel: 'minimax-m2.5',
|
|
32
|
+
extraHeaders: { 'anthropic-version': '2023-06-01' },
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const OPENAI_PROVIDERS = {
|
|
37
|
+
openai: {
|
|
38
|
+
envKey: 'OPENAI_API_KEY',
|
|
39
|
+
baseUrl: 'https://api.openai.com/v1',
|
|
40
|
+
defaultModel: 'gpt-4o-mini',
|
|
41
|
+
},
|
|
42
|
+
openrouter: {
|
|
43
|
+
envKey: 'OPENROUTER_API_KEY',
|
|
44
|
+
baseUrl: 'https://openrouter.ai/api/v1',
|
|
45
|
+
defaultModel: 'openai/gpt-4o-mini',
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
function createAnthropicShapeFn({ baseUrl, apiKey, model, extraHeaders, timeoutMs, maxTokens }) {
|
|
50
|
+
const timeout = timeoutMs || 120000;
|
|
51
|
+
const mt = maxTokens || 4096;
|
|
52
|
+
return async function llmFn(prompt) {
|
|
53
|
+
const controller = new AbortController();
|
|
54
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
55
|
+
try {
|
|
56
|
+
const res = await fetch(baseUrl, {
|
|
57
|
+
method: 'POST',
|
|
58
|
+
headers: {
|
|
59
|
+
'Content-Type': 'application/json',
|
|
60
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
61
|
+
...(extraHeaders || {}),
|
|
62
|
+
},
|
|
63
|
+
body: JSON.stringify({
|
|
64
|
+
model,
|
|
65
|
+
messages: [{ role: 'user', content: prompt }],
|
|
66
|
+
max_tokens: mt,
|
|
67
|
+
}),
|
|
68
|
+
signal: controller.signal,
|
|
69
|
+
});
|
|
70
|
+
if (!res.ok) {
|
|
71
|
+
const body = await res.text().catch(() => '');
|
|
72
|
+
const err = new Error(`LLM ${res.status}: ${body.slice(0, 200).replace(/[\n\r]/g, ' ')}`);
|
|
73
|
+
err.statusCode = res.status;
|
|
74
|
+
throw err;
|
|
75
|
+
}
|
|
76
|
+
const data = await res.json();
|
|
77
|
+
let raw = '';
|
|
78
|
+
if (data.content && Array.isArray(data.content)) {
|
|
79
|
+
raw = data.content.map((c) => c.text || '').join('');
|
|
80
|
+
} else if (data.choices && Array.isArray(data.choices)) {
|
|
81
|
+
raw = data.choices[0]?.message?.content || '';
|
|
82
|
+
}
|
|
83
|
+
return raw.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
84
|
+
} finally {
|
|
85
|
+
clearTimeout(timer);
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function resolveLlmFn(llmConfig, env) {
|
|
91
|
+
if (llmConfig && typeof llmConfig.fn === 'function') {
|
|
92
|
+
return llmConfig.fn;
|
|
93
|
+
}
|
|
94
|
+
const provider = env.AQUIFER_LLM_PROVIDER;
|
|
95
|
+
if (!provider) return null;
|
|
96
|
+
|
|
97
|
+
const model = env.AQUIFER_LLM_MODEL || null;
|
|
98
|
+
const timeoutMs = env.AQUIFER_LLM_TIMEOUT ? Number(env.AQUIFER_LLM_TIMEOUT) : undefined;
|
|
99
|
+
|
|
100
|
+
if (ANTHROPIC_PROVIDERS[provider]) {
|
|
101
|
+
const p = ANTHROPIC_PROVIDERS[provider];
|
|
102
|
+
const apiKey = env[p.envKey];
|
|
103
|
+
if (!apiKey) {
|
|
104
|
+
throw new Error(`AQUIFER_LLM_PROVIDER=${provider} requires ${p.envKey}`);
|
|
105
|
+
}
|
|
106
|
+
return createAnthropicShapeFn({
|
|
107
|
+
baseUrl: p.baseUrl,
|
|
108
|
+
apiKey,
|
|
109
|
+
model: model || p.defaultModel,
|
|
110
|
+
extraHeaders: p.extraHeaders,
|
|
111
|
+
timeoutMs,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (OPENAI_PROVIDERS[provider]) {
|
|
116
|
+
const p = OPENAI_PROVIDERS[provider];
|
|
117
|
+
const apiKey = env[p.envKey];
|
|
118
|
+
if (!apiKey) {
|
|
119
|
+
throw new Error(`AQUIFER_LLM_PROVIDER=${provider} requires ${p.envKey}`);
|
|
120
|
+
}
|
|
121
|
+
return createLlmFn({
|
|
122
|
+
baseUrl: p.baseUrl,
|
|
123
|
+
model: model || p.defaultModel,
|
|
124
|
+
apiKey,
|
|
125
|
+
timeoutMs,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
throw new Error(
|
|
130
|
+
`AQUIFER_LLM_PROVIDER=${provider} not supported. Valid: ${[
|
|
131
|
+
...Object.keys(ANTHROPIC_PROVIDERS),
|
|
132
|
+
...Object.keys(OPENAI_PROVIDERS),
|
|
133
|
+
].join(', ')}`
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
module.exports = { resolveLlmFn };
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Shared normalize — turns raw host entries into commit-ready messages plus
|
|
5
|
+
// session-level metadata. Wraps pipeline/normalize so consumers don't each
|
|
6
|
+
// reinvent their own role/content extraction.
|
|
7
|
+
//
|
|
8
|
+
// Supported adapters: 'gateway' | 'cc' (alias of 'claude-code'). The OpenCode
|
|
9
|
+
// consumer reads from SQLite and constructs the output shape directly; it is
|
|
10
|
+
// not expected to route through here.
|
|
11
|
+
//
|
|
12
|
+
// Output shape is the one commit() + enrich() expect:
|
|
13
|
+
// { messages:[{role,content,timestamp}], userCount, assistantCount,
|
|
14
|
+
// model, tokensIn, tokensOut, startedAt, lastMessageAt,
|
|
15
|
+
// skipStats, boundaries, toolsUsed }
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
const { normalizeSession } = require('../../pipeline/normalize');
|
|
19
|
+
|
|
20
|
+
const ADAPTER_ALIASES = {
|
|
21
|
+
'cc': 'claude-code',
|
|
22
|
+
'claude-code': 'claude-code',
|
|
23
|
+
'gateway': 'gateway',
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
function resolveAdapter(adapter) {
|
|
27
|
+
if (!adapter) return null; // auto-detect
|
|
28
|
+
const name = ADAPTER_ALIASES[adapter];
|
|
29
|
+
if (!name) {
|
|
30
|
+
throw new Error(`Unknown adapter: "${adapter}". Supported: gateway, cc (alias claude-code).`);
|
|
31
|
+
}
|
|
32
|
+
return name;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function extractRawMeta(rawEntries) {
|
|
36
|
+
let model = null;
|
|
37
|
+
let tokensIn = 0;
|
|
38
|
+
let tokensOut = 0;
|
|
39
|
+
|
|
40
|
+
for (const entry of rawEntries || []) {
|
|
41
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
42
|
+
const msg = entry.message || entry;
|
|
43
|
+
if (msg && typeof msg === 'object') {
|
|
44
|
+
if (msg.model && !model) model = msg.model;
|
|
45
|
+
if (msg.usage) {
|
|
46
|
+
tokensIn += msg.usage.input_tokens || msg.usage.input || 0;
|
|
47
|
+
tokensOut += msg.usage.output_tokens || msg.usage.output || 0;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return { model, tokensIn, tokensOut };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Normalize raw host entries to Aquifer-commit shape + session metadata.
|
|
57
|
+
*
|
|
58
|
+
* @param {any[]} rawEntries
|
|
59
|
+
* @param {object} [opts]
|
|
60
|
+
* @param {'gateway'|'cc'|'claude-code'} [opts.adapter] — host adapter; auto-detected if omitted
|
|
61
|
+
* @returns {{
|
|
62
|
+
* messages: {role:string,content:string,timestamp:string|null}[],
|
|
63
|
+
* userCount: number, assistantCount: number,
|
|
64
|
+
* model: string|null, tokensIn: number, tokensOut: number,
|
|
65
|
+
* startedAt: string|null, lastMessageAt: string|null,
|
|
66
|
+
* skipStats: object, boundaries: object[], toolsUsed: string[]
|
|
67
|
+
* }}
|
|
68
|
+
*/
|
|
69
|
+
function normalizeMessages(rawEntries, opts = {}) {
|
|
70
|
+
const safeEntries = Array.isArray(rawEntries) ? rawEntries : [];
|
|
71
|
+
|
|
72
|
+
if (safeEntries.length === 0) {
|
|
73
|
+
return {
|
|
74
|
+
messages: [],
|
|
75
|
+
userCount: 0,
|
|
76
|
+
assistantCount: 0,
|
|
77
|
+
model: null,
|
|
78
|
+
tokensIn: 0,
|
|
79
|
+
tokensOut: 0,
|
|
80
|
+
startedAt: null,
|
|
81
|
+
lastMessageAt: null,
|
|
82
|
+
skipStats: { total: 0, nonMessage: 0, noRole: 0, meta: 0, caveat: 0,
|
|
83
|
+
empty: 0, toolOnly: 0, narration: 0, toolResult: 0, routine: 0, command: 0 },
|
|
84
|
+
boundaries: [],
|
|
85
|
+
toolsUsed: [],
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const client = resolveAdapter(opts.adapter);
|
|
90
|
+
const { normalized, skipStats, boundaries, toolsUsed } = normalizeSession(
|
|
91
|
+
safeEntries,
|
|
92
|
+
client ? { client } : {},
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
const messages = normalized.map(m => ({
|
|
96
|
+
role: m.role,
|
|
97
|
+
content: m.text || '',
|
|
98
|
+
timestamp: m.timestamp || null,
|
|
99
|
+
}));
|
|
100
|
+
|
|
101
|
+
let userCount = 0, assistantCount = 0;
|
|
102
|
+
let startedAt = null, lastMessageAt = null;
|
|
103
|
+
for (const m of messages) {
|
|
104
|
+
if (m.role === 'user') userCount++;
|
|
105
|
+
else if (m.role === 'assistant') assistantCount++;
|
|
106
|
+
if (m.timestamp) {
|
|
107
|
+
if (!startedAt) startedAt = m.timestamp;
|
|
108
|
+
lastMessageAt = m.timestamp;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const { model, tokensIn, tokensOut } = extractRawMeta(safeEntries);
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
messages,
|
|
116
|
+
userCount,
|
|
117
|
+
assistantCount,
|
|
118
|
+
model,
|
|
119
|
+
tokensIn,
|
|
120
|
+
tokensOut,
|
|
121
|
+
startedAt,
|
|
122
|
+
lastMessageAt,
|
|
123
|
+
skipStats,
|
|
124
|
+
boundaries,
|
|
125
|
+
toolsUsed,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
module.exports = { normalizeMessages, extractRawMeta };
|