@shadowforge0/aquifer-memory 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -0
- package/consumers/cli.js +314 -0
- package/consumers/mcp.js +135 -0
- package/consumers/openclaw-plugin.js +235 -0
- package/consumers/shared/config.js +143 -0
- package/consumers/shared/factory.js +77 -0
- package/consumers/shared/llm.js +119 -0
- package/core/aquifer.js +634 -0
- package/core/entity.js +360 -0
- package/core/hybrid-rank.js +166 -0
- package/core/storage.js +550 -0
- package/index.js +6 -0
- package/package.json +57 -0
- package/pipeline/embed.js +230 -0
- package/pipeline/extract-entities.js +73 -0
- package/pipeline/summarize.js +245 -0
- package/schema/001-base.sql +180 -0
- package/schema/002-entities.sql +120 -0
package/consumers/mcp.js
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Aquifer MCP Server — session_recall tool via Model Context Protocol.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* npx aquifer mcp
|
|
9
|
+
* node consumers/mcp.js
|
|
10
|
+
*
|
|
11
|
+
* Config via environment variables (see consumers/shared/config.js).
|
|
12
|
+
* Requires: DATABASE_URL + AQUIFER_EMBED_BASE_URL + AQUIFER_EMBED_MODEL
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const { createAquiferFromConfig } = require('./shared/factory');
|
|
16
|
+
|
|
17
|
+
let _aquifer = null;
|
|
18
|
+
|
|
19
|
+
function getAquifer() {
|
|
20
|
+
if (!_aquifer) _aquifer = createAquiferFromConfig();
|
|
21
|
+
return _aquifer;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Format recall results as readable text
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
function formatResults(results, query) {
|
|
29
|
+
if (results.length === 0) return `No results found for "${query}".`;
|
|
30
|
+
|
|
31
|
+
const lines = [`Found ${results.length} result(s) for "${query}":\n`];
|
|
32
|
+
for (let i = 0; i < results.length; i++) {
|
|
33
|
+
const r = results[i];
|
|
34
|
+
const ss = r.structuredSummary || {};
|
|
35
|
+
const title = ss.title || r.summaryText?.slice(0, 60) || '(untitled)';
|
|
36
|
+
const date = r.startedAt
|
|
37
|
+
? new Date(r.startedAt).toISOString().slice(0, 10)
|
|
38
|
+
: 'unknown';
|
|
39
|
+
|
|
40
|
+
lines.push(`### ${i + 1}. ${title} (${date}, ${r.agentId || 'default'})`);
|
|
41
|
+
if (ss.overview || r.summaryText) {
|
|
42
|
+
lines.push((ss.overview || r.summaryText).slice(0, 300));
|
|
43
|
+
}
|
|
44
|
+
if (r.matchedTurnText) {
|
|
45
|
+
lines.push(`Matched turn: ${r.matchedTurnText.slice(0, 200)}`);
|
|
46
|
+
}
|
|
47
|
+
lines.push(`Score: ${r.score?.toFixed(3) || '?'}\n`);
|
|
48
|
+
}
|
|
49
|
+
return lines.join('\n');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Start MCP server
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
async function main() {
|
|
57
|
+
let McpServer, StdioServerTransport, z;
|
|
58
|
+
try {
|
|
59
|
+
({ McpServer } = require('@modelcontextprotocol/sdk/server/mcp.js'));
|
|
60
|
+
({ StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js'));
|
|
61
|
+
({ z } = require('zod'));
|
|
62
|
+
} catch (e) {
|
|
63
|
+
process.stderr.write(
|
|
64
|
+
'aquifer mcp requires @modelcontextprotocol/sdk and zod.\n' +
|
|
65
|
+
'Install: npm install @modelcontextprotocol/sdk zod\n'
|
|
66
|
+
);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const server = new McpServer({
|
|
71
|
+
name: 'aquifer-memory',
|
|
72
|
+
version: '0.2.0',
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
server.tool(
|
|
76
|
+
'session_recall',
|
|
77
|
+
'Search stored sessions by keyword, returning ranked summaries and matched conversation turns.',
|
|
78
|
+
{
|
|
79
|
+
query: z.string().min(1).describe('Search query (keyword or natural language)'),
|
|
80
|
+
limit: z.number().int().min(1).max(20).optional().describe('Max results (default 5)'),
|
|
81
|
+
agentId: z.string().optional().describe('Filter by agent ID'),
|
|
82
|
+
source: z.string().optional().describe('Filter by source (e.g., gateway, cc)'),
|
|
83
|
+
dateFrom: z.string().optional().describe('Start date YYYY-MM-DD'),
|
|
84
|
+
dateTo: z.string().optional().describe('End date YYYY-MM-DD'),
|
|
85
|
+
},
|
|
86
|
+
async (params) => {
|
|
87
|
+
try {
|
|
88
|
+
const aquifer = getAquifer();
|
|
89
|
+
const limit = params.limit || 5;
|
|
90
|
+
|
|
91
|
+
const results = await aquifer.recall(params.query, {
|
|
92
|
+
limit,
|
|
93
|
+
agentId: params.agentId || undefined,
|
|
94
|
+
source: params.source || undefined,
|
|
95
|
+
dateFrom: params.dateFrom || undefined,
|
|
96
|
+
dateTo: params.dateTo || undefined,
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
const text = formatResults(results, params.query);
|
|
100
|
+
return { content: [{ type: 'text', text }] };
|
|
101
|
+
} catch (err) {
|
|
102
|
+
return {
|
|
103
|
+
content: [{ type: 'text', text: `session_recall error: ${err.message}` }],
|
|
104
|
+
isError: true,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
// Graceful shutdown
|
|
111
|
+
const cleanup = async () => {
|
|
112
|
+
if (_aquifer?._pool) await _aquifer._pool.end().catch(() => {});
|
|
113
|
+
process.exit(0);
|
|
114
|
+
};
|
|
115
|
+
process.on('SIGINT', cleanup);
|
|
116
|
+
process.on('SIGTERM', cleanup);
|
|
117
|
+
|
|
118
|
+
const transport = new StdioServerTransport();
|
|
119
|
+
await server.connect(transport);
|
|
120
|
+
|
|
121
|
+
// Clean up pool when transport closes (stdin EOF)
|
|
122
|
+
transport.onclose = async () => {
|
|
123
|
+
if (_aquifer?._pool) await _aquifer._pool.end().catch(() => {});
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Only execute when run directly, not when required as a module
|
|
128
|
+
if (require.main === module) {
|
|
129
|
+
main().catch(err => {
|
|
130
|
+
process.stderr.write(`aquifer-mcp error: ${err.message}\n`);
|
|
131
|
+
process.exit(1);
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
module.exports = { main };
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Aquifer Memory — OpenClaw Plugin
|
|
5
|
+
*
|
|
6
|
+
* Auto-captures sessions on before_reset and provides session_recall tool.
|
|
7
|
+
* Install: add to openclaw.json plugins or extensions directory.
|
|
8
|
+
*
|
|
9
|
+
* Config via plugin config, environment variables, or aquifer.config.json.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const { createAquiferFromConfig } = require('./shared/factory');
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Helpers
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
function coerceRawEntries(messages) {
|
|
19
|
+
if (!Array.isArray(messages)) return [];
|
|
20
|
+
return messages.flatMap((item) => {
|
|
21
|
+
if (!item || typeof item !== 'object') return [];
|
|
22
|
+
if (item.role) return [item];
|
|
23
|
+
if (item.message?.role) return [item.message];
|
|
24
|
+
return [];
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function normalizeEntries(rawEntries) {
|
|
29
|
+
const normalized = [];
|
|
30
|
+
let userCount = 0, assistantCount = 0;
|
|
31
|
+
let model = null, tokensIn = 0, tokensOut = 0;
|
|
32
|
+
let startedAt = null, lastMessageAt = null;
|
|
33
|
+
|
|
34
|
+
for (const entry of rawEntries) {
|
|
35
|
+
const msg = entry.message || entry;
|
|
36
|
+
if (!msg || !msg.role) continue;
|
|
37
|
+
if (!['user', 'assistant', 'system'].includes(msg.role)) continue;
|
|
38
|
+
|
|
39
|
+
let content = '';
|
|
40
|
+
if (typeof msg.content === 'string') {
|
|
41
|
+
content = msg.content;
|
|
42
|
+
} else if (Array.isArray(msg.content)) {
|
|
43
|
+
content = msg.content
|
|
44
|
+
.filter(c => c.type === 'text')
|
|
45
|
+
.map(c => c.text || '')
|
|
46
|
+
.join('\n');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const ts = entry.timestamp || msg.timestamp || null;
|
|
50
|
+
if (ts && !startedAt) startedAt = ts;
|
|
51
|
+
if (ts) lastMessageAt = ts;
|
|
52
|
+
|
|
53
|
+
if (msg.role === 'user') userCount++;
|
|
54
|
+
if (msg.role === 'assistant') assistantCount++;
|
|
55
|
+
if (msg.model && !model) model = msg.model;
|
|
56
|
+
if (msg.usage) {
|
|
57
|
+
tokensIn += msg.usage.input_tokens || msg.usage.input || 0;
|
|
58
|
+
tokensOut += msg.usage.output_tokens || msg.usage.output || 0;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
normalized.push({ role: msg.role, content, timestamp: ts });
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
messages: normalized,
|
|
66
|
+
userCount,
|
|
67
|
+
assistantCount,
|
|
68
|
+
model,
|
|
69
|
+
tokensIn,
|
|
70
|
+
tokensOut,
|
|
71
|
+
startedAt,
|
|
72
|
+
lastMessageAt,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function formatRecallResults(results) {
|
|
77
|
+
if (results.length === 0) return 'No matching sessions found.';
|
|
78
|
+
|
|
79
|
+
return results.map((r, i) => {
|
|
80
|
+
const ss = r.structuredSummary || {};
|
|
81
|
+
const title = ss.title || r.summaryText?.slice(0, 60) || '(untitled)';
|
|
82
|
+
const date = r.startedAt
|
|
83
|
+
? new Date(r.startedAt).toISOString().slice(0, 10)
|
|
84
|
+
: 'unknown';
|
|
85
|
+
|
|
86
|
+
const lines = [`### ${i + 1}. ${title} (${date}, ${r.agentId || 'default'})`];
|
|
87
|
+
if (ss.overview || r.summaryText) {
|
|
88
|
+
lines.push((ss.overview || r.summaryText).slice(0, 300));
|
|
89
|
+
}
|
|
90
|
+
if (r.matchedTurnText) {
|
|
91
|
+
lines.push(`Matched: ${r.matchedTurnText.slice(0, 200)}`);
|
|
92
|
+
}
|
|
93
|
+
return lines.join('\n');
|
|
94
|
+
}).join('\n\n');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Plugin
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
module.exports = {
|
|
102
|
+
id: 'aquifer-memory',
|
|
103
|
+
name: 'Aquifer Memory',
|
|
104
|
+
|
|
105
|
+
register(api) {
|
|
106
|
+
const pluginConfig = api.pluginConfig || {};
|
|
107
|
+
let aquifer;
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
aquifer = createAquiferFromConfig(pluginConfig);
|
|
111
|
+
} catch (err) {
|
|
112
|
+
api.logger.warn(`[aquifer-memory] disabled: ${err.message}`);
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const minUserMessages = pluginConfig.minUserMessages || 3;
|
|
117
|
+
const recentlyProcessed = new Map();
|
|
118
|
+
const inFlight = new Set();
|
|
119
|
+
|
|
120
|
+
// --- before_reset: auto-capture sessions ---
|
|
121
|
+
|
|
122
|
+
api.on('before_reset', (event, ctx) => {
|
|
123
|
+
const sessionId = ctx?.sessionId || event?.sessionId;
|
|
124
|
+
const agentId = ctx?.agentId || pluginConfig.agentId || 'main';
|
|
125
|
+
const sessionKey = ctx?.sessionKey || null;
|
|
126
|
+
|
|
127
|
+
if (!sessionId) return;
|
|
128
|
+
if ((sessionKey || '').includes('subagent')) return;
|
|
129
|
+
if ((sessionKey || '').includes(':cron:')) return;
|
|
130
|
+
|
|
131
|
+
const dedupKey = `${agentId}:${sessionId}`;
|
|
132
|
+
if (recentlyProcessed.has(dedupKey) || inFlight.has(dedupKey)) return;
|
|
133
|
+
|
|
134
|
+
const rawEntries = coerceRawEntries(event?.messages || []);
|
|
135
|
+
if (rawEntries.length < 3) {
|
|
136
|
+
api.logger.info(`[aquifer-memory] skip: ${sessionId} only ${rawEntries.length} msgs`);
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
inFlight.add(dedupKey);
|
|
141
|
+
api.logger.info(`[aquifer-memory] capturing ${sessionId} (${rawEntries.length} entries)`);
|
|
142
|
+
|
|
143
|
+
(async () => {
|
|
144
|
+
try {
|
|
145
|
+
const norm = normalizeEntries(rawEntries);
|
|
146
|
+
if (norm.userCount === 0) {
|
|
147
|
+
api.logger.info(`[aquifer-memory] skip: no user messages in ${sessionId}`);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Commit
|
|
152
|
+
await aquifer.commit(sessionId, norm.messages, {
|
|
153
|
+
agentId,
|
|
154
|
+
source: 'openclaw',
|
|
155
|
+
sessionKey,
|
|
156
|
+
model: norm.model,
|
|
157
|
+
tokensIn: norm.tokensIn,
|
|
158
|
+
tokensOut: norm.tokensOut,
|
|
159
|
+
startedAt: norm.startedAt,
|
|
160
|
+
lastMessageAt: norm.lastMessageAt,
|
|
161
|
+
});
|
|
162
|
+
api.logger.info(`[aquifer-memory] committed ${sessionId}`);
|
|
163
|
+
|
|
164
|
+
// Enrich (if enough messages)
|
|
165
|
+
if (norm.userCount >= minUserMessages) {
|
|
166
|
+
try {
|
|
167
|
+
const result = await aquifer.enrich(sessionId, { agentId });
|
|
168
|
+
api.logger.info(`[aquifer-memory] enriched ${sessionId} (${result.turnsEmbedded} turns, ${result.entitiesFound} entities)`);
|
|
169
|
+
} catch (enrichErr) {
|
|
170
|
+
api.logger.warn(`[aquifer-memory] enrich failed for ${sessionId}: ${enrichErr.message}`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
recentlyProcessed.set(dedupKey, Date.now());
|
|
175
|
+
} catch (err) {
|
|
176
|
+
api.logger.warn(`[aquifer-memory] capture failed for ${sessionId}: ${err.message}`);
|
|
177
|
+
} finally {
|
|
178
|
+
inFlight.delete(dedupKey);
|
|
179
|
+
// Evict old entries
|
|
180
|
+
if (recentlyProcessed.size > 200) {
|
|
181
|
+
const cutoff = Date.now() - 30 * 60 * 1000;
|
|
182
|
+
for (const [k, ts] of recentlyProcessed) {
|
|
183
|
+
if (ts < cutoff) recentlyProcessed.delete(k);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
})();
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
// --- session_recall tool ---
|
|
191
|
+
|
|
192
|
+
api.registerTool((ctx) => {
|
|
193
|
+
if ((ctx?.sessionKey || '').includes('subagent')) return null;
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
name: 'session_recall',
|
|
197
|
+
description: 'Search stored sessions by keyword, returning ranked summaries and matched conversation turns.',
|
|
198
|
+
parameters: {
|
|
199
|
+
type: 'object',
|
|
200
|
+
properties: {
|
|
201
|
+
query: { type: 'string', description: 'Search query' },
|
|
202
|
+
limit: { type: 'number', description: 'Max results (default 5, max 20)' },
|
|
203
|
+
agentId: { type: 'string', description: 'Filter by agent ID' },
|
|
204
|
+
source: { type: 'string', description: 'Filter by source' },
|
|
205
|
+
dateFrom: { type: 'string', description: 'Start date YYYY-MM-DD' },
|
|
206
|
+
dateTo: { type: 'string', description: 'End date YYYY-MM-DD' },
|
|
207
|
+
},
|
|
208
|
+
required: ['query'],
|
|
209
|
+
},
|
|
210
|
+
async execute(_toolCallId, params) {
|
|
211
|
+
try {
|
|
212
|
+
const limit = Math.max(1, Math.min(20, parseInt(params?.limit ?? 5, 10) || 5));
|
|
213
|
+
const results = await aquifer.recall(params.query, {
|
|
214
|
+
limit,
|
|
215
|
+
agentId: params.agentId || undefined,
|
|
216
|
+
source: params.source || undefined,
|
|
217
|
+
dateFrom: params.dateFrom || undefined,
|
|
218
|
+
dateTo: params.dateTo || undefined,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
const text = formatRecallResults(results);
|
|
222
|
+
return { content: [{ type: 'text', text }] };
|
|
223
|
+
} catch (err) {
|
|
224
|
+
return {
|
|
225
|
+
content: [{ type: 'text', text: `session_recall error: ${err.message}` }],
|
|
226
|
+
isError: true,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}, { name: 'session_recall' });
|
|
232
|
+
|
|
233
|
+
api.logger.info('[aquifer-memory] registered (before_reset + session_recall)');
|
|
234
|
+
},
|
|
235
|
+
};
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Defaults
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
const DEFAULTS = {
|
|
11
|
+
db: { url: null, max: 10, idleTimeoutMs: 30000 },
|
|
12
|
+
schema: 'aquifer',
|
|
13
|
+
tenantId: 'default',
|
|
14
|
+
defaults: { agentId: null, source: 'api' },
|
|
15
|
+
embed: {
|
|
16
|
+
baseUrl: null,
|
|
17
|
+
model: null,
|
|
18
|
+
apiKey: null,
|
|
19
|
+
dim: null,
|
|
20
|
+
timeoutMs: 120000,
|
|
21
|
+
maxRetries: 3,
|
|
22
|
+
chunkSize: 32,
|
|
23
|
+
},
|
|
24
|
+
llm: {
|
|
25
|
+
baseUrl: null,
|
|
26
|
+
model: null,
|
|
27
|
+
apiKey: null,
|
|
28
|
+
timeoutMs: 60000,
|
|
29
|
+
maxRetries: 3,
|
|
30
|
+
temperature: 0,
|
|
31
|
+
},
|
|
32
|
+
entities: { enabled: false, mergeCall: true },
|
|
33
|
+
rank: { rrf: 0.65, timeDecay: 0.25, access: 0.10, entityBoost: 0.18 },
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Env var mapping: ENV_NAME → config path
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
const ENV_MAP = [
|
|
41
|
+
['DATABASE_URL', 'db.url'],
|
|
42
|
+
['AQUIFER_DB_URL', 'db.url'],
|
|
43
|
+
['AQUIFER_DB_MAX', 'db.max', Number],
|
|
44
|
+
['AQUIFER_SCHEMA', 'schema'],
|
|
45
|
+
['AQUIFER_TENANT_ID', 'tenantId'],
|
|
46
|
+
['AQUIFER_AGENT_ID', 'defaults.agentId'],
|
|
47
|
+
['AQUIFER_SOURCE', 'defaults.source'],
|
|
48
|
+
['AQUIFER_EMBED_BASE_URL', 'embed.baseUrl'],
|
|
49
|
+
['AQUIFER_EMBED_MODEL', 'embed.model'],
|
|
50
|
+
['AQUIFER_EMBED_API_KEY', 'embed.apiKey'],
|
|
51
|
+
['AQUIFER_EMBED_DIM', 'embed.dim', Number],
|
|
52
|
+
['AQUIFER_EMBED_TIMEOUT_MS', 'embed.timeoutMs', Number],
|
|
53
|
+
['AQUIFER_EMBED_CHUNK_SIZE', 'embed.chunkSize', Number],
|
|
54
|
+
['AQUIFER_LLM_BASE_URL', 'llm.baseUrl'],
|
|
55
|
+
['AQUIFER_LLM_MODEL', 'llm.model'],
|
|
56
|
+
['AQUIFER_LLM_API_KEY', 'llm.apiKey'],
|
|
57
|
+
['AQUIFER_LLM_TIMEOUT_MS', 'llm.timeoutMs', Number],
|
|
58
|
+
['AQUIFER_LLM_TEMPERATURE', 'llm.temperature', Number],
|
|
59
|
+
['AQUIFER_ENTITIES_ENABLED', 'entities.enabled', Boolean],
|
|
60
|
+
];
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Helpers
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
function deepMerge(target, source) {
|
|
67
|
+
const result = { ...target };
|
|
68
|
+
for (const key of Object.keys(source)) {
|
|
69
|
+
if (source[key] !== undefined && source[key] !== null
|
|
70
|
+
&& typeof source[key] === 'object' && !Array.isArray(source[key])
|
|
71
|
+
&& typeof result[key] === 'object' && result[key] !== null) {
|
|
72
|
+
result[key] = deepMerge(result[key], source[key]);
|
|
73
|
+
} else if (source[key] !== undefined) {
|
|
74
|
+
result[key] = source[key];
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return result;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function setPath(obj, dotPath, value) {
|
|
81
|
+
const parts = dotPath.split('.');
|
|
82
|
+
let cur = obj;
|
|
83
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
84
|
+
if (!cur[parts[i]] || typeof cur[parts[i]] !== 'object') cur[parts[i]] = {};
|
|
85
|
+
cur = cur[parts[i]];
|
|
86
|
+
}
|
|
87
|
+
cur[parts[parts.length - 1]] = value;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function coerceEnvValue(raw, type) {
|
|
91
|
+
if (type === Number) return Number(raw);
|
|
92
|
+
if (type === Boolean) return raw === 'true' || raw === '1' || raw === 'yes';
|
|
93
|
+
return raw;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// loadConfig
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
function loadConfig(opts = {}) {
|
|
101
|
+
const env = opts.env || process.env;
|
|
102
|
+
let config = JSON.parse(JSON.stringify(DEFAULTS));
|
|
103
|
+
|
|
104
|
+
// 1. Config file
|
|
105
|
+
const configPath = opts.configPath || env.AQUIFER_CONFIG || null;
|
|
106
|
+
const candidates = configPath
|
|
107
|
+
? [configPath]
|
|
108
|
+
: [
|
|
109
|
+
path.join(opts.cwd || process.cwd(), 'aquifer.config.json'),
|
|
110
|
+
path.join(opts.cwd || process.cwd(), 'aquifer.config.js'),
|
|
111
|
+
];
|
|
112
|
+
|
|
113
|
+
for (const candidate of candidates) {
|
|
114
|
+
try {
|
|
115
|
+
if (candidate.endsWith('.json')) {
|
|
116
|
+
const raw = fs.readFileSync(candidate, 'utf8');
|
|
117
|
+
config = deepMerge(config, JSON.parse(raw));
|
|
118
|
+
} else if (candidate.endsWith('.js') || candidate.endsWith('.cjs')) {
|
|
119
|
+
config = deepMerge(config, require(candidate));
|
|
120
|
+
}
|
|
121
|
+
break;
|
|
122
|
+
} catch (e) {
|
|
123
|
+
if (e.code !== 'ENOENT' && e.code !== 'MODULE_NOT_FOUND') throw e;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// 2. Environment variables
|
|
128
|
+
for (const [envName, configPath, type] of ENV_MAP) {
|
|
129
|
+
const val = env[envName];
|
|
130
|
+
if (val !== undefined && val !== '') {
|
|
131
|
+
setPath(config, configPath, type ? coerceEnvValue(val, type) : val);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// 3. Programmatic overrides
|
|
136
|
+
if (opts.overrides) {
|
|
137
|
+
config = deepMerge(config, opts.overrides);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return config;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
module.exports = { loadConfig, DEFAULTS };
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { Pool } = require('pg');
|
|
4
|
+
const { createAquifer, createEmbedder } = require('../../index');
|
|
5
|
+
const { loadConfig } = require('./config');
|
|
6
|
+
const { createLlmFn } = require('./llm');
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// createAquiferFromConfig
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
function createAquiferFromConfig(overrides) {
|
|
13
|
+
const config = loadConfig({ overrides });
|
|
14
|
+
|
|
15
|
+
if (!config.db.url) {
|
|
16
|
+
throw new Error('Database URL is required (set DATABASE_URL or AQUIFER_DB_URL)');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Pool
|
|
20
|
+
const pool = new Pool({
|
|
21
|
+
connectionString: config.db.url,
|
|
22
|
+
max: config.db.max || 10,
|
|
23
|
+
idleTimeoutMillis: config.db.idleTimeoutMs || 30000,
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Embed function (optional — lazy validation in core)
|
|
27
|
+
let embedFn = null;
|
|
28
|
+
if (config.embed && config.embed.baseUrl && config.embed.model) {
|
|
29
|
+
// Detect provider from baseUrl
|
|
30
|
+
const isOllama = config.embed.baseUrl.includes('11434') || config.embed.baseUrl.includes('ollama');
|
|
31
|
+
const embedder = isOllama
|
|
32
|
+
? createEmbedder({
|
|
33
|
+
provider: 'ollama',
|
|
34
|
+
ollamaUrl: config.embed.baseUrl.replace(/\/v1\/?$/, ''),
|
|
35
|
+
model: config.embed.model,
|
|
36
|
+
chunkSize: config.embed.chunkSize || 32,
|
|
37
|
+
timeout: config.embed.timeoutMs || 120000,
|
|
38
|
+
maxRetries: config.embed.maxRetries || 3,
|
|
39
|
+
initialBackoffMs: 2000,
|
|
40
|
+
})
|
|
41
|
+
: createEmbedder({
|
|
42
|
+
provider: 'openai',
|
|
43
|
+
openaiApiKey: config.embed.apiKey || '',
|
|
44
|
+
openaiModel: config.embed.model,
|
|
45
|
+
openaiDimensions: config.embed.dim || undefined,
|
|
46
|
+
chunkSize: config.embed.chunkSize || 100,
|
|
47
|
+
timeout: config.embed.timeoutMs || 120000,
|
|
48
|
+
maxRetries: config.embed.maxRetries || 3,
|
|
49
|
+
initialBackoffMs: 2000,
|
|
50
|
+
});
|
|
51
|
+
embedFn = (texts) => embedder.embedBatch(texts);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// LLM function (optional)
|
|
55
|
+
let llmFn = null;
|
|
56
|
+
if (config.llm && config.llm.baseUrl && config.llm.model) {
|
|
57
|
+
llmFn = createLlmFn(config.llm);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const aquifer = createAquifer({
|
|
61
|
+
db: pool,
|
|
62
|
+
schema: config.schema,
|
|
63
|
+
tenantId: config.tenantId,
|
|
64
|
+
embed: embedFn ? { fn: embedFn, dim: config.embed.dim || null } : null,
|
|
65
|
+
llm: llmFn ? { fn: llmFn } : null,
|
|
66
|
+
entities: config.entities,
|
|
67
|
+
rank: config.rank,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Attach pool for lifecycle management
|
|
71
|
+
aquifer._pool = pool;
|
|
72
|
+
aquifer._config = config;
|
|
73
|
+
|
|
74
|
+
return aquifer;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
module.exports = { createAquiferFromConfig };
|