@shadowforge0/aquifer-memory 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,354 @@
1
+ <div align="center">
2
+
3
+ # 🌊 Aquifer
4
+
5
+ **PG-native long-term memory for AI agents**
6
+
7
+ *Turn-level embedding, hybrid RRF ranking, optional knowledge graph — all on PostgreSQL + pgvector.*
8
+
9
+ [![npm version](https://img.shields.io/npm/v/aquifer-memory)](https://www.npmjs.com/package/aquifer-memory)
10
+ [![PostgreSQL 15+](https://img.shields.io/badge/PostgreSQL-15%2B-336791)](https://www.postgresql.org/)
11
+ [![pgvector](https://img.shields.io/badge/pgvector-0.7%2B-blue)](https://github.com/pgvector/pgvector)
12
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
13
+
14
+ [English](README.md) | [įšéĢ”ä¸­æ–‡](README_TW.md) | [įŽ€äŊ“中文](README_CN.md)
15
+
16
+ </div>
17
+
18
+ ---
19
+
20
+ ## Why Aquifer?
21
+
22
+ Most AI memory systems bolt a vector DB on the side. Aquifer takes a different approach: **PostgreSQL is the memory**.
23
+
24
+ Sessions, summaries, turn-level embeddings, entity graph — all live in one database, queried with one connection. No sync layer, no eventual consistency, no extra infrastructure.
25
+
26
+ ### What makes it different
27
+
28
+ | | Aquifer | Typical vector-DB approach |
29
+ |---|---|---|
30
+ | **Storage** | PostgreSQL + pgvector | Separate vector DB + app DB |
31
+ | **Granularity** | Turn-level embeddings (not just session summaries) | Session or document chunks |
32
+ | **Ranking** | 3-way RRF: FTS + session embedding + turn embedding | Single vector similarity |
33
+ | **Knowledge graph** | Built-in entity extraction & co-occurrence | Usually separate system |
34
+ | **Multi-tenant** | `tenant_id` on every table, day-1 | Often an afterthought |
35
+ | **Dependencies** | Just `pg` | Multiple SDKs |
36
+
37
+ ### Before and after
38
+
39
+ **Without turn-level memory — search misses precise moments:**
40
+
41
+ > Query: "What did we decide about the auth middleware?"
42
+ > → Returns a 2000-word session summary that mentions auth somewhere
43
+
44
+ **With Aquifer — search finds the exact turn:**
45
+
46
+ > Query: "What did we decide about the auth middleware?"
47
+ > → Returns the specific user turn: "Let's rip out the old auth middleware — legal flagged it for session token compliance"
48
+
49
+ ---
50
+
51
+ ## Quick Start
52
+
53
+ ### Prerequisites
54
+
55
+ - Node.js >= 18
56
+ - PostgreSQL 15+ with [pgvector](https://github.com/pgvector/pgvector) extension
57
+ - An embedding API (OpenAI, Ollama, or any OpenAI-compatible endpoint)
58
+
59
+ ### Install
60
+
61
+ ```bash
62
+ npm install aquifer-memory
63
+ ```
64
+
65
+ ### Initialize
66
+
67
+ ```javascript
68
+ const { createAquifer } = require('aquifer-memory');
69
+
70
+ const aquifer = createAquifer({
71
+ schema: 'memory', // PG schema name (default: 'aquifer')
72
+ pg: {
73
+ connectionString: 'postgresql://user:pass@localhost:5432/mydb',
74
+ },
75
+ embedder: {
76
+ baseURL: 'http://localhost:11434/v1', // Ollama
77
+ model: 'bge-m3',
78
+ apiKey: 'ollama',
79
+ },
80
+ llm: {
81
+ baseURL: 'https://api.openai.com/v1',
82
+ model: 'gpt-4o-mini',
83
+ apiKey: process.env.OPENAI_API_KEY,
84
+ },
85
+ });
86
+
87
+ // Run migrations (safe to call multiple times)
88
+ await aquifer.migrate();
89
+ ```
90
+
91
+ ### Ingest a session
92
+
93
+ ```javascript
94
+ await aquifer.ingest({
95
+ sessionId: 'conv-001',
96
+ agentId: 'main',
97
+ messages: [
98
+ { role: 'user', content: 'Let me tell you about our new auth approach...' },
99
+ { role: 'assistant', content: 'Got it. So the plan is...' },
100
+ ],
101
+ });
102
+ // Stores session → generates summary → creates turn embeddings → extracts entities
103
+ ```
104
+
105
+ ### Recall
106
+
107
+ ```javascript
108
+ const results = await aquifer.recall('auth middleware decision', {
109
+ agentId: 'main',
110
+ limit: 5,
111
+ });
112
+ // Returns ranked sessions with scores, using 3-way RRF fusion
113
+ ```
114
+
115
+ ---
116
+
117
+ ## Architecture
118
+
119
+ ```
120
+ ┌─────────────────────────────────────────────────────────────┐
121
+ │ createAquifer (entry) │
122
+ │ Config · Migration · Ingest · Recall · Enrich │
123
+ └────────â”Ŧ──────────â”Ŧ──────────â”Ŧ──────────â”Ŧ───────────────────┘
124
+ │ │ │ │
125
+ ┌────â–ŧ───┐ ┌────â–ŧ────┐ ┌──â–ŧ───┐ ┌───â–ŧ──────────┐
126
+ │storage │ │hybrid- │ │entity│ │ pipeline/ │
127
+ │ .js │ │rank.js │ │ .js │ │summarize.js │
128
+ └────────┘ └─────────┘ └──────┘ │embed.js │
129
+ │ │ │extract-ent.js │
130
+ ┌────â–ŧ───────────┐ ┌───â–ŧ──┐ └───────────────┘
131
+ │ PostgreSQL │ │ LLM │
132
+ │ + pgvector │ │ API │
133
+ └────────────────┘ └──────┘
134
+
135
+ ┌─────────────────────────────┐
136
+ │ schema/ │
137
+ │ 001-base.sql (sessions, │
138
+ │ summaries, turns, FTS) │
139
+ │ 002-entities.sql (KG) │
140
+ └─────────────────────────────┘
141
+ ```
142
+
143
+ ### File Reference
144
+
145
+ | File | Purpose |
146
+ |------|---------|
147
+ | `index.js` | Entry point — exports `createAquifer`, `createEmbedder` |
148
+ | `core/aquifer.js` | Main facade: `migrate()`, `ingest()`, `recall()`, `enrich()` |
149
+ | `core/storage.js` | Session/summary/turn CRUD, FTS search, embedding search |
150
+ | `core/entity.js` | Entity upsert, mention tracking, relation graph, normalization |
151
+ | `core/hybrid-rank.js` | 3-way RRF fusion, time decay, entity boost scoring |
152
+ | `pipeline/summarize.js` | LLM-powered session summarization with structured output |
153
+ | `pipeline/embed.js` | Embedding client (any OpenAI-compatible API) |
154
+ | `pipeline/extract-entities.js` | LLM-powered entity extraction (12 types) |
155
+ | `schema/001-base.sql` | DDL: sessions, summaries, turn_embeddings, FTS indexes |
156
+ | `schema/002-entities.sql` | DDL: entities, mentions, relations, entity_sessions |
157
+
158
+ ---
159
+
160
+ ## Core Features
161
+
162
+ ### 3-Way Hybrid Retrieval (RRF)
163
+
164
+ ```
165
+ Query ──â”Ŧ── FTS (BM25) ──┐
166
+ ├── Session embedding search ──├── RRF Fusion → Time Decay → Entity Boost → Results
167
+ └── Turn embedding search ──┘
168
+ ```
169
+
170
+ - **Full-text search** — PostgreSQL `tsvector` with language-aware ranking
171
+ - **Session embedding** — cosine similarity on session summaries
172
+ - **Turn embedding** — cosine similarity on individual user turns
173
+ - **Reciprocal Rank Fusion** — merges all three ranked lists (K=60)
174
+ - **Time decay** — sigmoid decay with configurable midpoint and steepness
175
+ - **Entity boost** — sessions mentioning query-relevant entities get a score boost
176
+
177
+ ### Turn-Level Embeddings
178
+
179
+ Not just session summaries — Aquifer embeds each meaningful user turn individually.
180
+
181
+ - Filters noise: short messages, slash commands, confirmations ("ok", "got it")
182
+ - Truncates at 2000 chars, skips turns under 5 chars
183
+ - Stores turn text + embedding + position for precise retrieval
184
+
185
+ ### Knowledge Graph
186
+
187
+ Built-in entity extraction and relationship tracking:
188
+
189
+ - **12 entity types**: person, project, concept, tool, metric, org, place, event, doc, task, topic, other
190
+ - **Entity normalization**: NFKC + homoglyph mapping + case folding
191
+ - **Co-occurrence relations**: undirected edges with frequency tracking
192
+ - **Entity-session mapping**: which entities appear in which sessions
193
+ - **Entity boost in ranking**: sessions with relevant entities score higher
194
+
195
+ ---
196
+
197
+ ## Benchmark: LongMemEval
198
+
199
+ We tested Aquifer's retrieval pipeline on [LongMemEval_S](https://github.com/xiaowu0162/LongMemEval) — 470 questions across 19,195 sessions (98,845 turn embeddings).
200
+
201
+ **Setup:** Per-question haystack scoping (matching official methodology), bge-m3 embeddings via OpenRouter, turn-level user-only embedding.
202
+
203
+ | Metric | Aquifer (bge-m3) |
204
+ |--------|-----------------|
205
+ | R@1 | 89.6% |
206
+ | R@3 | 96.6% |
207
+ | R@5 | 98.1% |
208
+ | R@10 | 98.9% |
209
+
210
+ **Key finding:** Turn-level embedding is the main driver — going from session-level (R@1=26.8%) to turn-level (R@1=89.6%) is a 3x improvement.
211
+
212
+ ### Multi-Tenant
213
+
214
+ Every table includes `tenant_id` (default: `'default'`). Isolation is enforced at the query level — no cross-tenant data leakage by design.
215
+
216
+ ### Schema-per-deployment
217
+
218
+ Pass `schema: 'my_app'` to `createAquifer()` and all tables live under that PostgreSQL schema. Run multiple Aquifer instances in the same database without conflicts.
219
+
220
+ ---
221
+
222
+ ## API Reference
223
+
224
+ ### `createAquifer(config)`
225
+
226
+ Returns an Aquifer instance with the following methods:
227
+
228
+ #### `aquifer.migrate()`
229
+
230
+ Runs SQL migrations (idempotent). Creates tables, indexes, and extensions.
231
+
232
+ #### `aquifer.ingest(options)`
233
+
234
+ Ingests a session: stores messages, generates summary, creates turn embeddings, extracts entities.
235
+
236
+ ```javascript
237
+ await aquifer.ingest({
238
+ sessionId: 'unique-id',
239
+ agentId: 'main',
240
+ source: 'api', // optional, default 'api'
241
+ messages: [{ role, content }],
242
+ tenantId: 'default', // optional
243
+ model: 'gpt-4o', // optional metadata
244
+ tokensIn: 1500, // optional
245
+ tokensOut: 800, // optional
246
+ });
247
+ ```
248
+
249
+ #### `aquifer.recall(query, options)`
250
+
251
+ Hybrid search across sessions.
252
+
253
+ ```javascript
254
+ const results = await aquifer.recall('search query', {
255
+ agentId: 'main',
256
+ tenantId: 'default',
257
+ limit: 10, // max results
258
+ ftsLimit: 20, // FTS candidate pool
259
+ embLimit: 20, // embedding candidate pool
260
+ turnLimit: 20, // turn embedding candidate pool
261
+ midpointDays: 45, // time decay midpoint
262
+ entityBoostWeight: 0.18, // entity boost factor
263
+ });
264
+ // Returns: [{ session_id, score, title, overview, started_at, ... }]
265
+ ```
266
+
267
+ #### `aquifer.enrich(sessionId, options)`
268
+
269
+ Re-processes an existing session: regenerate summary, embeddings, and entities.
270
+
271
+ #### `aquifer.close()`
272
+
273
+ Closes the PostgreSQL connection pool.
274
+
275
+ ---
276
+
277
+ ## Configuration
278
+
279
+ ```javascript
280
+ createAquifer({
281
+ // PostgreSQL schema name (all tables created under this schema)
282
+ schema: 'aquifer',
283
+
284
+ // PostgreSQL connection
285
+ pg: {
286
+ connectionString: 'postgresql://...',
287
+ // or individual: host, port, database, user, password
288
+ max: 10, // pool size
289
+ },
290
+
291
+ // Embedding provider (any OpenAI-compatible API)
292
+ embedder: {
293
+ baseURL: 'http://localhost:11434/v1',
294
+ model: 'bge-m3',
295
+ apiKey: 'ollama',
296
+ dimensions: 1024, // optional
297
+ timeout: 30000, // ms, default 30s
298
+ },
299
+
300
+ // LLM for summarization & entity extraction
301
+ llm: {
302
+ baseURL: 'https://api.openai.com/v1',
303
+ model: 'gpt-4o-mini',
304
+ apiKey: process.env.OPENAI_API_KEY,
305
+ timeout: 60000, // ms, default 60s
306
+ },
307
+
308
+ // Tenant isolation
309
+ tenantId: 'default',
310
+ });
311
+ ```
312
+
313
+ ---
314
+
315
+ ## Database Schema
316
+
317
+ ### 001-base.sql
318
+
319
+ | Table | Purpose |
320
+ |-------|---------|
321
+ | `sessions` | Raw conversation data with messages (JSONB), token counts, timestamps |
322
+ | `session_summaries` | LLM-generated structured summaries with embeddings |
323
+ | `turn_embeddings` | Per-turn user message embeddings for precise retrieval |
324
+
325
+ Key indexes: GIN on messages, GiST on `tsvector`, ivfflat on embeddings, B-tree on tenant/agent/timestamps.
326
+
327
+ ### 002-entities.sql
328
+
329
+ | Table | Purpose |
330
+ |-------|---------|
331
+ | `entities` | Normalized named entities with type, aliases, frequency, optional embedding |
332
+ | `entity_mentions` | Entity × session join with mention count and context |
333
+ | `entity_relations` | Co-occurrence edges (undirected, `CHECK src < dst`) |
334
+ | `entity_sessions` | Entity-session association for boost scoring |
335
+
336
+ Key indexes: trigram on entity names, GiST on embeddings, composite on tenant/agent.
337
+
338
+ ---
339
+
340
+ ## Dependencies
341
+
342
+ | Package | Purpose |
343
+ |---------|---------|
344
+ | `pg` â‰Ĩ 8.13 | PostgreSQL client |
345
+
346
+ That's it. Aquifer has **one runtime dependency**.
347
+
348
+ LLM and embedding calls use raw HTTP — no SDK required.
349
+
350
+ ---
351
+
352
+ ## License
353
+
354
+ MIT
@@ -0,0 +1,314 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Aquifer CLI
6
+ *
7
+ * Usage:
8
+ * aquifer migrate Run database migrations
9
+ * aquifer recall <query> [options] Search sessions
10
+ * aquifer backfill [options] Enrich pending sessions
11
+ * aquifer stats [options] Show database statistics
12
+ * aquifer export [options] Export sessions
13
+ * aquifer mcp Start MCP server
14
+ */
15
+
16
+ const { createAquiferFromConfig } = require('./shared/factory');
17
+ const { loadConfig } = require('./shared/config');
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Argument parser (minimal, no deps)
21
+ // ---------------------------------------------------------------------------
22
+
23
+ function parseArgs(argv) {
24
+ const args = { _: [], flags: {} };
25
+ // Flags that take a value (not boolean)
26
+ const VALUE_FLAGS = new Set(['limit', 'agent-id', 'source', 'date-from', 'date-to', 'output', 'format', 'config', 'status', 'concurrency']);
27
+ for (let i = 0; i < argv.length; i++) {
28
+ if (argv[i] === '--') { args._.push(...argv.slice(i + 1)); break; }
29
+ if (argv[i].startsWith('--')) {
30
+ const key = argv[i].slice(2);
31
+ if (VALUE_FLAGS.has(key) && i + 1 < argv.length && !argv[i + 1].startsWith('--')) {
32
+ args.flags[key] = argv[++i];
33
+ } else {
34
+ args.flags[key] = true;
35
+ }
36
+ } else {
37
+ args._.push(argv[i]);
38
+ }
39
+ }
40
+ return args;
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Commands
45
+ // ---------------------------------------------------------------------------
46
+
47
+ async function cmdMigrate(aquifer) {
48
+ await aquifer.migrate();
49
+ console.log('Migrations applied successfully.');
50
+ }
51
+
52
+ async function cmdRecall(aquifer, args) {
53
+ const query = args._.slice(1).join(' ');
54
+ if (!query) {
55
+ console.error('Usage: aquifer recall <query> [--limit N] [--agent-id ID] [--json]');
56
+ process.exit(1);
57
+ }
58
+
59
+ const results = await aquifer.recall(query, {
60
+ limit: parseInt(args.flags.limit || '5', 10),
61
+ agentId: args.flags['agent-id'] || undefined,
62
+ source: args.flags.source || undefined,
63
+ dateFrom: args.flags['date-from'] || undefined,
64
+ dateTo: args.flags['date-to'] || undefined,
65
+ });
66
+
67
+ if (args.flags.json) {
68
+ console.log(JSON.stringify(results, null, 2));
69
+ return;
70
+ }
71
+
72
+ if (results.length === 0) {
73
+ console.log('No results found.');
74
+ return;
75
+ }
76
+
77
+ for (let i = 0; i < results.length; i++) {
78
+ const r = results[i];
79
+ const ss = r.structuredSummary || {};
80
+ const title = ss.title || r.summaryText?.slice(0, 60) || '(untitled)';
81
+ const date = r.startedAt ? new Date(r.startedAt).toISOString().slice(0, 10) : '?';
82
+ console.log(`${i + 1}. [${r.score?.toFixed(3)}] ${title} (${date}, ${r.agentId})`);
83
+ if (ss.overview) console.log(` ${ss.overview.slice(0, 200)}`);
84
+ if (r.matchedTurnText) console.log(` > ${r.matchedTurnText.slice(0, 150)}`);
85
+ console.log();
86
+ }
87
+ }
88
+
89
+ async function cmdBackfill(aquifer, args) {
90
+ const limit = parseInt(args.flags.limit || '100', 10);
91
+ const dryRun = !!args.flags['dry-run'];
92
+ const skipSummary = !!args.flags['skip-summary'];
93
+ const skipTurnEmbed = !!args.flags['skip-turn-embed'];
94
+ const skipEntities = !!args.flags['skip-entities'];
95
+
96
+ const config = aquifer._config || {};
97
+ const schema = config.schema || 'aquifer';
98
+ const tenantId = config.tenantId || 'default';
99
+ const pool = aquifer._pool;
100
+
101
+ if (!pool) {
102
+ console.error('Backfill requires direct pool access.');
103
+ process.exit(1);
104
+ }
105
+
106
+ const qi = (id) => `"${id}"`;
107
+ const { rows } = await pool.query(`
108
+ SELECT session_id, agent_id, processing_status
109
+ FROM ${qi(schema)}.sessions
110
+ WHERE tenant_id = $1
111
+ AND processing_status IN ('pending', 'failed')
112
+ ORDER BY started_at DESC
113
+ LIMIT $2
114
+ `, [tenantId, limit]);
115
+
116
+ console.log(`Found ${rows.length} sessions to backfill${dryRun ? ' (dry-run)' : ''}`);
117
+
118
+ let enriched = 0, failed = 0;
119
+ for (const row of rows) {
120
+ if (dryRun) {
121
+ console.log(` [dry-run] ${row.session_id} (${row.agent_id}) status=${row.processing_status}`);
122
+ continue;
123
+ }
124
+
125
+ try {
126
+ const result = await aquifer.enrich(row.session_id, {
127
+ agentId: row.agent_id,
128
+ skipSummary,
129
+ skipTurnEmbed,
130
+ skipEntities,
131
+ });
132
+ enriched++;
133
+ console.log(` [${enriched}] ${row.session_id}: ${result.turnsEmbedded} turns, ${result.entitiesFound} entities`);
134
+ } catch (err) {
135
+ failed++;
136
+ console.error(` [error] ${row.session_id}: ${err.message}`);
137
+ }
138
+ }
139
+
140
+ console.log(`\nDone. enriched=${enriched} failed=${failed} total=${rows.length}`);
141
+ if (failed > 0) process.exitCode = 2;
142
+ }
143
+
144
+ async function cmdStats(aquifer, args) {
145
+ const config = aquifer._config || {};
146
+ const schema = config.schema || 'aquifer';
147
+ const tenantId = config.tenantId || 'default';
148
+ const pool = aquifer._pool;
149
+
150
+ if (!pool) {
151
+ console.error('Stats requires direct pool access.');
152
+ process.exit(1);
153
+ }
154
+
155
+ const qi = (id) => `"${id}"`;
156
+ const [sessions, summaries, turns, entities] = await Promise.all([
157
+ pool.query(`SELECT processing_status, COUNT(*)::int as count FROM ${qi(schema)}.sessions WHERE tenant_id = $1 GROUP BY processing_status`, [tenantId]),
158
+ pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.session_summaries WHERE tenant_id = $1`, [tenantId]),
159
+ pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.turn_embeddings WHERE tenant_id = $1`, [tenantId]),
160
+ pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.entities WHERE tenant_id = $1`, [tenantId]).catch(() => ({ rows: [{ count: 0 }] })),
161
+ ]);
162
+
163
+ const timeRange = await pool.query(`SELECT MIN(started_at) as earliest, MAX(started_at) as latest FROM ${qi(schema)}.sessions WHERE tenant_id = $1`, [tenantId]);
164
+
165
+ const stats = {
166
+ sessions: Object.fromEntries(sessions.rows.map(r => [r.processing_status, r.count])),
167
+ sessionTotal: sessions.rows.reduce((s, r) => s + r.count, 0),
168
+ summaries: summaries.rows[0]?.count || 0,
169
+ turnEmbeddings: turns.rows[0]?.count || 0,
170
+ entities: entities.rows[0]?.count || 0,
171
+ earliest: timeRange.rows[0]?.earliest || null,
172
+ latest: timeRange.rows[0]?.latest || null,
173
+ };
174
+
175
+ if (args.flags.json) {
176
+ console.log(JSON.stringify(stats, null, 2));
177
+ } else {
178
+ console.log(`Sessions: ${stats.sessionTotal} (${Object.entries(stats.sessions).map(([k, v]) => `${k}: ${v}`).join(', ')})`);
179
+ console.log(`Summaries: ${stats.summaries}`);
180
+ console.log(`Turn embeddings: ${stats.turnEmbeddings}`);
181
+ console.log(`Entities: ${stats.entities}`);
182
+ if (stats.earliest) console.log(`Range: ${new Date(stats.earliest).toISOString().slice(0, 10)} — ${new Date(stats.latest).toISOString().slice(0, 10)}`);
183
+ }
184
+ }
185
+
186
+ async function cmdExport(aquifer, args) {
187
+ const config = aquifer._config || {};
188
+ const schema = config.schema || 'aquifer';
189
+ const tenantId = config.tenantId || 'default';
190
+ const pool = aquifer._pool;
191
+ const output = args.flags.output || null;
192
+ const limit = parseInt(args.flags.limit || '1000', 10);
193
+
194
+ if (!pool) {
195
+ console.error('Export requires direct pool access.');
196
+ process.exit(1);
197
+ }
198
+
199
+ const qi = (id) => `"${id}"`;
200
+ const where = [`s.tenant_id = $1`];
201
+ const params = [tenantId];
202
+
203
+ if (args.flags['agent-id']) { params.push(args.flags['agent-id']); where.push(`s.agent_id = $${params.length}`); }
204
+ if (args.flags.source) { params.push(args.flags.source); where.push(`s.source = $${params.length}`); }
205
+ params.push(limit);
206
+
207
+ const { rows } = await pool.query(`
208
+ SELECT s.*, ss.summary_text, ss.structured_summary
209
+ FROM ${qi(schema)}.sessions s
210
+ LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
211
+ WHERE ${where.join(' AND ')}
212
+ ORDER BY s.started_at DESC
213
+ LIMIT $${params.length}
214
+ `, params);
215
+
216
+ const stream = output ? require('fs').createWriteStream(output) : process.stdout;
217
+ for (const row of rows) {
218
+ stream.write(JSON.stringify({
219
+ session_id: row.session_id,
220
+ agent_id: row.agent_id,
221
+ source: row.source,
222
+ started_at: row.started_at,
223
+ msg_count: row.msg_count,
224
+ processing_status: row.processing_status,
225
+ summary: row.structured_summary || row.summary_text || null,
226
+ }) + '\n');
227
+ }
228
+ if (output) {
229
+ stream.end();
230
+ console.error(`Exported ${rows.length} sessions to ${output}`);
231
+ }
232
+ }
233
+
234
+ // ---------------------------------------------------------------------------
235
+ // Main
236
+ // ---------------------------------------------------------------------------
237
+
238
+ async function main() {
239
+ const argv = process.argv.slice(2);
240
+ if (argv.length === 0 || argv[0] === '--help' || argv[0] === '-h') {
241
+ console.log(`Usage: aquifer <command> [options]
242
+
243
+ Commands:
244
+ migrate Run database migrations
245
+ recall <query> Search sessions (requires embed config)
246
+ backfill Enrich pending sessions
247
+ stats Show database statistics
248
+ export Export sessions as JSONL
249
+ mcp Start MCP server
250
+
251
+ Options:
252
+ --limit N Limit results
253
+ --agent-id ID Filter by agent
254
+ --source NAME Filter by source
255
+ --date-from YYYY-MM-DD Start date
256
+ --date-to YYYY-MM-DD End date
257
+ --json JSON output
258
+ --dry-run Preview only (backfill)
259
+ --output PATH Output file (export)
260
+ --config PATH Config file path`);
261
+ process.exit(0);
262
+ }
263
+
264
+ const command = argv[0];
265
+ const args = parseArgs(argv);
266
+
267
+ // MCP: delegate to mcp.js
268
+ if (command === 'mcp') {
269
+ require('./mcp').main().catch(err => {
270
+ console.error(`aquifer mcp: ${err.message}`);
271
+ process.exit(1);
272
+ });
273
+ return;
274
+ }
275
+
276
+ // All other commands need an Aquifer instance
277
+ const configOverrides = {};
278
+ if (args.flags.config) {
279
+ // Will be picked up by loadConfig
280
+ process.env.AQUIFER_CONFIG = args.flags.config;
281
+ }
282
+
283
+ const aquifer = createAquiferFromConfig(configOverrides);
284
+
285
+ try {
286
+ switch (command) {
287
+ case 'migrate':
288
+ await cmdMigrate(aquifer);
289
+ break;
290
+ case 'recall':
291
+ await cmdRecall(aquifer, args);
292
+ break;
293
+ case 'backfill':
294
+ await cmdBackfill(aquifer, args);
295
+ break;
296
+ case 'stats':
297
+ await cmdStats(aquifer, args);
298
+ break;
299
+ case 'export':
300
+ await cmdExport(aquifer, args);
301
+ break;
302
+ default:
303
+ console.error(`Unknown command: ${command}. Run 'aquifer --help' for usage.`);
304
+ process.exit(1);
305
+ }
306
+ } finally {
307
+ if (aquifer._pool) await aquifer._pool.end();
308
+ }
309
+ }
310
+
311
+ main().catch(err => {
312
+ console.error(`aquifer: ${err.message}`);
313
+ process.exit(1);
314
+ });