@shadowforge0/aquifer-memory 1.3.0 → 1.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -132,6 +132,8 @@ Need LLM summarization, the knowledge graph, OpenAI embeddings, or the reranker?
132
132
  | `AQUIFER_RERANK_PROVIDER` | No | Reranker provider: `tei`, `jina`, `openrouter` | `tei` |
133
133
  | `AQUIFER_RERANK_BASE_URL` | No | Reranker endpoint | `http://localhost:8080` |
134
134
  | `AQUIFER_AGENT_ID` | No | Default agent ID | `main` |
135
+ | `AQUIFER_MIGRATIONS_MODE` | No | Startup handshake mode: `apply` (default), `check`, `off` | `apply` |
136
+ | `AQUIFER_MIGRATION_LOCK_TIMEOUT_MS` | No | Advisory-lock wait before `AQ_MIGRATION_LOCK_TIMEOUT` (default 30000) | `30000` |
135
137
 
136
138
  Full env-to-config mapping is in [consumers/shared/config.js](consumers/shared/config.js).
137
139
 
@@ -377,9 +379,36 @@ Returns an Aquifer instance. Config:
377
379
  }
378
380
  ```
379
381
 
382
+ #### `aquifer.init()`
383
+
384
+ Startup handshake — resolves pending migrations and returns a StartupEnvelope. Hosts should `await` this before accepting traffic. In `apply` mode a `ready=false` envelope is the signal to abort startup.
385
+
386
+ ```javascript
387
+ const envelope = await aquifer.init();
388
+ // {
389
+ // ready: true,
390
+ // memoryMode: 'rw', // 'rw' | 'ro' | 'off'
391
+ // migrationMode: 'apply', // 'apply' | 'check' | 'off'
392
+ // pendingMigrations: [], // migration ids still outstanding
393
+ // appliedMigrations: ['001-base', '003-trust-feedback', '004-completion', '006-insights'],
394
+ // error: null, // { code, message } on failure
395
+ // durationMs: 1035,
396
+ // }
397
+ ```
398
+
399
+ The MCP consumer (`consumers/mcp.js`) already wires `aquifer.init()` before `server.connect()` and exits non-zero if `ready=false` under `apply` mode.
400
+
401
+ #### `aquifer.listPendingMigrations()` / `aquifer.getMigrationStatus()`
402
+
403
+ Returns `{ required, applied, pending, lastRunAt }` via a `pg_tables` signature probe. No DDL runs. Use it from a health check or from a consumer that wants to surface drift before calling `init()`.
404
+
380
405
  #### `aquifer.migrate()`
381
406
 
382
- Runs SQL migrations (idempotent). Creates tables, indexes, triggers, and extensions.
407
+ Runs SQL migrations (idempotent). Creates tables, indexes, triggers, and extensions. Uses `pg_try_advisory_lock` with a 250 ms poll and a `lockTimeoutMs` deadline (30 s default); on exhaustion throws with `code: 'AQ_MIGRATION_LOCK_TIMEOUT'`. On success returns `{ ok: true, durationMs, notices, ddlExecuted }`; on failure throws an error whose `err.notices` / `err.failedAt` describe the stage that blew up. Most callers should go through `aquifer.init()` instead.
408
+
409
+ #### `aquifer.ensureMigrated()`
410
+
411
+ Lazy idempotent wrapper — fires `migrate()` once on first call, no-ops afterwards. Honors `migrations.mode`: `check` only probes, `off` marks the instance migrated without touching the DB.
383
412
 
384
413
  #### `aquifer.commit(sessionId, messages, opts)`
385
414
 
@@ -463,6 +492,26 @@ const result = await aquifer.bootstrap({
463
492
 
464
493
  Cross-session dedup on open loops and decisions, sentinel filtering (removes 無/none/n/a), and maxChars truncation.
465
494
 
495
+ #### `aquifer.insights.commitInsight(opts)` / `recallInsights(query, opts)` / `markStale(id)` / `supersede(oldId, newId)`
496
+
497
+ Higher-order reflections distilled from session windows (preferences, patterns, frustrations, workflows). Split into two identities: a **canonical key** that describes what the insight is *about* (stable across rewordings), and an **idempotency key** that describes which revision of that claim was written.
498
+
499
+ ```javascript
500
+ await aquifer.insights.commitInsight({
501
+ agentId: 'main',
502
+ type: 'preference',
503
+ canonicalClaim: 'mk prefers checking context before coding', // required — short declarative claim
504
+ title: 'Context-first discipline', // best-effort display
505
+ body: '…',
506
+ entities: ['mk', 'claude code'],
507
+ sourceSessionIds: ['sess-a', 'sess-b'],
508
+ evidenceWindow: { from: isoString, to: isoString },
509
+ importance: 0.9,
510
+ });
511
+ ```
512
+
513
+ Write rules: **duplicate** (same idempotency key → return existing), **revision** (same canonical key + newer evidence → INSERT + inline supersede of prior active), **back-fill revision** (same canonical key + older evidence → INSERT without supersede), **stale replay** (same canonical + same body → return existing). Old pre-1.5.6 rows are not retrofitted; their `canonical_key_v2` stays `NULL` and they age out naturally.
514
+
466
515
  #### `aquifer.close()`
467
516
 
468
517
  Closes the PostgreSQL connection pool (only if Aquifer created it).
@@ -498,9 +547,19 @@ createAquifer({
498
547
  access: 0.10, // access frequency weight
499
548
  entityBoost: 0.18, // entity match boost
500
549
  },
550
+ migrations: {
551
+ mode: 'apply', // 'apply' | 'check' | 'off'
552
+ lockTimeoutMs: 30000, // abort init() if advisory lock held this long
553
+ startupTimeoutMs: 60000, // overall init() deadline (plan probe + DDL combined)
554
+ onEvent: null, // (e) => void — lifecycle hook, see below
555
+ },
501
556
  });
502
557
  ```
503
558
 
559
+ ### Startup observability
560
+
561
+ Set `migrations.onEvent` to observe the lifecycle without parsing logs. Event names: `init_started`, `check_completed`, `apply_started`, `apply_succeeded`, `apply_failed`. Each payload carries `schema`, `mode`, the plan, `ddlExecuted`, `durationMs`, and on failure the `error` / `failedAt` / `notices`. No listener → zero cost.
562
+
504
563
  ### Entity Scope
505
564
 
506
565
  `entities.scope` defines the namespace for entity identity. The unique constraint is `(tenant_id, normalized_name, entity_scope)` — the same entity name in different scopes creates separate entities. This decouples entity identity from `agentId`, allowing multiple agents to share an entity namespace.
@@ -542,6 +601,22 @@ Key indexes: trigram on entity names, GiST on embeddings, unique on `(tenant_id,
542
601
 
543
602
  Also adds `trust_score` column to `session_summaries` (default 0.5, range 0–1).
544
603
 
604
+ ### 005-entity-state-history.sql *(entities enabled)*
605
+
606
+ | Table | Purpose |
607
+ |-------|---------|
608
+ | `entity_state_history` | Temporal state-change log with partial `UNIQUE (tenant, agent, entity, attribute) WHERE valid_to IS NULL` to enforce at-most-one-current. Out-of-order backfill is supported via predecessor/successor overlap checks |
609
+
610
+ Opt-in pipeline (`createAquifer({stateChanges: {enabled, whitelist, confidenceThreshold, timeoutMs, ...}})`) extracts temporal state transitions from session text during `enrich()`; off by default to control LLM cost.
611
+
612
+ ### 006-insights.sql
613
+
614
+ | Table | Purpose |
615
+ |-------|---------|
616
+ | `insights` | Higher-order reflections with TSTZRANGE evidence window, importance, GIN on source_session_ids, HNSW on 1024-dim embedding, and a non-unique partial index on `canonical_key_v2` for the canonical/revision dedup contract |
617
+
618
+ Key indexes: `idx_insights_canonical_v2_active` (partial on active rows with canonical key set), `idx_insights_idempotency_key` (unique on revision key).
619
+
545
620
  ---
546
621
 
547
622
  ## Troubleshooting
@@ -556,6 +631,10 @@ Also adds `trust_score` column to `session_summaries` (default 0.5, range 0–1)
556
631
 
557
632
  **Embedding provider connection refused** — Verify your `AQUIFER_EMBED_BASE_URL` is reachable. For local Ollama, make sure the server is running and the model is pulled (`ollama pull bge-m3`).
558
633
 
634
+ **`AQ_MIGRATION_LOCK_TIMEOUT` on startup** — another process holds the migration advisory lock for `aquifer:<schema>`. Either it is a concurrent `aquifer.init()` racing yours (expected; one will win, the other re-runs and finds `pending=[]`) or a crashed worker left the lock held. Raise `migrations.lockTimeoutMs`, or drop the stale backend via `SELECT pg_terminate_backend(pid) FROM pg_locks WHERE locktype='advisory'` after you have confirmed which pid is dead.
635
+
636
+ **MCP process exits non-zero at startup** — expected when `migrations.mode=apply` and `aquifer.init()` returns `ready=false`. Read the `[aquifer-mcp] startup aborted` line on stderr for the `error.code` / `failedAt`. If you need the old lazy-migrate-on-first-tool-call behaviour instead, set `AQUIFER_MIGRATIONS_MODE=check` (and run `migrate()` out of band) or `=off`.
637
+
559
638
  ---
560
639
 
561
640
  ## Dependencies
@@ -222,26 +222,39 @@ function createPersona(personaOpts = {}) {
222
222
  if ((ctx?.sessionKey || '').includes('subagent')) return null;
223
223
  return {
224
224
  name: 'session_recall',
225
- description: 'Search stored sessions by keyword.',
225
+ description: 'Search stored sessions by keyword or natural language. Use entities when the user names specific people, projects, files, tools, or concepts; use entity_mode="all" when every named entity must co-occur (default "any" boosts). Use mode to force fts/vector/hybrid (default hybrid).',
226
226
  parameters: {
227
227
  type: 'object',
228
228
  properties: {
229
- query: { type: 'string' },
229
+ query: { type: 'string', minLength: 1, description: 'Non-empty keyword or natural-language query' },
230
230
  limit: { type: 'number' },
231
231
  agent_id: { type: 'string' },
232
+ source: { type: 'string' },
232
233
  date_from: { type: 'string' },
233
234
  date_to: { type: 'string' },
235
+ entities: { type: 'array', items: { type: 'string' }, description: 'Named entities (person/project/tool/file)' },
236
+ entity_mode: { type: 'string', enum: ['any', 'all'], description: '"any" boosts; "all" hard-filters to sessions containing every entity' },
237
+ mode: { type: 'string', enum: ['fts', 'hybrid', 'vector'], description: 'Recall strategy, default hybrid' },
234
238
  },
235
239
  },
236
240
  async execute(_toolCallId, params) {
237
241
  try {
238
242
  const limit = Math.max(1, Math.min(20, parseInt(params?.limit ?? 5, 10) || 5));
239
- const results = await aquifer.recall(String(params?.query || ''), {
243
+ const recallOpts = {
240
244
  agentId: params?.agent_id || ctx?.agentId || undefined,
245
+ source: params?.source || undefined,
241
246
  dateFrom: params?.date_from || undefined,
242
247
  dateTo: params?.date_to || undefined,
243
248
  limit,
244
- });
249
+ };
250
+ if (Array.isArray(params?.entities) && params.entities.length > 0) {
251
+ recallOpts.entities = params.entities;
252
+ recallOpts.entityMode = params?.entity_mode || 'any';
253
+ }
254
+ if (params?.mode === 'fts' || params?.mode === 'hybrid' || params?.mode === 'vector') {
255
+ recallOpts.mode = params.mode;
256
+ }
257
+ const results = await aquifer.recall(String(params?.query || ''), recallOpts);
245
258
  const lines = results.map((r, i) =>
246
259
  `${i+1}. ${r.structuredSummary?.title || r.summaryText?.slice(0, 80) || '(untitled)'}`
247
260
  );
package/consumers/mcp.js CHANGED
@@ -225,6 +225,27 @@ async function main() {
225
225
  process.on('SIGINT', cleanup);
226
226
  process.on('SIGTERM', cleanup);
227
227
 
228
+ // Startup handshake: instantiate aquifer + drive init() before MCP transport
229
+ // so schema state is resolved before the first tool call. apply-mode failure
230
+ // is fatal (exit non-zero) — an MCP instance with pending DDL would serve
231
+ // tool traffic against a stale schema and surface confusing errors later.
232
+ const aquifer = getAquifer();
233
+ const envelope = await aquifer.init();
234
+ if (!envelope.ready) {
235
+ const err = envelope.error || { code: 'AQ_MIGRATION_NOT_READY', message: 'aquifer.init() did not reach ready state' };
236
+ process.stderr.write(
237
+ `[aquifer-mcp] startup aborted: migrationMode=${envelope.migrationMode} ` +
238
+ `memoryMode=${envelope.memoryMode} pending=${envelope.pendingMigrations.length} ` +
239
+ `error=${err.code || 'unknown'}: ${err.message}\n`
240
+ );
241
+ await aquifer.close().catch(() => {});
242
+ process.exit(1);
243
+ }
244
+ process.stderr.write(
245
+ `[aquifer-mcp] init ok: mode=${envelope.migrationMode} applied=${envelope.appliedMigrations.length} ` +
246
+ `pending=${envelope.pendingMigrations.length} durationMs=${envelope.durationMs}\n`
247
+ );
248
+
228
249
  const transport = new StdioServerTransport();
229
250
  await server.connect(transport);
230
251
 
@@ -275,13 +275,16 @@ function registerRecallTool(api, opts = {}) {
275
275
  if ((ctx?.sessionKey || '').includes('subagent')) return null;
276
276
  return {
277
277
  name: 'session_recall',
278
- description: '搜尋歷史 session 的摘要和對話記錄。可按關鍵字、日期範圍、agent 搜尋。',
278
+ description: '搜尋歷史 session 的摘要和對話記錄。當問題明確提到具體人名、專案、工具、檔名時,傳 entities;只想保留全部命中的 session 用 entity_mode="all",否則 "any" 是 boost。mode 可選 "fts"/"vector"/"hybrid",default hybrid。',
279
279
  parameters: {
280
280
  type: 'object',
281
281
  properties: {
282
- query: { type: 'string', description: '搜尋關鍵字(可空,空時按時間排序)' },
282
+ query: { type: 'string', description: '搜尋關鍵字或自然語言描述(必填非空)', minLength: 1 },
283
283
  date_from: { type: 'string' }, date_to: { type: 'string' },
284
284
  agent_id: { type: 'string' }, source: { type: 'string' },
285
+ entities: { type: 'array', items: { type: 'string' }, description: '具名 entity 清單(人/專案/工具/檔名)' },
286
+ entity_mode: { type: 'string', enum: ['any', 'all'], description: '"any" boost / "all" 硬過濾必含全部 entity' },
287
+ mode: { type: 'string', enum: ['fts', 'hybrid', 'vector'], description: 'recall 模式,default hybrid' },
285
288
  detail: { type: 'string' },
286
289
  limit: { type: 'number' },
287
290
  },
@@ -289,13 +292,21 @@ function registerRecallTool(api, opts = {}) {
289
292
  async execute(_toolCallId, params) {
290
293
  try {
291
294
  const limit = Math.max(1, Math.min(20, parseInt(params?.limit ?? 5, 10) || 5));
292
- const results = await aquifer.recall(String(params?.query || ''), {
295
+ const recallOpts = {
293
296
  agentId: params?.agent_id || ctx?.agentId || undefined,
294
297
  source: params?.source || undefined,
295
298
  dateFrom: params?.date_from || undefined,
296
299
  dateTo: params?.date_to || undefined,
297
300
  limit,
298
- });
301
+ };
302
+ if (Array.isArray(params?.entities) && params.entities.length > 0) {
303
+ recallOpts.entities = params.entities;
304
+ recallOpts.entityMode = params?.entity_mode || 'any';
305
+ }
306
+ if (params?.mode === 'fts' || params?.mode === 'hybrid' || params?.mode === 'vector') {
307
+ recallOpts.mode = params.mode;
308
+ }
309
+ const results = await aquifer.recall(String(params?.query || ''), recallOpts);
299
310
  const text = mirandaRecallFormat.formatRecallResults(results.map(r => ({
300
311
  sessionId: r.sessionId, agentId: r.agentId, source: r.source,
301
312
  startedAt: r.startedAt, summaryText: r.summaryText,
@@ -3,7 +3,7 @@
3
3
  // Miranda zh-TW recall formatter — overrides the shared default renderers
4
4
  // to produce narrative-style output instead of score-flavored markdown.
5
5
 
6
- const { createRecallFormatter, truncate, formatDateIso } = require('../shared/recall-format');
6
+ const { createRecallFormatter, truncate, formatDateIso, formatRelativeZhTw } = require('../shared/recall-format');
7
7
 
8
8
  function formatTopicLines(topics) {
9
9
  if (!Array.isArray(topics) || topics.length === 0) return '- 無';
@@ -32,10 +32,12 @@ function coalesceTitle(structuredSummary, summaryText) {
32
32
  const mirandaRenderers = {
33
33
  empty: () => '找不到符合條件的 session。',
34
34
  header: () => null,
35
- title: (r, i) => {
35
+ title: (r, i, ctx) => {
36
36
  const ss = r.structuredSummary || {};
37
37
  const title = coalesceTitle(ss, r.summaryText);
38
- const date = formatDateIso(r.startedAt);
38
+ const iso = formatDateIso(r.startedAt);
39
+ const rel = formatRelativeZhTw(r.startedAt, ctx?.now);
40
+ const date = rel ? `${rel}(${iso})` : iso;
39
41
  const agent = r.agentId || r.agent_id || 'main';
40
42
  return `### ${i + 1}. ${title}\n**Agent**: ${agent} | **Date**: ${date}`;
41
43
  },
@@ -42,6 +42,12 @@ const DEFAULTS = {
42
42
  timeoutMs: 2000,
43
43
  maxRetries: 1,
44
44
  },
45
+ migrations: {
46
+ mode: 'apply', // 'apply' | 'check' | 'off'
47
+ lockTimeoutMs: 30000,
48
+ startupTimeoutMs: 60000,
49
+ onEvent: null, // (event) => void, optional observability hook
50
+ },
45
51
  };
46
52
 
47
53
  // ---------------------------------------------------------------------------
@@ -77,6 +83,8 @@ const ENV_MAP = [
77
83
  ['AQUIFER_RERANK_TOP_K', 'rerank.topK', Number],
78
84
  ['AQUIFER_RERANK_MAX_CHARS', 'rerank.maxChars', Number],
79
85
  ['AQUIFER_RERANK_TIMEOUT_MS','rerank.timeoutMs', Number],
86
+ ['AQUIFER_MIGRATIONS_MODE', 'migrations.mode'],
87
+ ['AQUIFER_MIGRATION_LOCK_TIMEOUT_MS', 'migrations.lockTimeoutMs', Number],
80
88
  ];
81
89
 
82
90
  // ---------------------------------------------------------------------------
@@ -1,7 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  const { Pool } = require('pg');
4
- const { createAquifer, createEmbedder, createReranker } = require('../../index');
4
+ const { createAquifer, createEmbedder } = require('../../index');
5
5
  const { loadConfig } = require('./config');
6
6
  const { createLlmFn } = require('./llm');
7
7
 
@@ -90,6 +90,7 @@ function createAquiferFromConfig(overrides) {
90
90
  entities: config.entities,
91
91
  rank: config.rank,
92
92
  rerank: rerankOpts,
93
+ migrations: config.migrations,
93
94
  });
94
95
 
95
96
  return aquifer;
@@ -31,7 +31,7 @@ function httpRequest(url, options, body) {
31
31
  }
32
32
  try {
33
33
  finish(resolve, JSON.parse(raw));
34
- } catch (e) {
34
+ } catch {
35
35
  finish(reject, new Error(`Invalid JSON from LLM (${raw.length} bytes)`));
36
36
  }
37
37
  });
@@ -18,6 +18,25 @@ function formatDateIso(value) {
18
18
  return Number.isNaN(d.getTime()) ? 'unknown' : d.toISOString().slice(0, 10);
19
19
  }
20
20
 
21
+ // Humanize a past timestamp into zh-TW relative form (e.g. "3 天前", "昨天").
22
+ // Bucketed on raw ms-diff — good enough for model intuition, not calendar-precise.
23
+ // Returns null for invalid / future timestamps so callers can fall back.
24
+ function formatRelativeZhTw(value, now) {
25
+ if (!value) return null;
26
+ const t = new Date(value).getTime();
27
+ if (Number.isNaN(t)) return null;
28
+ const nowMs = typeof now === 'number' ? now : Date.now();
29
+ const diffMs = nowMs - t;
30
+ if (diffMs < 0) return null;
31
+ const day = 86400000;
32
+ if (diffMs < day) return '今天';
33
+ if (diffMs < 2 * day) return '昨天';
34
+ if (diffMs < 7 * day) return `${Math.floor(diffMs / day)} 天前`;
35
+ if (diffMs < 30 * day) return `${Math.floor(diffMs / (7 * day))} 週前`;
36
+ if (diffMs < 365 * day) return `${Math.floor(diffMs / (30 * day))} 個月前`;
37
+ return `${Math.floor(diffMs / (365 * day))} 年前`;
38
+ }
39
+
21
40
  // Default English renderers --------------------------------------------------
22
41
 
23
42
  const defaultRenderers = {
@@ -63,7 +82,7 @@ function createRecallFormatter(overrides = {}) {
63
82
 
64
83
  return function format(results, opts = {}) {
65
84
  const safeResults = Array.isArray(results) ? results : [];
66
- const ctx = { query: opts.query || null, results: safeResults };
85
+ const ctx = { query: opts.query || null, results: safeResults, now: opts.now };
67
86
 
68
87
  if (safeResults.length === 0) {
69
88
  return r.empty(ctx);
@@ -106,5 +125,6 @@ module.exports = {
106
125
  formatRecallResults,
107
126
  truncate,
108
127
  formatDateIso,
128
+ formatRelativeZhTw,
109
129
  defaultRenderers,
110
130
  };