npm - @shadowforge0/aquifer-memory - Versions diffs - 1.5.8 → 1.5.12 - Mend

@shadowforge0/aquifer-memory 1.5.8 → 1.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +100 -3
package/consumers/cli.js +34 -0
package/consumers/mcp.js +38 -5
package/consumers/miranda/context-inject.js +1 -0
package/consumers/openclaw-plugin.js +43 -3
package/consumers/shared/config.js +20 -0
package/consumers/shared/factory.js +1 -0
package/consumers/shared/recall-format.js +26 -0
package/core/aquifer.js +12 -0
package/core/insights.js +210 -58
package/core/mcp-manifest.js +18 -1
package/core/storage.js +71 -0
package/package.json +10 -2
package/scripts/backfill-canonical-key.js +250 -0
package/scripts/queries.json +0 -45
package/scripts/retro-recall-bench.js +0 -409
package/scripts/sample-bench-queries.sql +0 -75

package/README.md CHANGED Viewed

@@ -132,14 +132,34 @@ Need LLM summarization, the knowledge graph, OpenAI embeddings, or the reranker?
 | `AQUIFER_RERANK_PROVIDER` | No | Reranker provider: `tei`, `jina`, `openrouter` | `tei` |
 | `AQUIFER_RERANK_BASE_URL` | No | Reranker endpoint | `http://localhost:8080` |
 | `AQUIFER_AGENT_ID` | No | Default agent ID | `main` |
+| `AQUIFER_MIGRATIONS_MODE` | No | Startup handshake mode: `apply` (default), `check`, `off` | `apply` |
+| `AQUIFER_MIGRATION_LOCK_TIMEOUT_MS` | No | Advisory-lock wait before `AQ_MIGRATION_LOCK_TIMEOUT` (default 30000) | `30000` |
+| `AQUIFER_INSIGHTS_DEDUP_MODE` | No | Insights semantic dedup mode: `off` (default), `shadow`, `enforce` — env wins over code for this field only, so operators can kill-switch without redeploy | `shadow` |
+| `AQUIFER_INSIGHTS_DEDUP_COSINE` | No | Cosine threshold for semantic merge (default `0.88`; warn outside `[0.75, 0.95]`) | `0.90` |
+| `AQUIFER_INSIGHTS_DEDUP_CLOSE_BAND_FROM` | No | Lower bound for close-band logging (`dedupNear`); must be below threshold (default `0.85`) | `0.82` |
 Full env-to-config mapping is in [consumers/shared/config.js](consumers/shared/config.js).
+### Insights semantic dedup (1.5.10)
+When a cron extractor (`scripts/extract-insights-from-recent-sessions.js`) or any other caller writes insights via `commitInsight`, the canonical-key layer (1.5.3+) dedupes rows whose `canonicalClaim + entities` hash to the same value. But LLMs don't always produce the same `canonicalClaim` across runs, so 1.5.10 adds a second tier: `title + body` are embedded, matched against `(tenant, agent, type)`-scoped active rows, and a top cosine above `AQUIFER_INSIGHTS_DEDUP_COSINE` triggers supersede (enforce) or metadata-only would-merge logging (shadow). Close-band hits (`closeBandFrom ≤ cos < threshold`) write `metadata.dedupNear` without supersede so operators can tune thresholds without committing.
+Recommended rollout: `shadow` for one weekly cycle, inspect `SELECT metadata->>'shadowMatch' FROM insights WHERE metadata ? 'shadowMatch'`, then flip to `enforce`. Kill-switch: `AQUIFER_INSIGHTS_DEDUP_MODE=off` and restart.
+Pre-1.5.3 rows with `canonical_key_v2 IS NULL` are caught by the semantic tier but skip the canonical path; a startup warn points at the one-shot backfill:
+```bash
+DATABASE_URL=... \
+  node scripts/backfill-canonical-key.js --schema <schema> --agent <id>
+```
+The script is idempotent (`WHERE canonical_key_v2 IS NULL` guard) and race-safe with live writers.
 ---
 ## Host Integration
-MCP is the primary integration surface. Agent hosts connect to the Aquifer MCP server, which exposes five tools: `session_recall`, `session_feedback`, `session_bootstrap`, `memory_stats`, `memory_pending`.
+MCP is the primary integration surface. Agent hosts connect to the Aquifer MCP server, which exposes six tools: `session_recall`, `session_feedback`, `feedback_stats`, `session_bootstrap`, `memory_stats`, `memory_pending`.
 | Integration | Route | Status | When to use |
 |-------------|-------|--------|-------------|
@@ -194,7 +214,7 @@ Add to `openclaw.json` under `mcp.servers`:
 }
 ```
-Tools materialize as `aquifer__session_recall`, `aquifer__session_feedback`, `aquifer__session_bootstrap`, `aquifer__memory_stats`, `aquifer__memory_pending` (server name prefix added by the host).
+Tools materialize as `aquifer__session_recall`, `aquifer__session_feedback`, `aquifer__feedback_stats`, `aquifer__session_bootstrap`, `aquifer__memory_stats`, `aquifer__memory_pending` (server name prefix added by the host).
 The OpenClaw plugin (`consumers/openclaw-plugin.js`) is retained for session capture via `before_reset` but is **not** the recommended tool delivery path. Use MCP.
@@ -377,9 +397,36 @@ Returns an Aquifer instance. Config:
 }
 ```
+#### `aquifer.init()`
+Startup handshake — resolves pending migrations and returns a StartupEnvelope. Hosts should `await` this before accepting traffic. In `apply` mode a `ready=false` envelope is the signal to abort startup.
+```javascript
+const envelope = await aquifer.init();
+// {
+//   ready:             true,
+//   memoryMode:        'rw',        // 'rw' | 'ro' | 'off'
+//   migrationMode:     'apply',     // 'apply' | 'check' | 'off'
+//   pendingMigrations: [],          // migration ids still outstanding
+//   appliedMigrations: ['001-base', '003-trust-feedback', '004-completion', '006-insights'],
+//   error:             null,        // { code, message } on failure
+//   durationMs:        1035,
+// }
+```
+The MCP consumer (`consumers/mcp.js`) already wires `aquifer.init()` before `server.connect()` and exits non-zero if `ready=false` under `apply` mode.
+#### `aquifer.listPendingMigrations()` / `aquifer.getMigrationStatus()`
+Returns `{ required, applied, pending, lastRunAt }` via a `pg_tables` signature probe. No DDL runs. Use it from a health check or from a consumer that wants to surface drift before calling `init()`.
 #### `aquifer.migrate()`
-Runs SQL migrations (idempotent). Creates tables, indexes, triggers, and extensions.
+Runs SQL migrations (idempotent). Creates tables, indexes, triggers, and extensions. Uses `pg_try_advisory_lock` with a 250 ms poll and a `lockTimeoutMs` deadline (30 s default); on exhaustion throws with `code: 'AQ_MIGRATION_LOCK_TIMEOUT'`. On success returns `{ ok: true, durationMs, notices, ddlExecuted }`; on failure throws an error whose `err.notices` / `err.failedAt` describe the stage that blew up. Most callers should go through `aquifer.init()` instead.
+#### `aquifer.ensureMigrated()`
+Lazy idempotent wrapper — fires `migrate()` once on first call, no-ops afterwards. Honors `migrations.mode`: `check` only probes, `off` marks the instance migrated without touching the DB.
 #### `aquifer.commit(sessionId, messages, opts)`
@@ -463,6 +510,26 @@ const result = await aquifer.bootstrap({
 Cross-session dedup on open loops and decisions, sentinel filtering (removes 無/none/n/a), and maxChars truncation.
+#### `aquifer.insights.commitInsight(opts)` / `recallInsights(query, opts)` / `markStale(id)` / `supersede(oldId, newId)`
+Higher-order reflections distilled from session windows (preferences, patterns, frustrations, workflows). Split into two identities: a **canonical key** that describes what the insight is *about* (stable across rewordings), and an **idempotency key** that describes which revision of that claim was written.
+```javascript
+await aquifer.insights.commitInsight({
+  agentId:        'main',
+  type:           'preference',
+  canonicalClaim: 'mk prefers checking context before coding',  // required — short declarative claim
+  title:          'Context-first discipline',                    // best-effort display
+  body:           '…',
+  entities:       ['mk', 'claude code'],
+  sourceSessionIds: ['sess-a', 'sess-b'],
+  evidenceWindow:  { from: isoString, to: isoString },
+  importance:     0.9,
+});
+```
+Write rules: **duplicate** (same idempotency key → return existing), **revision** (same canonical key + newer evidence → INSERT + inline supersede of prior active), **back-fill revision** (same canonical key + older evidence → INSERT without supersede), **stale replay** (same canonical + same body → return existing). Old pre-1.5.6 rows are not retrofitted; their `canonical_key_v2` stays `NULL` and they age out naturally.
 #### `aquifer.close()`
 Closes the PostgreSQL connection pool (only if Aquifer created it).
@@ -498,9 +565,19 @@ createAquifer({
     access: 0.10,              // access frequency weight
     entityBoost: 0.18,         // entity match boost
   },
+  migrations: {
+    mode: 'apply',             // 'apply' | 'check' | 'off'
+    lockTimeoutMs: 30000,      // abort init() if advisory lock held this long
+    startupTimeoutMs: 60000,   // overall init() deadline (plan probe + DDL combined)
+    onEvent: null,             // (e) => void — lifecycle hook, see below
+  },
 });
 ```
+### Startup observability
+Set `migrations.onEvent` to observe the lifecycle without parsing logs. Event names: `init_started`, `check_completed`, `apply_started`, `apply_succeeded`, `apply_failed`. Each payload carries `schema`, `mode`, the plan, `ddlExecuted`, `durationMs`, and on failure the `error` / `failedAt` / `notices`. No listener → zero cost.
 ### Entity Scope
 `entities.scope` defines the namespace for entity identity. The unique constraint is `(tenant_id, normalized_name, entity_scope)` — the same entity name in different scopes creates separate entities. This decouples entity identity from `agentId`, allowing multiple agents to share an entity namespace.
@@ -542,6 +619,22 @@ Key indexes: trigram on entity names, GiST on embeddings, unique on `(tenant_id,
 Also adds `trust_score` column to `session_summaries` (default 0.5, range 0–1).
+### 005-entity-state-history.sql *(entities enabled)*
+| Table | Purpose |
+|-------|---------|
+| `entity_state_history` | Temporal state-change log with partial `UNIQUE (tenant, agent, entity, attribute) WHERE valid_to IS NULL` to enforce at-most-one-current. Out-of-order backfill is supported via predecessor/successor overlap checks |
+Opt-in pipeline (`createAquifer({stateChanges: {enabled, whitelist, confidenceThreshold, timeoutMs, ...}})`) extracts temporal state transitions from session text during `enrich()`; off by default to control LLM cost.
+### 006-insights.sql
+| Table | Purpose |
+|-------|---------|
+| `insights` | Higher-order reflections with TSTZRANGE evidence window, importance, GIN on source_session_ids, HNSW on 1024-dim embedding, and a non-unique partial index on `canonical_key_v2` for the canonical/revision dedup contract |
+Key indexes: `idx_insights_canonical_v2_active` (partial on active rows with canonical key set), `idx_insights_idempotency_key` (unique on revision key).
 ---
 ## Troubleshooting
@@ -556,6 +649,10 @@ Also adds `trust_score` column to `session_summaries` (default 0.5, range 0–1)
 **Embedding provider connection refused** — Verify your `AQUIFER_EMBED_BASE_URL` is reachable. For local Ollama, make sure the server is running and the model is pulled (`ollama pull bge-m3`).
+**`AQ_MIGRATION_LOCK_TIMEOUT` on startup** — another process holds the migration advisory lock for `aquifer:<schema>`. Either it is a concurrent `aquifer.init()` racing yours (expected; one will win, the other re-runs and finds `pending=[]`) or a crashed worker left the lock held. Raise `migrations.lockTimeoutMs`, or drop the stale backend via `SELECT pg_terminate_backend(pid) FROM pg_locks WHERE locktype='advisory'` after you have confirmed which pid is dead.
+**MCP process exits non-zero at startup** — expected when `migrations.mode=apply` and `aquifer.init()` returns `ready=false`. Read the `[aquifer-mcp] startup aborted` line on stderr for the `error.code` / `failedAt`. If you need the old lazy-migrate-on-first-tool-call behaviour instead, set `AQUIFER_MIGRATIONS_MODE=check` (and run `migrate()` out of band) or `=off`.
 ---
 ## Dependencies

package/consumers/cli.js CHANGED Viewed

@@ -99,6 +99,7 @@ async function cmdRecall(aquifer, args) {
     return;
   }
+  const showExplain = !!args.flags.explain;
   for (let i = 0; i < results.length; i++) {
     const r = results[i];
     const ss = r.structuredSummary || {};
@@ -107,6 +108,18 @@ async function cmdRecall(aquifer, args) {
     console.log(`${i + 1}. [${r.score?.toFixed(3)}] ${title} (${date}, ${r.agentId})`);
     if (ss.overview) console.log(`   ${ss.overview.slice(0, 200)}`);
     if (r.matchedTurnText) console.log(`   > ${r.matchedTurnText.slice(0, 150)}`);
+    if (showExplain && r._debug) {
+      const d = r._debug;
+      const f = (v) => typeof v === 'number' ? v.toFixed(3) : '?';
+      const parts = [
+        `rrf=${f(d.rrf)}`, `td=${f(d.timeDecay)}`, `access=${f(d.access)}`,
+        `entity=${f(d.entityScore)}`, `trust=${f(d.trustScore)}(\u00d7${f(d.trustMultiplier)})`,
+        `ol=${f(d.openLoopBoost)}`, `\u2192 hybrid=${f(d.hybridScore)}`,
+      ];
+      if (d.rerankApplied) parts.push(`rerank=${f(d.rerankScore)}(${d.rerankReason || '?'})`);
+      else parts.push(`[rerank: off (${d.rerankReason || '?'})]`);
+      console.log(`   ${parts.join(' ')}`);
+    }
     console.log();
   }
 }
@@ -133,6 +146,22 @@ async function cmdFeedback(aquifer, args) {
   }
 }
+async function cmdFeedbackStats(aquifer, args) {
+  const stats = await aquifer.feedbackStats({
+    agentId: args.flags['agent-id'] || undefined,
+    dateFrom: args.flags['date-from'] || undefined,
+    dateTo: args.flags['date-to'] || undefined,
+  });
+  if (args.flags.json) {
+    console.log(JSON.stringify(stats, null, 2));
+  } else {
+    console.log(`Feedback: ${stats.totalFeedback} total (${stats.helpfulCount} helpful, ${stats.unhelpfulCount} unhelpful)`);
+    console.log(`Coverage: ${stats.feedbackSessions}/${stats.totalSessions} sessions rated`);
+    console.log(`Trust score: avg=${stats.trustScoreAvg} min=${stats.trustScoreMin} max=${stats.trustScoreMax}`);
+  }
+}
 async function cmdBackfill(aquifer, args) {
   const limit = parsePositiveInt(args.flags.limit, 100);
   const dryRun = !!args.flags['dry-run'];
@@ -318,6 +347,7 @@ Commands:
   migrate                     Run database migrations
   recall <query>              Search sessions (requires embed config)
   feedback                    Record trust feedback on a session
+  feedback-stats              Show trust feedback statistics and coverage
   backfill                    Enrich pending sessions
   stats                       Show database statistics
   export                      Export sessions as JSONL
@@ -336,6 +366,7 @@ Options:
   --session-id ID             Session ID (feedback)
   --verdict helpful|unhelpful Feedback verdict (feedback)
   --note TEXT                 Feedback note (feedback)
+  --explain                    Show score breakdown per result (recall)
   --json                      JSON output
   --dry-run                   Preview only (backfill)
   --output PATH               Output file (export)
@@ -410,6 +441,9 @@ Options:
       case 'feedback':
         await cmdFeedback(aquifer, args);
         break;
+      case 'feedback-stats':
+        await cmdFeedbackStats(aquifer, args);
+        break;
       case 'backfill':
         await cmdBackfill(aquifer, args);
         break;

package/consumers/mcp.js CHANGED Viewed

@@ -7,7 +7,8 @@
  * This is the primary integration surface for Aquifer. Agent hosts (Claude Code,
  * Codex, OpenCode, etc.) should integrate through this MCP server.
  *
- * Tools: session_recall, session_feedback, memory_stats, memory_pending
+ * Tools: session_recall, session_feedback, feedback_stats,
+ * session_bootstrap, memory_stats, memory_pending
  *
  * Usage:
  *   npx aquifer mcp
@@ -32,8 +33,8 @@ function getAquifer() {
 const { formatRecallResults } = require('./shared/recall-format');
-function formatResults(results, query) {
-  return formatRecallResults(results, { query, showScore: true });
+function formatResults(results, query, explain) {
+  return formatRecallResults(results, { query, showScore: true, showExplain: !!explain });
 }
 // ---------------------------------------------------------------------------
@@ -74,6 +75,7 @@ async function main() {
       entities: z.array(z.string()).optional().describe('Entity names to match'),
       entityMode: z.enum(['any', 'all']).optional().describe('"any" (default, boost) or "all" (only sessions with every entity)'),
       mode: z.enum(['fts', 'hybrid', 'vector']).optional().describe('Recall mode: "fts" (keyword only, no embed needed), "hybrid" (default, FTS + vector), "vector" (vector only)'),
+      explain: z.boolean().optional().describe('Include per-result score breakdown (diagnostic)'),
     },
     async (params) => {
       try {
@@ -93,7 +95,7 @@ async function main() {
         if (params.mode) recallOpts.mode = params.mode;
         const results = await aquifer.recall(params.query, recallOpts);
-        const text = formatResults(results, params.query);
+        const text = formatResults(results, params.query, params.explain);
         return { content: [{ type: 'text', text }] };
       } catch (err) {
         return {
@@ -106,7 +108,7 @@ async function main() {
   server.tool(
     'session_feedback',
-    'Record trust feedback on a recalled session. Helpful sessions rank higher in future recalls.',
+    'After using session_recall, mark the result helpful if it directly informed your answer, or unhelpful if it was irrelevant/outdated. Include a short note. Sessions with more helpful feedback rank higher in future recalls.',
     {
       sessionId: z.string().min(1).describe('Session ID to give feedback on'),
       verdict: z.enum(['helpful', 'unhelpful']).describe('Was the recalled session useful?'),
@@ -133,6 +135,37 @@ async function main() {
     }
   );
+  server.tool(
+    'feedback_stats',
+    'Return trust feedback statistics: total feedback count, helpful/unhelpful breakdown, trust score distribution, and coverage.',
+    {
+      agentId: z.string().optional().describe('Filter by agent ID'),
+      dateFrom: z.string().optional().describe('Start date YYYY-MM-DD'),
+      dateTo: z.string().optional().describe('End date YYYY-MM-DD'),
+    },
+    async (params) => {
+      try {
+        const aquifer = getAquifer();
+        const stats = await aquifer.feedbackStats({
+          agentId: params.agentId || undefined,
+          dateFrom: params.dateFrom || undefined,
+          dateTo: params.dateTo || undefined,
+        });
+        const lines = [
+          `Feedback: ${stats.totalFeedback} total (${stats.helpfulCount} helpful, ${stats.unhelpfulCount} unhelpful)`,
+          `Coverage: ${stats.feedbackSessions}/${stats.totalSessions} sessions rated`,
+          `Trust score: avg=${stats.trustScoreAvg} min=${stats.trustScoreMin} max=${stats.trustScoreMax}`,
+        ];
+        return { content: [{ type: 'text', text: lines.join('\n') }] };
+      } catch (err) {
+        return {
+          content: [{ type: 'text', text: `feedback_stats error: ${err.message}` }],
+          isError: true,
+        };
+      }
+    }
+  );
   server.tool(
     'memory_stats',
     'Return storage statistics for the Aquifer memory store (session counts by status, summaries, turn embeddings, entities, date range).',

package/consumers/miranda/context-inject.js CHANGED Viewed

@@ -14,6 +14,7 @@ function buildSessionContext({ today, agentId, focusText, todoItems, moodLine, h
     const parts = [];
     parts.push('你是 Miranda。以下是你已經知道的現況，直接用來回應，不需要讀檔或搜尋。像做 briefing——帶現況也帶判斷和建議。用散文段落，最後一句必須是結論或建議，不能是問句。若草稿有 bullet、標題、表格或問句收尾，改寫再送出。');
     parts.push('回答任何關於過去做過什麼、討論過什麼、決策過什麼的問題時，第一步用 session_recall MCP tool 查，不要用 grep、讀 log、翻檔案。工具在手上就用。');
+    parts.push('用完 session_recall 後，如果某筆結果實際幫助了你的回答，呼叫 session_feedback(sessionId, verdict="helpful")；如果結果明顯過時或錯誤，呼叫 session_feedback(sessionId, verdict="unhelpful")，帶簡短 note 說明原因。只對實際影響回答的結果回饋，不要每次 recall 都打分。');
     if (focusText) parts.push(`現在的焦點是${focusText}。`);
     if (handoffText) parts.push(`上一段的交接：${handoffText}`);

package/consumers/openclaw-plugin.js CHANGED Viewed

@@ -214,6 +214,7 @@ function register(api) {
             entities: { type: 'array', items: { type: 'string' }, description: 'Entity names to match' },
             entityMode: { type: 'string', enum: ['any', 'all'], description: '"any" (default, boost) or "all" (only sessions with every entity)' },
             mode: { type: 'string', enum: ['fts', 'hybrid', 'vector'], description: 'Recall mode: "fts" (keyword only), "hybrid" (default), "vector" (vector only)' },
+            explain: { type: 'boolean', description: 'Include per-result score breakdown (diagnostic)' },
           },
           required: ['query'],
         },
@@ -234,7 +235,7 @@ function register(api) {
             if (params.mode) recallOpts.mode = params.mode;
             const results = await aquifer.recall(params.query, recallOpts);
-            const text = formatRecallResults(results);
+            const text = formatRecallResults(results, { showScore: true, showExplain: !!params.explain });
             return { content: [{ type: 'text', text }] };
           } catch (err) {
             return {
@@ -253,7 +254,7 @@ function register(api) {
       return {
         name: 'session_feedback',
-        description: 'Record trust feedback on a recalled session. Helpful sessions rank higher in future recalls.',
+        description: 'After using session_recall, mark the result helpful if it directly informed your answer, or unhelpful if it was irrelevant/outdated. Include a short note. Sessions with more helpful feedback rank higher in future recalls.',
         parameters: {
           type: 'object',
           properties: {
@@ -285,5 +286,44 @@ function register(api) {
       };
     }, { name: 'session_feedback' });
-  api.logger.info('[aquifer-memory] registered (before_reset + session_recall + session_feedback)');
+    // --- feedback_stats tool ---
+    api.registerTool((ctx) => {
+      if ((ctx?.sessionKey || '').includes('subagent')) return null;
+      return {
+        name: 'feedback_stats',
+        description: 'Return trust feedback statistics: total feedback count, helpful/unhelpful breakdown, trust score distribution, and coverage.',
+        parameters: {
+          type: 'object',
+          properties: {
+            agentId: { type: 'string', description: 'Filter by agent ID' },
+            dateFrom: { type: 'string', description: 'Start date YYYY-MM-DD' },
+            dateTo: { type: 'string', description: 'End date YYYY-MM-DD' },
+          },
+        },
+        async execute(_toolCallId, params) {
+          try {
+            const stats = await aquifer.feedbackStats({
+              agentId: params.agentId || undefined,
+              dateFrom: params.dateFrom || undefined,
+              dateTo: params.dateTo || undefined,
+            });
+            const lines = [
+              `Feedback: ${stats.totalFeedback} total (${stats.helpfulCount} helpful, ${stats.unhelpfulCount} unhelpful)`,
+              `Coverage: ${stats.feedbackSessions}/${stats.totalSessions} sessions rated`,
+              `Trust score: avg=${stats.trustScoreAvg} min=${stats.trustScoreMin} max=${stats.trustScoreMax}`,
+            ];
+            return { content: [{ type: 'text', text: lines.join('\n') }] };
+          } catch (err) {
+            return {
+              content: [{ type: 'text', text: `feedback_stats error: ${err.message}` }],
+              isError: true,
+            };
+          }
+        },
+      };
+    }, { name: 'feedback_stats' });
+  api.logger.info('[aquifer-memory] registered (before_reset + session_recall + session_feedback + feedback_stats)');
 }

package/consumers/shared/config.js CHANGED Viewed

@@ -30,6 +30,15 @@ const DEFAULTS = {
     temperature: 0,
   },
   entities: { enabled: false, mergeCall: true, scope: 'default' },
+  insights: {
+    recallWeights: null,
+    recencyWindowDays: null,
+    dedup: {
+      mode: 'off',
+      cosineThreshold: 0.88,
+      closeBandFrom: 0.85,
+    },
+  },
   rank: { rrf: 0.65, timeDecay: 0.25, access: 0.10, entityBoost: 0.18 },
   rerank: {
     enabled: false,
@@ -75,6 +84,9 @@ const ENV_MAP = [
   ['AQUIFER_LLM_TEMPERATURE',   'llm.temperature',   Number],
   ['AQUIFER_ENTITIES_ENABLED',  'entities.enabled',  Boolean],
   ['AQUIFER_ENTITY_SCOPE',     'entities.scope'],
+  ['AQUIFER_INSIGHTS_DEDUP_MODE',             'insights.dedup.mode'],
+  ['AQUIFER_INSIGHTS_DEDUP_COSINE',           'insights.dedup.cosineThreshold', Number],
+  ['AQUIFER_INSIGHTS_DEDUP_CLOSE_BAND_FROM',  'insights.dedup.closeBandFrom',   Number],
   ['AQUIFER_RERANK_ENABLED',   'rerank.enabled',    Boolean],
   ['AQUIFER_RERANK_PROVIDER',  'rerank.provider'],
   ['AQUIFER_RERANK_BASE_URL',  'rerank.baseUrl'],
@@ -165,6 +177,14 @@ function loadConfig(opts = {}) {
     config = deepMerge(config, opts.overrides);
   }
+  // insights.dedup shorthand: true → enforce, false → off
+  if (config.insights && typeof config.insights.dedup === 'boolean') {
+    config.insights.dedup = {
+      ...DEFAULTS.insights.dedup,
+      mode: config.insights.dedup ? 'enforce' : 'off',
+    };
+  }
   return config;
 }

package/consumers/shared/factory.js CHANGED Viewed

@@ -91,6 +91,7 @@ function createAquiferFromConfig(overrides) {
     rank: config.rank,
     rerank: rerankOpts,
     migrations: config.migrations,
+    insights: config.insights,
   });
   return aquifer;

package/consumers/shared/recall-format.js CHANGED Viewed

@@ -66,6 +66,30 @@ const defaultRenderers = {
         if (!showScore) return null;
         return `Score: ${typeof result.score === 'number' ? result.score.toFixed(3) : '?'}`;
     },
+    explain(result, { showExplain }) {
+        if (!showExplain) return null;
+        const d = result._debug;
+        if (!d) return null;
+        const f = (v) => typeof v === 'number' ? v.toFixed(3) : '?';
+        const parts = [
+            `rrf=${f(d.rrf)}`,
+            `td=${f(d.timeDecay)}`,
+            `access=${f(d.access)}`,
+            `entity=${f(d.entityScore)}`,
+            `trust=${f(d.trustScore)}(\u00d7${f(d.trustMultiplier)})`,
+            `ol=${f(d.openLoopBoost)}`,
+            `\u2192 hybrid=${f(d.hybridScore)}`,
+        ];
+        if (d.rerankApplied) {
+            parts.push(`rerank=${f(d.rerankScore)}(${d.rerankReason || '?'})`);
+        } else {
+            parts.push(`[rerank: off (${d.rerankReason || '?'})]`);
+        }
+        if (Array.isArray(d.searchErrors) && d.searchErrors.length > 0) {
+            parts.push(`errors: ${d.searchErrors.map(e => (e && e.path) || '?').join(',')}`);
+        }
+        return `  ${parts.join(' ')}`;
+    },
     separator() {
         return '';
     },
@@ -102,6 +126,8 @@ function createRecallFormatter(overrides = {}) {
             if (matched) lines.push(matched);
             const score = r.score(res, { showScore: !!opts.showScore, ...ctx });
             if (score) lines.push(score);
+            const explain = r.explain(res, { showExplain: !!opts.showExplain, ...ctx });
+            if (explain) lines.push(explain);
             const sep = r.separator(i, ctx);
             if (sep !== null && sep !== undefined) lines.push(sep);
         }

package/core/aquifer.js CHANGED Viewed

@@ -1558,6 +1558,17 @@ function createAquifer(config = {}) {
       });
     },
+    async feedbackStats(opts = {}) {
+      await ensureMigrated();
+      return storage.getFeedbackStats(pool, {
+        schema,
+        tenantId,
+        agentId: opts.agentId || undefined,
+        dateFrom: opts.dateFrom || undefined,
+        dateTo: opts.dateTo || undefined,
+      });
+    },
     // --- admin ---
     async getSession(sessionId, opts = {}) {
@@ -1837,6 +1848,7 @@ function createAquifer(config = {}) {
     recallWeights: (config.insights && config.insights.recallWeights) || null,
     recencyWindowDays: config.insights && Number.isFinite(config.insights.recencyWindowDays)
       ? config.insights.recencyWindowDays : undefined,
+    dedup: config.insights && config.insights.dedup ? config.insights.dedup : undefined,
   });
   return aquifer;