npm - @shadowforge0/aquifer-memory - Versions diffs - 0.7.0 → 0.8.0 - Mend

@shadowforge0/aquifer-memory 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +62 -9
package/consumers/cli.js +11 -78
package/consumers/mcp.js +68 -4
package/consumers/openclaw-plugin.js +18 -5
package/consumers/shared/config.js +3 -3
package/consumers/shared/factory.js +6 -4
package/core/aquifer.js +157 -17
package/core/storage.js +12 -3
package/index.js +2 -1
package/package.json +3 -3
package/pipeline/normalize/adapters/claude-code.js +90 -0
package/pipeline/normalize/adapters/gateway.js +67 -0
package/pipeline/normalize/constants.js +12 -0
package/pipeline/normalize/detect.js +52 -0
package/pipeline/normalize/extract.js +49 -0
package/pipeline/normalize/index.js +129 -0
package/pipeline/normalize/timestamp.js +33 -0

package/README.md CHANGED Viewed

@@ -127,18 +127,28 @@ const results = await aquifer.recall('auth middleware decision', {
 ## Architecture
 ```
-┌─────────────────────────────────────────────────────────────┐
-│                    createAquifer (entry)                     │
-│         Config · Migration · Ingest · Recall · Enrich       │
-└────────┬──────────┬──────────┬──────────┬───────────────────┘
+┌──────────────────────────────────────────────────────────────┐
+│                     Agent Hosts                              │
+│   Claude Code · OpenClaw · Codex · OpenCode · ...            │
+└──────────────────────┬───────────────────────────────────────┘
+                       │ MCP (stdio or HTTP)
+┌──────────────────────▼───────────────────────────────────────┐
+│              Aquifer MCP Server (canonical API)               │
+│   session_recall · session_feedback · memory_stats · ...     │
+└──────────────────────┬───────────────────────────────────────┘
+                       │
+┌──────────────────────▼───────────────────────────────────────┐
+│                    createAquifer (engine)                     │
+│         Config · Migration · Ingest · Recall · Enrich        │
+└────────┬──────────┬──────────┬──────────┬────────────────────┘
          │          │          │          │
     ┌────▼───┐ ┌────▼────┐ ┌──▼───┐ ┌───▼──────────┐
     │storage │ │hybrid-  │ │entity│ │   pipeline/   │
     │  .js   │ │rank.js  │ │ .js  │ │summarize.js   │
     └────────┘ └─────────┘ └──────┘ │embed.js       │
          │                     │    │extract-ent.js │
-    ┌────▼───────────┐    ┌───▼──┐  └───────────────┘
-    │  PostgreSQL     │    │ LLM  │
+    ┌────▼───────────┐    ┌───▼──┐  │rerank.js      │
+    │  PostgreSQL     │    │ LLM  │  └───────────────┘
     │  + pgvector     │    │ API  │
     └────────────────┘    └──────┘
@@ -151,11 +161,13 @@ const results = await aquifer.recall('auth middleware decision', {
     └──────────────────────────────────┘
 ```
+**Integration model:** MCP is the primary integration surface. Agent hosts connect to Aquifer through the MCP server (`consumers/mcp.js`), which exposes `session_recall`, `session_feedback`, `memory_stats`, and `memory_pending`. The CLI wraps the same engine for command-line use. The OpenClaw plugin (`consumers/openclaw-plugin.js`) is retained as a compatibility adapter for session capture but is not the primary tool delivery path.
 ### File Reference
 | File | Purpose |
 |------|---------|
-| `index.js` | Entry point — exports `createAquifer`, `createEmbedder` |
+| `index.js` | Entry point — exports `createAquifer`, `createEmbedder`, `createReranker`, `normalizeSession` |
 | `core/aquifer.js` | Main facade: `migrate()`, `ingest()`, `recall()`, `enrich()` |
 | `core/storage.js` | Session/summary/turn CRUD, FTS search, embedding search |
 | `core/entity.js` | Entity upsert, mention tracking, relation graph, normalization |
@@ -163,6 +175,8 @@ const results = await aquifer.recall('auth middleware decision', {
 | `pipeline/summarize.js` | LLM-powered session summarization with structured output |
 | `pipeline/embed.js` | Embedding client (any OpenAI-compatible API) |
 | `pipeline/extract-entities.js` | LLM-powered entity extraction (12 types) |
+| `pipeline/rerank.js` | Cross-encoder reranking (TEI, Jina, OpenRouter) |
+| `pipeline/normalize/` | Session normalization for Claude Code / gateway noise |
 | `schema/001-base.sql` | DDL: sessions, summaries, turn_embeddings, FTS indexes |
 | `schema/002-entities.sql` | DDL: entities, mentions, relations, entity_sessions |
 | `schema/003-trust-feedback.sql` | DDL: trust_score column, session_feedback audit trail |
@@ -395,9 +409,48 @@ createAquifer({
 Fallback chain: `config.entities.scope` → `'default'`.
-### Consumers (CLI, MCP, OpenClaw plugin)
+### MCP Server (primary integration)
+Agent hosts should connect through the Aquifer MCP server. For OpenClaw, add to `openclaw.json`:
+```json
+{
+  "mcp": {
+    "servers": {
+      "aquifer": {
+        "command": "node",
+        "args": ["/path/to/aquifer/consumers/mcp.js"],
+        "env": {
+          "DATABASE_URL": "postgresql://...",
+          "AQUIFER_SCHEMA": "aquifer",
+          "AQUIFER_EMBED_BASE_URL": "http://localhost:11434/v1",
+          "AQUIFER_EMBED_MODEL": "bge-m3"
+        }
+      }
+    }
+  }
+}
+```
+Tools are exposed as `aquifer__session_recall`, `aquifer__session_feedback`, `aquifer__memory_stats`, `aquifer__memory_pending` (server name prefix is added by the host).
+For Claude Code, add to `.claude.json`:
+```json
+{
+  "mcpServers": {
+    "aquifer": {
+      "type": "stdio",
+      "command": "node",
+      "args": ["/path/to/aquifer/consumers/mcp.js"]
+    }
+  }
+}
+```
+### CLI (secondary)
-For consumer-based setup using environment variables instead of code:
+For command-line use with environment variables:
 ```bash
 export DATABASE_URL="postgresql://..."

package/consumers/cli.js CHANGED Viewed

@@ -14,7 +14,6 @@
  */
 const { createAquiferFromConfig } = require('./shared/factory');
-const { loadConfig } = require('./shared/config');
 // ---------------------------------------------------------------------------
 // Argument parser (minimal, no deps)
@@ -120,30 +119,12 @@ async function cmdBackfill(aquifer, args) {
   const skipTurnEmbed = !!args.flags['skip-turn-embed'];
   const skipEntities = !!args.flags['skip-entities'];
-  const config = aquifer._config || {};
-  const schema = config.schema || 'aquifer';
-  const tenantId = config.tenantId || 'default';
-  const pool = aquifer._pool;
+  const pending = await aquifer.getPendingSessions({ limit });
-  if (!pool) {
-    console.error('Backfill requires direct pool access.');
-    process.exit(1);
-  }
-  const qi = (id) => `"${id}"`;
-  const { rows } = await pool.query(`
-    SELECT session_id, agent_id, processing_status
-    FROM ${qi(schema)}.sessions
-    WHERE tenant_id = $1
-      AND processing_status IN ('pending', 'failed')
-    ORDER BY started_at DESC
-    LIMIT $2
-  `, [tenantId, limit]);
-  console.log(`Found ${rows.length} sessions to backfill${dryRun ? ' (dry-run)' : ''}`);
+  console.log(`Found ${pending.length} sessions to backfill${dryRun ? ' (dry-run)' : ''}`);
   let enriched = 0, failed = 0;
-  for (const row of rows) {
+  for (const row of pending) {
     if (dryRun) {
       console.log(`  [dry-run] ${row.session_id} (${row.agent_id}) status=${row.processing_status}`);
       continue;
@@ -164,40 +145,12 @@ async function cmdBackfill(aquifer, args) {
     }
   }
-  console.log(`\nDone. enriched=${enriched} failed=${failed} total=${rows.length}`);
+  console.log(`\nDone. enriched=${enriched} failed=${failed} total=${pending.length}`);
   if (failed > 0) process.exitCode = 2;
 }
 async function cmdStats(aquifer, args) {
-  const config = aquifer._config || {};
-  const schema = config.schema || 'aquifer';
-  const tenantId = config.tenantId || 'default';
-  const pool = aquifer._pool;
-  if (!pool) {
-    console.error('Stats requires direct pool access.');
-    process.exit(1);
-  }
-  const qi = (id) => `"${id}"`;
-  const [sessions, summaries, turns, entities] = await Promise.all([
-    pool.query(`SELECT processing_status, COUNT(*)::int as count FROM ${qi(schema)}.sessions WHERE tenant_id = $1 GROUP BY processing_status`, [tenantId]),
-    pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.session_summaries WHERE tenant_id = $1`, [tenantId]),
-    pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.turn_embeddings WHERE tenant_id = $1`, [tenantId]),
-    pool.query(`SELECT COUNT(*)::int as count FROM ${qi(schema)}.entities WHERE tenant_id = $1`, [tenantId]).catch(() => ({ rows: [{ count: 0 }] })),
-  ]);
-  const timeRange = await pool.query(`SELECT MIN(started_at) as earliest, MAX(started_at) as latest FROM ${qi(schema)}.sessions WHERE tenant_id = $1`, [tenantId]);
-  const stats = {
-    sessions: Object.fromEntries(sessions.rows.map(r => [r.processing_status, r.count])),
-    sessionTotal: sessions.rows.reduce((s, r) => s + r.count, 0),
-    summaries: summaries.rows[0]?.count || 0,
-    turnEmbeddings: turns.rows[0]?.count || 0,
-    entities: entities.rows[0]?.count || 0,
-    earliest: timeRange.rows[0]?.earliest || null,
-    latest: timeRange.rows[0]?.latest || null,
-  };
+  const stats = await aquifer.getStats();
   if (args.flags.json) {
     console.log(JSON.stringify(stats, null, 2));
@@ -211,34 +164,14 @@ async function cmdStats(aquifer, args) {
 }
 async function cmdExport(aquifer, args) {
-  const config = aquifer._config || {};
-  const schema = config.schema || 'aquifer';
-  const tenantId = config.tenantId || 'default';
-  const pool = aquifer._pool;
   const output = args.flags.output || null;
   const limit = parseInt(args.flags.limit || '1000', 10);
-  if (!pool) {
-    console.error('Export requires direct pool access.');
-    process.exit(1);
-  }
-  const qi = (id) => `"${id}"`;
-  const where = [`s.tenant_id = $1`];
-  const params = [tenantId];
-  if (args.flags['agent-id']) { params.push(args.flags['agent-id']); where.push(`s.agent_id = $${params.length}`); }
-  if (args.flags.source) { params.push(args.flags.source); where.push(`s.source = $${params.length}`); }
-  params.push(limit);
-  const { rows } = await pool.query(`
-    SELECT s.*, ss.summary_text, ss.structured_summary
-    FROM ${qi(schema)}.sessions s
-    LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
-    WHERE ${where.join(' AND ')}
-    ORDER BY s.started_at DESC
-    LIMIT $${params.length}
-  `, params);
+  const rows = await aquifer.exportSessions({
+    agentId: args.flags['agent-id'],
+    source: args.flags.source,
+    limit,
+  });
   const stream = output ? require('fs').createWriteStream(output) : process.stdout;
   for (const row of rows) {
@@ -340,7 +273,7 @@ Options:
         process.exit(1);
     }
   } finally {
-    if (aquifer._pool) await aquifer._pool.end();
+    await aquifer.close();
   }
 }

package/consumers/mcp.js CHANGED Viewed

@@ -2,7 +2,12 @@
 'use strict';
 /**
- * Aquifer MCP Server — session_recall tool via Model Context Protocol.
+ * Aquifer MCP Server — canonical external contract for agent host integration.
+ *
+ * This is the primary integration surface for Aquifer. Agent hosts (Claude Code,
+ * Codex, OpenCode, etc.) should integrate through this MCP server.
+ *
+ * Tools: session_recall, session_feedback, memory_stats, memory_pending
  *
  * Usage:
  *   npx aquifer mcp
@@ -69,7 +74,7 @@ async function main() {
   const server = new McpServer({
     name: 'aquifer-memory',
-    version: '0.6.0',
+    version: '0.8.0',
   });
   server.tool(
@@ -84,6 +89,7 @@ async function main() {
       dateTo: z.string().optional().describe('End date YYYY-MM-DD'),
       entities: z.array(z.string()).optional().describe('Entity names to match'),
       entityMode: z.enum(['any', 'all']).optional().describe('"any" (default, boost) or "all" (only sessions with every entity)'),
+      mode: z.enum(['fts', 'hybrid', 'vector']).optional().describe('Recall mode: "fts" (keyword only, no embed needed), "hybrid" (default, FTS + vector), "vector" (vector only)'),
     },
     async (params) => {
       try {
@@ -100,6 +106,7 @@ async function main() {
           recallOpts.entities = params.entities;
           recallOpts.entityMode = params.entityMode || 'any';
         }
+        if (params.mode) recallOpts.mode = params.mode;
         const results = await aquifer.recall(params.query, recallOpts);
         const text = formatResults(results, params.query);
@@ -120,6 +127,7 @@ async function main() {
       sessionId: z.string().min(1).describe('Session ID to give feedback on'),
       verdict: z.enum(['helpful', 'unhelpful']).describe('Was the recalled session useful?'),
       note: z.string().optional().describe('Optional reason'),
+      agentId: z.string().optional().describe('Agent ID the session was stored under (e.g. "main"). Defaults to "agent" if omitted.'),
     },
     async (params) => {
       try {
@@ -127,6 +135,7 @@ async function main() {
         const result = await aquifer.feedback(params.sessionId, {
           verdict: params.verdict,
           note: params.note || undefined,
+          agentId: params.agentId || undefined,
         });
         return {
           content: [{ type: 'text', text: `Feedback: ${result.verdict} (trust ${result.trustBefore.toFixed(2)} → ${result.trustAfter.toFixed(2)})` }],
@@ -140,9 +149,64 @@ async function main() {
     }
   );
+  server.tool(
+    'memory_stats',
+    'Return storage statistics for the Aquifer memory store (session counts by status, summaries, turn embeddings, entities, date range).',
+    {},
+    async () => {
+      try {
+        const aquifer = getAquifer();
+        const stats = await aquifer.getStats();
+        const lines = [
+          `Sessions: ${stats.sessionTotal} total`,
+        ];
+        for (const [status, count] of Object.entries(stats.sessions)) {
+          lines.push(`  ${status}: ${count}`);
+        }
+        lines.push(`Summaries: ${stats.summaries}`);
+        lines.push(`Turn embeddings: ${stats.turnEmbeddings}`);
+        lines.push(`Entities: ${stats.entities}`);
+        if (stats.earliest) lines.push(`Date range: ${new Date(stats.earliest).toISOString().slice(0, 10)} → ${new Date(stats.latest).toISOString().slice(0, 10)}`);
+        return { content: [{ type: 'text', text: lines.join('\n') }] };
+      } catch (err) {
+        return {
+          content: [{ type: 'text', text: `memory_stats error: ${err.message}` }],
+          isError: true,
+        };
+      }
+    }
+  );
+  server.tool(
+    'memory_pending',
+    'List sessions with pending or failed processing status.',
+    {
+      limit: z.number().int().min(1).max(200).optional().describe('Max results (default 20)'),
+    },
+    async (params) => {
+      try {
+        const aquifer = getAquifer();
+        const rows = await aquifer.getPendingSessions({ limit: params.limit ?? 20 });
+        if (rows.length === 0) {
+          return { content: [{ type: 'text', text: 'No pending or failed sessions.' }] };
+        }
+        const lines = [`${rows.length} pending/failed session(s):\n`];
+        for (const row of rows) {
+          lines.push(`${row.session_id}  [${row.processing_status}]  agent=${row.agent_id}`);
+        }
+        return { content: [{ type: 'text', text: lines.join('\n') }] };
+      } catch (err) {
+        return {
+          content: [{ type: 'text', text: `memory_pending error: ${err.message}` }],
+          isError: true,
+        };
+      }
+    }
+  );
   // Graceful shutdown
   const cleanup = async () => {
-    if (_aquifer?._pool) await _aquifer._pool.end().catch(() => {});
+    if (_aquifer) await _aquifer.close().catch(() => {});
     process.exit(0);
   };
   process.on('SIGINT', cleanup);
@@ -153,7 +217,7 @@ async function main() {
   // Clean up pool when transport closes (stdin EOF)
   transport.onclose = async () => {
-    if (_aquifer?._pool) await _aquifer._pool.end().catch(() => {});
+    if (_aquifer) await _aquifer.close().catch(() => {});
   };
 }

package/consumers/openclaw-plugin.js CHANGED Viewed

@@ -1,11 +1,17 @@
 'use strict';
 /**
- * Aquifer Memory — OpenClaw Plugin
+ * Aquifer Memory — OpenClaw Host Adapter
  *
- * Auto-captures sessions on before_reset and provides session_recall tool.
- * Install: add to openclaw.json plugins or extensions directory.
+ * Ingest adapter: auto-captures sessions on before_reset.
+ * Tool adapter: exposes session_recall/session_feedback via OpenClaw registerTool().
+ *
+ * Status: COMPATIBILITY ONLY. The official tool delivery path is mcp.servers.aquifer
+ * (see consumers/mcp.js). registerTool() exposure has OpenClaw upstream limitations
+ * that prevent reliable tool visibility. This plugin is retained for before_reset
+ * session capture; tool registration code is kept for future upstream fixes.
  *
+ * Install: add to openclaw.json plugins or extensions directory.
  * Config via plugin config, environment variables, or aquifer.config.json.
  */
@@ -169,6 +175,10 @@ module.exports = {
             } catch (enrichErr) {
               api.logger.warn(`[aquifer-memory] enrich failed for ${sessionId}: ${enrichErr.message}`);
             }
+          } else {
+            try {
+              await aquifer.skip(sessionId, { agentId, reason: `user_count=${norm.userCount} < min=${minUserMessages}` });
+            } catch (e) { api.logger.warn(`[aquifer-memory] skip failed for ${sessionId}: ${e.message}`); }
           }
           recentlyProcessed.set(dedupKey, Date.now());
@@ -189,8 +199,6 @@ module.exports = {
     // --- session_recall tool ---
-    // --- session_recall tool ---
     api.registerTool((ctx) => {
       if ((ctx?.sessionKey || '').includes('subagent')) return null;
@@ -208,6 +216,7 @@ module.exports = {
             dateTo: { type: 'string', description: 'End date YYYY-MM-DD' },
             entities: { type: 'array', items: { type: 'string' }, description: 'Entity names to match' },
             entityMode: { type: 'string', enum: ['any', 'all'], description: '"any" (default, boost) or "all" (only sessions with every entity)' },
+            mode: { type: 'string', enum: ['fts', 'hybrid', 'vector'], description: 'Recall mode: "fts" (keyword only), "hybrid" (default), "vector" (vector only)' },
           },
           required: ['query'],
         },
@@ -225,6 +234,7 @@ module.exports = {
               recallOpts.entities = params.entities;
               recallOpts.entityMode = params.entityMode || 'any';
             }
+            if (params.mode) recallOpts.mode = params.mode;
             const results = await aquifer.recall(params.query, recallOpts);
             const text = formatRecallResults(results);
@@ -253,14 +263,17 @@ module.exports = {
             sessionId: { type: 'string', description: 'Session ID to give feedback on' },
             verdict: { type: 'string', enum: ['helpful', 'unhelpful'], description: 'Was the recalled session useful?' },
             note: { type: 'string', description: 'Optional reason' },
+            agentId: { type: 'string', description: 'Agent ID the session was stored under (e.g. "main"). Defaults to context agent or "agent" if omitted.' },
           },
           required: ['sessionId', 'verdict'],
         },
         async execute(_toolCallId, params) {
           try {
+            const resolvedAgentId = params.agentId || ctx?.agentId || undefined;
             const result = await aquifer.feedback(params.sessionId, {
               verdict: params.verdict,
               note: params.note || undefined,
+              agentId: resolvedAgentId,
             });
             return {
               content: [{ type: 'text', text: `Feedback: ${result.verdict} (trust ${result.trustBefore.toFixed(2)} → ${result.trustAfter.toFixed(2)})` }],

package/consumers/shared/config.js CHANGED Viewed

@@ -33,10 +33,10 @@ const DEFAULTS = {
   rank: { rrf: 0.65, timeDecay: 0.25, access: 0.10, entityBoost: 0.18 },
   rerank: {
     enabled: false,
-    provider: null,    // 'tei' | 'jina' | 'custom'
+    provider: null,    // 'tei' | 'jina' | 'openrouter' | 'custom'
     baseUrl: null,     // TEI base URL
-    apiKey: null,      // Jina API key
-    model: null,       // Jina model override
+    apiKey: null,      // Jina / OpenRouter API key
+    model: null,       // model override (Jina / OpenRouter)
     topK: 20,
     maxChars: 1600,
     timeoutMs: 2000,

package/consumers/shared/factory.js CHANGED Viewed

@@ -71,12 +71,18 @@ function createAquiferFromConfig(overrides) {
       if (rc.model) rerankConfig.jinaModel = rc.model;
       rerankConfig.timeout = rc.timeoutMs || 2000;
       rerankConfig.maxRetries = rc.maxRetries ?? 1;
+    } else if (rc.provider === 'openrouter') {
+      rerankConfig.openrouterApiKey = rc.apiKey;
+      if (rc.model) rerankConfig.model = rc.model;
+      rerankConfig.timeout = rc.timeoutMs || 5000;
+      rerankConfig.maxRetries = rc.maxRetries ?? 1;
     }
     rerankOpts = rerankConfig;
   }
   const aquifer = createAquifer({
     db: pool,
+    ownsPool: true,
     schema: config.schema,
     tenantId: config.tenantId,
     embed: embedFn ? { fn: embedFn, dim: config.embed.dim || null } : null,
@@ -86,10 +92,6 @@ function createAquiferFromConfig(overrides) {
     rerank: rerankOpts,
   });
-  // Attach pool for lifecycle management
-  aquifer._pool = pool;
-  aquifer._config = config;
   return aquifer;
 }

package/core/aquifer.js CHANGED Viewed

@@ -77,6 +77,7 @@ function createAquifer(config) {
     ownsPool = true;
   } else {
     pool = config.db;
+    ownsPool = !!config.ownsPool;  // allow factory to claim ownership
   }
   // Embed config (lazy — only required for recall/enrich)
@@ -99,8 +100,18 @@ function createAquifer(config) {
   const entityPromptFn = config.entities && config.entities.prompt ? config.entities.prompt : null;
   const entityScope = (config.entities && config.entities.scope) || 'default';
-  // FTS config (default: 'simple'; set to 'zhcfg' for Chinese tokenization)
-  const ftsConfig = config.ftsConfig || 'simple';
+  // FTS config — locked to 'simple'.
+  // The search_tsv trigger always uses to_tsvector('simple', ...), so query-time
+  // config must match.  Warn and override if someone passes anything else.
+  const _rawFtsConfig = config.ftsConfig || 'simple';
+  if (_rawFtsConfig !== 'simple') {
+    console.warn(
+      `[aquifer] ftsConfig '${_rawFtsConfig}' is not currently supported. ` +
+      `The search_tsv index is built with 'simple'; only 'simple' is valid at query time. ` +
+      `Overriding to 'simple'.`
+    );
+  }
+  const ftsConfig = 'simple';
   // Rank weights
   const rankWeights = {
@@ -551,7 +562,16 @@ function createAquifer(config) {
     async recall(query, opts = {}) {
       if (!query) return [];
-      requireEmbed('recall');
+      const VALID_MODES = ['fts', 'hybrid', 'vector'];
+      const mode = opts.mode !== undefined ? opts.mode : 'hybrid';
+      if (!VALID_MODES.includes(mode)) {
+        throw new Error(`Invalid recall mode: "${mode}". Must be one of: ${VALID_MODES.join(', ')}`);
+      }
+      if (mode === 'hybrid' || mode === 'vector') {
+        requireEmbed('recall');
+      }
       const {
         agentId,
@@ -582,10 +602,13 @@ function createAquifer(config) {
       const rerankTopK = rerankEnabled ? Math.max(limit, opts.rerankTopK || defaultRerankTopK) : limit;
       const fetchLimit = rerankTopK * 4;
-      // 1. Embed query
-      const queryVecResult = await embedFn([query]);
-      const queryVec = queryVecResult[0];
-      if (!queryVec || !queryVec.length) return []; // m3: guard empty array too
+      // 1. Embed query (only needed for hybrid/vector modes)
+      let queryVec = null;
+      if (mode === 'hybrid' || mode === 'vector') {
+        const queryVecResult = await embedFn([query]);
+        queryVec = queryVecResult[0];
+        if (!queryVec || !queryVec.length) return []; // m3: guard empty array too
+      }
       // 2. Entity intersection pre-filter (when entityMode === 'all')
       let candidateSessionIds = null; // null = no filter
@@ -661,17 +684,26 @@ function createAquifer(config) {
         } catch (_) { /* entity search failure non-fatal */ }
       }
-      // 3. Run 3 search paths in parallel
+      // 3. Run search paths in parallel (conditioned on mode)
+      const runFts = mode === 'fts' || mode === 'hybrid';
+      const runVector = mode === 'vector' || mode === 'hybrid';
       const [ftsRows, embRows, turnResult] = await Promise.all([
-        storage.searchSessions(pool, query, {
-          schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit, ftsConfig,
-        }).catch(() => []),
-        embeddingSearchSummaries(queryVec, {
-          agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
-        }).catch(() => []),
-        storage.searchTurnEmbeddings(pool, {
-          schema, tenantId, queryVec, dateFrom, dateTo, agentIds: resolvedAgentIds, source, limit: fetchLimit,
-        }).catch(() => ({ rows: [] })),
+        runFts
+          ? storage.searchSessions(pool, query, {
+              schema, tenantId, agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit, ftsConfig,
+            }).catch(() => [])
+          : Promise.resolve([]),
+        runVector
+          ? embeddingSearchSummaries(queryVec, {
+              agentIds: resolvedAgentIds, source, dateFrom, dateTo, limit: fetchLimit,
+            }).catch(() => [])
+          : Promise.resolve([]),
+        runVector
+          ? storage.searchTurnEmbeddings(pool, {
+              schema, tenantId, queryVec, dateFrom, dateTo, agentIds: resolvedAgentIds, source, limit: fetchLimit,
+            }).catch(() => ({ rows: [] }))
+          : Promise.resolve({ rows: [] }),
       ]);
       const turnRows = turnResult.rows || [];
@@ -836,6 +868,27 @@ function createAquifer(config) {
       return storage.getSession(pool, sessionId, agentId, opts, { schema, tenantId });
     },
+    async skip(sessionId, opts = {}) {
+      const agentId = opts.agentId || 'agent';
+      const reason = opts.reason || null;
+      // Atomic CAS: only skip if still pending (avoids race with concurrent enrich)
+      const result = await pool.query(
+        `UPDATE ${qi(schema)}.sessions
+        SET processing_status = 'skipped', processing_error = $1
+        WHERE session_id = $2 AND agent_id = $3 AND tenant_id = $4
+          AND processing_status = 'pending'
+        RETURNING id`,
+        [reason, sessionId, agentId, tenantId]
+      );
+      if (result.rows.length === 0) {
+        // Check if session exists at all
+        const existing = await storage.getSession(pool, sessionId, agentId, {}, { schema, tenantId });
+        if (!existing) throw new Error(`Session not found: ${sessionId} (agentId=${agentId})`);
+        return null; // exists but not pending — no-op
+      }
+      return { id: result.rows[0].id, sessionId, agentId, status: 'skipped' };
+    },
     async getSessionFull(sessionId) {
       // Try to find the session across agents by querying directly
       const result = await pool.query(
@@ -868,6 +921,93 @@ function createAquifer(config) {
         summary: sumResult.rows[0] || null,
       };
     },
+    // --- public config accessor ---
+    getConfig() {
+      return { schema, tenantId };
+    },
+    // --- admin query helpers ---
+    async getStats() {
+      const [sessions, summaries, turns, timeRange] = await Promise.all([
+        pool.query(
+          `SELECT processing_status, COUNT(*)::int as count
+          FROM ${qi(schema)}.sessions WHERE tenant_id = $1
+          GROUP BY processing_status`,
+          [tenantId]
+        ),
+        pool.query(
+          `SELECT COUNT(*)::int as count FROM ${qi(schema)}.session_summaries WHERE tenant_id = $1`,
+          [tenantId]
+        ),
+        pool.query(
+          `SELECT COUNT(*)::int as count FROM ${qi(schema)}.turn_embeddings WHERE tenant_id = $1`,
+          [tenantId]
+        ),
+        pool.query(
+          `SELECT MIN(started_at) as earliest, MAX(started_at) as latest
+          FROM ${qi(schema)}.sessions WHERE tenant_id = $1`,
+          [tenantId]
+        ),
+      ]);
+      let entityCount = 0;
+      try {
+        const entResult = await pool.query(
+          `SELECT COUNT(*)::int as count FROM ${qi(schema)}.entities WHERE tenant_id = $1`,
+          [tenantId]
+        );
+        entityCount = entResult.rows[0]?.count || 0;
+      } catch (_) { /* entities table may not exist */ }
+      return {
+        sessions: Object.fromEntries(sessions.rows.map(r => [r.processing_status, r.count])),
+        sessionTotal: sessions.rows.reduce((s, r) => s + r.count, 0),
+        summaries: summaries.rows[0]?.count || 0,
+        turnEmbeddings: turns.rows[0]?.count || 0,
+        entities: entityCount,
+        earliest: timeRange.rows[0]?.earliest || null,
+        latest: timeRange.rows[0]?.latest || null,
+      };
+    },
+    async getPendingSessions(opts = {}) {
+      const limit = opts.limit !== undefined ? opts.limit : 100;
+      const result = await pool.query(
+        `SELECT session_id, agent_id, processing_status
+        FROM ${qi(schema)}.sessions
+        WHERE tenant_id = $1
+          AND processing_status IN ('pending', 'failed')
+        ORDER BY started_at DESC
+        LIMIT $2`,
+        [tenantId, limit]
+      );
+      return result.rows;
+    },
+    async exportSessions(opts = {}) {
+      const { agentId, source, limit = 1000 } = opts;
+      const where = [`s.tenant_id = $1`];
+      const params = [tenantId];
+      if (agentId) { params.push(agentId); where.push(`s.agent_id = $${params.length}`); }
+      if (source) { params.push(source); where.push(`s.source = $${params.length}`); }
+      params.push(limit);
+      const result = await pool.query(
+        `SELECT s.session_id, s.agent_id, s.source, s.started_at, s.msg_count,
+                s.processing_status, ss.summary_text, ss.structured_summary
+        FROM ${qi(schema)}.sessions s
+        LEFT JOIN ${qi(schema)}.session_summaries ss ON ss.session_row_id = s.id
+        WHERE ${where.join(' AND ')}
+        ORDER BY s.started_at DESC
+        LIMIT $${params.length}`,
+        params
+      );
+      return result.rows;
+    },
   };
   return aquifer;

package/core/storage.js CHANGED Viewed

@@ -31,7 +31,7 @@ const TURN_NOISE_RE = [
   /^A new session was started via \/new/,
 ];
-const VALID_STATUSES = new Set(['pending', 'processing', 'succeeded', 'partial', 'failed']);
+const VALID_STATUSES = new Set(['pending', 'processing', 'succeeded', 'partial', 'failed', 'skipped']);
 // ---------------------------------------------------------------------------
 // upsertSession
@@ -339,8 +339,17 @@ async function searchSessions(pool, query, {
   ftsConfig = 'simple',
 } = {}) {
   const clampedLimit = Math.max(1, Math.min(100, limit));
-  // Sanitize ftsConfig to prevent SQL injection (must be a valid regconfig name)
-  const safeFts = /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(ftsConfig) ? ftsConfig : 'simple';
+  // FTS config is locked to 'simple' — the search_tsv trigger always uses
+  // to_tsvector('simple', ...) so query semantics must match.  Warn callers
+  // that pass a different value rather than silently honouring it.
+  if (ftsConfig !== 'simple') {
+    console.warn(
+      `[aquifer/storage] searchSessions: ftsConfig '${ftsConfig}' ignored. ` +
+      `Only 'simple' is supported (index is built with simple tokenizer). ` +
+      `Using 'simple'.`
+    );
+  }
+  const safeFts = 'simple';
   // Normalize agentId/agentIds
   const agentIds = rawAgentIds && rawAgentIds.length > 0

package/index.js CHANGED Viewed

@@ -3,5 +3,6 @@
 const { createAquifer } = require('./core/aquifer');
 const { createEmbedder } = require('./pipeline/embed');
 const { createReranker } = require('./pipeline/rerank');
+const { normalizeSession, detectClient } = require('./pipeline/normalize');
-module.exports = { createAquifer, createEmbedder, createReranker };
+module.exports = { createAquifer, createEmbedder, createReranker, normalizeSession, detectClient };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@shadowforge0/aquifer-memory",
-  "version": "0.7.0",
+  "version": "0.8.0",
   "description": "PG-native long-term memory for AI agents. Turn-level embedding, hybrid RRF ranking, optional knowledge graph. Includes CLI, MCP server, and OpenClaw plugin.",
   "main": "index.js",
   "files": [
@@ -35,8 +35,8 @@
     "pg": "^8.13.0"
   },
   "optionalDependencies": {
-    "@modelcontextprotocol/sdk": "^1.12.0",
-    "zod": "^3.24.0"
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "zod": "^3.25.76"
   },
   "engines": {
     "node": ">=18.0.0"

package/pipeline/normalize/adapters/claude-code.js ADDED Viewed

@@ -0,0 +1,90 @@
+'use strict';
+/**
+ * Claude Code adapter — for Claude Code CLI sessions.
+ * Entry types are 'user'/'assistant' (split format: one content type per entry).
+ * Text and tool_use are separate entries, enabling narration detection via look-ahead.
+ */
+const { extractContent } = require('../extract');
+const { parseTimestamp } = require('../timestamp');
+const { MAX_NARRATION_CHARS } = require('../constants');
+module.exports = {
+    name: 'claude-code',
+    detect(entry) {
+        // Only count entry types that participate in normalize
+        return entry.type === 'user' || entry.type === 'assistant';
+    },
+    toIntermediate(entry, ctx) {
+        const { idx, rawEntries } = ctx;
+        const entryType = entry.type;
+        if (entryType !== 'user' && entryType !== 'assistant') {
+            return { idx, toolNames: [], adapterSkip: 'nonMessage' };
+        }
+        const role = entry.message?.role || entryType;
+        if (role === 'toolResult') {
+            return { idx, toolNames: [], adapterSkip: 'toolResult' };
+        }
+        if (role !== 'user' && role !== 'assistant') {
+            return { idx, role: null, toolNames: [], adapterSkip: 'noRole' };
+        }
+        if (entry.isMeta) {
+            return { idx, toolNames: [], adapterSkip: 'meta' };
+        }
+        const { text, commandName, toolNames } = extractContent(entry.message);
+        // CLI internal command output tags
+        if (text.includes('<local-command-caveat>') || text.includes('<local-command-stdout>') || text.includes('<local-command-stderr>')) {
+            return { idx, toolNames, adapterSkip: 'caveat' };
+        }
+        const isInterrupt = text.startsWith('[Request interrupted by user');
+        // Tool-use-only assistant entry (no visible text, only tool calls)
+        if (!text && toolNames.length > 0 && role === 'assistant') {
+            return { idx, toolNames, adapterSkip: 'toolOnly' };
+        }
+        // Narration detection: short text entry immediately followed by a tool_use entry.
+        // Claude Code splits text and tool_use into separate JSONL entries.
+        // A short text before a tool call is narration ("Now reading X...", "Let me check...").
+        if (role === 'assistant' && text && text.length < MAX_NARRATION_CHARS) {
+            let nextIsTool = false;
+            for (let j = idx + 1; j < rawEntries.length && j < idx + 3; j++) {
+                const ne = rawEntries[j];
+                if (ne.type === 'assistant') {
+                    const nc = ne.message?.content;
+                    if (Array.isArray(nc) && nc.some(x => x.type === 'tool_use')) nextIsTool = true;
+                    break;
+                }
+            }
+            if (nextIsTool) {
+                return { idx, toolNames, adapterSkip: 'narration' };
+            }
+        }
+        return {
+            idx, role, text,
+            timestamp: parseTimestamp(entry),
+            toolNames, commandName, isInterrupt,
+            adapterSkip: null,
+        };
+    },
+    routinePatterns: [
+        /^<task-notification>/,
+    ],
+    skipCommands: [
+        '/model', '/cost', '/memory', '/permissions', '/diff', '/review',
+        '/doctor', '/login', '/logout', '/mcp', '/context', '/fast',
+        '/think', '/vim', '/exit',
+    ],
+};

package/pipeline/normalize/adapters/gateway.js ADDED Viewed

@@ -0,0 +1,67 @@
+'use strict';
+/**
+ * Gateway adapter — for AI gateway servers that produce type='message' entries.
+ * Content blocks combine text + thinking + toolCall in a single entry.
+ * Supports channel metadata stripping (Discord, Telegram, etc.).
+ */
+const { extractContent } = require('../extract');
+const { parseTimestamp } = require('../timestamp');
+// Channel metadata prefix injected by gateway routing layers
+const METADATA_PREFIX_RE = /^(?:Conversation info \(untrusted metadata\):[\s\S]*?```\s*\n\s*)?(?:Sender \(untrusted metadata\):[\s\S]*?```\s*\n\s*)?/;
+function stripChannelMetadata(text) {
+    const stripped = text.replace(METADATA_PREFIX_RE, '').trim();
+    return stripped || text;
+}
+module.exports = {
+    name: 'gateway',
+    detect(entry) {
+        return entry.type === 'message';
+    },
+    toIntermediate(entry, ctx) {
+        const { idx } = ctx;
+        if (entry.type !== 'message') {
+            return { idx, toolNames: [], adapterSkip: 'nonMessage' };
+        }
+        const msg = entry.message;
+        const role = msg?.role;
+        if (role === 'toolResult') {
+            return { idx, toolNames: [], adapterSkip: 'toolResult' };
+        }
+        if (role !== 'user' && role !== 'assistant') {
+            return { idx, role: null, toolNames: [], adapterSkip: 'noRole' };
+        }
+        const { text, commandName, toolNames } = extractContent(msg);
+        let finalText = text;
+        const isInterrupt = text.startsWith('[Request interrupted by user');
+        if (role === 'user' && finalText && !isInterrupt) {
+            finalText = stripChannelMetadata(finalText);
+        }
+        return {
+            idx, role, text: finalText,
+            timestamp: parseTimestamp(entry),
+            toolNames, commandName, isInterrupt,
+            adapterSkip: null,
+        };
+    },
+    routinePatterns: [
+        /^HEARTBEAT_OK$/,
+        /^THINK_OK$/,
+        /^\[Queued messages while agent was busy\]/,
+    ],
+    skipCommands: [],
+};

package/pipeline/normalize/constants.js ADDED Viewed

@@ -0,0 +1,12 @@
+'use strict';
+// Commands that produce no conversational value — skip entirely
+const SKIP_COMMANDS = new Set(['/clear', '/compact', '/help', '/status', '/config']);
+// Commands that mark session boundaries — keep as boundary markers
+const RESET_COMMANDS = new Set(['/new', '/reset']);
+const MAX_MSG_CHARS = 8000;
+const MAX_NARRATION_CHARS = 200;
+module.exports = { SKIP_COMMANDS, RESET_COMMANDS, MAX_MSG_CHARS, MAX_NARRATION_CHARS };

package/pipeline/normalize/detect.js ADDED Viewed

@@ -0,0 +1,52 @@
+'use strict';
+const gatewayAdapter = require('./adapters/gateway');
+const claudeCodeAdapter = require('./adapters/claude-code');
+const ADAPTERS = [gatewayAdapter, claudeCodeAdapter];
+/**
+ * Auto-detect the client type from raw session entries.
+ * Samples the first 5 entries and picks the adapter with the most matches.
+ * @param {any[]} rawEntries
+ * @returns {string} Client name ('gateway' | 'claude-code')
+ * @throws {Error} If entries are empty, no adapter matches, or detection is ambiguous
+ */
+function detectClient(rawEntries) {
+    if (!rawEntries || rawEntries.length === 0) {
+        throw new Error('Cannot detect client: empty entries');
+    }
+    const sample = rawEntries.slice(0, Math.min(5, rawEntries.length));
+    const scores = [];
+    for (const adapter of ADAPTERS) {
+        const count = sample.filter(e => adapter.detect(e)).length;
+        scores.push({ name: adapter.name, count });
+    }
+    scores.sort((a, b) => b.count - a.count);
+    if (scores[0].count === 0) {
+        throw new Error('Cannot detect session client type. Pass opts.client explicitly.');
+    }
+    if (scores.length > 1 && scores[0].count === scores[1].count) {
+        throw new Error(`Ambiguous client detection (${scores[0].name}=${scores[0].count}, ${scores[1].name}=${scores[1].count}). Pass opts.client explicitly.`);
+    }
+    return scores[0].name;
+}
+/**
+ * Get adapter by client name.
+ * @param {string} clientType
+ * @returns {object} Adapter object
+ * @throws {Error} If client type is unknown
+ */
+function getAdapter(clientType) {
+    for (const adapter of ADAPTERS) {
+        if (adapter.name === clientType) return adapter;
+    }
+    throw new Error(`Unknown client type: "${clientType}". Known: ${ADAPTERS.map(a => a.name).join(', ')}`);
+}
+module.exports = { detectClient, getAdapter, ADAPTERS };

package/pipeline/normalize/extract.js ADDED Viewed

@@ -0,0 +1,49 @@
+'use strict';
+// Content extraction utilities shared across adapters
+function extractCommandName(content) {
+    const match = typeof content === 'string'
+        ? content.match(/<command-name>(\/\w+)<\/command-name>/)
+        : null;
+    return match ? match[1] : null;
+}
+/**
+ * Extract text, command name, and tool names from a message object.
+ * Handles both string content and content block arrays.
+ * @param {object} msg - Message object with .content field
+ * @returns {{ text: string, commandName: string|null, toolNames: string[] }}
+ */
+function extractContent(msg) {
+    if (!msg) return { text: '', commandName: null, toolNames: [] };
+    const content = msg.content;
+    let commandName = null;
+    const toolNames = [];
+    if (typeof content === 'string') {
+        commandName = extractCommandName(content);
+        return { text: content.trim(), commandName, toolNames };
+    }
+    if (Array.isArray(content)) {
+        const texts = [];
+        for (const item of content) {
+            if (item.type === 'text' && item.text) {
+                const cmd = extractCommandName(item.text);
+                if (cmd) commandName = cmd;
+                texts.push(item.text);
+            }
+            // tool_use: Claude Code / Anthropic API format
+            // toolCall: gateway / OpenAI-style format
+            if ((item.type === 'tool_use' || item.type === 'toolCall') && item.name) {
+                toolNames.push(item.name);
+            }
+        }
+        return { text: texts.join('\n').trim(), commandName, toolNames };
+    }
+    return { text: '', commandName, toolNames };
+}
+module.exports = { extractContent, extractCommandName };

package/pipeline/normalize/index.js ADDED Viewed

@@ -0,0 +1,129 @@
+'use strict';
+const { SKIP_COMMANDS, RESET_COMMANDS, MAX_MSG_CHARS } = require('./constants');
+const { detectClient, getAdapter } = require('./detect');
+/**
+ * Normalize raw session entries into effective messages.
+ *
+ * Accepts raw JSONL entries from any supported client (gateway, Claude Code, etc.)
+ * and produces a clean, uniform array of conversational messages suitable for
+ * summarization, embedding, and recall.
+ *
+ * @param {any[]} rawEntries - Raw JSONL entries from a session file
+ * @param {object} [opts]
+ * @param {string} [opts.client] - Client type: 'gateway' | 'claude-code'. Auto-detected if omitted.
+ * @param {number} [opts.idleGapMs] - Idle gap threshold for boundary detection (default: 2 hours)
+ * @returns {{ normalized: object[], skipStats: object, boundaries: object[], toolsUsed: string[] }}
+ */
+function normalizeSession(rawEntries, opts = {}) {
+    if (!rawEntries || rawEntries.length === 0) {
+        return {
+            normalized: [],
+            skipStats: { total: 0, nonMessage: 0, noRole: 0, meta: 0, caveat: 0,
+                empty: 0, toolOnly: 0, narration: 0, toolResult: 0, routine: 0, command: 0 },
+            boundaries: [],
+            toolsUsed: [],
+        };
+    }
+    const idleGapMs = opts.idleGapMs || 2 * 60 * 60 * 1000;
+    // 1. Select adapter
+    const clientType = opts.client || detectClient(rawEntries);
+    const adapter = getAdapter(clientType);
+    // 2. Merge adapter-specific constants with shared constants
+    const allSkipCommands = new Set([...SKIP_COMMANDS, ...(adapter.skipCommands || [])]);
+    const allRoutinePatterns = [...(adapter.routinePatterns || [])];
+    // 3. Main loop: adapter.toIntermediate → shared filter → collect
+    const normalized = [];
+    const skipStats = { total: 0, nonMessage: 0, noRole: 0, meta: 0, caveat: 0,
+        empty: 0, toolOnly: 0, narration: 0, toolResult: 0, routine: 0, command: 0 };
+    const toolsUsed = new Set();
+    for (let idx = 0; idx < rawEntries.length; idx++) {
+        skipStats.total++;
+        const parsed = adapter.toIntermediate(rawEntries[idx], { idx, rawEntries });
+        // Collect tool names even from skipped entries
+        if (parsed.toolNames?.length) {
+            for (const tn of parsed.toolNames) toolsUsed.add(tn);
+        }
+        // Adapter-determined skip
+        if (parsed.adapterSkip) {
+            if (!(parsed.adapterSkip in skipStats)) {
+                throw new Error(`Unknown adapterSkip reason: "${parsed.adapterSkip}" from ${clientType} adapter`);
+            }
+            skipStats[parsed.adapterSkip]++;
+            continue;
+        }
+        // Shared: invalid role
+        if (!parsed.role || (parsed.role !== 'user' && parsed.role !== 'assistant')) {
+            skipStats.noRole++;
+            continue;
+        }
+        // Shared: empty text (but keep interrupts)
+        if (!parsed.text && !parsed.isInterrupt) {
+            skipStats.empty++;
+            continue;
+        }
+        // Shared: routine patterns
+        if (!parsed.isInterrupt && parsed.text && allRoutinePatterns.some(re => re.test(parsed.text.trim()))) {
+            skipStats.routine++;
+            continue;
+        }
+        // Shared: skip commands
+        if (parsed.commandName && allSkipCommands.has(parsed.commandName)) {
+            skipStats.command++;
+            continue;
+        }
+        // Shared: truncate + reset command handling
+        const isResetCommand = !!(parsed.commandName && RESET_COMMANDS.has(parsed.commandName));
+        let finalText = isResetCommand ? '' : (parsed.text || '');
+        if (finalText.length > MAX_MSG_CHARS) {
+            finalText = finalText.slice(0, MAX_MSG_CHARS) + '\n[truncated]';
+        }
+        const msg = {
+            idx: parsed.idx,
+            role: parsed.role,
+            timestamp: parsed.timestamp,
+            text: finalText,
+            commandName: parsed.commandName || null,
+            isResetCommand,
+        };
+        if (parsed.isInterrupt) msg.isInterrupt = true;
+        normalized.push(msg);
+    }
+    // 4. Boundary detection
+    const boundaries = [];
+    for (let i = 0; i < normalized.length; i++) {
+        const cur = normalized[i];
+        const prev = i > 0 ? normalized[i - 1] : null;
+        if (cur.isResetCommand) {
+            boundaries.push({ type: 'command', at_index: i, reason: cur.commandName });
+        }
+        if (prev?.timestamp && cur.timestamp) {
+            const gapMs = new Date(cur.timestamp).getTime() - new Date(prev.timestamp).getTime();
+            if (gapMs > idleGapMs) {
+                boundaries.push({ type: 'idle_gap', at_index: i, gap_minutes: Math.round(gapMs / 60000) });
+            }
+        }
+    }
+    return { normalized, skipStats, boundaries, toolsUsed: [...toolsUsed] };
+}
+module.exports = { normalizeSession, detectClient };

package/pipeline/normalize/timestamp.js ADDED Viewed

@@ -0,0 +1,33 @@
+'use strict';
+/**
+ * Parse timestamp from a raw session entry.
+ * Handles multiple formats: ISO string (outer), epoch ms number (inner).
+ * Unified across all adapters to ensure consistent boundary detection.
+ * @param {object} entry - Raw session entry
+ * @returns {string|null} ISO8601 string or null
+ */
+function parseTimestamp(entry) {
+    // Outer timestamp (ISO string) — common in CLI-based clients
+    const outerTs = entry.timestamp;
+    if (typeof outerTs === 'string') {
+        const d = new Date(outerTs);
+        if (!isNaN(d.getTime())) return d.toISOString();
+    }
+    // Inner timestamp (epoch ms) — common in gateway/server-side clients
+    const innerTs = entry.message?.timestamp;
+    if (typeof innerTs === 'number') {
+        return new Date(innerTs).toISOString();
+    }
+    // Inner timestamp can also be ISO string
+    if (typeof innerTs === 'string') {
+        const d = new Date(innerTs);
+        if (!isNaN(d.getTime())) return d.toISOString();
+    }
+    return null;
+}
+module.exports = { parseTimestamp };