npm - @onenomad/engram-mcp - Versions diffs - 1.1.0 → 2.0.0 - Mend

@onenomad/engram-mcp 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +32 -32
package/dist/auth/login.d.ts +107 -68
package/dist/auth/login.js +227 -216
package/dist/auth/login.js.map +1 -1
package/dist/consolidator.js +519 -519
package/dist/context-pressure.js +91 -91
package/dist/handoff.d.ts +53 -53
package/dist/handoff.js +156 -156
package/dist/server.js +204 -49
package/dist/server.js.map +1 -1
package/dist/source-dedup.d.ts +86 -86
package/dist/source-dedup.js +147 -147
package/dist/update-metadata.d.ts +29 -29
package/dist/update-metadata.js +51 -51
package/dist/wal.d.ts +95 -95
package/dist/wal.js +295 -295
package/package.json +1 -1

package/dist/source-dedup.js CHANGED Viewed

@@ -1,148 +1,148 @@
-/**
- * Session-scoped same-source ingest dedup.
- *
- * Agents in long sessions repeatedly re-read stable files, re-poll
- * unchanged endpoints, and re-list the same directories. Each re-ingest
- * goes through the full chunk → embed → save pipeline even though the
- * content hasn't moved. On CPU embeddings (Engram's default backend),
- * a 20K-token re-read can cost 5–15 seconds; multiplied across a
- * 50-step agent run that's significant wall-clock burn.
- *
- * The existing 0.75-similarity dedup (in `server.ts`'s memory_ingest
- * tool handler) catches semantic duplicates, but does so against the
- * ENTIRE memory store — and at write-time it actually trips on
- * incidentally-similar memories (a fact about Pyre at 0.78 similarity
- * to a fact about Engram). It also requires the new content to be
- * embedded first, so it doesn't save the embedding cost.
- *
- * This module is the cheaper, more conservative path:
- *   - Scoped to a single source identifier (file path, URL, etc.).
- *   - Hash-based equality (SHA-256 of trimmed content) — exact match
- *     only, no false positives.
- *   - In-memory LRU keyed by `source` → list of recent content hashes.
- *   - Bounded: max 64 sources tracked, max 8 hashes per source.
- *
- * When an ingest hits the dedup cache, the caller can skip embedding
- * AND skip the disk write — return the cached chunk id. Agent's
- * conversation history stays internally consistent (same id for same
- * content), and the wall-clock cost drops from "embed + save" to a
- * map lookup.
- *
- * Process-scoped intentionally: the persistence layer doesn't need
- * to know about this. Engram restart resets the cache — first ingest
- * after restart goes through the full pipeline, which is fine.
- */
-import { createHash } from 'node:crypto';
-const MAX_SOURCES = 64;
-const MAX_PER_SOURCE = 8;
-export class SourceDedupCache {
-    /** sourceKey → list of recent (hash, chunkId) entries, MRU first. */
-    bySource = new Map();
-    /** Cache hit count since boot. Useful for telemetry. */
-    hits = 0;
-    /** Cache miss count since boot. */
-    misses = 0;
-    /**
-     * Hash trimmed content. Stable across ingest calls for the same
-     * payload — that's the whole point.
-     */
-    static hashContent(content) {
-        return createHash('sha256').update(content.trim()).digest('hex');
-    }
-    /**
-     * Look up a (source, content) pair. Returns the cached entry on hit
-     * or null on miss. Does NOT promote the entry on read — promote on
-     * write only, so a hit doesn't reset its LRU position.
-     */
-    lookup(source, content) {
-        if (!source) {
-            // No source key → no scoping → don't dedup. The caller's
-            // existing semantic-similarity dedup is the right tool for
-            // unscoped ingests.
-            this.misses++;
-            return null;
-        }
-        const list = this.bySource.get(source);
-        if (!list || list.length === 0) {
-            this.misses++;
-            return null;
-        }
-        const hash = SourceDedupCache.hashContent(content);
-        const found = list.find((e) => e.hash === hash);
-        if (found) {
-            this.hits++;
-            return found;
-        }
-        this.misses++;
-        return null;
-    }
-    /**
-     * Record a new (source, content, chunkId) entry after a successful
-     * ingest. LRU-evicts the oldest entry per source when the per-source
-     * cap is hit, and the oldest source when the overall cap is hit.
-     */
-    remember(source, content, chunkId) {
-        if (!source)
-            return;
-        const hash = SourceDedupCache.hashContent(content);
-        const entry = { hash, chunkId, ts: Date.now() };
-        let list = this.bySource.get(source);
-        if (!list) {
-            list = [];
-            // Evict the oldest source if we're at the global cap.
-            if (this.bySource.size >= MAX_SOURCES) {
-                let oldestKey = null;
-                let oldestTs = Infinity;
-                for (const [k, entries] of this.bySource.entries()) {
-                    const recent = entries[0]?.ts ?? 0;
-                    if (recent < oldestTs) {
-                        oldestTs = recent;
-                        oldestKey = k;
-                    }
-                }
-                if (oldestKey)
-                    this.bySource.delete(oldestKey);
-            }
-            this.bySource.set(source, list);
-        }
-        // De-dupe by hash within the per-source list — if the same hash
-        // is already there, replace it (fresh chunkId), otherwise prepend.
-        const existing = list.findIndex((e) => e.hash === hash);
-        if (existing >= 0) {
-            list[existing] = entry;
-        }
-        else {
-            list.unshift(entry);
-            if (list.length > MAX_PER_SOURCE)
-                list.length = MAX_PER_SOURCE;
-        }
-    }
-    /** Drop the entire cache. Useful for tests and explicit resets. */
-    clear() {
-        this.bySource.clear();
-        this.hits = 0;
-        this.misses = 0;
-    }
-    /** Snapshot stats for telemetry / Settings UI. */
-    stats() {
-        let entries = 0;
-        for (const list of this.bySource.values())
-            entries += list.length;
-        const total = this.hits + this.misses;
-        return {
-            sources: this.bySource.size,
-            entries,
-            hits: this.hits,
-            misses: this.misses,
-            hitRate: total === 0 ? 0 : this.hits / total,
-        };
-    }
-}
-/**
- * Module-level singleton. Engram is process-singleton anyway (one
- * server instance per data dir), so a single cache covers the whole
- * lifetime. Tests construct fresh `SourceDedupCache` instances; prod
- * uses this default.
- */
-export const sourceDedup = new SourceDedupCache();
+/**
+ * Session-scoped same-source ingest dedup.
+ *
+ * Agents in long sessions repeatedly re-read stable files, re-poll
+ * unchanged endpoints, and re-list the same directories. Each re-ingest
+ * goes through the full chunk → embed → save pipeline even though the
+ * content hasn't moved. On CPU embeddings (Engram's default backend),
+ * a 20K-token re-read can cost 5–15 seconds; multiplied across a
+ * 50-step agent run that's significant wall-clock burn.
+ *
+ * The existing 0.75-similarity dedup (in `server.ts`'s engram-ingest
+ * tool handler) catches semantic duplicates, but does so against the
+ * ENTIRE memory store — and at write-time it actually trips on
+ * incidentally-similar memories (a fact about Pyre at 0.78 similarity
+ * to a fact about Engram). It also requires the new content to be
+ * embedded first, so it doesn't save the embedding cost.
+ *
+ * This module is the cheaper, more conservative path:
+ *   - Scoped to a single source identifier (file path, URL, etc.).
+ *   - Hash-based equality (SHA-256 of trimmed content) — exact match
+ *     only, no false positives.
+ *   - In-memory LRU keyed by `source` → list of recent content hashes.
+ *   - Bounded: max 64 sources tracked, max 8 hashes per source.
+ *
+ * When an ingest hits the dedup cache, the caller can skip embedding
+ * AND skip the disk write — return the cached chunk id. Agent's
+ * conversation history stays internally consistent (same id for same
+ * content), and the wall-clock cost drops from "embed + save" to a
+ * map lookup.
+ *
+ * Process-scoped intentionally: the persistence layer doesn't need
+ * to know about this. Engram restart resets the cache — first ingest
+ * after restart goes through the full pipeline, which is fine.
+ */
+import { createHash } from 'node:crypto';
+const MAX_SOURCES = 64;
+const MAX_PER_SOURCE = 8;
+export class SourceDedupCache {
+    /** sourceKey → list of recent (hash, chunkId) entries, MRU first. */
+    bySource = new Map();
+    /** Cache hit count since boot. Useful for telemetry. */
+    hits = 0;
+    /** Cache miss count since boot. */
+    misses = 0;
+    /**
+     * Hash trimmed content. Stable across ingest calls for the same
+     * payload — that's the whole point.
+     */
+    static hashContent(content) {
+        return createHash('sha256').update(content.trim()).digest('hex');
+    }
+    /**
+     * Look up a (source, content) pair. Returns the cached entry on hit
+     * or null on miss. Does NOT promote the entry on read — promote on
+     * write only, so a hit doesn't reset its LRU position.
+     */
+    lookup(source, content) {
+        if (!source) {
+            // No source key → no scoping → don't dedup. The caller's
+            // existing semantic-similarity dedup is the right tool for
+            // unscoped ingests.
+            this.misses++;
+            return null;
+        }
+        const list = this.bySource.get(source);
+        if (!list || list.length === 0) {
+            this.misses++;
+            return null;
+        }
+        const hash = SourceDedupCache.hashContent(content);
+        const found = list.find((e) => e.hash === hash);
+        if (found) {
+            this.hits++;
+            return found;
+        }
+        this.misses++;
+        return null;
+    }
+    /**
+     * Record a new (source, content, chunkId) entry after a successful
+     * ingest. LRU-evicts the oldest entry per source when the per-source
+     * cap is hit, and the oldest source when the overall cap is hit.
+     */
+    remember(source, content, chunkId) {
+        if (!source)
+            return;
+        const hash = SourceDedupCache.hashContent(content);
+        const entry = { hash, chunkId, ts: Date.now() };
+        let list = this.bySource.get(source);
+        if (!list) {
+            list = [];
+            // Evict the oldest source if we're at the global cap.
+            if (this.bySource.size >= MAX_SOURCES) {
+                let oldestKey = null;
+                let oldestTs = Infinity;
+                for (const [k, entries] of this.bySource.entries()) {
+                    const recent = entries[0]?.ts ?? 0;
+                    if (recent < oldestTs) {
+                        oldestTs = recent;
+                        oldestKey = k;
+                    }
+                }
+                if (oldestKey)
+                    this.bySource.delete(oldestKey);
+            }
+            this.bySource.set(source, list);
+        }
+        // De-dupe by hash within the per-source list — if the same hash
+        // is already there, replace it (fresh chunkId), otherwise prepend.
+        const existing = list.findIndex((e) => e.hash === hash);
+        if (existing >= 0) {
+            list[existing] = entry;
+        }
+        else {
+            list.unshift(entry);
+            if (list.length > MAX_PER_SOURCE)
+                list.length = MAX_PER_SOURCE;
+        }
+    }
+    /** Drop the entire cache. Useful for tests and explicit resets. */
+    clear() {
+        this.bySource.clear();
+        this.hits = 0;
+        this.misses = 0;
+    }
+    /** Snapshot stats for telemetry / Settings UI. */
+    stats() {
+        let entries = 0;
+        for (const list of this.bySource.values())
+            entries += list.length;
+        const total = this.hits + this.misses;
+        return {
+            sources: this.bySource.size,
+            entries,
+            hits: this.hits,
+            misses: this.misses,
+            hitRate: total === 0 ? 0 : this.hits / total,
+        };
+    }
+}
+/**
+ * Module-level singleton. Engram is process-singleton anyway (one
+ * server instance per data dir), so a single cache covers the whole
+ * lifetime. Tests construct fresh `SourceDedupCache` instances; prod
+ * uses this default.
+ */
+export const sourceDedup = new SourceDedupCache();
 //# sourceMappingURL=source-dedup.js.map

package/dist/update-metadata.d.ts CHANGED Viewed

@@ -1,29 +1,29 @@
-import type { CognitiveLayer, MemoryChunk, MemoryType, Sentiment } from './types.js';
-export interface UpdateMetadataInput {
-    tags?: string[];
-    source?: string;
-    domain?: string;
-    topic?: string;
-    type?: MemoryType;
-    sentiment?: Sentiment;
-    importance?: number;
-    cognitiveLayer?: CognitiveLayer;
-}
-export type UpdateMetadataMode = 'merge' | 'replace';
-/**
- * Pure helper: build the storage patch for a memory_update_metadata
- * call. Separated from server.ts so importing it (e.g. from tests)
- * doesn't pull in the MCP stdio server bootstrap.
- *
- * - `merge`: only fields the caller specified land in the patch.
- *   Untouched fields are absent → Storage.updateChunk leaves them alone.
- * - `replace`: every metadata-shape field is set, with caller values
- *   where present and engram defaults otherwise. Existing untouched
- *   fields get overwritten with the default. Footgun-y; the tool
- *   layer logs a warning when this mode fires.
- *
- * Immutable fields (id, createdAt, embedding, embeddingVersion) are
- * never produced by this helper; the tool layer doesn't accept them
- * in its input schema either.
- */
-export declare function buildUpdateMetadataPatch(metadata: UpdateMetadataInput, mode: UpdateMetadataMode): Partial<MemoryChunk>;
+import type { CognitiveLayer, MemoryChunk, MemoryType, Sentiment } from './types.js';
+export interface UpdateMetadataInput {
+    tags?: string[];
+    source?: string;
+    domain?: string;
+    topic?: string;
+    type?: MemoryType;
+    sentiment?: Sentiment;
+    importance?: number;
+    cognitiveLayer?: CognitiveLayer;
+}
+export type UpdateMetadataMode = 'merge' | 'replace';
+/**
+ * Pure helper: build the storage patch for a engram-update-metadata
+ * call. Separated from server.ts so importing it (e.g. from tests)
+ * doesn't pull in the MCP stdio server bootstrap.
+ *
+ * - `merge`: only fields the caller specified land in the patch.
+ *   Untouched fields are absent → Storage.updateChunk leaves them alone.
+ * - `replace`: every metadata-shape field is set, with caller values
+ *   where present and engram defaults otherwise. Existing untouched
+ *   fields get overwritten with the default. Footgun-y; the tool
+ *   layer logs a warning when this mode fires.
+ *
+ * Immutable fields (id, createdAt, embedding, embeddingVersion) are
+ * never produced by this helper; the tool layer doesn't accept them
+ * in its input schema either.
+ */
+export declare function buildUpdateMetadataPatch(metadata: UpdateMetadataInput, mode: UpdateMetadataMode): Partial<MemoryChunk>;

package/dist/update-metadata.js CHANGED Viewed

@@ -1,52 +1,52 @@
-/**
- * Pure helper: build the storage patch for a memory_update_metadata
- * call. Separated from server.ts so importing it (e.g. from tests)
- * doesn't pull in the MCP stdio server bootstrap.
- *
- * - `merge`: only fields the caller specified land in the patch.
- *   Untouched fields are absent → Storage.updateChunk leaves them alone.
- * - `replace`: every metadata-shape field is set, with caller values
- *   where present and engram defaults otherwise. Existing untouched
- *   fields get overwritten with the default. Footgun-y; the tool
- *   layer logs a warning when this mode fires.
- *
- * Immutable fields (id, createdAt, embedding, embeddingVersion) are
- * never produced by this helper; the tool layer doesn't accept them
- * in its input schema either.
- */
-export function buildUpdateMetadataPatch(metadata, mode) {
-    const patch = {};
-    if (mode === 'replace') {
-        patch.tags = metadata.tags ?? [];
-        patch.source = metadata.source ?? '';
-        patch.domain = metadata.domain ?? '';
-        patch.topic = metadata.topic ?? '';
-        patch.type = metadata.type ?? 'context';
-        patch.sentiment = metadata.sentiment ?? 'neutral';
-        patch.importance = metadata.importance ?? 0.5;
-        if (metadata.cognitiveLayer !== undefined) {
-            patch.cognitiveLayer = metadata.cognitiveLayer;
-        }
-    }
-    else {
-        if (metadata.tags !== undefined)
-            patch.tags = metadata.tags;
-        if (metadata.source !== undefined)
-            patch.source = metadata.source;
-        if (metadata.domain !== undefined)
-            patch.domain = metadata.domain;
-        if (metadata.topic !== undefined)
-            patch.topic = metadata.topic;
-        if (metadata.type !== undefined)
-            patch.type = metadata.type;
-        if (metadata.sentiment !== undefined)
-            patch.sentiment = metadata.sentiment;
-        if (metadata.importance !== undefined)
-            patch.importance = metadata.importance;
-        if (metadata.cognitiveLayer !== undefined) {
-            patch.cognitiveLayer = metadata.cognitiveLayer;
-        }
-    }
-    return patch;
-}
+/**
+ * Pure helper: build the storage patch for a engram-update-metadata
+ * call. Separated from server.ts so importing it (e.g. from tests)
+ * doesn't pull in the MCP stdio server bootstrap.
+ *
+ * - `merge`: only fields the caller specified land in the patch.
+ *   Untouched fields are absent → Storage.updateChunk leaves them alone.
+ * - `replace`: every metadata-shape field is set, with caller values
+ *   where present and engram defaults otherwise. Existing untouched
+ *   fields get overwritten with the default. Footgun-y; the tool
+ *   layer logs a warning when this mode fires.
+ *
+ * Immutable fields (id, createdAt, embedding, embeddingVersion) are
+ * never produced by this helper; the tool layer doesn't accept them
+ * in its input schema either.
+ */
+export function buildUpdateMetadataPatch(metadata, mode) {
+    const patch = {};
+    if (mode === 'replace') {
+        patch.tags = metadata.tags ?? [];
+        patch.source = metadata.source ?? '';
+        patch.domain = metadata.domain ?? '';
+        patch.topic = metadata.topic ?? '';
+        patch.type = metadata.type ?? 'context';
+        patch.sentiment = metadata.sentiment ?? 'neutral';
+        patch.importance = metadata.importance ?? 0.5;
+        if (metadata.cognitiveLayer !== undefined) {
+            patch.cognitiveLayer = metadata.cognitiveLayer;
+        }
+    }
+    else {
+        if (metadata.tags !== undefined)
+            patch.tags = metadata.tags;
+        if (metadata.source !== undefined)
+            patch.source = metadata.source;
+        if (metadata.domain !== undefined)
+            patch.domain = metadata.domain;
+        if (metadata.topic !== undefined)
+            patch.topic = metadata.topic;
+        if (metadata.type !== undefined)
+            patch.type = metadata.type;
+        if (metadata.sentiment !== undefined)
+            patch.sentiment = metadata.sentiment;
+        if (metadata.importance !== undefined)
+            patch.importance = metadata.importance;
+        if (metadata.cognitiveLayer !== undefined) {
+            patch.cognitiveLayer = metadata.cognitiveLayer;
+        }
+    }
+    return patch;
+}
 //# sourceMappingURL=update-metadata.js.map

package/dist/wal.d.ts CHANGED Viewed

@@ -1,95 +1,95 @@
-import type { SmartMemoryConfig, MemoryType, CognitiveLayer, Sentiment, MemoryOrigin, MemoryTier } from './types.js';
-import type { StoredChunk } from './storage.js';
-import { Storage } from './storage.js';
-/**
- * Write-Ahead Log (WAL) — real-time memory capture during conversations.
- *
- * The WAL principle: write state BEFORE responding, not after.
- * This ensures no memory is lost if the agent crashes, compacts, or restarts.
- *
- * Use `ingest` for immediate capture of facts/decisions/preferences
- * as they happen, rather than waiting for post-conversation extraction.
- */
-export interface IngestEntry {
-    content: string;
-    type?: MemoryType;
-    layer?: CognitiveLayer;
-    importance?: number;
-    tags?: string[];
-    source?: string;
-    domain?: string;
-    topic?: string;
-    sentiment?: Sentiment;
-    emotionalValence?: number;
-    emotionalArousal?: number;
-    origin?: MemoryOrigin;
-    tier?: MemoryTier;
-    /**
-     * ISO 8601 timestamp override. Default: ingest time (Date.now()).
-     *
-     * Critical when the content represents an event that originally
-     * happened at a different time — meeting notes from yesterday,
-     * dated documents, imported chat history, benchmark fixtures.
-     *
-     * The createdAt timestamp flows into `buildContextPrefix()` which
-     * is included in the embedded text. The retrieval pipeline uses
-     * this as a temporal signal — both via similarity match against
-     * the prefix in queries, and via downstream temporal-boost logic
-     * in `search.ts`.
-     *
-     * Without an override, every ingested memory shares the ingest-
-     * time prefix (which is the same for everything ingested in the
-     * same hour), losing all temporal differentiation.
-     */
-    createdAt?: string;
-    /**
-     * When true, skip the per-chunk KG triple extraction. The standalone
-     * locomo bench bypasses this (calls saveChunk directly, never enters
-     * wal.ts), which is why its wall-clock is ~50× faster than Pyre's
-     * MCP-boundary bench on the same dataset.
-     *
-     * Real users keep KG extraction (it powers memory_dossier,
-     * memory_kg_query, graph rerank). Benchmark harnesses comparing
-     * apples-to-apples vs the standalone bench should pass this flag
-     * so they're measuring the same code path.
-     */
-    skipKgExtraction?: boolean;
-    /**
-     * When true, skip the post-batch appendDailyEntry write. Same
-     * rationale as skipKgExtraction — the standalone bench doesn't
-     * touch the daily-entries store; bench harnesses matching it
-     * should skip the write to compare on equal footing.
-     */
-    skipDailyEntry?: boolean;
-    /**
-     * When false, KG extraction + daily-entry append run in the
-     * BACKGROUND after ingest() returns. The caller gets its chunks
-     * back as soon as the saveChunk loop finishes; the side effects
-     * complete on their own pace.
-     *
-     * Default true (backwards compatible — caller awaits everything).
-     * Production callers where the agent doesn't immediately query
-     * the just-written content (chat WAL, tool-vault bridge) should
-     * pass false for ~5-30× faster perceived ingest latency.
-     *
-     * To wait for background work to drain (tests, shutdown), call
-     * `flushPendingSideEffects()` from this module.
-     */
-    awaitSideEffects?: boolean;
-}
-/**
- * Wait for all in-flight background side-effects (KG extraction +
- * daily-entry append fired with `awaitSideEffects: false`) to
- * complete. No-op when nothing is pending.
- *
- * Tests should call this between ingest and assert; shutdown code
- * should call before process exit to avoid losing KG writes.
- */
-export declare function flushPendingSideEffects(): Promise<void>;
-/** Pending count — for tests + telemetry. */
-export declare function pendingSideEffectCount(): number;
-/**
- * Immediately persist one or more memory entries.
- * Designed to be called mid-conversation, before the agent responds.
- */
-export declare function ingest(config: SmartMemoryConfig, storage: Storage, entries: IngestEntry[]): Promise<StoredChunk[]>;
+import type { SmartMemoryConfig, MemoryType, CognitiveLayer, Sentiment, MemoryOrigin, MemoryTier } from './types.js';
+import type { StoredChunk } from './storage.js';
+import { Storage } from './storage.js';
+/**
+ * Write-Ahead Log (WAL) — real-time memory capture during conversations.
+ *
+ * The WAL principle: write state BEFORE responding, not after.
+ * This ensures no memory is lost if the agent crashes, compacts, or restarts.
+ *
+ * Use `ingest` for immediate capture of facts/decisions/preferences
+ * as they happen, rather than waiting for post-conversation extraction.
+ */
+export interface IngestEntry {
+    content: string;
+    type?: MemoryType;
+    layer?: CognitiveLayer;
+    importance?: number;
+    tags?: string[];
+    source?: string;
+    domain?: string;
+    topic?: string;
+    sentiment?: Sentiment;
+    emotionalValence?: number;
+    emotionalArousal?: number;
+    origin?: MemoryOrigin;
+    tier?: MemoryTier;
+    /**
+     * ISO 8601 timestamp override. Default: ingest time (Date.now()).
+     *
+     * Critical when the content represents an event that originally
+     * happened at a different time — meeting notes from yesterday,
+     * dated documents, imported chat history, benchmark fixtures.
+     *
+     * The createdAt timestamp flows into `buildContextPrefix()` which
+     * is included in the embedded text. The retrieval pipeline uses
+     * this as a temporal signal — both via similarity match against
+     * the prefix in queries, and via downstream temporal-boost logic
+     * in `search.ts`.
+     *
+     * Without an override, every ingested memory shares the ingest-
+     * time prefix (which is the same for everything ingested in the
+     * same hour), losing all temporal differentiation.
+     */
+    createdAt?: string;
+    /**
+     * When true, skip the per-chunk KG triple extraction. The standalone
+     * locomo bench bypasses this (calls saveChunk directly, never enters
+     * wal.ts), which is why its wall-clock is ~50× faster than Pyre's
+     * MCP-boundary bench on the same dataset.
+     *
+     * Real users keep KG extraction (it powers engram-dossier,
+     * engram-kg-query, graph rerank). Benchmark harnesses comparing
+     * apples-to-apples vs the standalone bench should pass this flag
+     * so they're measuring the same code path.
+     */
+    skipKgExtraction?: boolean;
+    /**
+     * When true, skip the post-batch appendDailyEntry write. Same
+     * rationale as skipKgExtraction — the standalone bench doesn't
+     * touch the daily-entries store; bench harnesses matching it
+     * should skip the write to compare on equal footing.
+     */
+    skipDailyEntry?: boolean;
+    /**
+     * When false, KG extraction + daily-entry append run in the
+     * BACKGROUND after ingest() returns. The caller gets its chunks
+     * back as soon as the saveChunk loop finishes; the side effects
+     * complete on their own pace.
+     *
+     * Default true (backwards compatible — caller awaits everything).
+     * Production callers where the agent doesn't immediately query
+     * the just-written content (chat WAL, tool-vault bridge) should
+     * pass false for ~5-30× faster perceived ingest latency.
+     *
+     * To wait for background work to drain (tests, shutdown), call
+     * `flushPendingSideEffects()` from this module.
+     */
+    awaitSideEffects?: boolean;
+}
+/**
+ * Wait for all in-flight background side-effects (KG extraction +
+ * daily-entry append fired with `awaitSideEffects: false`) to
+ * complete. No-op when nothing is pending.
+ *
+ * Tests should call this between ingest and assert; shutdown code
+ * should call before process exit to avoid losing KG writes.
+ */
+export declare function flushPendingSideEffects(): Promise<void>;
+/** Pending count — for tests + telemetry. */
+export declare function pendingSideEffectCount(): number;
+/**
+ * Immediately persist one or more memory entries.
+ * Designed to be called mid-conversation, before the agent responds.
+ */
+export declare function ingest(config: SmartMemoryConfig, storage: Storage, entries: IngestEntry[]): Promise<StoredChunk[]>;