npm - pi-vault-mind - Versions diffs - 0.7.1 → 0.7.3 - Mend

pi-vault-mind 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +24 -2
package/dist/src/autosync.d.ts +16 -0
package/dist/src/autosync.js +43 -0
package/dist/src/commands.d.ts +18 -0
package/dist/src/commands.js +545 -12
package/dist/src/embed-queue.d.ts +80 -0
package/dist/src/embed-queue.js +163 -0
package/dist/src/index.js +9 -0
package/dist/src/lance.d.ts +7 -0
package/dist/src/lance.js +432 -0
package/dist/src/modal-client.d.ts +176 -0
package/dist/src/modal-client.js +174 -0
package/dist/src/modal-config.d.ts +54 -0
package/dist/src/modal-config.js +113 -0
package/dist/src/settings-ui.d.ts +9 -0
package/dist/src/settings-ui.js +131 -5
package/dist/src/sync.d.ts +71 -0
package/dist/src/sync.js +211 -0
package/dist/src/types.d.ts +107 -1
package/dist/src/utils.d.ts +4 -0
package/dist/src/utils.js +27 -0
package/dist/src/watcher.js +2 -1
package/dist/test/embed-queue.test.js +105 -0
package/dist/test/index.test.js +35 -0
package/dist/test/lance-modal.test.js +95 -0
package/dist/test/modal-client.test.js +294 -0
package/dist/test/modal-config.test.js +139 -0
package/dist/test/sync.test.js +132 -0
package/package.json +3 -2
package/dist/test/index.test.d.ts +0 -1

package/dist/src/embed-queue.d.ts ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Local embedding request coalescer — debounce + batch.
+ *
+ * Collects individual embedding requests arriving within a short window and
+ * flushes them as a single batched call to the backend (`/embed` for the modal
+ * provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
+ * requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
+ * window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
+ * (the analog of `pendingQueue` + `maxConcurrent`).
+ *
+ * ──────────────────────────────────────────────────────────────────────────
+ * NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
+ *
+ *   This is a complete, dependency-free, unit-tested building block (see
+ *   test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
+ *   Wire it in `src/lance.ts` roughly like:
+ *
+ *     const coalescer = new EmbeddingCoalescer({
+ *       embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
+ *       debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
+ *       maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
+ *       maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
+ *     });
+ *
+ *   ROUTING POLICY (yours to own, not baked in here):
+ *     - append / ingest / bulk → coalescer.embed(text, "document")   [debounced]
+ *     - interactive wiki_search → coalescer.embedImmediate(query, "query")
+ *       (latency-sensitive; bypasses the debounce)
+ *
+ *   It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
+ *   they all take a batch and return one vector per input. Batches are
+ *   homogeneous per `task` (queries and documents flush separately) because the
+ *   embedding endpoints take a single task per call.
+ *
+ *   Make the knobs configurable (Q6 in the decision log): debounceMs,
+ *   maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
+ *   Adapt the interface freely — this is a reference, not a contract.
+ * ──────────────────────────────────────────────────────────────────────────
+ */
+export type EmbedTask = "query" | "document";
+/** Batch embed backend: same texts in, one vector per text out, in order. */
+export type EmbedFn = (texts: string[], task: EmbedTask) => Promise<number[][]>;
+export interface CoalescerOptions {
+    embedFn: EmbedFn;
+    /** Window to gather requests before flushing (ms). Default 1000 (matches watcher). */
+    debounceMs?: number;
+    /** Flush immediately once a task's buffer reaches this size. Default 64. */
+    maxBatchSize?: number;
+    /** Max batched embedFn calls in flight at once. Default 2. */
+    maxConcurrentFlushes?: number;
+    /** Coalesce identical texts within a batch to a single embed. Default true. */
+    dedupe?: boolean;
+}
+export declare class EmbeddingCoalescer {
+    private readonly embedFn;
+    private readonly debounceMs;
+    private readonly maxBatchSize;
+    private readonly maxConcurrentFlushes;
+    private readonly dedupe;
+    private readonly buffers;
+    private readonly timers;
+    private active;
+    private readonly pendingFlushes;
+    private readonly inFlight;
+    constructor(opts: CoalescerOptions);
+    /** Queue a text for embedding; resolves with its vector once a batch flushes. */
+    embed(text: string, task?: EmbedTask): Promise<number[]>;
+    /** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
+    embedImmediate(text: string, task?: EmbedTask): Promise<number[]>;
+    /** Number of requests currently buffered (not yet flushed). */
+    size(): number;
+    /** Flush all buffered tasks now, then await every in-flight batch to settle. */
+    drain(): Promise<void>;
+    /** Cancel pending debounce timers. Does not reject already-buffered waiters. */
+    dispose(): void;
+    private arm;
+    private flushTask;
+    private schedule;
+    private runBatch;
+}

package/dist/src/embed-queue.js ADDED Viewed

@@ -0,0 +1,163 @@
+/**
+ * Local embedding request coalescer — debounce + batch.
+ *
+ * Collects individual embedding requests arriving within a short window and
+ * flushes them as a single batched call to the backend (`/embed` for the modal
+ * provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
+ * requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
+ * window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
+ * (the analog of `pendingQueue` + `maxConcurrent`).
+ *
+ * ──────────────────────────────────────────────────────────────────────────
+ * NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
+ *
+ *   This is a complete, dependency-free, unit-tested building block (see
+ *   test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
+ *   Wire it in `src/lance.ts` roughly like:
+ *
+ *     const coalescer = new EmbeddingCoalescer({
+ *       embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
+ *       debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
+ *       maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
+ *       maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
+ *     });
+ *
+ *   ROUTING POLICY (yours to own, not baked in here):
+ *     - append / ingest / bulk → coalescer.embed(text, "document")   [debounced]
+ *     - interactive wiki_search → coalescer.embedImmediate(query, "query")
+ *       (latency-sensitive; bypasses the debounce)
+ *
+ *   It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
+ *   they all take a batch and return one vector per input. Batches are
+ *   homogeneous per `task` (queries and documents flush separately) because the
+ *   embedding endpoints take a single task per call.
+ *
+ *   Make the knobs configurable (Q6 in the decision log): debounceMs,
+ *   maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
+ *   Adapt the interface freely — this is a reference, not a contract.
+ * ──────────────────────────────────────────────────────────────────────────
+ */
+export class EmbeddingCoalescer {
+    embedFn;
+    debounceMs;
+    maxBatchSize;
+    maxConcurrentFlushes;
+    dedupe;
+    buffers = new Map();
+    timers = new Map();
+    active = 0;
+    pendingFlushes = [];
+    inFlight = new Set();
+    constructor(opts) {
+        this.embedFn = opts.embedFn;
+        this.debounceMs = opts.debounceMs ?? 1000;
+        this.maxBatchSize = Math.max(1, opts.maxBatchSize ?? 64);
+        this.maxConcurrentFlushes = Math.max(1, opts.maxConcurrentFlushes ?? 2);
+        this.dedupe = opts.dedupe ?? true;
+    }
+    /** Queue a text for embedding; resolves with its vector once a batch flushes. */
+    embed(text, task = "document") {
+        return new Promise((resolve, reject) => {
+            const buf = this.buffers.get(task) ?? [];
+            buf.push({ text, resolve, reject });
+            this.buffers.set(task, buf);
+            if (buf.length >= this.maxBatchSize) {
+                this.flushTask(task);
+            }
+            else {
+                this.arm(task);
+            }
+        });
+    }
+    /** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
+    async embedImmediate(text, task = "query") {
+        const vectors = await this.embedFn([text], task);
+        return vectors[0];
+    }
+    /** Number of requests currently buffered (not yet flushed). */
+    size() {
+        let n = 0;
+        for (const buf of this.buffers.values())
+            n += buf.length;
+        return n;
+    }
+    /** Flush all buffered tasks now, then await every in-flight batch to settle. */
+    async drain() {
+        for (const task of [...this.buffers.keys()])
+            this.flushTask(task);
+        while (this.inFlight.size > 0 || this.pendingFlushes.length > 0) {
+            await Promise.allSettled([...this.inFlight]);
+        }
+    }
+    /** Cancel pending debounce timers. Does not reject already-buffered waiters. */
+    dispose() {
+        for (const t of this.timers.values())
+            clearTimeout(t);
+        this.timers.clear();
+    }
+    // ── internals ──────────────────────────────────────────────────────────
+    arm(task) {
+        const existing = this.timers.get(task);
+        if (existing)
+            clearTimeout(existing);
+        this.timers.set(task, setTimeout(() => this.flushTask(task), this.debounceMs));
+    }
+    flushTask(task) {
+        const timer = this.timers.get(task);
+        if (timer) {
+            clearTimeout(timer);
+            this.timers.delete(task);
+        }
+        const buf = this.buffers.get(task);
+        if (!buf || buf.length === 0)
+            return;
+        this.buffers.set(task, []);
+        this.schedule(() => this.runBatch(task, buf));
+    }
+    schedule(job) {
+        const run = () => {
+            this.active++;
+            const p = job().finally(() => {
+                this.active--;
+                this.inFlight.delete(p);
+                const next = this.pendingFlushes.shift();
+                if (next)
+                    next();
+            });
+            this.inFlight.add(p);
+        };
+        if (this.active < this.maxConcurrentFlushes)
+            run();
+        else
+            this.pendingFlushes.push(run);
+    }
+    async runBatch(task, items) {
+        try {
+            let texts;
+            let indexByText = null;
+            if (this.dedupe) {
+                indexByText = new Map();
+                texts = [];
+                for (const it of items) {
+                    if (!indexByText.has(it.text)) {
+                        indexByText.set(it.text, texts.length);
+                        texts.push(it.text);
+                    }
+                }
+            }
+            else {
+                texts = items.map((i) => i.text);
+            }
+            const vectors = await this.embedFn(texts, task);
+            for (let i = 0; i < items.length; i++) {
+                const it = items[i];
+                const idx = indexByText ? (indexByText.get(it.text) ?? i) : i;
+                it.resolve(vectors[idx]);
+            }
+        }
+        catch (err) {
+            for (const it of items)
+                it.reject(err);
+        }
+    }
+}

package/dist/src/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
+import { startAutoSync } from "./autosync.js";
 import { registerCommands, selectActiveCollection, serverState, watcherState } from "./commands.js";
 import { handleBeforeAgentStart, handleTurnEnd } from "./events.js";
 import { startServer, stopServer } from "./server.js";
@@ -8,6 +9,7 @@ import { EXT_ROOT } from "./utils.js";
 import { loadConfig } from "./utils.js";
 import { startWatcher, stopWatcher } from "./watcher.js";
 import { updateActiveCollectionWidget } from "./widget.js";
+let stopAutoSync = null;
 export default function (pi) {
     /* expose skills directory */
     pi.on("resources_discover", async (_event) => {
@@ -49,6 +51,11 @@ export default function (pi) {
                     startWatcher(pi, vaults, watcherState);
             }, 2000);
         }
+        /* auto-sync: off by default; only starts when modal.sync.autoSync is true */
+        if (cfg.wiki.embedding.provider === "modal" && cfg.wiki.embedding.modal?.sync?.autoSync) {
+            stopAutoSync = startAutoSync(cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000);
+            console.log(`[pi-vault-mind] Modal auto-sync enabled (every ${cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000}ms)`);
+        }
     }
     catch {
         /* config may not exist yet; user can start watcher manually via /wiki watcher start */
@@ -57,5 +64,7 @@ export default function (pi) {
     pi.on("session_shutdown", async () => {
         stopWatcher(watcherState);
         stopServer(serverState);
+        if (stopAutoSync)
+            stopAutoSync();
     });
 }

package/dist/src/lance.d.ts CHANGED Viewed

@@ -35,6 +35,13 @@ export declare const pullOllamaModel: (model: string, piOrHost?: ExtensionAPI |
     message: string;
 }>;
 export declare const upsertEntry: (dataDir: string, collectionName: string, entry: Record<string, string>, cfg: WikiConfig) => Promise<void>;
+/**
+ * Precomputed-vector insert path — used by sync. Upserts rows that are already
+ * embedded (vectors come from the server) into the namespaced table, keyed by
+ * `id` (merge-insert). Bypasses the auto-embed source field entirely. Carries
+ * text + metadata. Idempotent (re-fetching a boundary row is a no-op).
+ */
+export declare const upsertPrecomputed: (dataDir: string, collectionName: string, model: string, dim: number, rows: Array<Record<string, unknown>>, cfg: WikiConfig) => Promise<void>;
 export declare const searchHybrid: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
 export declare const searchFts: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
 export declare const getStatus: (dataDir: string) => Promise<Record<string, unknown>>;