npm - pi-vault-mind - Versions diffs - 0.7.1 → 0.7.2 - Mend

pi-vault-mind 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +24 -2
package/dist/src/autosync.d.ts +16 -0
package/dist/src/autosync.js +43 -0
package/dist/src/commands.d.ts +18 -0
package/dist/src/commands.js +464 -10
package/dist/src/embed-queue.d.ts +80 -0
package/dist/src/embed-queue.js +163 -0
package/dist/src/index.js +9 -0
package/dist/src/lance.d.ts +7 -0
package/dist/src/lance.js +432 -0
package/dist/src/modal-client.d.ts +176 -0
package/dist/src/modal-client.js +174 -0
package/dist/src/modal-config.d.ts +42 -0
package/dist/src/modal-config.js +60 -0
package/dist/src/settings-ui.d.ts +7 -0
package/dist/src/settings-ui.js +109 -1
package/dist/src/sync.d.ts +71 -0
package/dist/src/sync.js +211 -0
package/dist/src/types.d.ts +102 -1
package/dist/test/embed-queue.test.js +105 -0
package/dist/test/index.test.js +35 -0
package/dist/test/lance-modal.test.js +95 -0
package/dist/test/modal-client.test.js +294 -0
package/dist/test/modal-config.test.js +86 -0
package/dist/test/sync.test.js +132 -0
package/package.json +3 -2
package/dist/test/index.test.d.ts +0 -1

package/dist/src/lance.js CHANGED Viewed

@@ -4,6 +4,8 @@ import * as path from "node:path";
 import * as lancedb from "@lancedb/lancedb";
 import { LanceSchema, TextEmbeddingFunction } from "@lancedb/lancedb/embedding";
 import * as arrow from "apache-arrow";
+import { EmbeddingCoalescer } from "./embed-queue.js";
+import { createModalClient, namespacedTableName, resolveDim, resolveModel, } from "./modal-config.js";
 let db = null;
 // ── Connection ──────────────────────────────────────────────────────────────
 export const connect = async (dataDir) => {
@@ -18,6 +20,10 @@ export const connect = async (dataDir) => {
 export const resetConnection = () => {
     db = null;
     tables = {};
+    modalTableCache.clear();
+    modalTableInFlight.clear();
+    nativeDimCache.clear();
+    void disposeCoalescers();
 };
 // ── Embedding Functions ─────────────────────────────────────────────────────
 /**
@@ -108,6 +114,264 @@ class TransformersEmbeddingFunction extends TextEmbeddingFunction {
         return embeddings;
     }
 }
+// ── Modal Provider ──────────────────────────────────────────────────────────
+/**
+ * Embedding function backed by the Modal `/embed` endpoint (see
+ * `src/modal-client.ts`). Used two ways:
+ *
+ *  1. As a `TextEmbeddingFunction` for graph tables that still auto-embed via
+ *     `sourceField` (document task for storage, query task for search).
+ *  2. As the `embedFn` wrapped by the `EmbeddingCoalescer` for the main
+ *     collection path, where append/ingest is debounced + batched and search
+ *     bypasses the debounce via `embedImmediate`.
+ *
+ * The main collection path does NOT auto-embed — it inserts precomputed
+ * vectors into namespaced `col_{collection}__{model}__{dim}` tables (see
+ * `upsertEntry`/`searchHybrid` below and ADR-3). One canonical model owns the
+ * vector space; a single query never mixes `model__dim` spaces.
+ */
+class ModalEmbeddingFunction extends TextEmbeddingFunction {
+    cfg;
+    collection;
+    ndimsValue;
+    constructor(cfg, collection, ndimsValue) {
+        super();
+        this.cfg = cfg;
+        this.collection = collection;
+        this.ndimsValue = ndimsValue;
+    }
+    ndims() {
+        return this.ndimsValue;
+    }
+    embeddingDataType() {
+        return new arrow.Float32();
+    }
+    /** Batched embed against `/embed`. `task` selects query vs document. */
+    async generateEmbeddings(texts, task = "document") {
+        const client = createModalClient(this.cfg);
+        if (!client)
+            throw new Error("Modal provider not configured (missing baseUrl or token).");
+        const res = await client.embed(texts, {
+            model: resolveModel(this.cfg, this.collection),
+            dim: resolveDim(this.cfg, this.collection),
+            task,
+        });
+        // Cache the model's effective output dim for table resolution.
+        rememberNativeDim(resolveModel(this.cfg, this.collection), res.dim);
+        return res.vectors;
+    }
+    // Storage path (LanceDB auto-embed via sourceField) → document task.
+    async computeSourceEmbeddings(texts) {
+        return this.generateEmbeddings(texts, "document");
+    }
+    // Query path (LanceDB nearestTo via embedding function) → query task.
+    async computeQueryEmbeddings(data) {
+        const vecs = await this.generateEmbeddings([data], "query");
+        return vecs[0];
+    }
+}
+/**
+ * Cached native output dim per model, learned from the first successful
+ * `/embed` response. Lets the search path resolve the namespaced table name
+ * before any vector is in hand (e.g. an offline fallback to FTS).
+ */
+const nativeDimCache = new Map();
+const rememberNativeDim = (model, dim) => {
+    if (dim && !nativeDimCache.has(model))
+        nativeDimCache.set(model, dim);
+};
+/** Resolved effective dim: config dim > cached native dim > undefined. */
+const effectiveDim = (cfg, collection) => {
+    const d = resolveDim(cfg, collection);
+    if (d != null)
+        return d;
+    const model = resolveModel(cfg, collection);
+    return nativeDimCache.get(model);
+};
+/**
+ * Coalescer cache, keyed by provider+model+dim signature. Append/ingest embeds
+ * are debounced + batched into a single `/embed` call; identical texts are
+ * deduped; in-flight flushes are capped. Search queries bypass the debounce via
+ * `embedImmediate` (latency-sensitive). Knobs come from
+ * `cfg.embedding.coalesce` (Q6).
+ */
+const coalescerCache = new Map();
+const coalesceKey = (cfg, collection) => `modal:${resolveModel(cfg, collection)}:${resolveDim(cfg, collection) ?? "native"}`;
+const getCoalescer = (cfg, collection) => {
+    const key = coalesceKey(cfg, collection);
+    const existing = coalescerCache.get(key);
+    if (existing)
+        return existing;
+    const fn = new ModalEmbeddingFunction(cfg, collection);
+    const embedFn = (texts, task) => fn.generateEmbeddings(texts, task);
+    const co = cfg.embedding.coalesce ?? {};
+    const coalescer = new EmbeddingCoalescer({
+        embedFn,
+        debounceMs: co.debounceMs ?? 1000,
+        maxBatchSize: co.maxBatchSize ?? 64,
+        maxConcurrentFlushes: co.maxConcurrentFlushes ?? 2,
+        dedupe: co.dedupe ?? true,
+    });
+    coalescerCache.set(key, coalescer);
+    return coalescer;
+};
+/** Whether search queries bypass the coalescer debounce (default true). */
+const searchBypassesCoalescer = (cfg) => cfg.embedding.coalesce?.searchBypass ?? true;
+/**
+ * Embed a single query, immediately when search-bypass is on (default),
+ * otherwise through the debounce. Latency-sensitive interactive search.
+ */
+const embedQuery = (cfg, collection, text) => {
+    const co = getCoalescer(cfg, collection);
+    return searchBypassesCoalescer(cfg) ? co.embedImmediate(text, "query") : co.embed(text, "query");
+};
+/**
+ * Embed one document via the coalescer (debounced + batched). Append/ingest
+ * never fires one network request per entry — requests within the window flush
+ * as a single batched `/embed` call.
+ */
+const embedDocument = (cfg, collection, text) => getCoalescer(cfg, collection).embed(text, "document");
+// ── Modal table management ──────────────────────────────────────────────────
+/** Plain (no auto-embed) schema for a namespaced collection table. */
+function makeModalCollectionSchema(dim) {
+    const vec = new arrow.FixedSizeList(dim, new arrow.Field("item", new arrow.Float32(), true));
+    return new arrow.Schema([
+        new arrow.Field("id", new arrow.Utf8(), true),
+        new arrow.Field("domain", new arrow.Utf8(), true),
+        new arrow.Field("source", new arrow.Utf8(), true),
+        new arrow.Field("fact", new arrow.Utf8(), true),
+        new arrow.Field("tag", new arrow.Utf8(), true),
+        new arrow.Field("artifact", new arrow.Utf8(), true),
+        new arrow.Field("created_at", new arrow.Utf8(), true),
+        new arrow.Field("vector", vec, true),
+    ]);
+}
+const modalTableCache = new Map();
+/** In-flight creation/open promises, to dedupe concurrent getModalCollectionTable calls. */
+const modalTableInFlight = new Map();
+/**
+ * Open (or create) a namespaced modal collection table at a known dim. The dim
+ * is always derived from a real vector (an embedded query/document or a synced
+ * row), so there is no hard-coded dimension. Concurrent calls for the same
+ * table are deduped via `modalTableInFlight` so only one create/open runs.
+ */
+const getModalCollectionTable = async (dataDir, collection, model, dim, cfg) => {
+    const conn = await connect(dataDir);
+    const tableName = namespacedTableName(collection, model, dim);
+    const cached = modalTableCache.get(tableName);
+    if (cached)
+        return cached;
+    const inflight = modalTableInFlight.get(tableName);
+    if (inflight)
+        return inflight;
+    const p = (async () => {
+        const existing = await conn.tableNames();
+        let t;
+        if (existing.includes(tableName)) {
+            t = await conn.openTable(tableName);
+        }
+        else {
+            const schema = makeModalCollectionSchema(dim);
+            t = await conn.createEmptyTable(tableName, schema);
+            if (cfg.ftsEnabled !== false) {
+                try {
+                    await t.createIndex("fact", { config: lancedb.Index.fts() });
+                }
+                catch {
+                    /* index may already exist */
+                }
+            }
+        }
+        modalTableCache.set(tableName, t);
+        return t;
+    })();
+    modalTableInFlight.set(tableName, p);
+    try {
+        return await p;
+    }
+    finally {
+        modalTableInFlight.delete(tableName);
+    }
+};
+/**
+ * Locate an existing namespaced table for a collection+model (any dim) by
+ * listing table names. Returns the table + its parsed dim, or null. Used by the
+ * search path so it can find a table populated purely by sync (before any
+ * local embed has cached the dim).
+ */
+const findModalTable = async (dataDir, collection, model) => {
+    const conn = await connect(dataDir);
+    const prefix = `col_${collection}__${model}__`;
+    for (const name of await conn.tableNames()) {
+        if (name.startsWith(prefix)) {
+            const dim = Number.parseInt(name.slice(prefix.length), 10);
+            if (Number.isFinite(dim)) {
+                const table = await conn.openTable(name);
+                return { table, dim };
+            }
+        }
+    }
+    return null;
+};
+/**
+ * Best-effort FTS fallback. Used when the modal embed path is unavailable
+ * (offline + no same-space local fallback) so a search degrades to keyword
+ * search instead of crashing.
+ */
+const modalFtsFallback = async (dataDir, collection, model, query, limit) => {
+    const found = await findModalTable(dataDir, collection, model);
+    if (!found)
+        return [];
+    try {
+        return await found.table.search(query).limit(limit).toArray();
+    }
+    catch {
+        return [];
+    }
+};
+/**
+ * Offline fallback embed for a query. Only used when Modal is unreachable and a
+ * same-space local provider is configured (same canonical model → same vector
+ * space → can query the same namespaced table). Returns null when no same-space
+ * fallback is available, so the caller degrades to FTS.
+ */
+const fallbackQueryEmbed = async (cfg, collection, text) => {
+    const fb = cfg.embedding.modal?.fallback;
+    if (fb?.enabled === false)
+        return null;
+    if (fb?.provider !== "ollama")
+        return null; // transformers is 384-dim ≠ canonical space
+    const canonical = resolveModel(cfg, collection);
+    // Same-space only: the local Ollama model must match the canonical model.
+    if ((cfg.embedding.ollamaModel || "embeddinggemma") !== canonical)
+        return null;
+    try {
+        const fn = new OllamaEmbeddingFunction({
+            model: cfg.embedding.ollamaModel || "embeddinggemma",
+            host: cfg.embedding.ollamaHost || "http://127.0.0.1:11434",
+        });
+        await fn.init();
+        const vecs = await fn.generateEmbeddings([text]);
+        return vecs[0];
+    }
+    catch (err) {
+        console.warn(`[pi-vault-mind] Modal offline and Ollama fallback failed: ${err.message}`);
+        return null;
+    }
+};
+/** Flush all pending coalesced embeds and clear caches (used by reset). */
+const disposeCoalescers = async () => {
+    for (const co of coalescerCache.values()) {
+        try {
+            await co.drain();
+        }
+        catch {
+            /* best-effort */
+        }
+        co.dispose();
+    }
+    coalescerCache.clear();
+};
 const getModelsJsonPath = () => path.join(homedir(), ".pi", "agent", "models.json");
 /** Read Pi's models.json for already-registered Ollama models. */
 const loadPiModelsJson = () => {
@@ -255,6 +519,12 @@ export const pullOllamaModel = async (model, piOrHost) => {
 };
 // ── Embedding Provider Factory ──────────────────────────────────────────────
 const getEmbeddingFunction = async (cfg) => {
+    if (cfg.embedding.provider === "modal") {
+        // Used by graph tables (auto-embed via sourceField). The main collection
+        // path bypasses this and inserts precomputed vectors (see upsertEntry).
+        const dim = effectiveDim(cfg);
+        return new ModalEmbeddingFunction(cfg, undefined, dim);
+    }
     if (cfg.embedding.provider === "ollama") {
         const fn = new OllamaEmbeddingFunction({
             model: cfg.embedding.ollamaModel || "embeddinggemma",
@@ -332,12 +602,25 @@ const getCollectionTable = async (dataDir, collectionName, cfg) => {
     }
     return tables[tableName];
 };
+/**
+ * Graph (entity/relation) schemas embed a fixed-size vector field, so they need
+ * a concrete dimension. Under the modal provider the dim may be unresolved until
+ * the first embed/sync caches it — fail with a clear message instead of building
+ * a schema with an undefined dimension. (These tables are not yet wired into the
+ * active graph path; this guards them for when they are.)
+ */
+const assertGraphDim = (cfg, kind) => {
+    if (cfg.embedding.provider === "modal" && effectiveDim(cfg) == null) {
+        throw new Error(`Graph ${kind} table needs a known embedding dimension under the modal provider. Set wiki.embedding.modal.dim, or run an embed/sync first so the native dim is cached.`);
+    }
+};
 const getEntityTable = async (dataDir, cfg) => {
     const conn = await connect(dataDir);
     const tableName = "entities";
     if (tables[tableName])
         return tables[tableName];
     const embeddingFn = await getEmbeddingFunction(cfg);
+    assertGraphDim(cfg, "entity");
     const schema = makeEntitySchema(embeddingFn);
     const existing = await conn.tableNames();
     if (existing.includes(tableName)) {
@@ -353,6 +636,7 @@ const getRelationTable = async (dataDir, cfg) => {
     if (tables[tableName])
         return tables[tableName];
     const embeddingFn = await getEmbeddingFunction(cfg);
+    assertGraphDim(cfg, "relation");
     const schema = makeRelationSchema(embeddingFn);
     const existing = await conn.tableNames();
     if (existing.includes(tableName)) {
@@ -363,7 +647,10 @@ const getRelationTable = async (dataDir, cfg) => {
     return tables[tableName];
 };
 // ── Public API ──────────────────────────────────────────────────────────────
+const isModal = (cfg) => cfg.embedding.provider === "modal";
 export const upsertEntry = async (dataDir, collectionName, entry, cfg) => {
+    if (isModal(cfg))
+        return upsertEntryModal(dataDir, collectionName, entry, cfg);
     const table = await getCollectionTable(dataDir, collectionName, cfg);
     const row = {
         id: entry.id || crypto.randomUUID(),
@@ -376,14 +663,159 @@ export const upsertEntry = async (dataDir, collectionName, entry, cfg) => {
     };
     await table.add([row]);
 };
+/**
+ * Modal append path: embed the document text via the coalescer (debounced +
+ * batched), then merge-insert the row with its precomputed vector into the
+ * namespaced `col_{collection}__{model}__{dim}` table. If Modal is offline and
+ * no same-space local fallback is configured, it warns and skips the vector
+ * index (the JSONL append is the source of truth; reindex recovers) — it does
+ * not crash. Keyed by `id` (upsert, idempotent).
+ */
+const upsertEntryModal = async (dataDir, collectionName, entry, cfg) => {
+    const model = resolveModel(cfg, collectionName);
+    const text = entry.fact || "";
+    let vector;
+    try {
+        vector = await embedDocument(cfg, collectionName, text);
+    }
+    catch (err) {
+        // Offline: try a same-space local fallback; else warn + skip indexing.
+        const fb = await fallbackDocumentEmbed(cfg, collectionName, text);
+        if (fb) {
+            vector = fb;
+        }
+        else {
+            console.warn(`[pi-vault-mind] Modal embed failed for "${collectionName}" and no same-space fallback — skipping vector index: ${err.message}`);
+            return;
+        }
+    }
+    const dim = vector.length;
+    rememberNativeDim(model, dim);
+    const table = await getModalCollectionTable(dataDir, collectionName, model, dim, cfg);
+    const row = {
+        id: entry.id || crypto.randomUUID(),
+        domain: entry.domain || "",
+        source: entry.source || "",
+        fact: text,
+        tag: entry.tag || "",
+        artifact: entry.artifact || "",
+        created_at: entry.created_at || new Date().toISOString(),
+        vector,
+    };
+    await table.mergeInsert(["id"]).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute([row]);
+};
+/** Offline fallback embed for a document (storage). Same-space ollama only. */
+const fallbackDocumentEmbed = async (cfg, collection, text) => {
+    const fb = cfg.embedding.modal?.fallback;
+    if (fb?.enabled === false)
+        return null;
+    if (fb?.provider !== "ollama")
+        return null;
+    const canonical = resolveModel(cfg, collection);
+    if ((cfg.embedding.ollamaModel || "embeddinggemma") !== canonical)
+        return null;
+    try {
+        const fn = new OllamaEmbeddingFunction({
+            model: cfg.embedding.ollamaModel || "embeddinggemma",
+            host: cfg.embedding.ollamaHost || "http://127.0.0.1:11434",
+        });
+        await fn.init();
+        const vecs = await fn.generateEmbeddings([text]);
+        return vecs[0];
+    }
+    catch (err) {
+        console.warn(`[pi-vault-mind] Modal offline and Ollama document fallback failed: ${err.message}`);
+        return null;
+    }
+};
+/**
+ * Precomputed-vector insert path — used by sync. Upserts rows that are already
+ * embedded (vectors come from the server) into the namespaced table, keyed by
+ * `id` (merge-insert). Bypasses the auto-embed source field entirely. Carries
+ * text + metadata. Idempotent (re-fetching a boundary row is a no-op).
+ */
+export const upsertPrecomputed = async (dataDir, collectionName, model, dim, rows, cfg) => {
+    if (rows.length === 0)
+        return;
+    const table = await getModalCollectionTable(dataDir, collectionName, model, dim, cfg);
+    const mapped = rows.map((r) => {
+        const meta = r.metadata || {};
+        const vec = r.vector;
+        if (!vec)
+            throw new Error(`Sync row ${r.id} has no vector — cannot import precomputed`);
+        return {
+            id: String(r.id),
+            domain: String(meta.domain ?? r.domain ?? ""),
+            source: String(meta.source ?? r.source ?? ""),
+            fact: String(r.text ?? r.fact ?? ""),
+            tag: String(meta.tag ?? r.tag ?? ""),
+            artifact: String(meta.artifact ?? r.artifact ?? ""),
+            created_at: String(r.created_at ?? meta.created_at ?? new Date().toISOString()),
+            vector: vec,
+        };
+    });
+    await table.mergeInsert(["id"]).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(mapped);
+};
 export const searchHybrid = async (dataDir, collectionName, query, limit, cfg) => {
+    if (isModal(cfg))
+        return searchHybridModal(dataDir, collectionName, query, limit, cfg);
     const table = await getCollectionTable(dataDir, collectionName, cfg);
     const embeddingFn = await getEmbeddingFunction(cfg);
     const queryVector = await embeddingFn.generateEmbeddings([query]);
     const results = await table.query().nearestTo(queryVector[0]).limit(limit).toArray();
     return results;
 };
+/**
+ * Modal search path: embed the query immediately (bypasses the coalescer
+ * debounce), then nearest-neighbor search the namespaced table. If Modal is
+ * offline, fall back to a same-space local provider when available; otherwise
+ * degrade to FTS/keyword search. Never crashes a search.
+ */
+const searchHybridModal = async (dataDir, collectionName, query, limit, cfg) => {
+    const model = resolveModel(cfg, collectionName);
+    let queryVector;
+    try {
+        queryVector = await embedQuery(cfg, collectionName, query);
+    }
+    catch (err) {
+        // Offline: same-space local fallback, else degrade to FTS.
+        const fb = await fallbackQueryEmbed(cfg, collectionName, query);
+        if (!fb) {
+            console.warn(`[pi-vault-mind] Modal search offline, degrading to FTS for "${collectionName}": ${err.message}`);
+            return modalFtsFallback(dataDir, collectionName, model, query, limit);
+        }
+        queryVector = fb;
+    }
+    const found = await findModalTable(dataDir, collectionName, model);
+    if (!found)
+        return [];
+    // Guard against space mismatch (shouldn't happen with one canonical model).
+    if (found.dim !== queryVector.length) {
+        console.warn(`[pi-vault-mind] Query dim ${queryVector.length} ≠ table dim ${found.dim} for "${collectionName}"; degrading to FTS.`);
+        return modalFtsFallback(dataDir, collectionName, model, query, limit);
+    }
+    try {
+        return await found.table.query().nearestTo(queryVector).limit(limit).toArray();
+    }
+    catch (err) {
+        console.warn(`[pi-vault-mind] Modal vector search failed, degrading to FTS: ${err.message}`);
+        return modalFtsFallback(dataDir, collectionName, model, query, limit);
+    }
+};
 export const searchFts = async (dataDir, collectionName, query, limit, cfg) => {
+    // For modal, FTS targets the namespaced table (the same one sync + query use).
+    if (isModal(cfg)) {
+        const model = resolveModel(cfg, collectionName);
+        const found = await findModalTable(dataDir, collectionName, model);
+        if (!found)
+            return [];
+        try {
+            return await found.table.search(query).limit(limit).toArray();
+        }
+        catch {
+            return [];
+        }
+    }
     const table = await getCollectionTable(dataDir, collectionName, cfg);
     const results = await table.search(query).limit(limit).toArray();
     return results;

package/dist/src/modal-client.d.ts ADDED Viewed

@@ -0,0 +1,176 @@
+/**
+ * Client for the pi-vault-mind Modal embedding service.
+ *
+ * This is the local (extension) side of the Modal app under `modal/`. It mirrors
+ * the HTTP contract documented in `docs/MODAL_EMBEDDING.md`:
+ *   - on-demand embedding         → POST /embed
+ *   - bulk background jobs        → POST /jobs, GET /jobs, GET /jobs/{id},
+ *                                  POST /jobs/{id}/cancel
+ *   - incremental vector sync     → GET /sync/collections, GET /sync/export
+ *                                  (format=json|arrow)
+ *   - model registry + stats      → GET /models, GET /stats
+ *
+ * This client is the typed mirror of the server contract. The server (Agent A)
+ * owns it; additive changes here are mirrored in the server's `modal/web.py`.
+ * The local wiring lives in `src/lance.ts` (provider), `src/sync.ts`, and
+ * `/wiki modal` commands (see docs/MODAL_EMBEDDING.md "Local integration").
+ */
+export interface ModalClientConfig {
+    /** Base URL of the deployed ASGI app (no trailing slash needed). */
+    baseUrl: string;
+    /** Bearer token matching the `pi-vault-mind-auth` Modal secret. */
+    apiToken: string;
+    /** Per-request timeout in ms (default 120s — bulk submits can be large). */
+    timeoutMs?: number;
+}
+export interface EmbedResult {
+    model: string;
+    dim: number;
+    vectors: number[][];
+}
+export interface JobRecord {
+    id: string;
+    text: string;
+    metadata?: Record<string, unknown>;
+    created_at?: string;
+}
+export interface JobSubmitResult {
+    job_id: string;
+    call_id: string;
+    total: number;
+}
+export interface JobStatus {
+    status: "queued" | "running" | "done" | "error" | "cancelled";
+    collection: string;
+    model: string;
+    dim: number;
+    total: number;
+    processed: number;
+    /** Set by POST /jobs/{id}/cancel; the worker stops after the current batch. */
+    cancel_requested?: boolean;
+    error?: string;
+    updated_at: string;
+}
+/** One model in the server's registry (GET /models). */
+export interface ModelInfo {
+    key: string;
+    hf_id: string;
+    backend: "sentence-transformers" | "ollama" | "hf";
+    native_dim: number;
+    matryoshka_dims: number[];
+    query_prompt?: string | null;
+    document_prompt?: string | null;
+    gated: boolean;
+    trust_remote_code: boolean;
+    enabled: boolean;
+    notes: string;
+}
+export interface ModelsResponse {
+    default: string;
+    default_dim: number | null;
+    models: ModelInfo[];
+}
+export interface JobListResponse {
+    jobs: JobStatus[];
+    count: number;
+}
+export interface SyncCollection {
+    collection: string;
+    model: string;
+    dim: number;
+    rows: number;
+    table: string;
+}
+export interface ExportRow {
+    id: string;
+    text: string;
+    vector?: number[];
+    metadata: Record<string, unknown>;
+    model: string;
+    dim: number;
+    seq: number;
+    created_at: string;
+}
+export interface ExportPage {
+    rows: ExportRow[];
+    next_watermark: number;
+    count: number;
+    /** False when the page was full and more rows likely remain. */
+    done: boolean;
+}
+export declare class ModalEmbeddingClient {
+    private baseUrl;
+    private apiToken;
+    private timeoutMs;
+    constructor(cfg: ModalClientConfig);
+    private request;
+    /** Liveness check; also returns the server's default model. */
+    health(): Promise<{
+        ok: boolean;
+        default_model: string;
+    }>;
+    /** Registry of available embedders (public; no auth). Use native_dim to
+     * resolve a model's output dim up-front instead of waiting for the first
+     * /embed response. (Additive — Agent B request #2.) */
+    models(): Promise<ModelsResponse>;
+    /** Server-side store + compute stats (rows per namespace, index state, GPU). */
+    stats(): Promise<Record<string, unknown>>;
+    /** Embed text on demand. Use task="query" for search, "document" for storage. */
+    embed(texts: string[], opts?: {
+        model?: string;
+        dim?: number;
+        task?: "query" | "document";
+    }): Promise<EmbedResult>;
+    /** Submit a bulk embedding job; embeds + stores server-side. */
+    submitJob(collection: string, records: JobRecord[], opts?: {
+        model?: string;
+        dim?: number;
+    }): Promise<JobSubmitResult>;
+    jobStatus(jobId: string): Promise<JobStatus>;
+    /** List recent jobs (newest first). Additive — surfaces GET /jobs so
+     * `/wiki modal jobs` can list, not just poll a known id. (Agent B request #1.) */
+    listJobs(limit?: number): Promise<JobListResponse>;
+    /** Cooperatively cancel a running/queued job. The worker stops after its
+     * current batch and writes status=cancelled. */
+    cancelJob(jobId: string): Promise<{
+        job_id: string;
+        cancel_requested: boolean;
+    }>;
+    /** Poll a job until it reaches a terminal state. */
+    waitForJob(jobId: string, pollMs?: number): Promise<JobStatus>;
+    /** List the collections/tables held in the server-side vector store. */
+    syncCollections(): Promise<SyncCollection[]>;
+    /** Pull one page of rows with seq > since. Remember next_watermark. */
+    exportSince(collection: string, opts?: {
+        model?: string;
+        dim?: number;
+        since?: number;
+        limit?: number;
+    }): Promise<ExportPage>;
+    /** Pull one page of rows with seq > since as an Arrow IPC stream.
+     * Vectors are always included (no include_vectors flag). The watermark /
+     * done / count come back as response headers (X-Next-Watermark, X-Done,
+     * X-Count) since the body is binary. Additive — the local sync path uses
+     * the JSON `exportSince`; this is for clients that want zero-copy rows. */
+    exportSinceArrow(collection: string, opts?: {
+        model?: string;
+        dim?: number;
+        since?: number;
+        limit?: number;
+    }): Promise<{
+        data: ArrayBuffer;
+        nextWatermark: number;
+        done: boolean;
+        count: number;
+    }>;
+    /**
+     * Drain every remaining page for a collection, invoking `onPage` for each.
+     * Returns the final watermark to persist for the next incremental sync.
+     */
+    exportAll(collection: string, onPage: (rows: ExportRow[]) => Promise<void> | void, opts?: {
+        model?: string;
+        dim?: number;
+        since?: number;
+        limit?: number;
+    }): Promise<number>;
+}