pi-vault-mind 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/lance.js CHANGED
@@ -4,6 +4,8 @@ import * as path from "node:path";
4
4
  import * as lancedb from "@lancedb/lancedb";
5
5
  import { LanceSchema, TextEmbeddingFunction } from "@lancedb/lancedb/embedding";
6
6
  import * as arrow from "apache-arrow";
7
+ import { EmbeddingCoalescer } from "./embed-queue.js";
8
+ import { createModalClient, namespacedTableName, resolveDim, resolveModel, } from "./modal-config.js";
7
9
  let db = null;
8
10
  // ── Connection ──────────────────────────────────────────────────────────────
9
11
  export const connect = async (dataDir) => {
@@ -18,6 +20,10 @@ export const connect = async (dataDir) => {
18
20
  export const resetConnection = () => {
19
21
  db = null;
20
22
  tables = {};
23
+ modalTableCache.clear();
24
+ modalTableInFlight.clear();
25
+ nativeDimCache.clear();
26
+ void disposeCoalescers();
21
27
  };
22
28
  // ── Embedding Functions ─────────────────────────────────────────────────────
23
29
  /**
@@ -108,6 +114,264 @@ class TransformersEmbeddingFunction extends TextEmbeddingFunction {
108
114
  return embeddings;
109
115
  }
110
116
  }
117
+ // ── Modal Provider ──────────────────────────────────────────────────────────
118
+ /**
119
+ * Embedding function backed by the Modal `/embed` endpoint (see
120
+ * `src/modal-client.ts`). Used two ways:
121
+ *
122
+ * 1. As a `TextEmbeddingFunction` for graph tables that still auto-embed via
123
+ * `sourceField` (document task for storage, query task for search).
124
+ * 2. As the `embedFn` wrapped by the `EmbeddingCoalescer` for the main
125
+ * collection path, where append/ingest is debounced + batched and search
126
+ * bypasses the debounce via `embedImmediate`.
127
+ *
128
+ * The main collection path does NOT auto-embed — it inserts precomputed
129
+ * vectors into namespaced `col_{collection}__{model}__{dim}` tables (see
130
+ * `upsertEntry`/`searchHybrid` below and ADR-3). One canonical model owns the
131
+ * vector space; a single query never mixes `model__dim` spaces.
132
+ */
133
+ class ModalEmbeddingFunction extends TextEmbeddingFunction {
134
+ cfg;
135
+ collection;
136
+ ndimsValue;
137
+ constructor(cfg, collection, ndimsValue) {
138
+ super();
139
+ this.cfg = cfg;
140
+ this.collection = collection;
141
+ this.ndimsValue = ndimsValue;
142
+ }
143
+ ndims() {
144
+ return this.ndimsValue;
145
+ }
146
+ embeddingDataType() {
147
+ return new arrow.Float32();
148
+ }
149
+ /** Batched embed against `/embed`. `task` selects query vs document. */
150
+ async generateEmbeddings(texts, task = "document") {
151
+ const client = createModalClient(this.cfg);
152
+ if (!client)
153
+ throw new Error("Modal provider not configured (missing baseUrl or token).");
154
+ const res = await client.embed(texts, {
155
+ model: resolveModel(this.cfg, this.collection),
156
+ dim: resolveDim(this.cfg, this.collection),
157
+ task,
158
+ });
159
+ // Cache the model's effective output dim for table resolution.
160
+ rememberNativeDim(resolveModel(this.cfg, this.collection), res.dim);
161
+ return res.vectors;
162
+ }
163
+ // Storage path (LanceDB auto-embed via sourceField) → document task.
164
+ async computeSourceEmbeddings(texts) {
165
+ return this.generateEmbeddings(texts, "document");
166
+ }
167
+ // Query path (LanceDB nearestTo via embedding function) → query task.
168
+ async computeQueryEmbeddings(data) {
169
+ const vecs = await this.generateEmbeddings([data], "query");
170
+ return vecs[0];
171
+ }
172
+ }
173
+ /**
174
+ * Cached native output dim per model, learned from the first successful
175
+ * `/embed` response. Lets the search path resolve the namespaced table name
176
+ * before any vector is in hand (e.g. an offline fallback to FTS).
177
+ */
178
+ const nativeDimCache = new Map();
179
+ const rememberNativeDim = (model, dim) => {
180
+ if (dim && !nativeDimCache.has(model))
181
+ nativeDimCache.set(model, dim);
182
+ };
183
+ /** Resolved effective dim: config dim > cached native dim > undefined. */
184
+ const effectiveDim = (cfg, collection) => {
185
+ const d = resolveDim(cfg, collection);
186
+ if (d != null)
187
+ return d;
188
+ const model = resolveModel(cfg, collection);
189
+ return nativeDimCache.get(model);
190
+ };
191
+ /**
192
+ * Coalescer cache, keyed by provider+model+dim signature. Append/ingest embeds
193
+ * are debounced + batched into a single `/embed` call; identical texts are
194
+ * deduped; in-flight flushes are capped. Search queries bypass the debounce via
195
+ * `embedImmediate` (latency-sensitive). Knobs come from
196
+ * `cfg.embedding.coalesce` (Q6).
197
+ */
198
+ const coalescerCache = new Map();
199
+ const coalesceKey = (cfg, collection) => `modal:${resolveModel(cfg, collection)}:${resolveDim(cfg, collection) ?? "native"}`;
200
+ const getCoalescer = (cfg, collection) => {
201
+ const key = coalesceKey(cfg, collection);
202
+ const existing = coalescerCache.get(key);
203
+ if (existing)
204
+ return existing;
205
+ const fn = new ModalEmbeddingFunction(cfg, collection);
206
+ const embedFn = (texts, task) => fn.generateEmbeddings(texts, task);
207
+ const co = cfg.embedding.coalesce ?? {};
208
+ const coalescer = new EmbeddingCoalescer({
209
+ embedFn,
210
+ debounceMs: co.debounceMs ?? 1000,
211
+ maxBatchSize: co.maxBatchSize ?? 64,
212
+ maxConcurrentFlushes: co.maxConcurrentFlushes ?? 2,
213
+ dedupe: co.dedupe ?? true,
214
+ });
215
+ coalescerCache.set(key, coalescer);
216
+ return coalescer;
217
+ };
218
+ /** Whether search queries bypass the coalescer debounce (default true). */
219
+ const searchBypassesCoalescer = (cfg) => cfg.embedding.coalesce?.searchBypass ?? true;
220
+ /**
221
+ * Embed a single query, immediately when search-bypass is on (default),
222
+ * otherwise through the debounce. Latency-sensitive interactive search.
223
+ */
224
+ const embedQuery = (cfg, collection, text) => {
225
+ const co = getCoalescer(cfg, collection);
226
+ return searchBypassesCoalescer(cfg) ? co.embedImmediate(text, "query") : co.embed(text, "query");
227
+ };
228
+ /**
229
+ * Embed one document via the coalescer (debounced + batched). Append/ingest
230
+ * never fires one network request per entry — requests within the window flush
231
+ * as a single batched `/embed` call.
232
+ */
233
+ const embedDocument = (cfg, collection, text) => getCoalescer(cfg, collection).embed(text, "document");
234
+ // ── Modal table management ──────────────────────────────────────────────────
235
+ /** Plain (no auto-embed) schema for a namespaced collection table. */
236
+ function makeModalCollectionSchema(dim) {
237
+ const vec = new arrow.FixedSizeList(dim, new arrow.Field("item", new arrow.Float32(), true));
238
+ return new arrow.Schema([
239
+ new arrow.Field("id", new arrow.Utf8(), true),
240
+ new arrow.Field("domain", new arrow.Utf8(), true),
241
+ new arrow.Field("source", new arrow.Utf8(), true),
242
+ new arrow.Field("fact", new arrow.Utf8(), true),
243
+ new arrow.Field("tag", new arrow.Utf8(), true),
244
+ new arrow.Field("artifact", new arrow.Utf8(), true),
245
+ new arrow.Field("created_at", new arrow.Utf8(), true),
246
+ new arrow.Field("vector", vec, true),
247
+ ]);
248
+ }
249
+ const modalTableCache = new Map();
250
+ /** In-flight creation/open promises, to dedupe concurrent getModalCollectionTable calls. */
251
+ const modalTableInFlight = new Map();
252
+ /**
253
+ * Open (or create) a namespaced modal collection table at a known dim. The dim
254
+ * is always derived from a real vector (an embedded query/document or a synced
255
+ * row), so there is no hard-coded dimension. Concurrent calls for the same
256
+ * table are deduped via `modalTableInFlight` so only one create/open runs.
257
+ */
258
+ const getModalCollectionTable = async (dataDir, collection, model, dim, cfg) => {
259
+ const conn = await connect(dataDir);
260
+ const tableName = namespacedTableName(collection, model, dim);
261
+ const cached = modalTableCache.get(tableName);
262
+ if (cached)
263
+ return cached;
264
+ const inflight = modalTableInFlight.get(tableName);
265
+ if (inflight)
266
+ return inflight;
267
+ const p = (async () => {
268
+ const existing = await conn.tableNames();
269
+ let t;
270
+ if (existing.includes(tableName)) {
271
+ t = await conn.openTable(tableName);
272
+ }
273
+ else {
274
+ const schema = makeModalCollectionSchema(dim);
275
+ t = await conn.createEmptyTable(tableName, schema);
276
+ if (cfg.ftsEnabled !== false) {
277
+ try {
278
+ await t.createIndex("fact", { config: lancedb.Index.fts() });
279
+ }
280
+ catch {
281
+ /* index may already exist */
282
+ }
283
+ }
284
+ }
285
+ modalTableCache.set(tableName, t);
286
+ return t;
287
+ })();
288
+ modalTableInFlight.set(tableName, p);
289
+ try {
290
+ return await p;
291
+ }
292
+ finally {
293
+ modalTableInFlight.delete(tableName);
294
+ }
295
+ };
296
+ /**
297
+ * Locate an existing namespaced table for a collection+model (any dim) by
298
+ * listing table names. Returns the table + its parsed dim, or null. Used by the
299
+ * search path so it can find a table populated purely by sync (before any
300
+ * local embed has cached the dim).
301
+ */
302
+ const findModalTable = async (dataDir, collection, model) => {
303
+ const conn = await connect(dataDir);
304
+ const prefix = `col_${collection}__${model}__`;
305
+ for (const name of await conn.tableNames()) {
306
+ if (name.startsWith(prefix)) {
307
+ const dim = Number.parseInt(name.slice(prefix.length), 10);
308
+ if (Number.isFinite(dim)) {
309
+ const table = await conn.openTable(name);
310
+ return { table, dim };
311
+ }
312
+ }
313
+ }
314
+ return null;
315
+ };
316
+ /**
317
+ * Best-effort FTS fallback. Used when the modal embed path is unavailable
318
+ * (offline + no same-space local fallback) so a search degrades to keyword
319
+ * search instead of crashing.
320
+ */
321
+ const modalFtsFallback = async (dataDir, collection, model, query, limit) => {
322
+ const found = await findModalTable(dataDir, collection, model);
323
+ if (!found)
324
+ return [];
325
+ try {
326
+ return await found.table.search(query).limit(limit).toArray();
327
+ }
328
+ catch {
329
+ return [];
330
+ }
331
+ };
332
+ /**
333
+ * Offline fallback embed for a query. Only used when Modal is unreachable and a
334
+ * same-space local provider is configured (same canonical model → same vector
335
+ * space → can query the same namespaced table). Returns null when no same-space
336
+ * fallback is available, so the caller degrades to FTS.
337
+ */
338
+ const fallbackQueryEmbed = async (cfg, collection, text) => {
339
+ const fb = cfg.embedding.modal?.fallback;
340
+ if (fb?.enabled === false)
341
+ return null;
342
+ if (fb?.provider !== "ollama")
343
+ return null; // transformers is 384-dim ≠ canonical space
344
+ const canonical = resolveModel(cfg, collection);
345
+ // Same-space only: the local Ollama model must match the canonical model.
346
+ if ((cfg.embedding.ollamaModel || "embeddinggemma") !== canonical)
347
+ return null;
348
+ try {
349
+ const fn = new OllamaEmbeddingFunction({
350
+ model: cfg.embedding.ollamaModel || "embeddinggemma",
351
+ host: cfg.embedding.ollamaHost || "http://127.0.0.1:11434",
352
+ });
353
+ await fn.init();
354
+ const vecs = await fn.generateEmbeddings([text]);
355
+ return vecs[0];
356
+ }
357
+ catch (err) {
358
+ console.warn(`[pi-vault-mind] Modal offline and Ollama fallback failed: ${err.message}`);
359
+ return null;
360
+ }
361
+ };
362
+ /** Flush all pending coalesced embeds and clear caches (used by reset). */
363
+ const disposeCoalescers = async () => {
364
+ for (const co of coalescerCache.values()) {
365
+ try {
366
+ await co.drain();
367
+ }
368
+ catch {
369
+ /* best-effort */
370
+ }
371
+ co.dispose();
372
+ }
373
+ coalescerCache.clear();
374
+ };
111
375
  const getModelsJsonPath = () => path.join(homedir(), ".pi", "agent", "models.json");
112
376
  /** Read Pi's models.json for already-registered Ollama models. */
113
377
  const loadPiModelsJson = () => {
@@ -255,6 +519,12 @@ export const pullOllamaModel = async (model, piOrHost) => {
255
519
  };
256
520
  // ── Embedding Provider Factory ──────────────────────────────────────────────
257
521
  const getEmbeddingFunction = async (cfg) => {
522
+ if (cfg.embedding.provider === "modal") {
523
+ // Used by graph tables (auto-embed via sourceField). The main collection
524
+ // path bypasses this and inserts precomputed vectors (see upsertEntry).
525
+ const dim = effectiveDim(cfg);
526
+ return new ModalEmbeddingFunction(cfg, undefined, dim);
527
+ }
258
528
  if (cfg.embedding.provider === "ollama") {
259
529
  const fn = new OllamaEmbeddingFunction({
260
530
  model: cfg.embedding.ollamaModel || "embeddinggemma",
@@ -332,12 +602,25 @@ const getCollectionTable = async (dataDir, collectionName, cfg) => {
332
602
  }
333
603
  return tables[tableName];
334
604
  };
605
+ /**
606
+ * Graph (entity/relation) schemas embed a fixed-size vector field, so they need
607
+ * a concrete dimension. Under the modal provider the dim may be unresolved until
608
+ * the first embed/sync caches it — fail with a clear message instead of building
609
+ * a schema with an undefined dimension. (These tables are not yet wired into the
610
+ * active graph path; this guards them for when they are.)
611
+ */
612
+ const assertGraphDim = (cfg, kind) => {
613
+ if (cfg.embedding.provider === "modal" && effectiveDim(cfg) == null) {
614
+ throw new Error(`Graph ${kind} table needs a known embedding dimension under the modal provider. Set wiki.embedding.modal.dim, or run an embed/sync first so the native dim is cached.`);
615
+ }
616
+ };
335
617
  const getEntityTable = async (dataDir, cfg) => {
336
618
  const conn = await connect(dataDir);
337
619
  const tableName = "entities";
338
620
  if (tables[tableName])
339
621
  return tables[tableName];
340
622
  const embeddingFn = await getEmbeddingFunction(cfg);
623
+ assertGraphDim(cfg, "entity");
341
624
  const schema = makeEntitySchema(embeddingFn);
342
625
  const existing = await conn.tableNames();
343
626
  if (existing.includes(tableName)) {
@@ -353,6 +636,7 @@ const getRelationTable = async (dataDir, cfg) => {
353
636
  if (tables[tableName])
354
637
  return tables[tableName];
355
638
  const embeddingFn = await getEmbeddingFunction(cfg);
639
+ assertGraphDim(cfg, "relation");
356
640
  const schema = makeRelationSchema(embeddingFn);
357
641
  const existing = await conn.tableNames();
358
642
  if (existing.includes(tableName)) {
@@ -363,7 +647,10 @@ const getRelationTable = async (dataDir, cfg) => {
363
647
  return tables[tableName];
364
648
  };
365
649
  // ── Public API ──────────────────────────────────────────────────────────────
650
+ const isModal = (cfg) => cfg.embedding.provider === "modal";
366
651
  export const upsertEntry = async (dataDir, collectionName, entry, cfg) => {
652
+ if (isModal(cfg))
653
+ return upsertEntryModal(dataDir, collectionName, entry, cfg);
367
654
  const table = await getCollectionTable(dataDir, collectionName, cfg);
368
655
  const row = {
369
656
  id: entry.id || crypto.randomUUID(),
@@ -376,14 +663,159 @@ export const upsertEntry = async (dataDir, collectionName, entry, cfg) => {
376
663
  };
377
664
  await table.add([row]);
378
665
  };
666
+ /**
667
+ * Modal append path: embed the document text via the coalescer (debounced +
668
+ * batched), then merge-insert the row with its precomputed vector into the
669
+ * namespaced `col_{collection}__{model}__{dim}` table. If Modal is offline and
670
+ * no same-space local fallback is configured, it warns and skips the vector
671
+ * index (the JSONL append is the source of truth; reindex recovers) — it does
672
+ * not crash. Keyed by `id` (upsert, idempotent).
673
+ */
674
+ const upsertEntryModal = async (dataDir, collectionName, entry, cfg) => {
675
+ const model = resolveModel(cfg, collectionName);
676
+ const text = entry.fact || "";
677
+ let vector;
678
+ try {
679
+ vector = await embedDocument(cfg, collectionName, text);
680
+ }
681
+ catch (err) {
682
+ // Offline: try a same-space local fallback; else warn + skip indexing.
683
+ const fb = await fallbackDocumentEmbed(cfg, collectionName, text);
684
+ if (fb) {
685
+ vector = fb;
686
+ }
687
+ else {
688
+ console.warn(`[pi-vault-mind] Modal embed failed for "${collectionName}" and no same-space fallback — skipping vector index: ${err.message}`);
689
+ return;
690
+ }
691
+ }
692
+ const dim = vector.length;
693
+ rememberNativeDim(model, dim);
694
+ const table = await getModalCollectionTable(dataDir, collectionName, model, dim, cfg);
695
+ const row = {
696
+ id: entry.id || crypto.randomUUID(),
697
+ domain: entry.domain || "",
698
+ source: entry.source || "",
699
+ fact: text,
700
+ tag: entry.tag || "",
701
+ artifact: entry.artifact || "",
702
+ created_at: entry.created_at || new Date().toISOString(),
703
+ vector,
704
+ };
705
+ await table.mergeInsert(["id"]).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute([row]);
706
+ };
707
+ /** Offline fallback embed for a document (storage). Same-space ollama only. */
708
+ const fallbackDocumentEmbed = async (cfg, collection, text) => {
709
+ const fb = cfg.embedding.modal?.fallback;
710
+ if (fb?.enabled === false)
711
+ return null;
712
+ if (fb?.provider !== "ollama")
713
+ return null;
714
+ const canonical = resolveModel(cfg, collection);
715
+ if ((cfg.embedding.ollamaModel || "embeddinggemma") !== canonical)
716
+ return null;
717
+ try {
718
+ const fn = new OllamaEmbeddingFunction({
719
+ model: cfg.embedding.ollamaModel || "embeddinggemma",
720
+ host: cfg.embedding.ollamaHost || "http://127.0.0.1:11434",
721
+ });
722
+ await fn.init();
723
+ const vecs = await fn.generateEmbeddings([text]);
724
+ return vecs[0];
725
+ }
726
+ catch (err) {
727
+ console.warn(`[pi-vault-mind] Modal offline and Ollama document fallback failed: ${err.message}`);
728
+ return null;
729
+ }
730
+ };
731
+ /**
732
+ * Precomputed-vector insert path — used by sync. Upserts rows that are already
733
+ * embedded (vectors come from the server) into the namespaced table, keyed by
734
+ * `id` (merge-insert). Bypasses the auto-embed source field entirely. Carries
735
+ * text + metadata. Idempotent (re-fetching a boundary row is a no-op).
736
+ */
737
+ export const upsertPrecomputed = async (dataDir, collectionName, model, dim, rows, cfg) => {
738
+ if (rows.length === 0)
739
+ return;
740
+ const table = await getModalCollectionTable(dataDir, collectionName, model, dim, cfg);
741
+ const mapped = rows.map((r) => {
742
+ const meta = r.metadata || {};
743
+ const vec = r.vector;
744
+ if (!vec)
745
+ throw new Error(`Sync row ${r.id} has no vector — cannot import precomputed`);
746
+ return {
747
+ id: String(r.id),
748
+ domain: String(meta.domain ?? r.domain ?? ""),
749
+ source: String(meta.source ?? r.source ?? ""),
750
+ fact: String(r.text ?? r.fact ?? ""),
751
+ tag: String(meta.tag ?? r.tag ?? ""),
752
+ artifact: String(meta.artifact ?? r.artifact ?? ""),
753
+ created_at: String(r.created_at ?? meta.created_at ?? new Date().toISOString()),
754
+ vector: vec,
755
+ };
756
+ });
757
+ await table.mergeInsert(["id"]).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(mapped);
758
+ };
379
759
  export const searchHybrid = async (dataDir, collectionName, query, limit, cfg) => {
760
+ if (isModal(cfg))
761
+ return searchHybridModal(dataDir, collectionName, query, limit, cfg);
380
762
  const table = await getCollectionTable(dataDir, collectionName, cfg);
381
763
  const embeddingFn = await getEmbeddingFunction(cfg);
382
764
  const queryVector = await embeddingFn.generateEmbeddings([query]);
383
765
  const results = await table.query().nearestTo(queryVector[0]).limit(limit).toArray();
384
766
  return results;
385
767
  };
768
+ /**
769
+ * Modal search path: embed the query immediately (bypasses the coalescer
770
+ * debounce), then nearest-neighbor search the namespaced table. If Modal is
771
+ * offline, fall back to a same-space local provider when available; otherwise
772
+ * degrade to FTS/keyword search. Never crashes a search.
773
+ */
774
+ const searchHybridModal = async (dataDir, collectionName, query, limit, cfg) => {
775
+ const model = resolveModel(cfg, collectionName);
776
+ let queryVector;
777
+ try {
778
+ queryVector = await embedQuery(cfg, collectionName, query);
779
+ }
780
+ catch (err) {
781
+ // Offline: same-space local fallback, else degrade to FTS.
782
+ const fb = await fallbackQueryEmbed(cfg, collectionName, query);
783
+ if (!fb) {
784
+ console.warn(`[pi-vault-mind] Modal search offline, degrading to FTS for "${collectionName}": ${err.message}`);
785
+ return modalFtsFallback(dataDir, collectionName, model, query, limit);
786
+ }
787
+ queryVector = fb;
788
+ }
789
+ const found = await findModalTable(dataDir, collectionName, model);
790
+ if (!found)
791
+ return [];
792
+ // Guard against space mismatch (shouldn't happen with one canonical model).
793
+ if (found.dim !== queryVector.length) {
794
+ console.warn(`[pi-vault-mind] Query dim ${queryVector.length} ≠ table dim ${found.dim} for "${collectionName}"; degrading to FTS.`);
795
+ return modalFtsFallback(dataDir, collectionName, model, query, limit);
796
+ }
797
+ try {
798
+ return await found.table.query().nearestTo(queryVector).limit(limit).toArray();
799
+ }
800
+ catch (err) {
801
+ console.warn(`[pi-vault-mind] Modal vector search failed, degrading to FTS: ${err.message}`);
802
+ return modalFtsFallback(dataDir, collectionName, model, query, limit);
803
+ }
804
+ };
386
805
  export const searchFts = async (dataDir, collectionName, query, limit, cfg) => {
806
+ // For modal, FTS targets the namespaced table (the same one sync + query use).
807
+ if (isModal(cfg)) {
808
+ const model = resolveModel(cfg, collectionName);
809
+ const found = await findModalTable(dataDir, collectionName, model);
810
+ if (!found)
811
+ return [];
812
+ try {
813
+ return await found.table.search(query).limit(limit).toArray();
814
+ }
815
+ catch {
816
+ return [];
817
+ }
818
+ }
387
819
  const table = await getCollectionTable(dataDir, collectionName, cfg);
388
820
  const results = await table.search(query).limit(limit).toArray();
389
821
  return results;
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Client for the pi-vault-mind Modal embedding service.
3
+ *
4
+ * This is the local (extension) side of the Modal app under `modal/`. It mirrors
5
+ * the HTTP contract documented in `docs/MODAL_EMBEDDING.md`:
6
+ * - on-demand embedding → POST /embed
7
+ * - bulk background jobs → POST /jobs, GET /jobs, GET /jobs/{id},
8
+ * POST /jobs/{id}/cancel
9
+ * - incremental vector sync → GET /sync/collections, GET /sync/export
10
+ * (format=json|arrow)
11
+ * - model registry + stats → GET /models, GET /stats
12
+ *
13
+ * This client is the typed mirror of the server contract. The server (Agent A)
14
+ * owns it; additive changes here are mirrored in the server's `modal/web.py`.
15
+ * The local wiring lives in `src/lance.ts` (provider), `src/sync.ts`, and
16
+ * `/wiki modal` commands (see docs/MODAL_EMBEDDING.md "Local integration").
17
+ */
18
+ export interface ModalClientConfig {
19
+ /** Base URL of the deployed ASGI app (no trailing slash needed). */
20
+ baseUrl: string;
21
+ /** Bearer token matching the `pi-vault-mind-auth` Modal secret. */
22
+ apiToken: string;
23
+ /** Per-request timeout in ms (default 120s — bulk submits can be large). */
24
+ timeoutMs?: number;
25
+ }
26
+ export interface EmbedResult {
27
+ model: string;
28
+ dim: number;
29
+ vectors: number[][];
30
+ }
31
+ export interface JobRecord {
32
+ id: string;
33
+ text: string;
34
+ metadata?: Record<string, unknown>;
35
+ created_at?: string;
36
+ }
37
+ export interface JobSubmitResult {
38
+ job_id: string;
39
+ call_id: string;
40
+ total: number;
41
+ }
42
+ export interface JobStatus {
43
+ status: "queued" | "running" | "done" | "error" | "cancelled";
44
+ collection: string;
45
+ model: string;
46
+ dim: number;
47
+ total: number;
48
+ processed: number;
49
+ /** Set by POST /jobs/{id}/cancel; the worker stops after the current batch. */
50
+ cancel_requested?: boolean;
51
+ error?: string;
52
+ updated_at: string;
53
+ }
54
+ /** One model in the server's registry (GET /models). */
55
+ export interface ModelInfo {
56
+ key: string;
57
+ hf_id: string;
58
+ backend: "sentence-transformers" | "ollama" | "hf";
59
+ native_dim: number;
60
+ matryoshka_dims: number[];
61
+ query_prompt?: string | null;
62
+ document_prompt?: string | null;
63
+ gated: boolean;
64
+ trust_remote_code: boolean;
65
+ enabled: boolean;
66
+ notes: string;
67
+ }
68
+ export interface ModelsResponse {
69
+ default: string;
70
+ default_dim: number | null;
71
+ models: ModelInfo[];
72
+ }
73
+ export interface JobListResponse {
74
+ jobs: JobStatus[];
75
+ count: number;
76
+ }
77
+ export interface SyncCollection {
78
+ collection: string;
79
+ model: string;
80
+ dim: number;
81
+ rows: number;
82
+ table: string;
83
+ }
84
+ export interface ExportRow {
85
+ id: string;
86
+ text: string;
87
+ vector?: number[];
88
+ metadata: Record<string, unknown>;
89
+ model: string;
90
+ dim: number;
91
+ seq: number;
92
+ created_at: string;
93
+ }
94
+ export interface ExportPage {
95
+ rows: ExportRow[];
96
+ next_watermark: number;
97
+ count: number;
98
+ /** False when the page was full and more rows likely remain. */
99
+ done: boolean;
100
+ }
101
+ export declare class ModalEmbeddingClient {
102
+ private baseUrl;
103
+ private apiToken;
104
+ private timeoutMs;
105
+ constructor(cfg: ModalClientConfig);
106
+ private request;
107
+ /** Liveness check; also returns the server's default model. */
108
+ health(): Promise<{
109
+ ok: boolean;
110
+ default_model: string;
111
+ }>;
112
+ /** Registry of available embedders (public; no auth). Use native_dim to
113
+ * resolve a model's output dim up-front instead of waiting for the first
114
+ * /embed response. (Additive — Agent B request #2.) */
115
+ models(): Promise<ModelsResponse>;
116
+ /** Server-side store + compute stats (rows per namespace, index state, GPU). */
117
+ stats(): Promise<Record<string, unknown>>;
118
+ /** Embed text on demand. Use task="query" for search, "document" for storage. */
119
+ embed(texts: string[], opts?: {
120
+ model?: string;
121
+ dim?: number;
122
+ task?: "query" | "document";
123
+ }): Promise<EmbedResult>;
124
+ /** Submit a bulk embedding job; embeds + stores server-side. */
125
+ submitJob(collection: string, records: JobRecord[], opts?: {
126
+ model?: string;
127
+ dim?: number;
128
+ }): Promise<JobSubmitResult>;
129
+ jobStatus(jobId: string): Promise<JobStatus>;
130
+ /** List recent jobs (newest first). Additive — surfaces GET /jobs so
131
+ * `/wiki modal jobs` can list, not just poll a known id. (Agent B request #1.) */
132
+ listJobs(limit?: number): Promise<JobListResponse>;
133
+ /** Cooperatively cancel a running/queued job. The worker stops after its
134
+ * current batch and writes status=cancelled. */
135
+ cancelJob(jobId: string): Promise<{
136
+ job_id: string;
137
+ cancel_requested: boolean;
138
+ }>;
139
+ /** Poll a job until it reaches a terminal state. */
140
+ waitForJob(jobId: string, pollMs?: number): Promise<JobStatus>;
141
+ /** List the collections/tables held in the server-side vector store. */
142
+ syncCollections(): Promise<SyncCollection[]>;
143
+ /** Pull one page of rows with seq > since. Remember next_watermark. */
144
+ exportSince(collection: string, opts?: {
145
+ model?: string;
146
+ dim?: number;
147
+ since?: number;
148
+ limit?: number;
149
+ }): Promise<ExportPage>;
150
+ /** Pull one page of rows with seq > since as an Arrow IPC stream.
151
+ * Vectors are always included (no include_vectors flag). The watermark /
152
+ * done / count come back as response headers (X-Next-Watermark, X-Done,
153
+ * X-Count) since the body is binary. Additive — the local sync path uses
154
+ * the JSON `exportSince`; this is for clients that want zero-copy rows. */
155
+ exportSinceArrow(collection: string, opts?: {
156
+ model?: string;
157
+ dim?: number;
158
+ since?: number;
159
+ limit?: number;
160
+ }): Promise<{
161
+ data: ArrayBuffer;
162
+ nextWatermark: number;
163
+ done: boolean;
164
+ count: number;
165
+ }>;
166
+ /**
167
+ * Drain every remaining page for a collection, invoking `onPage` for each.
168
+ * Returns the final watermark to persist for the next incremental sync.
169
+ */
170
+ exportAll(collection: string, onPage: (rows: ExportRow[]) => Promise<void> | void, opts?: {
171
+ model?: string;
172
+ dim?: number;
173
+ since?: number;
174
+ limit?: number;
175
+ }): Promise<number>;
176
+ }