pi-vault-mind 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Client for the pi-vault-mind Modal embedding service.
3
+ *
4
+ * This is the local (extension) side of the Modal app under `modal/`. It mirrors
5
+ * the HTTP contract documented in `docs/MODAL_EMBEDDING.md`:
6
+ * - on-demand embedding → POST /embed
7
+ * - bulk background jobs → POST /jobs, GET /jobs, GET /jobs/{id},
8
+ * POST /jobs/{id}/cancel
9
+ * - incremental vector sync → GET /sync/collections, GET /sync/export
10
+ * (format=json|arrow)
11
+ * - model registry + stats → GET /models, GET /stats
12
+ *
13
+ * This client is the typed mirror of the server contract. The server (Agent A)
14
+ * owns it; additive changes here are mirrored in the server's `modal/web.py`.
15
+ * The local wiring lives in `src/lance.ts` (provider), `src/sync.ts`, and
16
+ * `/wiki modal` commands (see docs/MODAL_EMBEDDING.md "Local integration").
17
+ */
18
+ export class ModalEmbeddingClient {
19
+ baseUrl;
20
+ apiToken;
21
+ timeoutMs;
22
+ constructor(cfg) {
23
+ this.baseUrl = cfg.baseUrl.replace(/\/$/, "");
24
+ this.apiToken = cfg.apiToken;
25
+ this.timeoutMs = cfg.timeoutMs ?? 120_000;
26
+ }
27
+ async request(method, path, body) {
28
+ const controller = new AbortController();
29
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
30
+ try {
31
+ const resp = await fetch(`${this.baseUrl}${path}`, {
32
+ method,
33
+ headers: {
34
+ Authorization: `Bearer ${this.apiToken}`,
35
+ ...(body !== undefined ? { "Content-Type": "application/json" } : {}),
36
+ },
37
+ body: body !== undefined ? JSON.stringify(body) : undefined,
38
+ signal: controller.signal,
39
+ });
40
+ if (!resp.ok) {
41
+ const detail = await resp.text().catch(() => "");
42
+ throw new Error(`Modal ${method} ${path} failed: ${resp.status} ${detail}`);
43
+ }
44
+ return (await resp.json());
45
+ }
46
+ finally {
47
+ clearTimeout(timer);
48
+ }
49
+ }
50
+ /** Liveness check; also returns the server's default model. */
51
+ health() {
52
+ return this.request("GET", "/health");
53
+ }
54
+ /** Registry of available embedders (public; no auth). Use native_dim to
55
+ * resolve a model's output dim up-front instead of waiting for the first
56
+ * /embed response. (Additive — Agent B request #2.) */
57
+ models() {
58
+ return this.request("GET", "/models");
59
+ }
60
+ /** Server-side store + compute stats (rows per namespace, index state, GPU). */
61
+ stats() {
62
+ return this.request("GET", "/stats");
63
+ }
64
+ /** Embed text on demand. Use task="query" for search, "document" for storage. */
65
+ embed(texts, opts = {}) {
66
+ return this.request("POST", "/embed", {
67
+ texts,
68
+ model: opts.model,
69
+ dim: opts.dim,
70
+ task: opts.task ?? "query",
71
+ });
72
+ }
73
+ /** Submit a bulk embedding job; embeds + stores server-side. */
74
+ submitJob(collection, records, opts = {}) {
75
+ return this.request("POST", "/jobs", {
76
+ collection,
77
+ records,
78
+ model: opts.model,
79
+ dim: opts.dim,
80
+ });
81
+ }
82
+ jobStatus(jobId) {
83
+ return this.request("GET", `/jobs/${encodeURIComponent(jobId)}`);
84
+ }
85
+ /** List recent jobs (newest first). Additive — surfaces GET /jobs so
86
+ * `/wiki modal jobs` can list, not just poll a known id. (Agent B request #1.) */
87
+ listJobs(limit) {
88
+ const p = new URLSearchParams();
89
+ if (limit != null)
90
+ p.set("limit", String(limit));
91
+ const qs = p.toString();
92
+ return this.request("GET", `/jobs${qs ? `?${qs}` : ""}`);
93
+ }
94
+ /** Cooperatively cancel a running/queued job. The worker stops after its
95
+ * current batch and writes status=cancelled. */
96
+ cancelJob(jobId) {
97
+ return this.request("POST", `/jobs/${encodeURIComponent(jobId)}/cancel`);
98
+ }
99
+ /** Poll a job until it reaches a terminal state. */
100
+ async waitForJob(jobId, pollMs = 2000) {
101
+ for (;;) {
102
+ const status = await this.jobStatus(jobId);
103
+ if (status.status === "done" || status.status === "error" || status.status === "cancelled")
104
+ return status;
105
+ await new Promise((r) => setTimeout(r, pollMs));
106
+ }
107
+ }
108
+ /** List the collections/tables held in the server-side vector store. */
109
+ async syncCollections() {
110
+ const out = await this.request("GET", "/sync/collections");
111
+ return out.collections;
112
+ }
113
+ /** Pull one page of rows with seq > since. Remember next_watermark. */
114
+ exportSince(collection, opts = {}) {
115
+ const p = new URLSearchParams({ collection });
116
+ if (opts.model)
117
+ p.set("model", opts.model);
118
+ if (opts.dim != null)
119
+ p.set("dim", String(opts.dim));
120
+ p.set("since", String(opts.since ?? 0));
121
+ p.set("limit", String(opts.limit ?? 500));
122
+ return this.request("GET", `/sync/export?${p.toString()}`);
123
+ }
124
+ /** Pull one page of rows with seq > since as an Arrow IPC stream.
125
+ * Vectors are always included (no include_vectors flag). The watermark /
126
+ * done / count come back as response headers (X-Next-Watermark, X-Done,
127
+ * X-Count) since the body is binary. Additive — the local sync path uses
128
+ * the JSON `exportSince`; this is for clients that want zero-copy rows. */
129
+ async exportSinceArrow(collection, opts = {}) {
130
+ const p = new URLSearchParams({ collection });
131
+ p.set("format", "arrow");
132
+ if (opts.model)
133
+ p.set("model", opts.model);
134
+ if (opts.dim != null)
135
+ p.set("dim", String(opts.dim));
136
+ p.set("since", String(opts.since ?? 0));
137
+ p.set("limit", String(opts.limit ?? 500));
138
+ const controller = new AbortController();
139
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
140
+ try {
141
+ const resp = await fetch(`${this.baseUrl}/sync/export?${p.toString()}`, {
142
+ method: "GET",
143
+ headers: { Authorization: `Bearer ${this.apiToken}` },
144
+ signal: controller.signal,
145
+ });
146
+ if (!resp.ok) {
147
+ const detail = await resp.text().catch(() => "");
148
+ throw new Error(`Modal GET /sync/export failed: ${resp.status} ${detail}`);
149
+ }
150
+ const nextWatermark = Number(resp.headers.get("X-Next-Watermark") ?? opts.since ?? 0);
151
+ const done = (resp.headers.get("X-Done") ?? "true") === "true";
152
+ const count = Number(resp.headers.get("X-Count") ?? 0);
153
+ return { data: await resp.arrayBuffer(), nextWatermark, done, count };
154
+ }
155
+ finally {
156
+ clearTimeout(timer);
157
+ }
158
+ }
159
+ /**
160
+ * Drain every remaining page for a collection, invoking `onPage` for each.
161
+ * Returns the final watermark to persist for the next incremental sync.
162
+ */
163
+ async exportAll(collection, onPage, opts = {}) {
164
+ let watermark = opts.since ?? 0;
165
+ for (;;) {
166
+ const page = await this.exportSince(collection, { ...opts, since: watermark });
167
+ if (page.rows.length > 0)
168
+ await onPage(page.rows);
169
+ watermark = page.next_watermark;
170
+ if (page.done)
171
+ return watermark;
172
+ }
173
+ }
174
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Config → Modal client resolution.
3
+ *
4
+ * Keeps `src/modal-client.ts` (the typed HTTP mirror of the contract) pristine
5
+ * and centralizes the pieces every consumer (lance.ts, sync.ts, commands.ts)
6
+ * shares: token resolution (env wins), the canonical `model__dim` pair, and
7
+ * client construction.
8
+ *
9
+ * No HTTP is reimplemented here — `ModalEmbeddingClient` owns that.
10
+ */
11
+ import { ModalEmbeddingClient } from "./modal-client.js";
12
+ import type { WikiConfig } from "./types.js";
13
+ /** Env var name for the Modal bearer token (preferred over config). */
14
+ export declare const MODAL_TOKEN_ENV = "PVM_API_TOKEN";
15
+ /** Dotenv-style fallback path for the Modal token. */
16
+ export declare const MODAL_TOKEN_ENV_PATH: string;
17
+ /** Canonical deployed Modal app name. */
18
+ export declare const MODAL_APP_NAME = "pi-vault-mind-embed";
19
+ /** Derive the Modal service URL from a workspace slug and canonical app name. */
20
+ export declare const modalUrl: (workspace: string) => string;
21
+ /**
22
+ * Resolve the Modal API token. Resolution order:
23
+ * 1. `PVM_API_TOKEN` env var
24
+ * 2. `~/.pi/agent/vault-mind.env`
25
+ * 3. `wiki.embedding.modal.apiToken` in config
26
+ * Never log the resolved token.
27
+ */
28
+ export declare const resolveModalToken: (cfg: WikiConfig) => string | undefined;
29
+ /** Resolve the explicit baseUrl or derive it from workspace. */
30
+ export declare const resolveBaseUrl: (cfg: WikiConfig) => string | undefined;
31
+ /** Resolve the configured workspace slug, if any. */
32
+ export declare const resolveWorkspace: (cfg: WikiConfig) => string | undefined;
33
+ /** True when Modal is usable: a base URL (explicit or derived) and a resolvable token are present. */
34
+ export declare const isModalConfigured: (cfg: WikiConfig) => boolean;
35
+ /**
36
+ * Build a `ModalEmbeddingClient` from config. Returns null when Modal is not
37
+ * configured (no base URL / token) so callers can degrade gracefully.
38
+ */
39
+ export declare const createModalClient: (cfg: WikiConfig) => ModalEmbeddingClient | null;
40
+ /**
41
+ * The canonical embedder key for a collection. Per-collection overrides win;
42
+ * otherwise the global Modal config model; otherwise "embeddinggemma"
43
+ * (the eval-confirmed baseline — kept as the default *model name*, never as a
44
+ * hard-coded dimension).
45
+ */
46
+ export declare const resolveModel: (cfg: WikiConfig, collection?: string) => string;
47
+ /**
48
+ * The configured output dimension for a collection, if known. Per-collection
49
+ * overrides win; otherwise the global Modal config `dim`. Returns undefined
50
+ * when unset — callers resolve the native dim lazily via `/models`.
51
+ */
52
+ export declare const resolveDim: (cfg: WikiConfig, collection?: string) => number | undefined;
53
+ /** Namespaced local table name mirroring the server's ADR-3 scheme. */
54
+ export declare const namespacedTableName: (collection: string, model: string, dim: number) => string;
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Config → Modal client resolution.
3
+ *
4
+ * Keeps `src/modal-client.ts` (the typed HTTP mirror of the contract) pristine
5
+ * and centralizes the pieces every consumer (lance.ts, sync.ts, commands.ts)
6
+ * shares: token resolution (env wins), the canonical `model__dim` pair, and
7
+ * client construction.
8
+ *
9
+ * No HTTP is reimplemented here — `ModalEmbeddingClient` owns that.
10
+ */
11
+ import * as fs from "node:fs";
12
+ import * as path from "node:path";
13
+ import { ModalEmbeddingClient } from "./modal-client.js";
14
+ /** Env var name for the Modal bearer token (preferred over config). */
15
+ export const MODAL_TOKEN_ENV = "PVM_API_TOKEN";
16
+ /** Dotenv-style fallback path for the Modal token. */
17
+ export const MODAL_TOKEN_ENV_PATH = path.join(process.env.HOME || process.env.USERPROFILE || "", ".pi", "agent", "vault-mind.env");
18
+ /** Canonical deployed Modal app name. */
19
+ export const MODAL_APP_NAME = "pi-vault-mind-embed";
20
+ /**
21
+ * Read a dotenv-style file and return a record of KEY=VALUE pairs.
22
+ * Ignores comments, blank lines, and unquoted values.
23
+ */
24
+ const readDotEnv = (filePath) => {
25
+ if (!fs.existsSync(filePath))
26
+ return {};
27
+ const out = {};
28
+ for (const line of fs.readFileSync(filePath, "utf-8").split(/\r?\n/)) {
29
+ const trimmed = line.trim();
30
+ if (!trimmed || trimmed.startsWith("#"))
31
+ continue;
32
+ const eq = trimmed.indexOf("=");
33
+ if (eq === -1)
34
+ continue;
35
+ const key = trimmed.slice(0, eq).trim();
36
+ let value = trimmed.slice(eq + 1).trim();
37
+ if ((value.startsWith('"') && value.endsWith('"')) ||
38
+ (value.startsWith("'") && value.endsWith("'"))) {
39
+ value = value.slice(1, -1);
40
+ }
41
+ out[key] = value;
42
+ }
43
+ return out;
44
+ };
45
+ /** Derive the Modal service URL from a workspace slug and canonical app name. */
46
+ export const modalUrl = (workspace) => `https://${workspace}--${MODAL_APP_NAME}-embeddingservice-fastapi-app.modal.run`;
47
+ /**
48
+ * Resolve the Modal API token. Resolution order:
49
+ * 1. `PVM_API_TOKEN` env var
50
+ * 2. `~/.pi/agent/vault-mind.env`
51
+ * 3. `wiki.embedding.modal.apiToken` in config
52
+ * Never log the resolved token.
53
+ */
54
+ export const resolveModalToken = (cfg) => {
55
+ if (process.env[MODAL_TOKEN_ENV])
56
+ return process.env[MODAL_TOKEN_ENV];
57
+ const dotenv = readDotEnv(MODAL_TOKEN_ENV_PATH);
58
+ if (dotenv[MODAL_TOKEN_ENV])
59
+ return dotenv[MODAL_TOKEN_ENV];
60
+ return cfg.embedding.modal?.apiToken;
61
+ };
62
+ /** Resolve the explicit baseUrl or derive it from workspace. */
63
+ export const resolveBaseUrl = (cfg) => {
64
+ const modal = cfg.embedding.modal;
65
+ if (modal?.baseUrl)
66
+ return modal.baseUrl;
67
+ if (modal?.workspace)
68
+ return modalUrl(modal.workspace);
69
+ return undefined;
70
+ };
71
+ /** Resolve the configured workspace slug, if any. */
72
+ export const resolveWorkspace = (cfg) => cfg.embedding.modal?.workspace;
73
+ /** True when Modal is usable: a base URL (explicit or derived) and a resolvable token are present. */
74
+ export const isModalConfigured = (cfg) => cfg.embedding.provider === "modal" && !!resolveBaseUrl(cfg) && !!resolveModalToken(cfg);
75
+ /**
76
+ * Build a `ModalEmbeddingClient` from config. Returns null when Modal is not
77
+ * configured (no base URL / token) so callers can degrade gracefully.
78
+ */
79
+ export const createModalClient = (cfg) => {
80
+ const baseUrl = resolveBaseUrl(cfg);
81
+ if (!baseUrl)
82
+ return null;
83
+ const apiToken = resolveModalToken(cfg);
84
+ if (!apiToken)
85
+ return null;
86
+ const clientCfg = { baseUrl, apiToken };
87
+ return new ModalEmbeddingClient(clientCfg);
88
+ };
89
+ /**
90
+ * The canonical embedder key for a collection. Per-collection overrides win;
91
+ * otherwise the global Modal config model; otherwise "embeddinggemma"
92
+ * (the eval-confirmed baseline — kept as the default *model name*, never as a
93
+ * hard-coded dimension).
94
+ */
95
+ export const resolveModel = (cfg, collection) => {
96
+ const override = collection ? cfg.embedding.collectionModels?.[collection] : undefined;
97
+ if (override?.model)
98
+ return override.model;
99
+ return cfg.embedding.modal?.model || "embeddinggemma";
100
+ };
101
+ /**
102
+ * The configured output dimension for a collection, if known. Per-collection
103
+ * overrides win; otherwise the global Modal config `dim`. Returns undefined
104
+ * when unset — callers resolve the native dim lazily via `/models`.
105
+ */
106
+ export const resolveDim = (cfg, collection) => {
107
+ const override = collection ? cfg.embedding.collectionModels?.[collection] : undefined;
108
+ if (override?.dim != null)
109
+ return override.dim;
110
+ return cfg.embedding.modal?.dim;
111
+ };
112
+ /** Namespaced local table name mirroring the server's ADR-3 scheme. */
113
+ export const namespacedTableName = (collection, model, dim) => `col_${collection}__${model}__${dim}`;
@@ -5,5 +5,14 @@ export declare const setupWizard: (ctx: ExtensionContext, cliArgs?: {
5
5
  vault?: string;
6
6
  provider?: string;
7
7
  model?: string;
8
+ workspace?: string;
8
9
  }) => Promise<void>;
10
+ /**
11
+ * Interactive Modal embedding configuration + "Test connection" action.
12
+ * Walks workspace/base URL, canonical model, dim, offline fallback, and auto-sync
13
+ * (off by default). Token is read from `PVM_API_TOKEN` env or
14
+ * `~/.pi/agent/vault-mind.env` (preferred) — not collected here; only an optional
15
+ * config fallback is offered.
16
+ */
17
+ export declare const configureModalWizard: (ctx: ExtensionContext) => Promise<void>;
9
18
  export declare const openSettingsDashboard: (ctx: ExtensionContext) => Promise<void>;
@@ -1,7 +1,8 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import { testOllamaConnection } from "./lance.js";
4
- import { GLOBAL_CONFIG_PATH, collectionNames, findConfig, loadConfig, } from "./utils.js";
4
+ import { MODAL_TOKEN_ENV, createModalClient, modalUrl, resolveModalToken } from "./modal-config.js";
5
+ import { GLOBAL_CONFIG_PATH, collectionNames, findConfig, loadConfig, shrinkHome, } from "./utils.js";
5
6
  export const createCollectionWizard = async (ctx) => {
6
7
  const { project: cfgPath } = findConfig(ctx.cwd);
7
8
  if (!cfgPath) {
@@ -85,7 +86,7 @@ export const createInjectorWizard = async (ctx) => {
85
86
  export const setupWizard = async (ctx, cliArgs) => {
86
87
  const existingGlobal = fs.existsSync(GLOBAL_CONFIG_PATH);
87
88
  // ── Non-interactive / CLI mode ──────────────────────────────────────────
88
- if (cliArgs && (cliArgs.vault || cliArgs.provider)) {
89
+ if (cliArgs && (cliArgs.vault || cliArgs.provider || cliArgs.workspace)) {
89
90
  const config = existingGlobal
90
91
  ? JSON.parse(fs.readFileSync(GLOBAL_CONFIG_PATH, "utf-8"))
91
92
  : { wiki: { embedding: {}, vaults: {} } };
@@ -93,14 +94,18 @@ export const setupWizard = async (ctx, cliArgs) => {
93
94
  config.wiki.embedding = config.wiki.embedding || {};
94
95
  config.wiki.vaults = config.wiki.vaults || {};
95
96
  if (cliArgs.vault) {
96
- config.wiki.vaults.default = { path: cliArgs.vault };
97
+ config.wiki.vaults.default = { path: shrinkHome(cliArgs.vault), autoSync: true };
97
98
  }
98
- if (cliArgs.provider && ["ollama", "transformers"].includes(cliArgs.provider)) {
99
+ if (cliArgs.provider && ["ollama", "transformers", "modal"].includes(cliArgs.provider)) {
99
100
  config.wiki.embedding.provider = cliArgs.provider;
100
101
  }
101
102
  if (cliArgs.model) {
102
103
  config.wiki.embedding.ollamaModel = cliArgs.model;
103
104
  }
105
+ if (cliArgs.workspace) {
106
+ config.wiki.embedding.modal = config.wiki.embedding.modal || {};
107
+ config.wiki.embedding.modal.workspace = cliArgs.workspace;
108
+ }
104
109
  const dir = path.dirname(GLOBAL_CONFIG_PATH);
105
110
  if (!fs.existsSync(dir))
106
111
  fs.mkdirSync(dir, { recursive: true });
@@ -112,6 +117,8 @@ export const setupWizard = async (ctx, cliArgs) => {
112
117
  lines.push(` Embedding: ${cliArgs.provider}`);
113
118
  if (cliArgs.model)
114
119
  lines.push(` Model: ${cliArgs.model}`);
120
+ if (cliArgs.workspace)
121
+ lines.push(` Modal workspace: ${cliArgs.workspace}`);
115
122
  ctx.ui.notify(lines.join("\n"), "info");
116
123
  return;
117
124
  }
@@ -155,11 +162,18 @@ export const setupWizard = async (ctx, cliArgs) => {
155
162
  const provider = await ctx.ui.select("Embedding provider:", [
156
163
  "transformers (all-MiniLM-L6-v2, offline, zero config)",
157
164
  "ollama (embeddinggemma, higher quality, requires Ollama)",
165
+ "modal (cloud GPU embeddings + server vector sync, requires Modal deploy)",
158
166
  ]);
159
167
  if (!provider) {
160
168
  ctx.ui.notify("Setup cancelled.", "warning");
161
169
  return;
162
170
  }
171
+ if (provider.startsWith("modal")) {
172
+ // Delegate to the dedicated Modal wizard (workspace/baseUrl, model, dim,
173
+ // fallback, auto-sync, test connection) and return — it writes the project config.
174
+ await configureModalWizard(ctx);
175
+ return;
176
+ }
163
177
  let ollamaModel = "embeddinggemma";
164
178
  if (provider.startsWith("ollama")) {
165
179
  const conn = await testOllamaConnection();
@@ -185,7 +199,7 @@ export const setupWizard = async (ctx, cliArgs) => {
185
199
  config.wiki.embedding.ollamaModel = ollamaModel;
186
200
  config.wiki.embedding.ollamaHost = config.wiki.embedding.ollamaHost || "http://127.0.0.1:11434";
187
201
  }
188
- config.wiki.vaults.default = { path: vaultPath, autoSync: true };
202
+ config.wiki.vaults.default = { path: shrinkHome(vaultPath), autoSync: true };
189
203
  config.wiki.graph = config.wiki.graph || { enabled: true, canvasSync: true };
190
204
  config.wiki.ftsEnabled = config.wiki.ftsEnabled !== false;
191
205
  const dir = path.dirname(GLOBAL_CONFIG_PATH);
@@ -205,6 +219,114 @@ export const setupWizard = async (ctx, cliArgs) => {
205
219
  "Next: /wiki watcher start (or restart pi for auto-start)",
206
220
  ].join("\n"), "info");
207
221
  };
222
+ /**
223
+ * Interactive Modal embedding configuration + "Test connection" action.
224
+ * Walks workspace/base URL, canonical model, dim, offline fallback, and auto-sync
225
+ * (off by default). Token is read from `PVM_API_TOKEN` env or
226
+ * `~/.pi/agent/vault-mind.env` (preferred) — not collected here; only an optional
227
+ * config fallback is offered.
228
+ */
229
+ export const configureModalWizard = async (ctx) => {
230
+ const { project: cfgPath } = findConfig(ctx.cwd);
231
+ if (!cfgPath) {
232
+ ctx.ui.notify("No config found. Run /wiki init first.", "error");
233
+ return;
234
+ }
235
+ if (!ctx.hasUI) {
236
+ ctx.ui.notify("TUI required for the Modal wizard. Use /wiki modal config <key> <value>.", "error");
237
+ return;
238
+ }
239
+ const cfg = loadConfig(ctx.cwd);
240
+ const modal = cfg.wiki.embedding.modal ?? {};
241
+ // Prefer workspace input; fall back to full URL if user already has one.
242
+ const workspace = await ctx.ui.input("Modal workspace slug (blank if you want to enter the full URL):", modal.workspace || "");
243
+ if (workspace === undefined)
244
+ return;
245
+ let baseUrl;
246
+ if (workspace) {
247
+ baseUrl = modalUrl(workspace);
248
+ }
249
+ else {
250
+ baseUrl = await ctx.ui.input("Modal base URL (deployed ASGI app):", modal.baseUrl ||
251
+ "https://<workspace>--pi-vault-mind-embed-embeddingservice-fastapi-app.modal.run");
252
+ if (!baseUrl)
253
+ return;
254
+ }
255
+ const model = await ctx.ui.input("Canonical embedder model:", modal.model || "embeddinggemma");
256
+ if (model === undefined)
257
+ return;
258
+ const dimStr = await ctx.ui.input("Output dimension (blank = model native, e.g. 768 for embeddinggemma):", modal.dim ? String(modal.dim) : "");
259
+ const dim = dimStr ? Number.parseInt(dimStr, 10) : undefined;
260
+ if (dimStr && !Number.isFinite(dim)) {
261
+ ctx.ui.notify(`Invalid dim: ${dimStr}`, "error");
262
+ return;
263
+ }
264
+ const fallback = await ctx.ui.select("Offline fallback (when Modal is unreachable):", [
265
+ "ollama (same canonical model only)",
266
+ "transformers (different space → degrades to FTS)",
267
+ "none (degrade to FTS, never fall back)",
268
+ ]);
269
+ let fallbackCfg;
270
+ if (fallback?.startsWith("ollama"))
271
+ fallbackCfg = { enabled: true, provider: "ollama" };
272
+ else if (fallback?.startsWith("transformers"))
273
+ fallbackCfg = { enabled: true, provider: "transformers" };
274
+ else if (fallback?.startsWith("none"))
275
+ fallbackCfg = { enabled: false };
276
+ const autoSync = await ctx.ui.confirm("Auto-sync", "Enable background vector sync (off by default)? Pulls new server vectors on an interval.");
277
+ let intervalMs = 300000;
278
+ if (autoSync) {
279
+ const intervalStr = await ctx.ui.input("Auto-sync interval (ms):", "300000");
280
+ intervalMs = Number.parseInt(intervalStr || "300000", 10) || 300000;
281
+ }
282
+ // Token guidance (env/dotenv preferred; config apiToken is a fallback)
283
+ const tokenEnv = resolveModalToken(loadConfig(ctx.cwd).wiki);
284
+ if (!tokenEnv) {
285
+ ctx.ui.notify(`⚠️ No ${MODAL_TOKEN_ENV} env var or ~/.pi/agent/vault-mind.env found. Set it in your shell:\n export ${MODAL_TOKEN_ENV}=<bearer token>\nOr write it to ~/.pi/agent/vault-mind.env. (Env/dotenv preferred; config apiToken is a fallback only.)`, "warning");
286
+ }
287
+ // Persist
288
+ const existing = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
289
+ existing.wiki = existing.wiki || {};
290
+ existing.wiki.embedding = existing.wiki.embedding || {};
291
+ existing.wiki.embedding.provider = "modal";
292
+ existing.wiki.embedding.modal = {
293
+ ...(existing.wiki.embedding.modal || {}),
294
+ ...(workspace ? { workspace } : {}),
295
+ baseUrl: baseUrl.replace(/\/$/, ""),
296
+ model: model || "embeddinggemma",
297
+ ...(dim != null ? { dim } : {}),
298
+ ...(fallbackCfg ? { fallback: fallbackCfg } : {}),
299
+ sync: {
300
+ ...(existing.wiki.embedding.modal?.sync || {}),
301
+ autoSync,
302
+ autoSyncIntervalMs: intervalMs,
303
+ },
304
+ };
305
+ fs.writeFileSync(cfgPath, `${JSON.stringify(existing, null, 2)}\n`, "utf-8");
306
+ // Test connection against /health
307
+ const test = await ctx.ui.confirm("Test connection", "Call /health now to verify the Modal service?");
308
+ if (test) {
309
+ const client = createModalClient(loadConfig(ctx.cwd).wiki);
310
+ if (!client) {
311
+ ctx.ui.notify("❌ Modal not configured (missing baseUrl or token).", "error");
312
+ return;
313
+ }
314
+ try {
315
+ const health = await client.health();
316
+ ctx.ui.notify(`✅ Connected. Default model: ${health.default_model}`, "info");
317
+ }
318
+ catch (err) {
319
+ ctx.ui.notify(`❌ Health check failed: ${err.message}`, "error");
320
+ }
321
+ }
322
+ ctx.ui.notify([
323
+ "✅ Modal embedding configured.",
324
+ ` ${cfgPath}`,
325
+ ` Provider: modal • Model: ${model || "embeddinggemma"}${dim ? ` • dim ${dim}` : ""}`,
326
+ ` Auto-sync: ${autoSync ? `on (${intervalMs}ms)` : "off"}`,
327
+ " Restart session for changes to take effect.",
328
+ ].join("\n"), "info");
329
+ };
208
330
  export const openSettingsDashboard = async (ctx) => {
209
331
  if (!ctx.hasUI) {
210
332
  ctx.ui.notify("TUI required for settings dashboard.", "error");
@@ -220,6 +342,7 @@ export const openSettingsDashboard = async (ctx) => {
220
342
  const choice = await ctx.ui.select("pi-vault-mind Settings", [
221
343
  "Manage Collections",
222
344
  "Manage Injectors",
345
+ "Modal Embedding",
223
346
  "Extension Integrations",
224
347
  "Exit",
225
348
  ]);
@@ -233,6 +356,9 @@ export const openSettingsDashboard = async (ctx) => {
233
356
  else if (choice === "Manage Injectors") {
234
357
  await manageInjectors(ctx, cfgPath);
235
358
  }
359
+ else if (choice === "Modal Embedding") {
360
+ await configureModalWizard(ctx);
361
+ }
236
362
  else if (choice === "Extension Integrations") {
237
363
  await manageExtensions(ctx, cfgPath);
238
364
  }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Modal vector sync engine.
3
+ *
4
+ * Drains `ModalEmbeddingClient.exportAll(collection, …)` into the local
5
+ * LanceDB via the precomputed-vector insert path (`upsertPrecomputed`), and
6
+ * persists a per-(collection, model, dim) `seq` watermark (ADR-4) so
7
+ * incremental syncs resume from where the last one left off.
8
+ *
9
+ * Idempotent: rows are keyed by `id` and applied with merge-insert, so
10
+ * re-fetching a boundary row is a no-op and re-running a sync with no new rows
11
+ * changes nothing but the (unchanged) watermark.
12
+ */
13
+ import type { JobStatus } from "./modal-client.js";
14
+ import type { UniversalConfig } from "./types.js";
15
+ export interface SyncState {
16
+ /** `${collection}__${model}__${dim}` → last-seen `seq` watermark. */
17
+ watermarks: Record<string, number>;
18
+ }
19
+ export interface SyncResult {
20
+ collection: string;
21
+ model: string;
22
+ dim: number;
23
+ rows: number;
24
+ watermark: number;
25
+ full: boolean;
26
+ }
27
+ export declare const loadSyncState: (cfg: UniversalConfig) => SyncState;
28
+ export declare const saveSyncState: (cfg: UniversalConfig, state: SyncState) => void;
29
+ /** Current watermark for a (collection, model, dim), default 0. */
30
+ export declare const getWatermark: (cfg: UniversalConfig, collection: string, model: string, dim: number) => number;
31
+ /** List the collections/tables held in the server-side vector store. */
32
+ export declare const listRemoteCollections: (cfg: UniversalConfig) => Promise<import("./modal-client.js").SyncCollection[]>;
33
+ export interface SyncOptions {
34
+ /** Sync from watermark 0 (re-pull everything). Default false (incremental). */
35
+ full?: boolean;
36
+ /** Notified after each page is merged, with the running row count. */
37
+ onProgress?: (count: number) => void;
38
+ }
39
+ /**
40
+ * Sync a single collection from the server into the local LanceDB. Incremental
41
+ * by default (from the persisted watermark); `full` re-pulls from seq 0. Rows
42
+ * are merge-inserted by `id` (idempotent). Returns the row count + new
43
+ * watermark. A re-run with no new rows is a no-op (rows: 0, watermark unchanged).
44
+ */
45
+ export declare const syncCollection: (cfg: UniversalConfig, collection: string, opts?: SyncOptions) => Promise<SyncResult>;
46
+ /**
47
+ * Sync every configured collection (or a caller-supplied list). Collections
48
+ * with no server-side table report `rows: 0` and are skipped harmlessly.
49
+ */
50
+ export declare const syncAll: (cfg: UniversalConfig, collections?: string[], opts?: SyncOptions) => Promise<SyncResult[]>;
51
+ export interface ReindexRemoteOptions {
52
+ /** Notified with the job status on each poll. */
53
+ onStatus?: (status: JobStatus) => void;
54
+ /** Poll interval in ms (default 2000). */
55
+ pollMs?: number;
56
+ }
57
+ /**
58
+ * Read a collection's JSONL (the source of truth), submit a Modal bulk embedding
59
+ * job, poll it to completion, then sync the freshly-embedded vectors down into
60
+ * the local LanceDB. Used by `/wiki reindex --all --reembed --remote`. Local
61
+ * re-embed remains the default; this offloads bulk embedding to cloud GPUs.
62
+ *
63
+ * Records map the JSONL entry's embeddable field (`fact`) to `text` and the
64
+ * remaining fields to `metadata`, so sync-down can reconstruct the local row.
65
+ */
66
+ export declare const reindexRemote: (cfg: UniversalConfig, collections: string[], opts?: ReindexRemoteOptions) => Promise<Array<{
67
+ collection: string;
68
+ job?: JobStatus;
69
+ sync?: SyncResult;
70
+ error?: string;
71
+ }>>;