pi-vault-mind 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/src/autosync.d.ts +16 -0
- package/dist/src/autosync.js +43 -0
- package/dist/src/commands.d.ts +18 -0
- package/dist/src/commands.js +464 -10
- package/dist/src/embed-queue.d.ts +80 -0
- package/dist/src/embed-queue.js +163 -0
- package/dist/src/index.js +9 -0
- package/dist/src/lance.d.ts +7 -0
- package/dist/src/lance.js +432 -0
- package/dist/src/modal-client.d.ts +176 -0
- package/dist/src/modal-client.js +174 -0
- package/dist/src/modal-config.d.ts +42 -0
- package/dist/src/modal-config.js +60 -0
- package/dist/src/settings-ui.d.ts +7 -0
- package/dist/src/settings-ui.js +109 -1
- package/dist/src/sync.d.ts +71 -0
- package/dist/src/sync.js +211 -0
- package/dist/src/types.d.ts +102 -1
- package/dist/test/embed-queue.test.js +105 -0
- package/dist/test/index.test.js +35 -0
- package/dist/test/lance-modal.test.js +95 -0
- package/dist/test/modal-client.test.js +294 -0
- package/dist/test/modal-config.test.js +86 -0
- package/dist/test/sync.test.js +132 -0
- package/package.json +3 -2
- package/dist/test/index.test.d.ts +0 -1
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Client for the pi-vault-mind Modal embedding service.
|
|
3
|
+
*
|
|
4
|
+
* This is the local (extension) side of the Modal app under `modal/`. It mirrors
|
|
5
|
+
* the HTTP contract documented in `docs/MODAL_EMBEDDING.md`:
|
|
6
|
+
* - on-demand embedding → POST /embed
|
|
7
|
+
* - bulk background jobs → POST /jobs, GET /jobs, GET /jobs/{id},
|
|
8
|
+
* POST /jobs/{id}/cancel
|
|
9
|
+
* - incremental vector sync → GET /sync/collections, GET /sync/export
|
|
10
|
+
* (format=json|arrow)
|
|
11
|
+
* - model registry + stats → GET /models, GET /stats
|
|
12
|
+
*
|
|
13
|
+
* This client is the typed mirror of the server contract. The server (Agent A)
|
|
14
|
+
* owns it; additive changes here are mirrored in the server's `modal/web.py`.
|
|
15
|
+
* The local wiring lives in `src/lance.ts` (provider), `src/sync.ts`, and
|
|
16
|
+
* `/wiki modal` commands (see docs/MODAL_EMBEDDING.md "Local integration").
|
|
17
|
+
*/
|
|
18
|
+
export class ModalEmbeddingClient {
|
|
19
|
+
baseUrl;
|
|
20
|
+
apiToken;
|
|
21
|
+
timeoutMs;
|
|
22
|
+
constructor(cfg) {
|
|
23
|
+
this.baseUrl = cfg.baseUrl.replace(/\/$/, "");
|
|
24
|
+
this.apiToken = cfg.apiToken;
|
|
25
|
+
this.timeoutMs = cfg.timeoutMs ?? 120_000;
|
|
26
|
+
}
|
|
27
|
+
async request(method, path, body) {
|
|
28
|
+
const controller = new AbortController();
|
|
29
|
+
const timer = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
30
|
+
try {
|
|
31
|
+
const resp = await fetch(`${this.baseUrl}${path}`, {
|
|
32
|
+
method,
|
|
33
|
+
headers: {
|
|
34
|
+
Authorization: `Bearer ${this.apiToken}`,
|
|
35
|
+
...(body !== undefined ? { "Content-Type": "application/json" } : {}),
|
|
36
|
+
},
|
|
37
|
+
body: body !== undefined ? JSON.stringify(body) : undefined,
|
|
38
|
+
signal: controller.signal,
|
|
39
|
+
});
|
|
40
|
+
if (!resp.ok) {
|
|
41
|
+
const detail = await resp.text().catch(() => "");
|
|
42
|
+
throw new Error(`Modal ${method} ${path} failed: ${resp.status} ${detail}`);
|
|
43
|
+
}
|
|
44
|
+
return (await resp.json());
|
|
45
|
+
}
|
|
46
|
+
finally {
|
|
47
|
+
clearTimeout(timer);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/** Liveness check; also returns the server's default model. */
|
|
51
|
+
health() {
|
|
52
|
+
return this.request("GET", "/health");
|
|
53
|
+
}
|
|
54
|
+
/** Registry of available embedders (public; no auth). Use native_dim to
|
|
55
|
+
* resolve a model's output dim up-front instead of waiting for the first
|
|
56
|
+
* /embed response. (Additive — Agent B request #2.) */
|
|
57
|
+
models() {
|
|
58
|
+
return this.request("GET", "/models");
|
|
59
|
+
}
|
|
60
|
+
/** Server-side store + compute stats (rows per namespace, index state, GPU). */
|
|
61
|
+
stats() {
|
|
62
|
+
return this.request("GET", "/stats");
|
|
63
|
+
}
|
|
64
|
+
/** Embed text on demand. Use task="query" for search, "document" for storage. */
|
|
65
|
+
embed(texts, opts = {}) {
|
|
66
|
+
return this.request("POST", "/embed", {
|
|
67
|
+
texts,
|
|
68
|
+
model: opts.model,
|
|
69
|
+
dim: opts.dim,
|
|
70
|
+
task: opts.task ?? "query",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
/** Submit a bulk embedding job; embeds + stores server-side. */
|
|
74
|
+
submitJob(collection, records, opts = {}) {
|
|
75
|
+
return this.request("POST", "/jobs", {
|
|
76
|
+
collection,
|
|
77
|
+
records,
|
|
78
|
+
model: opts.model,
|
|
79
|
+
dim: opts.dim,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
jobStatus(jobId) {
|
|
83
|
+
return this.request("GET", `/jobs/${encodeURIComponent(jobId)}`);
|
|
84
|
+
}
|
|
85
|
+
/** List recent jobs (newest first). Additive — surfaces GET /jobs so
|
|
86
|
+
* `/wiki modal jobs` can list, not just poll a known id. (Agent B request #1.) */
|
|
87
|
+
listJobs(limit) {
|
|
88
|
+
const p = new URLSearchParams();
|
|
89
|
+
if (limit != null)
|
|
90
|
+
p.set("limit", String(limit));
|
|
91
|
+
const qs = p.toString();
|
|
92
|
+
return this.request("GET", `/jobs${qs ? `?${qs}` : ""}`);
|
|
93
|
+
}
|
|
94
|
+
/** Cooperatively cancel a running/queued job. The worker stops after its
|
|
95
|
+
* current batch and writes status=cancelled. */
|
|
96
|
+
cancelJob(jobId) {
|
|
97
|
+
return this.request("POST", `/jobs/${encodeURIComponent(jobId)}/cancel`);
|
|
98
|
+
}
|
|
99
|
+
/** Poll a job until it reaches a terminal state. */
|
|
100
|
+
async waitForJob(jobId, pollMs = 2000) {
|
|
101
|
+
for (;;) {
|
|
102
|
+
const status = await this.jobStatus(jobId);
|
|
103
|
+
if (status.status === "done" || status.status === "error" || status.status === "cancelled")
|
|
104
|
+
return status;
|
|
105
|
+
await new Promise((r) => setTimeout(r, pollMs));
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/** List the collections/tables held in the server-side vector store. */
|
|
109
|
+
async syncCollections() {
|
|
110
|
+
const out = await this.request("GET", "/sync/collections");
|
|
111
|
+
return out.collections;
|
|
112
|
+
}
|
|
113
|
+
/** Pull one page of rows with seq > since. Remember next_watermark. */
|
|
114
|
+
exportSince(collection, opts = {}) {
|
|
115
|
+
const p = new URLSearchParams({ collection });
|
|
116
|
+
if (opts.model)
|
|
117
|
+
p.set("model", opts.model);
|
|
118
|
+
if (opts.dim != null)
|
|
119
|
+
p.set("dim", String(opts.dim));
|
|
120
|
+
p.set("since", String(opts.since ?? 0));
|
|
121
|
+
p.set("limit", String(opts.limit ?? 500));
|
|
122
|
+
return this.request("GET", `/sync/export?${p.toString()}`);
|
|
123
|
+
}
|
|
124
|
+
/** Pull one page of rows with seq > since as an Arrow IPC stream.
|
|
125
|
+
* Vectors are always included (no include_vectors flag). The watermark /
|
|
126
|
+
* done / count come back as response headers (X-Next-Watermark, X-Done,
|
|
127
|
+
* X-Count) since the body is binary. Additive — the local sync path uses
|
|
128
|
+
* the JSON `exportSince`; this is for clients that want zero-copy rows. */
|
|
129
|
+
async exportSinceArrow(collection, opts = {}) {
|
|
130
|
+
const p = new URLSearchParams({ collection });
|
|
131
|
+
p.set("format", "arrow");
|
|
132
|
+
if (opts.model)
|
|
133
|
+
p.set("model", opts.model);
|
|
134
|
+
if (opts.dim != null)
|
|
135
|
+
p.set("dim", String(opts.dim));
|
|
136
|
+
p.set("since", String(opts.since ?? 0));
|
|
137
|
+
p.set("limit", String(opts.limit ?? 500));
|
|
138
|
+
const controller = new AbortController();
|
|
139
|
+
const timer = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
140
|
+
try {
|
|
141
|
+
const resp = await fetch(`${this.baseUrl}/sync/export?${p.toString()}`, {
|
|
142
|
+
method: "GET",
|
|
143
|
+
headers: { Authorization: `Bearer ${this.apiToken}` },
|
|
144
|
+
signal: controller.signal,
|
|
145
|
+
});
|
|
146
|
+
if (!resp.ok) {
|
|
147
|
+
const detail = await resp.text().catch(() => "");
|
|
148
|
+
throw new Error(`Modal GET /sync/export failed: ${resp.status} ${detail}`);
|
|
149
|
+
}
|
|
150
|
+
const nextWatermark = Number(resp.headers.get("X-Next-Watermark") ?? opts.since ?? 0);
|
|
151
|
+
const done = (resp.headers.get("X-Done") ?? "true") === "true";
|
|
152
|
+
const count = Number(resp.headers.get("X-Count") ?? 0);
|
|
153
|
+
return { data: await resp.arrayBuffer(), nextWatermark, done, count };
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
clearTimeout(timer);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Drain every remaining page for a collection, invoking `onPage` for each.
|
|
161
|
+
* Returns the final watermark to persist for the next incremental sync.
|
|
162
|
+
*/
|
|
163
|
+
async exportAll(collection, onPage, opts = {}) {
|
|
164
|
+
let watermark = opts.since ?? 0;
|
|
165
|
+
for (;;) {
|
|
166
|
+
const page = await this.exportSince(collection, { ...opts, since: watermark });
|
|
167
|
+
if (page.rows.length > 0)
|
|
168
|
+
await onPage(page.rows);
|
|
169
|
+
watermark = page.next_watermark;
|
|
170
|
+
if (page.done)
|
|
171
|
+
return watermark;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config → Modal client resolution.
|
|
3
|
+
*
|
|
4
|
+
* Keeps `src/modal-client.ts` (the typed HTTP mirror of the contract) pristine
|
|
5
|
+
* and centralizes the pieces every consumer (lance.ts, sync.ts, commands.ts)
|
|
6
|
+
* shares: token resolution (env wins), the canonical `model__dim` pair, and
|
|
7
|
+
* client construction.
|
|
8
|
+
*
|
|
9
|
+
* No HTTP is reimplemented here — `ModalEmbeddingClient` owns that.
|
|
10
|
+
*/
|
|
11
|
+
import { ModalEmbeddingClient } from "./modal-client.js";
|
|
12
|
+
import type { WikiConfig } from "./types.js";
|
|
13
|
+
/** Env var name for the Modal bearer token (preferred over config). */
|
|
14
|
+
export declare const MODAL_TOKEN_ENV = "PVM_API_TOKEN";
|
|
15
|
+
/**
|
|
16
|
+
* Resolve the Modal API token. `PVM_API_TOKEN` env always wins and is
|
|
17
|
+
* preferred; config `wiki.embedding.modal.apiToken` is a fallback. Never log
|
|
18
|
+
* the resolved token.
|
|
19
|
+
*/
|
|
20
|
+
export declare const resolveModalToken: (cfg: WikiConfig) => string | undefined;
|
|
21
|
+
/** True when Modal is usable: a base URL and a resolvable token are present. */
|
|
22
|
+
export declare const isModalConfigured: (cfg: WikiConfig) => boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Build a `ModalEmbeddingClient` from config. Returns null when Modal is not
|
|
25
|
+
* configured (no base URL / token) so callers can degrade gracefully.
|
|
26
|
+
*/
|
|
27
|
+
export declare const createModalClient: (cfg: WikiConfig) => ModalEmbeddingClient | null;
|
|
28
|
+
/**
|
|
29
|
+
* The canonical embedder key for a collection. Per-collection overrides win;
|
|
30
|
+
* otherwise the global Modal config model; otherwise "embeddinggemma"
|
|
31
|
+
* (the eval-confirmed baseline — kept as the default *model name*, never as a
|
|
32
|
+
* hard-coded dimension).
|
|
33
|
+
*/
|
|
34
|
+
export declare const resolveModel: (cfg: WikiConfig, collection?: string) => string;
|
|
35
|
+
/**
|
|
36
|
+
* The configured output dimension for a collection, if known. Per-collection
|
|
37
|
+
* overrides win; otherwise the global Modal config `dim`. Returns undefined
|
|
38
|
+
* when unset — callers resolve the native dim lazily via `/models`.
|
|
39
|
+
*/
|
|
40
|
+
export declare const resolveDim: (cfg: WikiConfig, collection?: string) => number | undefined;
|
|
41
|
+
/** Namespaced local table name mirroring the server's ADR-3 scheme. */
|
|
42
|
+
export declare const namespacedTableName: (collection: string, model: string, dim: number) => string;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config → Modal client resolution.
|
|
3
|
+
*
|
|
4
|
+
* Keeps `src/modal-client.ts` (the typed HTTP mirror of the contract) pristine
|
|
5
|
+
* and centralizes the pieces every consumer (lance.ts, sync.ts, commands.ts)
|
|
6
|
+
* shares: token resolution (env wins), the canonical `model__dim` pair, and
|
|
7
|
+
* client construction.
|
|
8
|
+
*
|
|
9
|
+
* No HTTP is reimplemented here — `ModalEmbeddingClient` owns that.
|
|
10
|
+
*/
|
|
11
|
+
import { ModalEmbeddingClient } from "./modal-client.js";
|
|
12
|
+
/** Env var name for the Modal bearer token (preferred over config). */
|
|
13
|
+
export const MODAL_TOKEN_ENV = "PVM_API_TOKEN";
|
|
14
|
+
/**
|
|
15
|
+
* Resolve the Modal API token. `PVM_API_TOKEN` env always wins and is
|
|
16
|
+
* preferred; config `wiki.embedding.modal.apiToken` is a fallback. Never log
|
|
17
|
+
* the resolved token.
|
|
18
|
+
*/
|
|
19
|
+
export const resolveModalToken = (cfg) => process.env[MODAL_TOKEN_ENV] || cfg.embedding.modal?.apiToken;
|
|
20
|
+
/** True when Modal is usable: a base URL and a resolvable token are present. */
|
|
21
|
+
export const isModalConfigured = (cfg) => cfg.embedding.provider === "modal" && !!cfg.embedding.modal?.baseUrl && !!resolveModalToken(cfg);
|
|
22
|
+
/**
|
|
23
|
+
* Build a `ModalEmbeddingClient` from config. Returns null when Modal is not
|
|
24
|
+
* configured (no base URL / token) so callers can degrade gracefully.
|
|
25
|
+
*/
|
|
26
|
+
export const createModalClient = (cfg) => {
|
|
27
|
+
const modal = cfg.embedding.modal;
|
|
28
|
+
if (!modal?.baseUrl)
|
|
29
|
+
return null;
|
|
30
|
+
const apiToken = resolveModalToken(cfg);
|
|
31
|
+
if (!apiToken)
|
|
32
|
+
return null;
|
|
33
|
+
const clientCfg = { baseUrl: modal.baseUrl, apiToken };
|
|
34
|
+
return new ModalEmbeddingClient(clientCfg);
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* The canonical embedder key for a collection. Per-collection overrides win;
|
|
38
|
+
* otherwise the global Modal config model; otherwise "embeddinggemma"
|
|
39
|
+
* (the eval-confirmed baseline — kept as the default *model name*, never as a
|
|
40
|
+
* hard-coded dimension).
|
|
41
|
+
*/
|
|
42
|
+
export const resolveModel = (cfg, collection) => {
|
|
43
|
+
const override = collection ? cfg.embedding.collectionModels?.[collection] : undefined;
|
|
44
|
+
if (override?.model)
|
|
45
|
+
return override.model;
|
|
46
|
+
return cfg.embedding.modal?.model || "embeddinggemma";
|
|
47
|
+
};
|
|
48
|
+
/**
|
|
49
|
+
* The configured output dimension for a collection, if known. Per-collection
|
|
50
|
+
* overrides win; otherwise the global Modal config `dim`. Returns undefined
|
|
51
|
+
* when unset — callers resolve the native dim lazily via `/models`.
|
|
52
|
+
*/
|
|
53
|
+
export const resolveDim = (cfg, collection) => {
|
|
54
|
+
const override = collection ? cfg.embedding.collectionModels?.[collection] : undefined;
|
|
55
|
+
if (override?.dim != null)
|
|
56
|
+
return override.dim;
|
|
57
|
+
return cfg.embedding.modal?.dim;
|
|
58
|
+
};
|
|
59
|
+
/** Namespaced local table name mirroring the server's ADR-3 scheme. */
|
|
60
|
+
export const namespacedTableName = (collection, model, dim) => `col_${collection}__${model}__${dim}`;
|
|
@@ -6,4 +6,11 @@ export declare const setupWizard: (ctx: ExtensionContext, cliArgs?: {
|
|
|
6
6
|
provider?: string;
|
|
7
7
|
model?: string;
|
|
8
8
|
}) => Promise<void>;
|
|
9
|
+
/**
|
|
10
|
+
* Interactive Modal embedding configuration + "Test connection" action.
|
|
11
|
+
* Walks base URL, canonical model, dim, offline fallback, and auto-sync
|
|
12
|
+
* (off by default). Token is read from `PVM_API_TOKEN` env (preferred) — not
|
|
13
|
+
* collected here; only an optional config fallback is offered.
|
|
14
|
+
*/
|
|
15
|
+
export declare const configureModalWizard: (ctx: ExtensionContext) => Promise<void>;
|
|
9
16
|
export declare const openSettingsDashboard: (ctx: ExtensionContext) => Promise<void>;
|
package/dist/src/settings-ui.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import { testOllamaConnection } from "./lance.js";
|
|
4
|
+
import { MODAL_TOKEN_ENV, createModalClient } from "./modal-config.js";
|
|
4
5
|
import { GLOBAL_CONFIG_PATH, collectionNames, findConfig, loadConfig, } from "./utils.js";
|
|
5
6
|
export const createCollectionWizard = async (ctx) => {
|
|
6
7
|
const { project: cfgPath } = findConfig(ctx.cwd);
|
|
@@ -95,7 +96,7 @@ export const setupWizard = async (ctx, cliArgs) => {
|
|
|
95
96
|
if (cliArgs.vault) {
|
|
96
97
|
config.wiki.vaults.default = { path: cliArgs.vault };
|
|
97
98
|
}
|
|
98
|
-
if (cliArgs.provider && ["ollama", "transformers"].includes(cliArgs.provider)) {
|
|
99
|
+
if (cliArgs.provider && ["ollama", "transformers", "modal"].includes(cliArgs.provider)) {
|
|
99
100
|
config.wiki.embedding.provider = cliArgs.provider;
|
|
100
101
|
}
|
|
101
102
|
if (cliArgs.model) {
|
|
@@ -155,11 +156,18 @@ export const setupWizard = async (ctx, cliArgs) => {
|
|
|
155
156
|
const provider = await ctx.ui.select("Embedding provider:", [
|
|
156
157
|
"transformers (all-MiniLM-L6-v2, offline, zero config)",
|
|
157
158
|
"ollama (embeddinggemma, higher quality, requires Ollama)",
|
|
159
|
+
"modal (cloud GPU embeddings + server vector sync, requires Modal deploy)",
|
|
158
160
|
]);
|
|
159
161
|
if (!provider) {
|
|
160
162
|
ctx.ui.notify("Setup cancelled.", "warning");
|
|
161
163
|
return;
|
|
162
164
|
}
|
|
165
|
+
if (provider.startsWith("modal")) {
|
|
166
|
+
// Delegate to the dedicated Modal wizard (base URL, model, dim, fallback,
|
|
167
|
+
// auto-sync, test connection) and return — it writes the project config.
|
|
168
|
+
await configureModalWizard(ctx);
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
163
171
|
let ollamaModel = "embeddinggemma";
|
|
164
172
|
if (provider.startsWith("ollama")) {
|
|
165
173
|
const conn = await testOllamaConnection();
|
|
@@ -205,6 +213,102 @@ export const setupWizard = async (ctx, cliArgs) => {
|
|
|
205
213
|
"Next: /wiki watcher start (or restart pi for auto-start)",
|
|
206
214
|
].join("\n"), "info");
|
|
207
215
|
};
|
|
216
|
+
/**
|
|
217
|
+
* Interactive Modal embedding configuration + "Test connection" action.
|
|
218
|
+
* Walks base URL, canonical model, dim, offline fallback, and auto-sync
|
|
219
|
+
* (off by default). Token is read from `PVM_API_TOKEN` env (preferred) — not
|
|
220
|
+
* collected here; only an optional config fallback is offered.
|
|
221
|
+
*/
|
|
222
|
+
export const configureModalWizard = async (ctx) => {
|
|
223
|
+
const { project: cfgPath } = findConfig(ctx.cwd);
|
|
224
|
+
if (!cfgPath) {
|
|
225
|
+
ctx.ui.notify("No config found. Run /wiki init first.", "error");
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
if (!ctx.hasUI) {
|
|
229
|
+
ctx.ui.notify("TUI required for the Modal wizard. Use /wiki modal config <key> <value>.", "error");
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
const cfg = loadConfig(ctx.cwd);
|
|
233
|
+
const modal = cfg.wiki.embedding.modal ?? {};
|
|
234
|
+
const baseUrl = await ctx.ui.input("Modal base URL (deployed ASGI app):", modal.baseUrl ||
|
|
235
|
+
"https://<workspace>--pi-vault-mind-embed-embeddingservice-fastapi-app.modal.run");
|
|
236
|
+
if (!baseUrl)
|
|
237
|
+
return;
|
|
238
|
+
const model = await ctx.ui.input("Canonical embedder model:", modal.model || "embeddinggemma");
|
|
239
|
+
if (model === undefined)
|
|
240
|
+
return;
|
|
241
|
+
const dimStr = await ctx.ui.input("Output dimension (blank = model native, e.g. 768 for embeddinggemma):", modal.dim ? String(modal.dim) : "");
|
|
242
|
+
const dim = dimStr ? Number.parseInt(dimStr, 10) : undefined;
|
|
243
|
+
if (dimStr && !Number.isFinite(dim)) {
|
|
244
|
+
ctx.ui.notify(`Invalid dim: ${dimStr}`, "error");
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
const fallback = await ctx.ui.select("Offline fallback (when Modal is unreachable):", [
|
|
248
|
+
"ollama (same canonical model only)",
|
|
249
|
+
"transformers (different space → degrades to FTS)",
|
|
250
|
+
"none (degrade to FTS, never fall back)",
|
|
251
|
+
]);
|
|
252
|
+
let fallbackCfg;
|
|
253
|
+
if (fallback?.startsWith("ollama"))
|
|
254
|
+
fallbackCfg = { enabled: true, provider: "ollama" };
|
|
255
|
+
else if (fallback?.startsWith("transformers"))
|
|
256
|
+
fallbackCfg = { enabled: true, provider: "transformers" };
|
|
257
|
+
else if (fallback?.startsWith("none"))
|
|
258
|
+
fallbackCfg = { enabled: false };
|
|
259
|
+
const autoSync = await ctx.ui.confirm("Auto-sync", "Enable background vector sync (off by default)? Pulls new server vectors on an interval.");
|
|
260
|
+
let intervalMs = 300000;
|
|
261
|
+
if (autoSync) {
|
|
262
|
+
const intervalStr = await ctx.ui.input("Auto-sync interval (ms):", "300000");
|
|
263
|
+
intervalMs = Number.parseInt(intervalStr || "300000", 10) || 300000;
|
|
264
|
+
}
|
|
265
|
+
// Persist
|
|
266
|
+
const existing = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
|
|
267
|
+
existing.wiki = existing.wiki || {};
|
|
268
|
+
existing.wiki.embedding = existing.wiki.embedding || {};
|
|
269
|
+
existing.wiki.embedding.provider = "modal";
|
|
270
|
+
existing.wiki.embedding.modal = {
|
|
271
|
+
...(existing.wiki.embedding.modal || {}),
|
|
272
|
+
baseUrl: baseUrl.replace(/\/$/, ""),
|
|
273
|
+
model: model || "embeddinggemma",
|
|
274
|
+
...(dim != null ? { dim } : {}),
|
|
275
|
+
...(fallbackCfg ? { fallback: fallbackCfg } : {}),
|
|
276
|
+
sync: {
|
|
277
|
+
...(existing.wiki.embedding.modal?.sync || {}),
|
|
278
|
+
autoSync,
|
|
279
|
+
autoSyncIntervalMs: intervalMs,
|
|
280
|
+
},
|
|
281
|
+
};
|
|
282
|
+
fs.writeFileSync(cfgPath, `${JSON.stringify(existing, null, 2)}\n`, "utf-8");
|
|
283
|
+
// Token guidance + optional config fallback (env always wins)
|
|
284
|
+
const tokenEnv = process.env[MODAL_TOKEN_ENV];
|
|
285
|
+
if (!tokenEnv) {
|
|
286
|
+
ctx.ui.notify(`⚠️ No ${MODAL_TOKEN_ENV} env var detected. Set it in your shell:\n export ${MODAL_TOKEN_ENV}=<bearer token>\n(Env is preferred; config apiToken is a fallback only.)`, "warning");
|
|
287
|
+
}
|
|
288
|
+
// Test connection against /health
|
|
289
|
+
const test = await ctx.ui.confirm("Test connection", "Call /health now to verify the Modal service?");
|
|
290
|
+
if (test) {
|
|
291
|
+
const client = createModalClient(loadConfig(ctx.cwd).wiki);
|
|
292
|
+
if (!client) {
|
|
293
|
+
ctx.ui.notify("❌ Modal not configured (missing baseUrl or token).", "error");
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
try {
|
|
297
|
+
const health = await client.health();
|
|
298
|
+
ctx.ui.notify(`✅ Connected. Default model: ${health.default_model}`, "info");
|
|
299
|
+
}
|
|
300
|
+
catch (err) {
|
|
301
|
+
ctx.ui.notify(`❌ Health check failed: ${err.message}`, "error");
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
ctx.ui.notify([
|
|
305
|
+
"✅ Modal embedding configured.",
|
|
306
|
+
` ${cfgPath}`,
|
|
307
|
+
` Provider: modal • Model: ${model || "embeddinggemma"}${dim ? ` • dim ${dim}` : ""}`,
|
|
308
|
+
` Auto-sync: ${autoSync ? `on (${intervalMs}ms)` : "off"}`,
|
|
309
|
+
" Restart session for changes to take effect.",
|
|
310
|
+
].join("\n"), "info");
|
|
311
|
+
};
|
|
208
312
|
export const openSettingsDashboard = async (ctx) => {
|
|
209
313
|
if (!ctx.hasUI) {
|
|
210
314
|
ctx.ui.notify("TUI required for settings dashboard.", "error");
|
|
@@ -220,6 +324,7 @@ export const openSettingsDashboard = async (ctx) => {
|
|
|
220
324
|
const choice = await ctx.ui.select("pi-vault-mind Settings", [
|
|
221
325
|
"Manage Collections",
|
|
222
326
|
"Manage Injectors",
|
|
327
|
+
"Modal Embedding",
|
|
223
328
|
"Extension Integrations",
|
|
224
329
|
"Exit",
|
|
225
330
|
]);
|
|
@@ -233,6 +338,9 @@ export const openSettingsDashboard = async (ctx) => {
|
|
|
233
338
|
else if (choice === "Manage Injectors") {
|
|
234
339
|
await manageInjectors(ctx, cfgPath);
|
|
235
340
|
}
|
|
341
|
+
else if (choice === "Modal Embedding") {
|
|
342
|
+
await configureModalWizard(ctx);
|
|
343
|
+
}
|
|
236
344
|
else if (choice === "Extension Integrations") {
|
|
237
345
|
await manageExtensions(ctx, cfgPath);
|
|
238
346
|
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Modal vector sync engine.
|
|
3
|
+
*
|
|
4
|
+
* Drains `ModalEmbeddingClient.exportAll(collection, …)` into the local
|
|
5
|
+
* LanceDB via the precomputed-vector insert path (`upsertPrecomputed`), and
|
|
6
|
+
* persists a per-(collection, model, dim) `seq` watermark (ADR-4) so
|
|
7
|
+
* incremental syncs resume from where the last one left off.
|
|
8
|
+
*
|
|
9
|
+
* Idempotent: rows are keyed by `id` and applied with merge-insert, so
|
|
10
|
+
* re-fetching a boundary row is a no-op and re-running a sync with no new rows
|
|
11
|
+
* changes nothing but the (unchanged) watermark.
|
|
12
|
+
*/
|
|
13
|
+
import type { JobStatus } from "./modal-client.js";
|
|
14
|
+
import type { UniversalConfig } from "./types.js";
|
|
15
|
+
export interface SyncState {
|
|
16
|
+
/** `${collection}__${model}__${dim}` → last-seen `seq` watermark. */
|
|
17
|
+
watermarks: Record<string, number>;
|
|
18
|
+
}
|
|
19
|
+
export interface SyncResult {
|
|
20
|
+
collection: string;
|
|
21
|
+
model: string;
|
|
22
|
+
dim: number;
|
|
23
|
+
rows: number;
|
|
24
|
+
watermark: number;
|
|
25
|
+
full: boolean;
|
|
26
|
+
}
|
|
27
|
+
export declare const loadSyncState: (cfg: UniversalConfig) => SyncState;
|
|
28
|
+
export declare const saveSyncState: (cfg: UniversalConfig, state: SyncState) => void;
|
|
29
|
+
/** Current watermark for a (collection, model, dim), default 0. */
|
|
30
|
+
export declare const getWatermark: (cfg: UniversalConfig, collection: string, model: string, dim: number) => number;
|
|
31
|
+
/** List the collections/tables held in the server-side vector store. */
|
|
32
|
+
export declare const listRemoteCollections: (cfg: UniversalConfig) => Promise<import("./modal-client.js").SyncCollection[]>;
|
|
33
|
+
export interface SyncOptions {
|
|
34
|
+
/** Sync from watermark 0 (re-pull everything). Default false (incremental). */
|
|
35
|
+
full?: boolean;
|
|
36
|
+
/** Notified after each page is merged, with the running row count. */
|
|
37
|
+
onProgress?: (count: number) => void;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Sync a single collection from the server into the local LanceDB. Incremental
|
|
41
|
+
* by default (from the persisted watermark); `full` re-pulls from seq 0. Rows
|
|
42
|
+
* are merge-inserted by `id` (idempotent). Returns the row count + new
|
|
43
|
+
* watermark. A re-run with no new rows is a no-op (rows: 0, watermark unchanged).
|
|
44
|
+
*/
|
|
45
|
+
export declare const syncCollection: (cfg: UniversalConfig, collection: string, opts?: SyncOptions) => Promise<SyncResult>;
|
|
46
|
+
/**
|
|
47
|
+
* Sync every configured collection (or a caller-supplied list). Collections
|
|
48
|
+
* with no server-side table report `rows: 0` and are skipped harmlessly.
|
|
49
|
+
*/
|
|
50
|
+
export declare const syncAll: (cfg: UniversalConfig, collections?: string[], opts?: SyncOptions) => Promise<SyncResult[]>;
|
|
51
|
+
export interface ReindexRemoteOptions {
|
|
52
|
+
/** Notified with the job status on each poll. */
|
|
53
|
+
onStatus?: (status: JobStatus) => void;
|
|
54
|
+
/** Poll interval in ms (default 2000). */
|
|
55
|
+
pollMs?: number;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Read a collection's JSONL (the source of truth), submit a Modal bulk embedding
|
|
59
|
+
* job, poll it to completion, then sync the freshly-embedded vectors down into
|
|
60
|
+
* the local LanceDB. Used by `/wiki reindex --all --reembed --remote`. Local
|
|
61
|
+
* re-embed remains the default; this offloads bulk embedding to cloud GPUs.
|
|
62
|
+
*
|
|
63
|
+
* Records map the JSONL entry's embeddable field (`fact`) to `text` and the
|
|
64
|
+
* remaining fields to `metadata`, so sync-down can reconstruct the local row.
|
|
65
|
+
*/
|
|
66
|
+
export declare const reindexRemote: (cfg: UniversalConfig, collections: string[], opts?: ReindexRemoteOptions) => Promise<Array<{
|
|
67
|
+
collection: string;
|
|
68
|
+
job?: JobStatus;
|
|
69
|
+
sync?: SyncResult;
|
|
70
|
+
error?: string;
|
|
71
|
+
}>>;
|