pi-vault-mind 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/src/autosync.d.ts +16 -0
- package/dist/src/autosync.js +43 -0
- package/dist/src/commands.d.ts +18 -0
- package/dist/src/commands.js +464 -10
- package/dist/src/embed-queue.d.ts +80 -0
- package/dist/src/embed-queue.js +163 -0
- package/dist/src/index.js +9 -0
- package/dist/src/lance.d.ts +7 -0
- package/dist/src/lance.js +432 -0
- package/dist/src/modal-client.d.ts +176 -0
- package/dist/src/modal-client.js +174 -0
- package/dist/src/modal-config.d.ts +42 -0
- package/dist/src/modal-config.js +60 -0
- package/dist/src/settings-ui.d.ts +7 -0
- package/dist/src/settings-ui.js +109 -1
- package/dist/src/sync.d.ts +71 -0
- package/dist/src/sync.js +211 -0
- package/dist/src/types.d.ts +102 -1
- package/dist/test/embed-queue.test.js +105 -0
- package/dist/test/index.test.js +35 -0
- package/dist/test/lance-modal.test.js +95 -0
- package/dist/test/modal-client.test.js +294 -0
- package/dist/test/modal-config.test.js +86 -0
- package/dist/test/sync.test.js +132 -0
- package/package.json +3 -2
- package/dist/test/index.test.d.ts +0 -1
package/dist/src/commands.js
CHANGED
|
@@ -7,8 +7,10 @@ import { DEFAULT_CONFIG } from "./types.js";
|
|
|
7
7
|
import { CONFIG_FILES, EXT_ROOT, collectionNames, ensureDir, findConfig, getPiContextConfig, hasPiContextTools, loadConfig, } from "./utils.js";
|
|
8
8
|
import { updateActiveCollectionWidget } from "./widget.js";
|
|
9
9
|
import { connect, pullOllamaModel, testOllamaConnection } from "./lance.js";
|
|
10
|
+
import { MODAL_TOKEN_ENV, createModalClient, isModalConfigured } from "./modal-config.js";
|
|
10
11
|
import { createServerState } from "./server.js";
|
|
11
12
|
import { createCollectionWizard, createInjectorWizard, openSettingsDashboard, setupWizard, } from "./settings-ui.js";
|
|
13
|
+
import { reindexRemote, syncAll, syncCollection } from "./sync.js";
|
|
12
14
|
import { createWatcherState, getWatcherStatus, startWatcher, stopWatcher } from "./watcher.js";
|
|
13
15
|
// ── Shared helpers ───────────────────────────────────────────────────────────
|
|
14
16
|
const WIKI_USAGE = [
|
|
@@ -26,10 +28,15 @@ const WIKI_USAGE = [
|
|
|
26
28
|
" /wiki context enable|disable Enable/disable pi-context integration",
|
|
27
29
|
" /wiki context status Show pi-context integration status",
|
|
28
30
|
" /wiki embedding status Show embedding config + Ollama models",
|
|
29
|
-
" /wiki embedding use Switch provider (ollama | transformers)",
|
|
31
|
+
" /wiki embedding use Switch provider (ollama | transformers | modal)",
|
|
30
32
|
" /wiki embedding model Set Ollama embedding model",
|
|
31
33
|
" /wiki embedding models List available Ollama models",
|
|
32
34
|
" /wiki embedding pull Pull a model from Ollama",
|
|
35
|
+
" /wiki modal status Show Modal config + health + remote collections",
|
|
36
|
+
" /wiki modal config Set Modal baseUrl/model/dim/sync/fallback",
|
|
37
|
+
" /wiki modal sync Pull server vectors into local LanceDB [--full]",
|
|
38
|
+
" /wiki modal jobs <id> Poll a Modal bulk job",
|
|
39
|
+
" /wiki modal migrate <model> Change canonical model + re-embed (remote)",
|
|
33
40
|
" /wiki watcher start Start passive file watcher",
|
|
34
41
|
" /wiki watcher stop Stop passive file watcher",
|
|
35
42
|
" /wiki watcher status Show watcher status",
|
|
@@ -139,9 +146,42 @@ export const selectActiveCollection = async (ctx) => {
|
|
|
139
146
|
}
|
|
140
147
|
};
|
|
141
148
|
// ── /wiki init ──────────────────────────────────────────────────────────────
|
|
149
|
+
/**
|
|
150
|
+
* Vault-root `.gitignore` entries that keep pi-vault-mind compatible with
|
|
151
|
+
* `obsidian-git` (and, transitively, Obsidian Sync setups that also back up via
|
|
152
|
+
* git): the LanceDB index is a large, per-device, rebuildable binary and must
|
|
153
|
+
* never be committed; Obsidian's workspace UI state churns constantly. See
|
|
154
|
+
* docs/OBSIDIAN_SETUP.md §6.
|
|
155
|
+
*/
|
|
156
|
+
export const GITIGNORE_ENTRIES = [".lancedb/", ".obsidian/workspace*.json"];
|
|
157
|
+
/**
|
|
158
|
+
* Decide what to do with the vault `.gitignore`, given its current contents
|
|
159
|
+
* (`null` if absent). Pure — does no I/O — so it is unit-testable. Creates the
|
|
160
|
+
* file when missing, appends only the entries that aren't already present
|
|
161
|
+
* (line-exact match, trimmed), and skips when everything is covered.
|
|
162
|
+
*/
|
|
163
|
+
export const planGitignore = (existing) => {
|
|
164
|
+
const header = "# pi-vault-mind: keep the rebuildable binary index out of git";
|
|
165
|
+
if (existing === null) {
|
|
166
|
+
return {
|
|
167
|
+
action: "create",
|
|
168
|
+
content: `${header}\n.lancedb/\n# Obsidian UI workspace state churns constantly\n.obsidian/workspace*.json\n`,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
// Match leniently: `.lancedb`, `.lancedb/`, and `/.lancedb/` are the same rule,
|
|
172
|
+
// so we don't append a duplicate when the user already ignores it differently.
|
|
173
|
+
const normalize = (s) => s.trim().replace(/^\/+|\/+$/g, "");
|
|
174
|
+
const present = new Set(existing.split(/\r?\n/).map(normalize));
|
|
175
|
+
const missing = GITIGNORE_ENTRIES.filter((e) => !present.has(normalize(e)));
|
|
176
|
+
if (missing.length === 0)
|
|
177
|
+
return { action: "skip", content: "" };
|
|
178
|
+
const prefix = existing.length === 0 ? "" : existing.endsWith("\n") ? "\n" : "\n\n";
|
|
179
|
+
return { action: "append", content: `${prefix}# pi-vault-mind\n${missing.join("\n")}\n` };
|
|
180
|
+
};
|
|
142
181
|
const handleInit = async (_args, ctx, pi) => {
|
|
143
182
|
const cfg = loadConfig(ctx.cwd);
|
|
144
183
|
const created = [];
|
|
184
|
+
const updated = [];
|
|
145
185
|
const skipped = [];
|
|
146
186
|
const ensureFile = (dest, tmpl) => {
|
|
147
187
|
if (fs.existsSync(dest)) {
|
|
@@ -203,12 +243,30 @@ const handleInit = async (_args, ctx, pi) => {
|
|
|
203
243
|
if (ij.artifactPath)
|
|
204
244
|
ensureFile(ij.artifactPath, "ARTIFACT.md");
|
|
205
245
|
}
|
|
246
|
+
// Keep the rebuildable binary index out of git so obsidian-git (and git-backed
|
|
247
|
+
// Obsidian Sync setups) never commit it. See docs/OBSIDIAN_SETUP.md §6.
|
|
248
|
+
const gitignoreDest = path.join(ctx.cwd, ".gitignore");
|
|
249
|
+
const plan = planGitignore(fs.existsSync(gitignoreDest) ? fs.readFileSync(gitignoreDest, "utf-8") : null);
|
|
250
|
+
if (plan.action === "create") {
|
|
251
|
+
fs.writeFileSync(gitignoreDest, plan.content, "utf-8");
|
|
252
|
+
created.push(gitignoreDest);
|
|
253
|
+
}
|
|
254
|
+
else if (plan.action === "append") {
|
|
255
|
+
fs.appendFileSync(gitignoreDest, plan.content);
|
|
256
|
+
updated.push(gitignoreDest);
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
skipped.push(gitignoreDest);
|
|
260
|
+
}
|
|
206
261
|
const msg = [
|
|
207
262
|
"Wiki scaffolding complete.",
|
|
208
263
|
"",
|
|
209
264
|
"Created:",
|
|
210
265
|
...created.map((c) => ` • ${path.relative(ctx.cwd, c)}`),
|
|
211
266
|
];
|
|
267
|
+
if (updated.length) {
|
|
268
|
+
msg.push("", "Updated:", ...updated.map((u) => ` • ${path.relative(ctx.cwd, u)}`));
|
|
269
|
+
}
|
|
212
270
|
if (skipped.length) {
|
|
213
271
|
msg.push("", "Skipped (already exist):", ...skipped.map((s) => ` • ${path.relative(ctx.cwd, s)}`));
|
|
214
272
|
}
|
|
@@ -328,10 +386,47 @@ const handleApprove = async (args, ctx) => {
|
|
|
328
386
|
// ── /wiki reindex ────────────────────────────────────────────────────────────
|
|
329
387
|
const handleReindex = async (args, ctx, pi) => {
|
|
330
388
|
const cfg = loadConfig(ctx.cwd);
|
|
331
|
-
const
|
|
332
|
-
const
|
|
333
|
-
const
|
|
334
|
-
const
|
|
389
|
+
const tokens = args.trim().split(/\s+/).filter(Boolean);
|
|
390
|
+
const flags = new Set(tokens.map((t) => t.toLowerCase()));
|
|
391
|
+
const rebuildEmbeddings = flags.has("--reembed") || flags.has("--full");
|
|
392
|
+
const reindexAll = flags.has("--all");
|
|
393
|
+
const remote = flags.has("--remote");
|
|
394
|
+
const collectionFilter = reindexAll || rebuildEmbeddings ? null : (tokens.find((t) => !t.startsWith("--")) ?? null);
|
|
395
|
+
// Remote bulk re-index: read JSONL → submit Modal bulk job → poll → sync down.
|
|
396
|
+
if (remote) {
|
|
397
|
+
if (cfg.wiki.embedding.provider !== "modal") {
|
|
398
|
+
ctx.ui.notify("--remote requires the modal provider. Run /wiki embedding use modal.", "error");
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
if (!isModalConfigured(cfg.wiki)) {
|
|
402
|
+
ctx.ui.notify(`Modal not configured. Set baseUrl (/wiki modal config baseUrl) and ${MODAL_TOKEN_ENV}.`, "error");
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
const names = reindexAll
|
|
406
|
+
? Object.keys(cfg.collections)
|
|
407
|
+
: collectionFilter
|
|
408
|
+
? [collectionFilter]
|
|
409
|
+
: Object.keys(cfg.collections);
|
|
410
|
+
ctx.ui.notify(`Remote re-index: submitting Modal bulk job for ${names.join(", ")}...`, "info");
|
|
411
|
+
try {
|
|
412
|
+
const results = await reindexRemote(cfg, names, {
|
|
413
|
+
onStatus: (s) => ctx.ui.notify(` ${s.collection || ""}: ${s.status} ${s.processed}/${s.total}`, "info"),
|
|
414
|
+
});
|
|
415
|
+
const lines = ["**Remote Re-index Report:**", ""];
|
|
416
|
+
for (const r of results) {
|
|
417
|
+
if (r.error)
|
|
418
|
+
lines.push(` ❌ ${r.collection}: ${r.error}`);
|
|
419
|
+
else
|
|
420
|
+
lines.push(` ✅ ${r.collection}: job ${r.job?.status}, synced ${r.sync?.rows ?? 0} rows (wm ${r.sync?.watermark ?? 0})`);
|
|
421
|
+
}
|
|
422
|
+
lines.push("", "Old namespaces are left intact until the new one is verified.");
|
|
423
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
424
|
+
}
|
|
425
|
+
catch (err) {
|
|
426
|
+
ctx.ui.notify(`Remote re-index failed: ${err.message}`, "error");
|
|
427
|
+
}
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
335
430
|
ctx.ui.notify(rebuildEmbeddings
|
|
336
431
|
? "Reindexing: regenerating embeddings + rebuilding indexes..."
|
|
337
432
|
: "Reindexing: rebuilding FTS + vector indexes...", "info");
|
|
@@ -432,12 +527,27 @@ const handleEmbedding = async (args, ctx, pi) => {
|
|
|
432
527
|
`Provider: ${cfg.wiki.embedding.provider}`,
|
|
433
528
|
cfg.wiki.embedding.provider === "ollama"
|
|
434
529
|
? `Model: ${cfg.wiki.embedding.ollamaModel || "embeddinggemma"}`
|
|
435
|
-
:
|
|
530
|
+
: cfg.wiki.embedding.provider === "modal"
|
|
531
|
+
? `Model: ${cfg.wiki.embedding.modal?.model || "(default embeddinggemma)"}`
|
|
532
|
+
: "Model: all-MiniLM-L6-v2 (384 dims)",
|
|
436
533
|
`FTS: ${cfg.wiki.ftsEnabled !== false ? "enabled" : "disabled"}`,
|
|
437
534
|
`Graph: ${cfg.wiki.graph?.enabled !== false ? "enabled" : "disabled"}`,
|
|
438
535
|
`Data Dir: ${cfg.wiki.dataDir}`,
|
|
439
536
|
];
|
|
440
|
-
if (cfg.wiki.embedding.provider === "
|
|
537
|
+
if (cfg.wiki.embedding.provider === "modal") {
|
|
538
|
+
const modal = cfg.wiki.embedding.modal;
|
|
539
|
+
const tokenSrc = process.env[MODAL_TOKEN_ENV]
|
|
540
|
+
? `env ${MODAL_TOKEN_ENV} ✅`
|
|
541
|
+
: modal?.apiToken
|
|
542
|
+
? "config (set env PVM_API_TOKEN to override)"
|
|
543
|
+
: "❌ none (set PVM_API_TOKEN env)";
|
|
544
|
+
lines.push("", "**Modal:**", ` Base URL: ${modal?.baseUrl || "❌ not set"}`, ` Model: ${modal?.model || "(default embeddinggemma)"}`, ` Dim: ${modal?.dim ?? "(native)"}`, ` Token: ${tokenSrc}`, ` Fallback: ${modal?.fallback?.enabled === false ? "disabled" : modal?.fallback?.provider || "(none — degrade to FTS)"}`, ` Sync: auto=${modal?.sync?.autoSync ? "on" : "off"}, interval=${modal?.sync?.autoSyncIntervalMs ?? 300000}ms`);
|
|
545
|
+
const co = cfg.wiki.embedding.coalesce;
|
|
546
|
+
if (co) {
|
|
547
|
+
lines.push(` Coalesce: debounce=${co.debounceMs ?? 1000}ms, batch=${co.maxBatchSize ?? 64}, concurrency=${co.maxConcurrentFlushes ?? 2}, dedupe=${co.dedupe ?? true}, searchBypass=${co.searchBypass ?? true}`);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
else if (cfg.wiki.embedding.provider === "ollama" || !cfg.wiki.embedding.provider) {
|
|
441
551
|
const conn = await testOllamaConnection(pi);
|
|
442
552
|
lines.push("", "**Ollama Status:**", ` Reachable: ${conn.reachable ? "✅ Yes" : "❌ No"}`);
|
|
443
553
|
if (conn.error)
|
|
@@ -462,8 +572,8 @@ const handleEmbedding = async (args, ctx, pi) => {
|
|
|
462
572
|
return;
|
|
463
573
|
}
|
|
464
574
|
case "use": {
|
|
465
|
-
if (!value || !["ollama", "transformers"].includes(value)) {
|
|
466
|
-
ctx.ui.notify("/wiki embedding use <ollama|transformers>", "error");
|
|
575
|
+
if (!value || !["ollama", "transformers", "modal"].includes(value)) {
|
|
576
|
+
ctx.ui.notify("/wiki embedding use <ollama|transformers|modal>", "error");
|
|
467
577
|
return;
|
|
468
578
|
}
|
|
469
579
|
if (value === "ollama") {
|
|
@@ -474,6 +584,12 @@ const handleEmbedding = async (args, ctx, pi) => {
|
|
|
474
584
|
return;
|
|
475
585
|
}
|
|
476
586
|
}
|
|
587
|
+
if (value === "modal") {
|
|
588
|
+
const modal = cfg.wiki.embedding.modal;
|
|
589
|
+
if (!modal?.baseUrl) {
|
|
590
|
+
ctx.ui.notify("Modal needs a base URL. Set it with: /wiki modal config baseUrl <url>\n(Token via PVM_API_TOKEN env, preferred.)", "warning");
|
|
591
|
+
}
|
|
592
|
+
}
|
|
477
593
|
const existing = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
|
|
478
594
|
existing.wiki = existing.wiki || {};
|
|
479
595
|
existing.wiki.embedding = existing.wiki.embedding || {};
|
|
@@ -528,6 +644,336 @@ const handleEmbedding = async (args, ctx, pi) => {
|
|
|
528
644
|
}
|
|
529
645
|
}
|
|
530
646
|
};
|
|
647
|
+
// ── /wiki modal ──────────────────────────────────────────────────────────────
|
|
648
|
+
const MODAL_CONFIG_USAGE = [
|
|
649
|
+
"**/wiki modal config**",
|
|
650
|
+
"",
|
|
651
|
+
" /wiki modal config baseUrl <url> Set the Modal ASGI base URL",
|
|
652
|
+
" /wiki modal config model <name> Set the canonical embedder (default embeddinggemma)",
|
|
653
|
+
" /wiki modal config dim <n> Set output dimension (omit for native)",
|
|
654
|
+
" /wiki modal config fallback ollama|none Set offline fallback provider",
|
|
655
|
+
" /wiki modal config sync auto on|off Toggle auto-sync",
|
|
656
|
+
" /wiki modal config sync interval <ms> Auto-sync interval",
|
|
657
|
+
" /wiki modal config pageSize <n> Sync page size",
|
|
658
|
+
" /wiki modal config coalesce debounce <ms> Coalescer debounce window",
|
|
659
|
+
" /wiki modal config coalesce batch <n> Coalescer max batch size",
|
|
660
|
+
" /wiki modal config token Show token guidance (use PVM_API_TOKEN env)",
|
|
661
|
+
"",
|
|
662
|
+
" (no args) Show current Modal config",
|
|
663
|
+
].join("\n");
|
|
664
|
+
/** Read the raw project config object (mutable). */
|
|
665
|
+
const readProjectConfig = (cfgPath) => {
|
|
666
|
+
try {
|
|
667
|
+
return JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
|
|
668
|
+
}
|
|
669
|
+
catch {
|
|
670
|
+
return {};
|
|
671
|
+
}
|
|
672
|
+
};
|
|
673
|
+
const writeProjectConfig = (cfgPath, obj) => {
|
|
674
|
+
fs.writeFileSync(cfgPath, `${JSON.stringify(obj, null, 2)}\n`, "utf-8");
|
|
675
|
+
};
|
|
676
|
+
const modalSection = (obj) => {
|
|
677
|
+
obj.wiki = obj.wiki || {};
|
|
678
|
+
const wiki = obj.wiki;
|
|
679
|
+
wiki.embedding = wiki.embedding || {};
|
|
680
|
+
const emb = wiki.embedding;
|
|
681
|
+
emb.modal = emb.modal || {};
|
|
682
|
+
return emb.modal;
|
|
683
|
+
};
|
|
684
|
+
const handleModalConfig = async (args, ctx) => {
|
|
685
|
+
const { project: cfgPath } = findConfig(ctx.cwd);
|
|
686
|
+
if (!cfgPath) {
|
|
687
|
+
ctx.ui.notify("No config found. Run /wiki init first.", "error");
|
|
688
|
+
return;
|
|
689
|
+
}
|
|
690
|
+
const cfg = loadConfig(ctx.cwd);
|
|
691
|
+
const parts = args.trim().split(/\s+/).filter(Boolean);
|
|
692
|
+
const key = parts[0]?.toLowerCase();
|
|
693
|
+
const modal = cfg.wiki.embedding.modal ?? {};
|
|
694
|
+
if (!key) {
|
|
695
|
+
const tokenSrc = process.env[MODAL_TOKEN_ENV]
|
|
696
|
+
? `env ${MODAL_TOKEN_ENV} ✅`
|
|
697
|
+
: modal.apiToken
|
|
698
|
+
? "config (set PVM_API_TOKEN env to override)"
|
|
699
|
+
: "❌ none (set PVM_API_TOKEN env)";
|
|
700
|
+
const lines = [
|
|
701
|
+
"**Modal Config:**",
|
|
702
|
+
"",
|
|
703
|
+
` baseUrl: ${modal.baseUrl || "❌ not set"}`,
|
|
704
|
+
` model: ${modal.model || "(default embeddinggemma)"}`,
|
|
705
|
+
` dim: ${modal.dim ?? "(native)"}`,
|
|
706
|
+
` token: ${tokenSrc}`,
|
|
707
|
+
` fallback: ${modal.fallback?.enabled === false ? "disabled" : modal.fallback?.provider || "(none)"}`,
|
|
708
|
+
` sync: ${JSON.stringify(modal.sync ?? {})}`,
|
|
709
|
+
` coalesce: ${JSON.stringify(cfg.wiki.embedding.coalesce ?? {})}`,
|
|
710
|
+
];
|
|
711
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
712
|
+
return;
|
|
713
|
+
}
|
|
714
|
+
const obj = readProjectConfig(cfgPath);
|
|
715
|
+
const m = modalSection(obj);
|
|
716
|
+
const setNum = (target, k, v) => {
|
|
717
|
+
const n = Number.parseInt(v, 10);
|
|
718
|
+
if (!Number.isFinite(n)) {
|
|
719
|
+
ctx.ui.notify(`Invalid number for ${k}: ${v}`, "error");
|
|
720
|
+
return false;
|
|
721
|
+
}
|
|
722
|
+
target[k] = n;
|
|
723
|
+
return true;
|
|
724
|
+
};
|
|
725
|
+
switch (key) {
|
|
726
|
+
case "baseurl":
|
|
727
|
+
case "url": {
|
|
728
|
+
const url = parts[1];
|
|
729
|
+
if (!url) {
|
|
730
|
+
ctx.ui.notify("/wiki modal config baseUrl <url>", "error");
|
|
731
|
+
return;
|
|
732
|
+
}
|
|
733
|
+
m.baseUrl = url.replace(/\/$/, "");
|
|
734
|
+
writeProjectConfig(cfgPath, obj);
|
|
735
|
+
ctx.ui.notify(`✅ Modal baseUrl set to ${m.baseUrl}`, "info");
|
|
736
|
+
return;
|
|
737
|
+
}
|
|
738
|
+
case "model": {
|
|
739
|
+
if (!parts[1]) {
|
|
740
|
+
ctx.ui.notify("/wiki modal config model <name>", "error");
|
|
741
|
+
return;
|
|
742
|
+
}
|
|
743
|
+
m.model = parts[1];
|
|
744
|
+
writeProjectConfig(cfgPath, obj);
|
|
745
|
+
ctx.ui.notify(`✅ Modal model set to ${parts[1]}`, "info");
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
case "dim": {
|
|
749
|
+
if (!setNum(m, "dim", parts[1] ?? ""))
|
|
750
|
+
return;
|
|
751
|
+
writeProjectConfig(cfgPath, obj);
|
|
752
|
+
ctx.ui.notify(`✅ Modal dim set to ${m.dim}`, "info");
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
case "fallback": {
|
|
756
|
+
const v = parts[1]?.toLowerCase();
|
|
757
|
+
if (v === "none") {
|
|
758
|
+
m.fallback = { enabled: false };
|
|
759
|
+
}
|
|
760
|
+
else if (v === "ollama" || v === "transformers") {
|
|
761
|
+
m.fallback = { enabled: true, provider: v };
|
|
762
|
+
}
|
|
763
|
+
else {
|
|
764
|
+
ctx.ui.notify("/wiki modal config fallback <ollama|transformers|none>", "error");
|
|
765
|
+
return;
|
|
766
|
+
}
|
|
767
|
+
writeProjectConfig(cfgPath, obj);
|
|
768
|
+
ctx.ui.notify(`✅ Modal fallback set to ${v}`, "info");
|
|
769
|
+
return;
|
|
770
|
+
}
|
|
771
|
+
case "sync": {
|
|
772
|
+
const sub = parts[1]?.toLowerCase();
|
|
773
|
+
const val = parts[2]?.toLowerCase();
|
|
774
|
+
const sync = m.sync || {};
|
|
775
|
+
if (sub === "auto") {
|
|
776
|
+
sync.autoSync = val === "on" || val === "true";
|
|
777
|
+
}
|
|
778
|
+
else if (sub === "interval") {
|
|
779
|
+
if (!setNum(sync, "autoSyncIntervalMs", parts[2] ?? ""))
|
|
780
|
+
return;
|
|
781
|
+
}
|
|
782
|
+
else if (sub === "pagesize") {
|
|
783
|
+
if (!setNum(sync, "pageSize", parts[2] ?? ""))
|
|
784
|
+
return;
|
|
785
|
+
}
|
|
786
|
+
else if (sub === "collections") {
|
|
787
|
+
sync.collections = parts.slice(2);
|
|
788
|
+
}
|
|
789
|
+
else {
|
|
790
|
+
ctx.ui.notify("/wiki modal config sync <auto on|off|interval <ms>|pageSize <n>|collections ...>", "error");
|
|
791
|
+
return;
|
|
792
|
+
}
|
|
793
|
+
m.sync = sync;
|
|
794
|
+
writeProjectConfig(cfgPath, obj);
|
|
795
|
+
ctx.ui.notify(`✅ Modal sync.${sub} updated`, "info");
|
|
796
|
+
return;
|
|
797
|
+
}
|
|
798
|
+
case "coalesce": {
|
|
799
|
+
const sub = parts[1]?.toLowerCase();
|
|
800
|
+
const wiki = obj.wiki || {};
|
|
801
|
+
const emb = wiki.embedding || {};
|
|
802
|
+
const co = emb.coalesce || {};
|
|
803
|
+
if (sub === "debounce") {
|
|
804
|
+
if (!setNum(co, "debounceMs", parts[2] ?? ""))
|
|
805
|
+
return;
|
|
806
|
+
}
|
|
807
|
+
else if (sub === "batch") {
|
|
808
|
+
if (!setNum(co, "maxBatchSize", parts[2] ?? ""))
|
|
809
|
+
return;
|
|
810
|
+
}
|
|
811
|
+
else if (sub === "concurrency") {
|
|
812
|
+
if (!setNum(co, "maxConcurrentFlushes", parts[2] ?? ""))
|
|
813
|
+
return;
|
|
814
|
+
}
|
|
815
|
+
else {
|
|
816
|
+
ctx.ui.notify("/wiki modal config coalesce <debounce|batch|concurrency> <n>", "error");
|
|
817
|
+
return;
|
|
818
|
+
}
|
|
819
|
+
emb.coalesce = co;
|
|
820
|
+
writeProjectConfig(cfgPath, obj);
|
|
821
|
+
ctx.ui.notify(`✅ Modal coalesce.${sub} updated`, "info");
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
case "token": {
|
|
825
|
+
ctx.ui.notify(`Token resolution: env ${MODAL_TOKEN_ENV} is preferred (never committed).\nSet it in your shell: export ${MODAL_TOKEN_ENV}=...\nConfig wiki.embedding.modal.apiToken is a fallback only.`, "info");
|
|
826
|
+
return;
|
|
827
|
+
}
|
|
828
|
+
default:
|
|
829
|
+
ctx.ui.notify(MODAL_CONFIG_USAGE, "info");
|
|
830
|
+
}
|
|
831
|
+
};
|
|
832
|
+
const handleModal = async (args, ctx, pi) => {
|
|
833
|
+
const cfg = loadConfig(ctx.cwd);
|
|
834
|
+
const parts = args.trim().split(/\s+/).filter(Boolean);
|
|
835
|
+
const sub = parts[0]?.toLowerCase() || "status";
|
|
836
|
+
const rest = parts.slice(1).join(" ");
|
|
837
|
+
switch (sub) {
|
|
838
|
+
case "status": {
|
|
839
|
+
const modal = cfg.wiki.embedding.modal;
|
|
840
|
+
const tokenSrc = process.env[MODAL_TOKEN_ENV]
|
|
841
|
+
? `env ${MODAL_TOKEN_ENV} ✅`
|
|
842
|
+
: modal?.apiToken
|
|
843
|
+
? "config"
|
|
844
|
+
: "❌ none";
|
|
845
|
+
const lines = [
|
|
846
|
+
"**Modal Status**",
|
|
847
|
+
"",
|
|
848
|
+
`Configured: ${isModalConfigured(cfg.wiki) ? "✅" : "❌"}`,
|
|
849
|
+
` baseUrl: ${modal?.baseUrl || "(not set)"}`,
|
|
850
|
+
` model: ${modal?.model || "(default embeddinggemma)"}`,
|
|
851
|
+
` dim: ${modal?.dim ?? "(native)"}`,
|
|
852
|
+
` token: ${tokenSrc}`,
|
|
853
|
+
];
|
|
854
|
+
const client = createModalClient(cfg.wiki);
|
|
855
|
+
if (client) {
|
|
856
|
+
try {
|
|
857
|
+
const health = await client.health();
|
|
858
|
+
lines.push("", `Health: ✅ ok, default_model=${health.default_model}`);
|
|
859
|
+
}
|
|
860
|
+
catch (err) {
|
|
861
|
+
lines.push("", `Health: ❌ ${err.message}`);
|
|
862
|
+
}
|
|
863
|
+
try {
|
|
864
|
+
const cols = await client.syncCollections();
|
|
865
|
+
lines.push("", `Remote collections (${cols.length}):`);
|
|
866
|
+
for (const c of cols)
|
|
867
|
+
lines.push(` • ${c.collection} / ${c.model} / ${c.dim} — ${c.rows} rows (${c.table})`);
|
|
868
|
+
}
|
|
869
|
+
catch (err) {
|
|
870
|
+
lines.push("", `Remote collections: ❌ ${err.message}`);
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
874
|
+
return;
|
|
875
|
+
}
|
|
876
|
+
case "config":
|
|
877
|
+
await handleModalConfig(rest, ctx);
|
|
878
|
+
return;
|
|
879
|
+
case "sync": {
|
|
880
|
+
if (!isModalConfigured(cfg.wiki)) {
|
|
881
|
+
ctx.ui.notify(`Modal not configured. Set baseUrl + ${MODAL_TOKEN_ENV} first.`, "error");
|
|
882
|
+
return;
|
|
883
|
+
}
|
|
884
|
+
const syncTokens = rest.split(/\s+/).filter(Boolean);
|
|
885
|
+
const full = syncTokens.includes("--full");
|
|
886
|
+
const colFlagIdx = syncTokens.indexOf("--collection");
|
|
887
|
+
const oneCollection = colFlagIdx >= 0 ? syncTokens[colFlagIdx + 1] : undefined;
|
|
888
|
+
ctx.ui.notify(`Syncing ${oneCollection ? `"${oneCollection}"` : "all collections"}${full ? " (full)" : ""}...`, "info");
|
|
889
|
+
try {
|
|
890
|
+
const results = oneCollection
|
|
891
|
+
? [await syncCollection(cfg, oneCollection, { full })]
|
|
892
|
+
: await syncAll(cfg, undefined, { full });
|
|
893
|
+
const lines = ["**Sync Report:**", ""];
|
|
894
|
+
for (const r of results)
|
|
895
|
+
lines.push(` ${r.rows > 0 ? "✅" : "•"} ${r.collection} / ${r.model} / ${r.dim}: ${r.rows} rows, watermark=${r.watermark}${r.full ? " (full)" : ""}`);
|
|
896
|
+
lines.push("", "Re-running with no new rows is a no-op.");
|
|
897
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
898
|
+
}
|
|
899
|
+
catch (err) {
|
|
900
|
+
ctx.ui.notify(`Sync failed: ${err.message}`, "error");
|
|
901
|
+
}
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
case "jobs": {
|
|
905
|
+
const jobId = parts[1];
|
|
906
|
+
if (!jobId) {
|
|
907
|
+
ctx.ui.notify("/wiki modal jobs <job_id> — poll a Modal bulk job.\n(Server-side job listing is pending upstream.)", "info");
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
const client = createModalClient(cfg.wiki);
|
|
911
|
+
if (!client) {
|
|
912
|
+
ctx.ui.notify("Modal not configured.", "error");
|
|
913
|
+
return;
|
|
914
|
+
}
|
|
915
|
+
try {
|
|
916
|
+
const status = await client.jobStatus(jobId);
|
|
917
|
+
ctx.ui.notify([
|
|
918
|
+
`Job ${jobId}:`,
|
|
919
|
+
` status: ${status.status}`,
|
|
920
|
+
` collection: ${status.collection}`,
|
|
921
|
+
` model: ${status.model} / dim ${status.dim}`,
|
|
922
|
+
` processed: ${status.processed}/${status.total}`,
|
|
923
|
+
...(status.error ? [` error: ${status.error}`] : []),
|
|
924
|
+
].join("\n"), "info");
|
|
925
|
+
}
|
|
926
|
+
catch (err) {
|
|
927
|
+
ctx.ui.notify(`Job poll failed: ${err.message}`, "error");
|
|
928
|
+
}
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
931
|
+
case "migrate": {
|
|
932
|
+
const newModel = parts[1];
|
|
933
|
+
if (!newModel) {
|
|
934
|
+
ctx.ui.notify("/wiki modal migrate <newModel> [dim] — change the canonical model + re-embed (remote).\nOld namespace is left intact until verified.", "info");
|
|
935
|
+
return;
|
|
936
|
+
}
|
|
937
|
+
const newDim = parts[2] ? Number.parseInt(parts[2], 10) : undefined;
|
|
938
|
+
if (parts[2] && !Number.isFinite(newDim)) {
|
|
939
|
+
ctx.ui.notify(`Invalid dim: ${parts[2]}`, "error");
|
|
940
|
+
return;
|
|
941
|
+
}
|
|
942
|
+
const { project: cfgPath } = findConfig(ctx.cwd);
|
|
943
|
+
if (!cfgPath) {
|
|
944
|
+
ctx.ui.notify("No config found.", "error");
|
|
945
|
+
return;
|
|
946
|
+
}
|
|
947
|
+
const obj = readProjectConfig(cfgPath);
|
|
948
|
+
const m = modalSection(obj);
|
|
949
|
+
const oldModel = m.model || "embeddinggemma";
|
|
950
|
+
const oldDim = m.dim;
|
|
951
|
+
m.model = newModel;
|
|
952
|
+
if (newDim != null)
|
|
953
|
+
m.dim = newDim;
|
|
954
|
+
writeProjectConfig(cfgPath, obj);
|
|
955
|
+
ctx.ui.notify(`Canonical model → ${newModel}${newDim ? `@${newDim}` : ""} (was ${oldModel}${oldDim ? `@${oldDim}` : ""}). Old namespace kept. Starting remote re-embed...`, "info");
|
|
956
|
+
const fresh = loadConfig(ctx.cwd);
|
|
957
|
+
try {
|
|
958
|
+
const results = await reindexRemote(fresh, Object.keys(fresh.collections));
|
|
959
|
+
const lines = [`**Migration → ${newModel}:**`, ""];
|
|
960
|
+
for (const r of results)
|
|
961
|
+
if (r.error)
|
|
962
|
+
lines.push(` ❌ ${r.collection}: ${r.error}`);
|
|
963
|
+
else
|
|
964
|
+
lines.push(` ✅ ${r.collection}: synced ${r.sync?.rows ?? 0} rows`);
|
|
965
|
+
lines.push("", `Old col_*__${oldModel}__* tables are untouched. Verify the new space, then drop the old table(s) when ready.`);
|
|
966
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
967
|
+
}
|
|
968
|
+
catch (err) {
|
|
969
|
+
ctx.ui.notify(`Migration re-embed failed: ${err.message}`, "error");
|
|
970
|
+
}
|
|
971
|
+
return;
|
|
972
|
+
}
|
|
973
|
+
default:
|
|
974
|
+
ctx.ui.notify("Unknown /wiki modal subcommand. Try: status, config, sync, jobs, migrate", "error");
|
|
975
|
+
}
|
|
976
|
+
};
|
|
531
977
|
// ── /wiki context ────────────────────────────────────────────────────────────
|
|
532
978
|
const handleContext = async (args, ctx, pi) => {
|
|
533
979
|
const parts = args.trim().split(/\s+/g);
|
|
@@ -723,6 +1169,7 @@ export const registerCommands = (pi) => {
|
|
|
723
1169
|
"injector",
|
|
724
1170
|
"context",
|
|
725
1171
|
"embedding",
|
|
1172
|
+
"modal",
|
|
726
1173
|
"watcher",
|
|
727
1174
|
"setup",
|
|
728
1175
|
];
|
|
@@ -756,10 +1203,15 @@ export const registerCommands = (pi) => {
|
|
|
756
1203
|
.map((c) => ({ label: c, value: c, description: `injector ${c}` }));
|
|
757
1204
|
}
|
|
758
1205
|
if (subcommand === "reindex") {
|
|
759
|
-
return ["--all", "--reembed"]
|
|
1206
|
+
return ["--all", "--reembed", "--remote"]
|
|
760
1207
|
.filter((c) => c.startsWith(prefix))
|
|
761
1208
|
.map((c) => ({ label: c, value: c, description: `reindex ${c}` }));
|
|
762
1209
|
}
|
|
1210
|
+
if (subcommand === "modal") {
|
|
1211
|
+
return ["status", "config", "sync", "jobs", "migrate"]
|
|
1212
|
+
.filter((c) => c.startsWith(prefix))
|
|
1213
|
+
.map((c) => ({ label: c, value: c, description: `modal ${c}` }));
|
|
1214
|
+
}
|
|
763
1215
|
if (subcommand === "watcher") {
|
|
764
1216
|
return ["start", "stop", "status"]
|
|
765
1217
|
.filter((c) => c.startsWith(prefix))
|
|
@@ -798,6 +1250,8 @@ export const registerCommands = (pi) => {
|
|
|
798
1250
|
return handleContext(rest, ctx, pi);
|
|
799
1251
|
case "embedding":
|
|
800
1252
|
return handleEmbedding(rest, ctx, pi);
|
|
1253
|
+
case "modal":
|
|
1254
|
+
return handleModal(rest, ctx, pi);
|
|
801
1255
|
case "server":
|
|
802
1256
|
return handleServer(ctx);
|
|
803
1257
|
case "watcher":
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding request coalescer — debounce + batch.
|
|
3
|
+
*
|
|
4
|
+
* Collects individual embedding requests arriving within a short window and
|
|
5
|
+
* flushes them as a single batched call to the backend (`/embed` for the modal
|
|
6
|
+
* provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
|
|
7
|
+
* requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
|
|
8
|
+
* window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
|
|
9
|
+
* (the analog of `pendingQueue` + `maxConcurrent`).
|
|
10
|
+
*
|
|
11
|
+
* ──────────────────────────────────────────────────────────────────────────
|
|
12
|
+
* NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
|
|
13
|
+
*
|
|
14
|
+
* This is a complete, dependency-free, unit-tested building block (see
|
|
15
|
+
* test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
|
|
16
|
+
* Wire it in `src/lance.ts` roughly like:
|
|
17
|
+
*
|
|
18
|
+
* const coalescer = new EmbeddingCoalescer({
|
|
19
|
+
* embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
|
|
20
|
+
* debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
|
|
21
|
+
* maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
|
|
22
|
+
* maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* ROUTING POLICY (yours to own, not baked in here):
|
|
26
|
+
* - append / ingest / bulk → coalescer.embed(text, "document") [debounced]
|
|
27
|
+
* - interactive wiki_search → coalescer.embedImmediate(query, "query")
|
|
28
|
+
* (latency-sensitive; bypasses the debounce)
|
|
29
|
+
*
|
|
30
|
+
* It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
|
|
31
|
+
* they all take a batch and return one vector per input. Batches are
|
|
32
|
+
* homogeneous per `task` (queries and documents flush separately) because the
|
|
33
|
+
* embedding endpoints take a single task per call.
|
|
34
|
+
*
|
|
35
|
+
* Make the knobs configurable (Q6 in the decision log): debounceMs,
|
|
36
|
+
* maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
|
|
37
|
+
* Adapt the interface freely — this is a reference, not a contract.
|
|
38
|
+
* ──────────────────────────────────────────────────────────────────────────
|
|
39
|
+
*/
|
|
40
|
+
export type EmbedTask = "query" | "document";
|
|
41
|
+
/** Batch embed backend: same texts in, one vector per text out, in order. */
|
|
42
|
+
export type EmbedFn = (texts: string[], task: EmbedTask) => Promise<number[][]>;
|
|
43
|
+
export interface CoalescerOptions {
|
|
44
|
+
embedFn: EmbedFn;
|
|
45
|
+
/** Window to gather requests before flushing (ms). Default 1000 (matches watcher). */
|
|
46
|
+
debounceMs?: number;
|
|
47
|
+
/** Flush immediately once a task's buffer reaches this size. Default 64. */
|
|
48
|
+
maxBatchSize?: number;
|
|
49
|
+
/** Max batched embedFn calls in flight at once. Default 2. */
|
|
50
|
+
maxConcurrentFlushes?: number;
|
|
51
|
+
/** Coalesce identical texts within a batch to a single embed. Default true. */
|
|
52
|
+
dedupe?: boolean;
|
|
53
|
+
}
|
|
54
|
+
export declare class EmbeddingCoalescer {
|
|
55
|
+
private readonly embedFn;
|
|
56
|
+
private readonly debounceMs;
|
|
57
|
+
private readonly maxBatchSize;
|
|
58
|
+
private readonly maxConcurrentFlushes;
|
|
59
|
+
private readonly dedupe;
|
|
60
|
+
private readonly buffers;
|
|
61
|
+
private readonly timers;
|
|
62
|
+
private active;
|
|
63
|
+
private readonly pendingFlushes;
|
|
64
|
+
private readonly inFlight;
|
|
65
|
+
constructor(opts: CoalescerOptions);
|
|
66
|
+
/** Queue a text for embedding; resolves with its vector once a batch flushes. */
|
|
67
|
+
embed(text: string, task?: EmbedTask): Promise<number[]>;
|
|
68
|
+
/** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
|
|
69
|
+
embedImmediate(text: string, task?: EmbedTask): Promise<number[]>;
|
|
70
|
+
/** Number of requests currently buffered (not yet flushed). */
|
|
71
|
+
size(): number;
|
|
72
|
+
/** Flush all buffered tasks now, then await every in-flight batch to settle. */
|
|
73
|
+
drain(): Promise<void>;
|
|
74
|
+
/** Cancel pending debounce timers. Does not reject already-buffered waiters. */
|
|
75
|
+
dispose(): void;
|
|
76
|
+
private arm;
|
|
77
|
+
private flushTask;
|
|
78
|
+
private schedule;
|
|
79
|
+
private runBatch;
|
|
80
|
+
}
|