pi-vault-mind 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,10 @@ import { DEFAULT_CONFIG } from "./types.js";
7
7
  import { CONFIG_FILES, EXT_ROOT, collectionNames, ensureDir, findConfig, getPiContextConfig, hasPiContextTools, loadConfig, } from "./utils.js";
8
8
  import { updateActiveCollectionWidget } from "./widget.js";
9
9
  import { connect, pullOllamaModel, testOllamaConnection } from "./lance.js";
10
+ import { MODAL_TOKEN_ENV, createModalClient, isModalConfigured } from "./modal-config.js";
10
11
  import { createServerState } from "./server.js";
11
12
  import { createCollectionWizard, createInjectorWizard, openSettingsDashboard, setupWizard, } from "./settings-ui.js";
13
+ import { reindexRemote, syncAll, syncCollection } from "./sync.js";
12
14
  import { createWatcherState, getWatcherStatus, startWatcher, stopWatcher } from "./watcher.js";
13
15
  // ── Shared helpers ───────────────────────────────────────────────────────────
14
16
  const WIKI_USAGE = [
@@ -26,10 +28,15 @@ const WIKI_USAGE = [
26
28
  " /wiki context enable|disable Enable/disable pi-context integration",
27
29
  " /wiki context status Show pi-context integration status",
28
30
  " /wiki embedding status Show embedding config + Ollama models",
29
- " /wiki embedding use Switch provider (ollama | transformers)",
31
+ " /wiki embedding use Switch provider (ollama | transformers | modal)",
30
32
  " /wiki embedding model Set Ollama embedding model",
31
33
  " /wiki embedding models List available Ollama models",
32
34
  " /wiki embedding pull Pull a model from Ollama",
35
+ " /wiki modal status Show Modal config + health + remote collections",
36
+ " /wiki modal config Set Modal baseUrl/model/dim/sync/fallback",
37
+ " /wiki modal sync Pull server vectors into local LanceDB [--full]",
38
+ " /wiki modal jobs <id> Poll a Modal bulk job",
39
+ " /wiki modal migrate <model> Change canonical model + re-embed (remote)",
33
40
  " /wiki watcher start Start passive file watcher",
34
41
  " /wiki watcher stop Stop passive file watcher",
35
42
  " /wiki watcher status Show watcher status",
@@ -139,9 +146,42 @@ export const selectActiveCollection = async (ctx) => {
139
146
  }
140
147
  };
141
148
  // ── /wiki init ──────────────────────────────────────────────────────────────
149
+ /**
150
+ * Vault-root `.gitignore` entries that keep pi-vault-mind compatible with
151
+ * `obsidian-git` (and, transitively, Obsidian Sync setups that also back up via
152
+ * git): the LanceDB index is a large, per-device, rebuildable binary and must
153
+ * never be committed; Obsidian's workspace UI state churns constantly. See
154
+ * docs/OBSIDIAN_SETUP.md §6.
155
+ */
156
+ export const GITIGNORE_ENTRIES = [".lancedb/", ".obsidian/workspace*.json"];
157
+ /**
158
+ * Decide what to do with the vault `.gitignore`, given its current contents
159
+ * (`null` if absent). Pure — does no I/O — so it is unit-testable. Creates the
160
+ * file when missing, appends only the entries that aren't already present
161
+ * (line-exact match, trimmed), and skips when everything is covered.
162
+ */
163
+ export const planGitignore = (existing) => {
164
+ const header = "# pi-vault-mind: keep the rebuildable binary index out of git";
165
+ if (existing === null) {
166
+ return {
167
+ action: "create",
168
+ content: `${header}\n.lancedb/\n# Obsidian UI workspace state churns constantly\n.obsidian/workspace*.json\n`,
169
+ };
170
+ }
171
+ // Match leniently: `.lancedb`, `.lancedb/`, and `/.lancedb/` are the same rule,
172
+ // so we don't append a duplicate when the user already ignores it differently.
173
+ const normalize = (s) => s.trim().replace(/^\/+|\/+$/g, "");
174
+ const present = new Set(existing.split(/\r?\n/).map(normalize));
175
+ const missing = GITIGNORE_ENTRIES.filter((e) => !present.has(normalize(e)));
176
+ if (missing.length === 0)
177
+ return { action: "skip", content: "" };
178
+ const prefix = existing.length === 0 ? "" : existing.endsWith("\n") ? "\n" : "\n\n";
179
+ return { action: "append", content: `${prefix}# pi-vault-mind\n${missing.join("\n")}\n` };
180
+ };
142
181
  const handleInit = async (_args, ctx, pi) => {
143
182
  const cfg = loadConfig(ctx.cwd);
144
183
  const created = [];
184
+ const updated = [];
145
185
  const skipped = [];
146
186
  const ensureFile = (dest, tmpl) => {
147
187
  if (fs.existsSync(dest)) {
@@ -203,12 +243,30 @@ const handleInit = async (_args, ctx, pi) => {
203
243
  if (ij.artifactPath)
204
244
  ensureFile(ij.artifactPath, "ARTIFACT.md");
205
245
  }
246
+ // Keep the rebuildable binary index out of git so obsidian-git (and git-backed
247
+ // Obsidian Sync setups) never commit it. See docs/OBSIDIAN_SETUP.md §6.
248
+ const gitignoreDest = path.join(ctx.cwd, ".gitignore");
249
+ const plan = planGitignore(fs.existsSync(gitignoreDest) ? fs.readFileSync(gitignoreDest, "utf-8") : null);
250
+ if (plan.action === "create") {
251
+ fs.writeFileSync(gitignoreDest, plan.content, "utf-8");
252
+ created.push(gitignoreDest);
253
+ }
254
+ else if (plan.action === "append") {
255
+ fs.appendFileSync(gitignoreDest, plan.content);
256
+ updated.push(gitignoreDest);
257
+ }
258
+ else {
259
+ skipped.push(gitignoreDest);
260
+ }
206
261
  const msg = [
207
262
  "Wiki scaffolding complete.",
208
263
  "",
209
264
  "Created:",
210
265
  ...created.map((c) => ` • ${path.relative(ctx.cwd, c)}`),
211
266
  ];
267
+ if (updated.length) {
268
+ msg.push("", "Updated:", ...updated.map((u) => ` • ${path.relative(ctx.cwd, u)}`));
269
+ }
212
270
  if (skipped.length) {
213
271
  msg.push("", "Skipped (already exist):", ...skipped.map((s) => ` • ${path.relative(ctx.cwd, s)}`));
214
272
  }
@@ -328,10 +386,47 @@ const handleApprove = async (args, ctx) => {
328
386
  // ── /wiki reindex ────────────────────────────────────────────────────────────
329
387
  const handleReindex = async (args, ctx, pi) => {
330
388
  const cfg = loadConfig(ctx.cwd);
331
- const subcommand = args.trim().split(/\s+/)[0]?.toLowerCase() || "";
332
- const rebuildEmbeddings = subcommand === "--reembed" || subcommand === "--full";
333
- const reindexAll = subcommand === "--all";
334
- const collectionFilter = reindexAll || rebuildEmbeddings ? null : subcommand || null;
389
+ const tokens = args.trim().split(/\s+/).filter(Boolean);
390
+ const flags = new Set(tokens.map((t) => t.toLowerCase()));
391
+ const rebuildEmbeddings = flags.has("--reembed") || flags.has("--full");
392
+ const reindexAll = flags.has("--all");
393
+ const remote = flags.has("--remote");
394
+ const collectionFilter = reindexAll || rebuildEmbeddings ? null : (tokens.find((t) => !t.startsWith("--")) ?? null);
395
+ // Remote bulk re-index: read JSONL → submit Modal bulk job → poll → sync down.
396
+ if (remote) {
397
+ if (cfg.wiki.embedding.provider !== "modal") {
398
+ ctx.ui.notify("--remote requires the modal provider. Run /wiki embedding use modal.", "error");
399
+ return;
400
+ }
401
+ if (!isModalConfigured(cfg.wiki)) {
402
+ ctx.ui.notify(`Modal not configured. Set baseUrl (/wiki modal config baseUrl) and ${MODAL_TOKEN_ENV}.`, "error");
403
+ return;
404
+ }
405
+ const names = reindexAll
406
+ ? Object.keys(cfg.collections)
407
+ : collectionFilter
408
+ ? [collectionFilter]
409
+ : Object.keys(cfg.collections);
410
+ ctx.ui.notify(`Remote re-index: submitting Modal bulk job for ${names.join(", ")}...`, "info");
411
+ try {
412
+ const results = await reindexRemote(cfg, names, {
413
+ onStatus: (s) => ctx.ui.notify(` ${s.collection || ""}: ${s.status} ${s.processed}/${s.total}`, "info"),
414
+ });
415
+ const lines = ["**Remote Re-index Report:**", ""];
416
+ for (const r of results) {
417
+ if (r.error)
418
+ lines.push(` ❌ ${r.collection}: ${r.error}`);
419
+ else
420
+ lines.push(` ✅ ${r.collection}: job ${r.job?.status}, synced ${r.sync?.rows ?? 0} rows (wm ${r.sync?.watermark ?? 0})`);
421
+ }
422
+ lines.push("", "Old namespaces are left intact until the new one is verified.");
423
+ ctx.ui.notify(lines.join("\n"), "info");
424
+ }
425
+ catch (err) {
426
+ ctx.ui.notify(`Remote re-index failed: ${err.message}`, "error");
427
+ }
428
+ return;
429
+ }
335
430
  ctx.ui.notify(rebuildEmbeddings
336
431
  ? "Reindexing: regenerating embeddings + rebuilding indexes..."
337
432
  : "Reindexing: rebuilding FTS + vector indexes...", "info");
@@ -432,12 +527,27 @@ const handleEmbedding = async (args, ctx, pi) => {
432
527
  `Provider: ${cfg.wiki.embedding.provider}`,
433
528
  cfg.wiki.embedding.provider === "ollama"
434
529
  ? `Model: ${cfg.wiki.embedding.ollamaModel || "embeddinggemma"}`
435
- : "Model: all-MiniLM-L6-v2 (384 dims)",
530
+ : cfg.wiki.embedding.provider === "modal"
531
+ ? `Model: ${cfg.wiki.embedding.modal?.model || "(default embeddinggemma)"}`
532
+ : "Model: all-MiniLM-L6-v2 (384 dims)",
436
533
  `FTS: ${cfg.wiki.ftsEnabled !== false ? "enabled" : "disabled"}`,
437
534
  `Graph: ${cfg.wiki.graph?.enabled !== false ? "enabled" : "disabled"}`,
438
535
  `Data Dir: ${cfg.wiki.dataDir}`,
439
536
  ];
440
- if (cfg.wiki.embedding.provider === "ollama" || !cfg.wiki.embedding.provider) {
537
+ if (cfg.wiki.embedding.provider === "modal") {
538
+ const modal = cfg.wiki.embedding.modal;
539
+ const tokenSrc = process.env[MODAL_TOKEN_ENV]
540
+ ? `env ${MODAL_TOKEN_ENV} ✅`
541
+ : modal?.apiToken
542
+ ? "config (set env PVM_API_TOKEN to override)"
543
+ : "❌ none (set PVM_API_TOKEN env)";
544
+ lines.push("", "**Modal:**", ` Base URL: ${modal?.baseUrl || "❌ not set"}`, ` Model: ${modal?.model || "(default embeddinggemma)"}`, ` Dim: ${modal?.dim ?? "(native)"}`, ` Token: ${tokenSrc}`, ` Fallback: ${modal?.fallback?.enabled === false ? "disabled" : modal?.fallback?.provider || "(none — degrade to FTS)"}`, ` Sync: auto=${modal?.sync?.autoSync ? "on" : "off"}, interval=${modal?.sync?.autoSyncIntervalMs ?? 300000}ms`);
545
+ const co = cfg.wiki.embedding.coalesce;
546
+ if (co) {
547
+ lines.push(` Coalesce: debounce=${co.debounceMs ?? 1000}ms, batch=${co.maxBatchSize ?? 64}, concurrency=${co.maxConcurrentFlushes ?? 2}, dedupe=${co.dedupe ?? true}, searchBypass=${co.searchBypass ?? true}`);
548
+ }
549
+ }
550
+ else if (cfg.wiki.embedding.provider === "ollama" || !cfg.wiki.embedding.provider) {
441
551
  const conn = await testOllamaConnection(pi);
442
552
  lines.push("", "**Ollama Status:**", ` Reachable: ${conn.reachable ? "✅ Yes" : "❌ No"}`);
443
553
  if (conn.error)
@@ -462,8 +572,8 @@ const handleEmbedding = async (args, ctx, pi) => {
462
572
  return;
463
573
  }
464
574
  case "use": {
465
- if (!value || !["ollama", "transformers"].includes(value)) {
466
- ctx.ui.notify("/wiki embedding use <ollama|transformers>", "error");
575
+ if (!value || !["ollama", "transformers", "modal"].includes(value)) {
576
+ ctx.ui.notify("/wiki embedding use <ollama|transformers|modal>", "error");
467
577
  return;
468
578
  }
469
579
  if (value === "ollama") {
@@ -474,6 +584,12 @@ const handleEmbedding = async (args, ctx, pi) => {
474
584
  return;
475
585
  }
476
586
  }
587
+ if (value === "modal") {
588
+ const modal = cfg.wiki.embedding.modal;
589
+ if (!modal?.baseUrl) {
590
+ ctx.ui.notify("Modal needs a base URL. Set it with: /wiki modal config baseUrl <url>\n(Token via PVM_API_TOKEN env, preferred.)", "warning");
591
+ }
592
+ }
477
593
  const existing = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
478
594
  existing.wiki = existing.wiki || {};
479
595
  existing.wiki.embedding = existing.wiki.embedding || {};
@@ -528,6 +644,336 @@ const handleEmbedding = async (args, ctx, pi) => {
528
644
  }
529
645
  }
530
646
  };
647
+ // ── /wiki modal ──────────────────────────────────────────────────────────────
648
+ const MODAL_CONFIG_USAGE = [
649
+ "**/wiki modal config**",
650
+ "",
651
+ " /wiki modal config baseUrl <url> Set the Modal ASGI base URL",
652
+ " /wiki modal config model <name> Set the canonical embedder (default embeddinggemma)",
653
+ " /wiki modal config dim <n> Set output dimension (omit for native)",
654
+ " /wiki modal config fallback ollama|none Set offline fallback provider",
655
+ " /wiki modal config sync auto on|off Toggle auto-sync",
656
+ " /wiki modal config sync interval <ms> Auto-sync interval",
657
+ " /wiki modal config pageSize <n> Sync page size",
658
+ " /wiki modal config coalesce debounce <ms> Coalescer debounce window",
659
+ " /wiki modal config coalesce batch <n> Coalescer max batch size",
660
+ " /wiki modal config token Show token guidance (use PVM_API_TOKEN env)",
661
+ "",
662
+ " (no args) Show current Modal config",
663
+ ].join("\n");
664
+ /** Read the raw project config object (mutable). */
665
+ const readProjectConfig = (cfgPath) => {
666
+ try {
667
+ return JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
668
+ }
669
+ catch {
670
+ return {};
671
+ }
672
+ };
673
+ const writeProjectConfig = (cfgPath, obj) => {
674
+ fs.writeFileSync(cfgPath, `${JSON.stringify(obj, null, 2)}\n`, "utf-8");
675
+ };
676
+ const modalSection = (obj) => {
677
+ obj.wiki = obj.wiki || {};
678
+ const wiki = obj.wiki;
679
+ wiki.embedding = wiki.embedding || {};
680
+ const emb = wiki.embedding;
681
+ emb.modal = emb.modal || {};
682
+ return emb.modal;
683
+ };
684
+ const handleModalConfig = async (args, ctx) => {
685
+ const { project: cfgPath } = findConfig(ctx.cwd);
686
+ if (!cfgPath) {
687
+ ctx.ui.notify("No config found. Run /wiki init first.", "error");
688
+ return;
689
+ }
690
+ const cfg = loadConfig(ctx.cwd);
691
+ const parts = args.trim().split(/\s+/).filter(Boolean);
692
+ const key = parts[0]?.toLowerCase();
693
+ const modal = cfg.wiki.embedding.modal ?? {};
694
+ if (!key) {
695
+ const tokenSrc = process.env[MODAL_TOKEN_ENV]
696
+ ? `env ${MODAL_TOKEN_ENV} ✅`
697
+ : modal.apiToken
698
+ ? "config (set PVM_API_TOKEN env to override)"
699
+ : "❌ none (set PVM_API_TOKEN env)";
700
+ const lines = [
701
+ "**Modal Config:**",
702
+ "",
703
+ ` baseUrl: ${modal.baseUrl || "❌ not set"}`,
704
+ ` model: ${modal.model || "(default embeddinggemma)"}`,
705
+ ` dim: ${modal.dim ?? "(native)"}`,
706
+ ` token: ${tokenSrc}`,
707
+ ` fallback: ${modal.fallback?.enabled === false ? "disabled" : modal.fallback?.provider || "(none)"}`,
708
+ ` sync: ${JSON.stringify(modal.sync ?? {})}`,
709
+ ` coalesce: ${JSON.stringify(cfg.wiki.embedding.coalesce ?? {})}`,
710
+ ];
711
+ ctx.ui.notify(lines.join("\n"), "info");
712
+ return;
713
+ }
714
+ const obj = readProjectConfig(cfgPath);
715
+ const m = modalSection(obj);
716
+ const setNum = (target, k, v) => {
717
+ const n = Number.parseInt(v, 10);
718
+ if (!Number.isFinite(n)) {
719
+ ctx.ui.notify(`Invalid number for ${k}: ${v}`, "error");
720
+ return false;
721
+ }
722
+ target[k] = n;
723
+ return true;
724
+ };
725
+ switch (key) {
726
+ case "baseurl":
727
+ case "url": {
728
+ const url = parts[1];
729
+ if (!url) {
730
+ ctx.ui.notify("/wiki modal config baseUrl <url>", "error");
731
+ return;
732
+ }
733
+ m.baseUrl = url.replace(/\/$/, "");
734
+ writeProjectConfig(cfgPath, obj);
735
+ ctx.ui.notify(`✅ Modal baseUrl set to ${m.baseUrl}`, "info");
736
+ return;
737
+ }
738
+ case "model": {
739
+ if (!parts[1]) {
740
+ ctx.ui.notify("/wiki modal config model <name>", "error");
741
+ return;
742
+ }
743
+ m.model = parts[1];
744
+ writeProjectConfig(cfgPath, obj);
745
+ ctx.ui.notify(`✅ Modal model set to ${parts[1]}`, "info");
746
+ return;
747
+ }
748
+ case "dim": {
749
+ if (!setNum(m, "dim", parts[1] ?? ""))
750
+ return;
751
+ writeProjectConfig(cfgPath, obj);
752
+ ctx.ui.notify(`✅ Modal dim set to ${m.dim}`, "info");
753
+ return;
754
+ }
755
+ case "fallback": {
756
+ const v = parts[1]?.toLowerCase();
757
+ if (v === "none") {
758
+ m.fallback = { enabled: false };
759
+ }
760
+ else if (v === "ollama" || v === "transformers") {
761
+ m.fallback = { enabled: true, provider: v };
762
+ }
763
+ else {
764
+ ctx.ui.notify("/wiki modal config fallback <ollama|transformers|none>", "error");
765
+ return;
766
+ }
767
+ writeProjectConfig(cfgPath, obj);
768
+ ctx.ui.notify(`✅ Modal fallback set to ${v}`, "info");
769
+ return;
770
+ }
771
+ case "sync": {
772
+ const sub = parts[1]?.toLowerCase();
773
+ const val = parts[2]?.toLowerCase();
774
+ const sync = m.sync || {};
775
+ if (sub === "auto") {
776
+ sync.autoSync = val === "on" || val === "true";
777
+ }
778
+ else if (sub === "interval") {
779
+ if (!setNum(sync, "autoSyncIntervalMs", parts[2] ?? ""))
780
+ return;
781
+ }
782
+ else if (sub === "pagesize") {
783
+ if (!setNum(sync, "pageSize", parts[2] ?? ""))
784
+ return;
785
+ }
786
+ else if (sub === "collections") {
787
+ sync.collections = parts.slice(2);
788
+ }
789
+ else {
790
+ ctx.ui.notify("/wiki modal config sync <auto on|off|interval <ms>|pageSize <n>|collections ...>", "error");
791
+ return;
792
+ }
793
+ m.sync = sync;
794
+ writeProjectConfig(cfgPath, obj);
795
+ ctx.ui.notify(`✅ Modal sync.${sub} updated`, "info");
796
+ return;
797
+ }
798
+ case "coalesce": {
799
+ const sub = parts[1]?.toLowerCase();
800
+ const wiki = obj.wiki || {};
801
+ const emb = wiki.embedding || {};
802
+ const co = emb.coalesce || {};
803
+ if (sub === "debounce") {
804
+ if (!setNum(co, "debounceMs", parts[2] ?? ""))
805
+ return;
806
+ }
807
+ else if (sub === "batch") {
808
+ if (!setNum(co, "maxBatchSize", parts[2] ?? ""))
809
+ return;
810
+ }
811
+ else if (sub === "concurrency") {
812
+ if (!setNum(co, "maxConcurrentFlushes", parts[2] ?? ""))
813
+ return;
814
+ }
815
+ else {
816
+ ctx.ui.notify("/wiki modal config coalesce <debounce|batch|concurrency> <n>", "error");
817
+ return;
818
+ }
819
+ emb.coalesce = co;
820
+ writeProjectConfig(cfgPath, obj);
821
+ ctx.ui.notify(`✅ Modal coalesce.${sub} updated`, "info");
822
+ return;
823
+ }
824
+ case "token": {
825
+ ctx.ui.notify(`Token resolution: env ${MODAL_TOKEN_ENV} is preferred (never committed).\nSet it in your shell: export ${MODAL_TOKEN_ENV}=...\nConfig wiki.embedding.modal.apiToken is a fallback only.`, "info");
826
+ return;
827
+ }
828
+ default:
829
+ ctx.ui.notify(MODAL_CONFIG_USAGE, "info");
830
+ }
831
+ };
832
+ const handleModal = async (args, ctx, pi) => {
833
+ const cfg = loadConfig(ctx.cwd);
834
+ const parts = args.trim().split(/\s+/).filter(Boolean);
835
+ const sub = parts[0]?.toLowerCase() || "status";
836
+ const rest = parts.slice(1).join(" ");
837
+ switch (sub) {
838
+ case "status": {
839
+ const modal = cfg.wiki.embedding.modal;
840
+ const tokenSrc = process.env[MODAL_TOKEN_ENV]
841
+ ? `env ${MODAL_TOKEN_ENV} ✅`
842
+ : modal?.apiToken
843
+ ? "config"
844
+ : "❌ none";
845
+ const lines = [
846
+ "**Modal Status**",
847
+ "",
848
+ `Configured: ${isModalConfigured(cfg.wiki) ? "✅" : "❌"}`,
849
+ ` baseUrl: ${modal?.baseUrl || "(not set)"}`,
850
+ ` model: ${modal?.model || "(default embeddinggemma)"}`,
851
+ ` dim: ${modal?.dim ?? "(native)"}`,
852
+ ` token: ${tokenSrc}`,
853
+ ];
854
+ const client = createModalClient(cfg.wiki);
855
+ if (client) {
856
+ try {
857
+ const health = await client.health();
858
+ lines.push("", `Health: ✅ ok, default_model=${health.default_model}`);
859
+ }
860
+ catch (err) {
861
+ lines.push("", `Health: ❌ ${err.message}`);
862
+ }
863
+ try {
864
+ const cols = await client.syncCollections();
865
+ lines.push("", `Remote collections (${cols.length}):`);
866
+ for (const c of cols)
867
+ lines.push(` • ${c.collection} / ${c.model} / ${c.dim} — ${c.rows} rows (${c.table})`);
868
+ }
869
+ catch (err) {
870
+ lines.push("", `Remote collections: ❌ ${err.message}`);
871
+ }
872
+ }
873
+ ctx.ui.notify(lines.join("\n"), "info");
874
+ return;
875
+ }
876
+ case "config":
877
+ await handleModalConfig(rest, ctx);
878
+ return;
879
+ case "sync": {
880
+ if (!isModalConfigured(cfg.wiki)) {
881
+ ctx.ui.notify(`Modal not configured. Set baseUrl + ${MODAL_TOKEN_ENV} first.`, "error");
882
+ return;
883
+ }
884
+ const syncTokens = rest.split(/\s+/).filter(Boolean);
885
+ const full = syncTokens.includes("--full");
886
+ const colFlagIdx = syncTokens.indexOf("--collection");
887
+ const oneCollection = colFlagIdx >= 0 ? syncTokens[colFlagIdx + 1] : undefined;
888
+ ctx.ui.notify(`Syncing ${oneCollection ? `"${oneCollection}"` : "all collections"}${full ? " (full)" : ""}...`, "info");
889
+ try {
890
+ const results = oneCollection
891
+ ? [await syncCollection(cfg, oneCollection, { full })]
892
+ : await syncAll(cfg, undefined, { full });
893
+ const lines = ["**Sync Report:**", ""];
894
+ for (const r of results)
895
+ lines.push(` ${r.rows > 0 ? "✅" : "•"} ${r.collection} / ${r.model} / ${r.dim}: ${r.rows} rows, watermark=${r.watermark}${r.full ? " (full)" : ""}`);
896
+ lines.push("", "Re-running with no new rows is a no-op.");
897
+ ctx.ui.notify(lines.join("\n"), "info");
898
+ }
899
+ catch (err) {
900
+ ctx.ui.notify(`Sync failed: ${err.message}`, "error");
901
+ }
902
+ return;
903
+ }
904
+ case "jobs": {
905
+ const jobId = parts[1];
906
+ if (!jobId) {
907
+ ctx.ui.notify("/wiki modal jobs <job_id> — poll a Modal bulk job.\n(Server-side job listing is pending upstream.)", "info");
908
+ return;
909
+ }
910
+ const client = createModalClient(cfg.wiki);
911
+ if (!client) {
912
+ ctx.ui.notify("Modal not configured.", "error");
913
+ return;
914
+ }
915
+ try {
916
+ const status = await client.jobStatus(jobId);
917
+ ctx.ui.notify([
918
+ `Job ${jobId}:`,
919
+ ` status: ${status.status}`,
920
+ ` collection: ${status.collection}`,
921
+ ` model: ${status.model} / dim ${status.dim}`,
922
+ ` processed: ${status.processed}/${status.total}`,
923
+ ...(status.error ? [` error: ${status.error}`] : []),
924
+ ].join("\n"), "info");
925
+ }
926
+ catch (err) {
927
+ ctx.ui.notify(`Job poll failed: ${err.message}`, "error");
928
+ }
929
+ return;
930
+ }
931
+ case "migrate": {
932
+ const newModel = parts[1];
933
+ if (!newModel) {
934
+ ctx.ui.notify("/wiki modal migrate <newModel> [dim] — change the canonical model + re-embed (remote).\nOld namespace is left intact until verified.", "info");
935
+ return;
936
+ }
937
+ const newDim = parts[2] ? Number.parseInt(parts[2], 10) : undefined;
938
+ if (parts[2] && !Number.isFinite(newDim)) {
939
+ ctx.ui.notify(`Invalid dim: ${parts[2]}`, "error");
940
+ return;
941
+ }
942
+ const { project: cfgPath } = findConfig(ctx.cwd);
943
+ if (!cfgPath) {
944
+ ctx.ui.notify("No config found.", "error");
945
+ return;
946
+ }
947
+ const obj = readProjectConfig(cfgPath);
948
+ const m = modalSection(obj);
949
+ const oldModel = m.model || "embeddinggemma";
950
+ const oldDim = m.dim;
951
+ m.model = newModel;
952
+ if (newDim != null)
953
+ m.dim = newDim;
954
+ writeProjectConfig(cfgPath, obj);
955
+ ctx.ui.notify(`Canonical model → ${newModel}${newDim ? `@${newDim}` : ""} (was ${oldModel}${oldDim ? `@${oldDim}` : ""}). Old namespace kept. Starting remote re-embed...`, "info");
956
+ const fresh = loadConfig(ctx.cwd);
957
+ try {
958
+ const results = await reindexRemote(fresh, Object.keys(fresh.collections));
959
+ const lines = [`**Migration → ${newModel}:**`, ""];
960
+ for (const r of results)
961
+ if (r.error)
962
+ lines.push(` ❌ ${r.collection}: ${r.error}`);
963
+ else
964
+ lines.push(` ✅ ${r.collection}: synced ${r.sync?.rows ?? 0} rows`);
965
+ lines.push("", `Old col_*__${oldModel}__* tables are untouched. Verify the new space, then drop the old table(s) when ready.`);
966
+ ctx.ui.notify(lines.join("\n"), "info");
967
+ }
968
+ catch (err) {
969
+ ctx.ui.notify(`Migration re-embed failed: ${err.message}`, "error");
970
+ }
971
+ return;
972
+ }
973
+ default:
974
+ ctx.ui.notify("Unknown /wiki modal subcommand. Try: status, config, sync, jobs, migrate", "error");
975
+ }
976
+ };
531
977
  // ── /wiki context ────────────────────────────────────────────────────────────
532
978
  const handleContext = async (args, ctx, pi) => {
533
979
  const parts = args.trim().split(/\s+/g);
@@ -723,6 +1169,7 @@ export const registerCommands = (pi) => {
723
1169
  "injector",
724
1170
  "context",
725
1171
  "embedding",
1172
+ "modal",
726
1173
  "watcher",
727
1174
  "setup",
728
1175
  ];
@@ -756,10 +1203,15 @@ export const registerCommands = (pi) => {
756
1203
  .map((c) => ({ label: c, value: c, description: `injector ${c}` }));
757
1204
  }
758
1205
  if (subcommand === "reindex") {
759
- return ["--all", "--reembed"]
1206
+ return ["--all", "--reembed", "--remote"]
760
1207
  .filter((c) => c.startsWith(prefix))
761
1208
  .map((c) => ({ label: c, value: c, description: `reindex ${c}` }));
762
1209
  }
1210
+ if (subcommand === "modal") {
1211
+ return ["status", "config", "sync", "jobs", "migrate"]
1212
+ .filter((c) => c.startsWith(prefix))
1213
+ .map((c) => ({ label: c, value: c, description: `modal ${c}` }));
1214
+ }
763
1215
  if (subcommand === "watcher") {
764
1216
  return ["start", "stop", "status"]
765
1217
  .filter((c) => c.startsWith(prefix))
@@ -798,6 +1250,8 @@ export const registerCommands = (pi) => {
798
1250
  return handleContext(rest, ctx, pi);
799
1251
  case "embedding":
800
1252
  return handleEmbedding(rest, ctx, pi);
1253
+ case "modal":
1254
+ return handleModal(rest, ctx, pi);
801
1255
  case "server":
802
1256
  return handleServer(ctx);
803
1257
  case "watcher":
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Local embedding request coalescer — debounce + batch.
3
+ *
4
+ * Collects individual embedding requests arriving within a short window and
5
+ * flushes them as a single batched call to the backend (`/embed` for the modal
6
+ * provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
7
+ * requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
8
+ * window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
9
+ * (the analog of `pendingQueue` + `maxConcurrent`).
10
+ *
11
+ * ──────────────────────────────────────────────────────────────────────────
12
+ * NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
13
+ *
14
+ * This is a complete, dependency-free, unit-tested building block (see
15
+ * test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
16
+ * Wire it in `src/lance.ts` roughly like:
17
+ *
18
+ * const coalescer = new EmbeddingCoalescer({
19
+ * embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
20
+ * debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
21
+ * maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
22
+ * maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
23
+ * });
24
+ *
25
+ * ROUTING POLICY (yours to own, not baked in here):
26
+ * - append / ingest / bulk → coalescer.embed(text, "document") [debounced]
27
+ * - interactive wiki_search → coalescer.embedImmediate(query, "query")
28
+ * (latency-sensitive; bypasses the debounce)
29
+ *
30
+ * It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
31
+ * they all take a batch and return one vector per input. Batches are
32
+ * homogeneous per `task` (queries and documents flush separately) because the
33
+ * embedding endpoints take a single task per call.
34
+ *
35
+ * Make the knobs configurable (Q6 in the decision log): debounceMs,
36
+ * maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
37
+ * Adapt the interface freely — this is a reference, not a contract.
38
+ * ──────────────────────────────────────────────────────────────────────────
39
+ */
40
+ export type EmbedTask = "query" | "document";
41
+ /** Batch embed backend: same texts in, one vector per text out, in order. */
42
+ export type EmbedFn = (texts: string[], task: EmbedTask) => Promise<number[][]>;
43
+ export interface CoalescerOptions {
44
+ embedFn: EmbedFn;
45
+ /** Window to gather requests before flushing (ms). Default 1000 (matches watcher). */
46
+ debounceMs?: number;
47
+ /** Flush immediately once a task's buffer reaches this size. Default 64. */
48
+ maxBatchSize?: number;
49
+ /** Max batched embedFn calls in flight at once. Default 2. */
50
+ maxConcurrentFlushes?: number;
51
+ /** Coalesce identical texts within a batch to a single embed. Default true. */
52
+ dedupe?: boolean;
53
+ }
54
+ export declare class EmbeddingCoalescer {
55
+ private readonly embedFn;
56
+ private readonly debounceMs;
57
+ private readonly maxBatchSize;
58
+ private readonly maxConcurrentFlushes;
59
+ private readonly dedupe;
60
+ private readonly buffers;
61
+ private readonly timers;
62
+ private active;
63
+ private readonly pendingFlushes;
64
+ private readonly inFlight;
65
+ constructor(opts: CoalescerOptions);
66
+ /** Queue a text for embedding; resolves with its vector once a batch flushes. */
67
+ embed(text: string, task?: EmbedTask): Promise<number[]>;
68
+ /** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
69
+ embedImmediate(text: string, task?: EmbedTask): Promise<number[]>;
70
+ /** Number of requests currently buffered (not yet flushed). */
71
+ size(): number;
72
+ /** Flush all buffered tasks now, then await every in-flight batch to settle. */
73
+ drain(): Promise<void>;
74
+ /** Cancel pending debounce timers. Does not reject already-buffered waiters. */
75
+ dispose(): void;
76
+ private arm;
77
+ private flushTask;
78
+ private schedule;
79
+ private runBatch;
80
+ }