pi-vault-mind 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Local embedding request coalescer — debounce + batch.
3
+ *
4
+ * Collects individual embedding requests arriving within a short window and
5
+ * flushes them as a single batched call to the backend (`/embed` for the modal
6
+ * provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
7
+ * requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
8
+ * window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
9
+ * (the analog of `pendingQueue` + `maxConcurrent`).
10
+ *
11
+ * ──────────────────────────────────────────────────────────────────────────
12
+ * NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
13
+ *
14
+ * This is a complete, dependency-free, unit-tested building block (see
15
+ * test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
16
+ * Wire it in `src/lance.ts` roughly like:
17
+ *
18
+ * const coalescer = new EmbeddingCoalescer({
19
+ * embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
20
+ * debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
21
+ * maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
22
+ * maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
23
+ * });
24
+ *
25
+ * ROUTING POLICY (yours to own, not baked in here):
26
+ * - append / ingest / bulk → coalescer.embed(text, "document") [debounced]
27
+ * - interactive wiki_search → coalescer.embedImmediate(query, "query")
28
+ * (latency-sensitive; bypasses the debounce)
29
+ *
30
+ * It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
31
+ * they all take a batch and return one vector per input. Batches are
32
+ * homogeneous per `task` (queries and documents flush separately) because the
33
+ * embedding endpoints take a single task per call.
34
+ *
35
+ * Make the knobs configurable (Q6 in the decision log): debounceMs,
36
+ * maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
37
+ * Adapt the interface freely — this is a reference, not a contract.
38
+ * ──────────────────────────────────────────────────────────────────────────
39
+ */
40
+ export type EmbedTask = "query" | "document";
41
+ /** Batch embed backend: same texts in, one vector per text out, in order. */
42
+ export type EmbedFn = (texts: string[], task: EmbedTask) => Promise<number[][]>;
43
+ export interface CoalescerOptions {
44
+ embedFn: EmbedFn;
45
+ /** Window to gather requests before flushing (ms). Default 1000 (matches watcher). */
46
+ debounceMs?: number;
47
+ /** Flush immediately once a task's buffer reaches this size. Default 64. */
48
+ maxBatchSize?: number;
49
+ /** Max batched embedFn calls in flight at once. Default 2. */
50
+ maxConcurrentFlushes?: number;
51
+ /** Coalesce identical texts within a batch to a single embed. Default true. */
52
+ dedupe?: boolean;
53
+ }
54
+ export declare class EmbeddingCoalescer {
55
+ private readonly embedFn;
56
+ private readonly debounceMs;
57
+ private readonly maxBatchSize;
58
+ private readonly maxConcurrentFlushes;
59
+ private readonly dedupe;
60
+ private readonly buffers;
61
+ private readonly timers;
62
+ private active;
63
+ private readonly pendingFlushes;
64
+ private readonly inFlight;
65
+ constructor(opts: CoalescerOptions);
66
+ /** Queue a text for embedding; resolves with its vector once a batch flushes. */
67
+ embed(text: string, task?: EmbedTask): Promise<number[]>;
68
+ /** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
69
+ embedImmediate(text: string, task?: EmbedTask): Promise<number[]>;
70
+ /** Number of requests currently buffered (not yet flushed). */
71
+ size(): number;
72
+ /** Flush all buffered tasks now, then await every in-flight batch to settle. */
73
+ drain(): Promise<void>;
74
+ /** Cancel pending debounce timers. Does not reject already-buffered waiters. */
75
+ dispose(): void;
76
+ private arm;
77
+ private flushTask;
78
+ private schedule;
79
+ private runBatch;
80
+ }
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Local embedding request coalescer — debounce + batch.
3
+ *
4
+ * Collects individual embedding requests arriving within a short window and
5
+ * flushes them as a single batched call to the backend (`/embed` for the modal
6
+ * provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
7
+ * requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
8
+ * window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
9
+ * (the analog of `pendingQueue` + `maxConcurrent`).
10
+ *
11
+ * ──────────────────────────────────────────────────────────────────────────
12
+ * NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
13
+ *
14
+ * This is a complete, dependency-free, unit-tested building block (see
15
+ * test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
16
+ * Wire it in `src/lance.ts` roughly like:
17
+ *
18
+ * const coalescer = new EmbeddingCoalescer({
19
+ * embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
20
+ * debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
21
+ * maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
22
+ * maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
23
+ * });
24
+ *
25
+ * ROUTING POLICY (yours to own, not baked in here):
26
+ * - append / ingest / bulk → coalescer.embed(text, "document") [debounced]
27
+ * - interactive wiki_search → coalescer.embedImmediate(query, "query")
28
+ * (latency-sensitive; bypasses the debounce)
29
+ *
30
+ * It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
31
+ * they all take a batch and return one vector per input. Batches are
32
+ * homogeneous per `task` (queries and documents flush separately) because the
33
+ * embedding endpoints take a single task per call.
34
+ *
35
+ * Make the knobs configurable (Q6 in the decision log): debounceMs,
36
+ * maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
37
+ * Adapt the interface freely — this is a reference, not a contract.
38
+ * ──────────────────────────────────────────────────────────────────────────
39
+ */
40
+ export class EmbeddingCoalescer {
41
+ embedFn;
42
+ debounceMs;
43
+ maxBatchSize;
44
+ maxConcurrentFlushes;
45
+ dedupe;
46
+ buffers = new Map();
47
+ timers = new Map();
48
+ active = 0;
49
+ pendingFlushes = [];
50
+ inFlight = new Set();
51
+ constructor(opts) {
52
+ this.embedFn = opts.embedFn;
53
+ this.debounceMs = opts.debounceMs ?? 1000;
54
+ this.maxBatchSize = Math.max(1, opts.maxBatchSize ?? 64);
55
+ this.maxConcurrentFlushes = Math.max(1, opts.maxConcurrentFlushes ?? 2);
56
+ this.dedupe = opts.dedupe ?? true;
57
+ }
58
+ /** Queue a text for embedding; resolves with its vector once a batch flushes. */
59
+ embed(text, task = "document") {
60
+ return new Promise((resolve, reject) => {
61
+ const buf = this.buffers.get(task) ?? [];
62
+ buf.push({ text, resolve, reject });
63
+ this.buffers.set(task, buf);
64
+ if (buf.length >= this.maxBatchSize) {
65
+ this.flushTask(task);
66
+ }
67
+ else {
68
+ this.arm(task);
69
+ }
70
+ });
71
+ }
72
+ /** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
73
+ async embedImmediate(text, task = "query") {
74
+ const vectors = await this.embedFn([text], task);
75
+ return vectors[0];
76
+ }
77
+ /** Number of requests currently buffered (not yet flushed). */
78
+ size() {
79
+ let n = 0;
80
+ for (const buf of this.buffers.values())
81
+ n += buf.length;
82
+ return n;
83
+ }
84
+ /** Flush all buffered tasks now, then await every in-flight batch to settle. */
85
+ async drain() {
86
+ for (const task of [...this.buffers.keys()])
87
+ this.flushTask(task);
88
+ while (this.inFlight.size > 0 || this.pendingFlushes.length > 0) {
89
+ await Promise.allSettled([...this.inFlight]);
90
+ }
91
+ }
92
+ /** Cancel pending debounce timers. Does not reject already-buffered waiters. */
93
+ dispose() {
94
+ for (const t of this.timers.values())
95
+ clearTimeout(t);
96
+ this.timers.clear();
97
+ }
98
+ // ── internals ──────────────────────────────────────────────────────────
99
+ arm(task) {
100
+ const existing = this.timers.get(task);
101
+ if (existing)
102
+ clearTimeout(existing);
103
+ this.timers.set(task, setTimeout(() => this.flushTask(task), this.debounceMs));
104
+ }
105
+ flushTask(task) {
106
+ const timer = this.timers.get(task);
107
+ if (timer) {
108
+ clearTimeout(timer);
109
+ this.timers.delete(task);
110
+ }
111
+ const buf = this.buffers.get(task);
112
+ if (!buf || buf.length === 0)
113
+ return;
114
+ this.buffers.set(task, []);
115
+ this.schedule(() => this.runBatch(task, buf));
116
+ }
117
+ schedule(job) {
118
+ const run = () => {
119
+ this.active++;
120
+ const p = job().finally(() => {
121
+ this.active--;
122
+ this.inFlight.delete(p);
123
+ const next = this.pendingFlushes.shift();
124
+ if (next)
125
+ next();
126
+ });
127
+ this.inFlight.add(p);
128
+ };
129
+ if (this.active < this.maxConcurrentFlushes)
130
+ run();
131
+ else
132
+ this.pendingFlushes.push(run);
133
+ }
134
+ async runBatch(task, items) {
135
+ try {
136
+ let texts;
137
+ let indexByText = null;
138
+ if (this.dedupe) {
139
+ indexByText = new Map();
140
+ texts = [];
141
+ for (const it of items) {
142
+ if (!indexByText.has(it.text)) {
143
+ indexByText.set(it.text, texts.length);
144
+ texts.push(it.text);
145
+ }
146
+ }
147
+ }
148
+ else {
149
+ texts = items.map((i) => i.text);
150
+ }
151
+ const vectors = await this.embedFn(texts, task);
152
+ for (let i = 0; i < items.length; i++) {
153
+ const it = items[i];
154
+ const idx = indexByText ? (indexByText.get(it.text) ?? i) : i;
155
+ it.resolve(vectors[idx]);
156
+ }
157
+ }
158
+ catch (err) {
159
+ for (const it of items)
160
+ it.reject(err);
161
+ }
162
+ }
163
+ }
package/dist/src/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
+ import { startAutoSync } from "./autosync.js";
3
4
  import { registerCommands, selectActiveCollection, serverState, watcherState } from "./commands.js";
4
5
  import { handleBeforeAgentStart, handleTurnEnd } from "./events.js";
5
6
  import { startServer, stopServer } from "./server.js";
@@ -8,6 +9,7 @@ import { EXT_ROOT } from "./utils.js";
8
9
  import { loadConfig } from "./utils.js";
9
10
  import { startWatcher, stopWatcher } from "./watcher.js";
10
11
  import { updateActiveCollectionWidget } from "./widget.js";
12
+ let stopAutoSync = null;
11
13
  export default function (pi) {
12
14
  /* expose skills directory */
13
15
  pi.on("resources_discover", async (_event) => {
@@ -49,6 +51,11 @@ export default function (pi) {
49
51
  startWatcher(pi, vaults, watcherState);
50
52
  }, 2000);
51
53
  }
54
+ /* auto-sync: off by default; only starts when modal.sync.autoSync is true */
55
+ if (cfg.wiki.embedding.provider === "modal" && cfg.wiki.embedding.modal?.sync?.autoSync) {
56
+ stopAutoSync = startAutoSync(cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000);
57
+ console.log(`[pi-vault-mind] Modal auto-sync enabled (every ${cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000}ms)`);
58
+ }
52
59
  }
53
60
  catch {
54
61
  /* config may not exist yet; user can start watcher manually via /wiki watcher start */
@@ -57,5 +64,7 @@ export default function (pi) {
57
64
  pi.on("session_shutdown", async () => {
58
65
  stopWatcher(watcherState);
59
66
  stopServer(serverState);
67
+ if (stopAutoSync)
68
+ stopAutoSync();
60
69
  });
61
70
  }
@@ -35,6 +35,13 @@ export declare const pullOllamaModel: (model: string, piOrHost?: ExtensionAPI |
35
35
  message: string;
36
36
  }>;
37
37
  export declare const upsertEntry: (dataDir: string, collectionName: string, entry: Record<string, string>, cfg: WikiConfig) => Promise<void>;
38
+ /**
39
+ * Precomputed-vector insert path — used by sync. Upserts rows that are already
40
+ * embedded (vectors come from the server) into the namespaced table, keyed by
41
+ * `id` (merge-insert). Bypasses the auto-embed source field entirely. Carries
42
+ * text + metadata. Idempotent (re-fetching a boundary row is a no-op).
43
+ */
44
+ export declare const upsertPrecomputed: (dataDir: string, collectionName: string, model: string, dim: number, rows: Array<Record<string, unknown>>, cfg: WikiConfig) => Promise<void>;
38
45
  export declare const searchHybrid: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
39
46
  export declare const searchFts: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
40
47
  export declare const getStatus: (dataDir: string) => Promise<Record<string, unknown>>;