pi-vault-mind 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/src/autosync.d.ts +16 -0
- package/dist/src/autosync.js +43 -0
- package/dist/src/commands.d.ts +18 -0
- package/dist/src/commands.js +464 -10
- package/dist/src/embed-queue.d.ts +80 -0
- package/dist/src/embed-queue.js +163 -0
- package/dist/src/index.js +9 -0
- package/dist/src/lance.d.ts +7 -0
- package/dist/src/lance.js +432 -0
- package/dist/src/modal-client.d.ts +176 -0
- package/dist/src/modal-client.js +174 -0
- package/dist/src/modal-config.d.ts +42 -0
- package/dist/src/modal-config.js +60 -0
- package/dist/src/settings-ui.d.ts +7 -0
- package/dist/src/settings-ui.js +109 -1
- package/dist/src/sync.d.ts +71 -0
- package/dist/src/sync.js +211 -0
- package/dist/src/types.d.ts +102 -1
- package/dist/test/embed-queue.test.js +105 -0
- package/dist/test/index.test.js +35 -0
- package/dist/test/lance-modal.test.js +95 -0
- package/dist/test/modal-client.test.js +294 -0
- package/dist/test/modal-config.test.js +86 -0
- package/dist/test/sync.test.js +132 -0
- package/package.json +3 -2
- package/dist/test/index.test.d.ts +0 -1
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding request coalescer — debounce + batch.
|
|
3
|
+
*
|
|
4
|
+
* Collects individual embedding requests arriving within a short window and
|
|
5
|
+
* flushes them as a single batched call to the backend (`/embed` for the modal
|
|
6
|
+
* provider, `/api/embed` for ollama, etc.), so ingest/append never fires N tiny
|
|
7
|
+
* requests. Modeled on the watcher's coalescing in `src/watcher.ts`: a debounce
|
|
8
|
+
* window (≈1000ms), a max-batch early flush, and a `maxConcurrentFlushes` cap
|
|
9
|
+
* (the analog of `pendingQueue` + `maxConcurrent`).
|
|
10
|
+
*
|
|
11
|
+
* ──────────────────────────────────────────────────────────────────────────
|
|
12
|
+
* NOTE FOR AGENT B (extension integration — see docs/plans/agent-B-*.md #11):
|
|
13
|
+
*
|
|
14
|
+
* This is a complete, dependency-free, unit-tested building block (see
|
|
15
|
+
* test/embed-queue.test.ts). It is intentionally NOT wired into anything yet.
|
|
16
|
+
* Wire it in `src/lance.ts` roughly like:
|
|
17
|
+
*
|
|
18
|
+
* const coalescer = new EmbeddingCoalescer({
|
|
19
|
+
* embedFn: (texts, task) => modalClient.embed(texts, { task }).then(r => r.vectors),
|
|
20
|
+
* debounceMs: cfg.embedding.coalesce?.debounceMs ?? 1000,
|
|
21
|
+
* maxBatchSize: cfg.embedding.coalesce?.maxBatchSize ?? 64,
|
|
22
|
+
* maxConcurrentFlushes: cfg.embedding.coalesce?.maxConcurrent ?? 2,
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* ROUTING POLICY (yours to own, not baked in here):
|
|
26
|
+
* - append / ingest / bulk → coalescer.embed(text, "document") [debounced]
|
|
27
|
+
* - interactive wiki_search → coalescer.embedImmediate(query, "query")
|
|
28
|
+
* (latency-sensitive; bypasses the debounce)
|
|
29
|
+
*
|
|
30
|
+
* It is provider-agnostic: `embedFn` can wrap modal, ollama, or transformers —
|
|
31
|
+
* they all take a batch and return one vector per input. Batches are
|
|
32
|
+
* homogeneous per `task` (queries and documents flush separately) because the
|
|
33
|
+
* embedding endpoints take a single task per call.
|
|
34
|
+
*
|
|
35
|
+
* Make the knobs configurable (Q6 in the decision log): debounceMs,
|
|
36
|
+
* maxBatchSize, maxConcurrentFlushes, dedupe, and whether search bypasses.
|
|
37
|
+
* Adapt the interface freely — this is a reference, not a contract.
|
|
38
|
+
* ──────────────────────────────────────────────────────────────────────────
|
|
39
|
+
*/
|
|
40
|
+
export class EmbeddingCoalescer {
|
|
41
|
+
embedFn;
|
|
42
|
+
debounceMs;
|
|
43
|
+
maxBatchSize;
|
|
44
|
+
maxConcurrentFlushes;
|
|
45
|
+
dedupe;
|
|
46
|
+
buffers = new Map();
|
|
47
|
+
timers = new Map();
|
|
48
|
+
active = 0;
|
|
49
|
+
pendingFlushes = [];
|
|
50
|
+
inFlight = new Set();
|
|
51
|
+
constructor(opts) {
|
|
52
|
+
this.embedFn = opts.embedFn;
|
|
53
|
+
this.debounceMs = opts.debounceMs ?? 1000;
|
|
54
|
+
this.maxBatchSize = Math.max(1, opts.maxBatchSize ?? 64);
|
|
55
|
+
this.maxConcurrentFlushes = Math.max(1, opts.maxConcurrentFlushes ?? 2);
|
|
56
|
+
this.dedupe = opts.dedupe ?? true;
|
|
57
|
+
}
|
|
58
|
+
/** Queue a text for embedding; resolves with its vector once a batch flushes. */
|
|
59
|
+
embed(text, task = "document") {
|
|
60
|
+
return new Promise((resolve, reject) => {
|
|
61
|
+
const buf = this.buffers.get(task) ?? [];
|
|
62
|
+
buf.push({ text, resolve, reject });
|
|
63
|
+
this.buffers.set(task, buf);
|
|
64
|
+
if (buf.length >= this.maxBatchSize) {
|
|
65
|
+
this.flushTask(task);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
this.arm(task);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
/** Bypass the debounce — embed a single text right now (for latency-sensitive search). */
|
|
73
|
+
async embedImmediate(text, task = "query") {
|
|
74
|
+
const vectors = await this.embedFn([text], task);
|
|
75
|
+
return vectors[0];
|
|
76
|
+
}
|
|
77
|
+
/** Number of requests currently buffered (not yet flushed). */
|
|
78
|
+
size() {
|
|
79
|
+
let n = 0;
|
|
80
|
+
for (const buf of this.buffers.values())
|
|
81
|
+
n += buf.length;
|
|
82
|
+
return n;
|
|
83
|
+
}
|
|
84
|
+
/** Flush all buffered tasks now, then await every in-flight batch to settle. */
|
|
85
|
+
async drain() {
|
|
86
|
+
for (const task of [...this.buffers.keys()])
|
|
87
|
+
this.flushTask(task);
|
|
88
|
+
while (this.inFlight.size > 0 || this.pendingFlushes.length > 0) {
|
|
89
|
+
await Promise.allSettled([...this.inFlight]);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/** Cancel pending debounce timers. Does not reject already-buffered waiters. */
|
|
93
|
+
dispose() {
|
|
94
|
+
for (const t of this.timers.values())
|
|
95
|
+
clearTimeout(t);
|
|
96
|
+
this.timers.clear();
|
|
97
|
+
}
|
|
98
|
+
// ── internals ──────────────────────────────────────────────────────────
|
|
99
|
+
arm(task) {
|
|
100
|
+
const existing = this.timers.get(task);
|
|
101
|
+
if (existing)
|
|
102
|
+
clearTimeout(existing);
|
|
103
|
+
this.timers.set(task, setTimeout(() => this.flushTask(task), this.debounceMs));
|
|
104
|
+
}
|
|
105
|
+
flushTask(task) {
|
|
106
|
+
const timer = this.timers.get(task);
|
|
107
|
+
if (timer) {
|
|
108
|
+
clearTimeout(timer);
|
|
109
|
+
this.timers.delete(task);
|
|
110
|
+
}
|
|
111
|
+
const buf = this.buffers.get(task);
|
|
112
|
+
if (!buf || buf.length === 0)
|
|
113
|
+
return;
|
|
114
|
+
this.buffers.set(task, []);
|
|
115
|
+
this.schedule(() => this.runBatch(task, buf));
|
|
116
|
+
}
|
|
117
|
+
schedule(job) {
|
|
118
|
+
const run = () => {
|
|
119
|
+
this.active++;
|
|
120
|
+
const p = job().finally(() => {
|
|
121
|
+
this.active--;
|
|
122
|
+
this.inFlight.delete(p);
|
|
123
|
+
const next = this.pendingFlushes.shift();
|
|
124
|
+
if (next)
|
|
125
|
+
next();
|
|
126
|
+
});
|
|
127
|
+
this.inFlight.add(p);
|
|
128
|
+
};
|
|
129
|
+
if (this.active < this.maxConcurrentFlushes)
|
|
130
|
+
run();
|
|
131
|
+
else
|
|
132
|
+
this.pendingFlushes.push(run);
|
|
133
|
+
}
|
|
134
|
+
async runBatch(task, items) {
|
|
135
|
+
try {
|
|
136
|
+
let texts;
|
|
137
|
+
let indexByText = null;
|
|
138
|
+
if (this.dedupe) {
|
|
139
|
+
indexByText = new Map();
|
|
140
|
+
texts = [];
|
|
141
|
+
for (const it of items) {
|
|
142
|
+
if (!indexByText.has(it.text)) {
|
|
143
|
+
indexByText.set(it.text, texts.length);
|
|
144
|
+
texts.push(it.text);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
texts = items.map((i) => i.text);
|
|
150
|
+
}
|
|
151
|
+
const vectors = await this.embedFn(texts, task);
|
|
152
|
+
for (let i = 0; i < items.length; i++) {
|
|
153
|
+
const it = items[i];
|
|
154
|
+
const idx = indexByText ? (indexByText.get(it.text) ?? i) : i;
|
|
155
|
+
it.resolve(vectors[idx]);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch (err) {
|
|
159
|
+
for (const it of items)
|
|
160
|
+
it.reject(err);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
package/dist/src/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
+
import { startAutoSync } from "./autosync.js";
|
|
3
4
|
import { registerCommands, selectActiveCollection, serverState, watcherState } from "./commands.js";
|
|
4
5
|
import { handleBeforeAgentStart, handleTurnEnd } from "./events.js";
|
|
5
6
|
import { startServer, stopServer } from "./server.js";
|
|
@@ -8,6 +9,7 @@ import { EXT_ROOT } from "./utils.js";
|
|
|
8
9
|
import { loadConfig } from "./utils.js";
|
|
9
10
|
import { startWatcher, stopWatcher } from "./watcher.js";
|
|
10
11
|
import { updateActiveCollectionWidget } from "./widget.js";
|
|
12
|
+
let stopAutoSync = null;
|
|
11
13
|
export default function (pi) {
|
|
12
14
|
/* expose skills directory */
|
|
13
15
|
pi.on("resources_discover", async (_event) => {
|
|
@@ -49,6 +51,11 @@ export default function (pi) {
|
|
|
49
51
|
startWatcher(pi, vaults, watcherState);
|
|
50
52
|
}, 2000);
|
|
51
53
|
}
|
|
54
|
+
/* auto-sync: off by default; only starts when modal.sync.autoSync is true */
|
|
55
|
+
if (cfg.wiki.embedding.provider === "modal" && cfg.wiki.embedding.modal?.sync?.autoSync) {
|
|
56
|
+
stopAutoSync = startAutoSync(cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000);
|
|
57
|
+
console.log(`[pi-vault-mind] Modal auto-sync enabled (every ${cfg.wiki.embedding.modal?.sync?.autoSyncIntervalMs ?? 300000}ms)`);
|
|
58
|
+
}
|
|
52
59
|
}
|
|
53
60
|
catch {
|
|
54
61
|
/* config may not exist yet; user can start watcher manually via /wiki watcher start */
|
|
@@ -57,5 +64,7 @@ export default function (pi) {
|
|
|
57
64
|
pi.on("session_shutdown", async () => {
|
|
58
65
|
stopWatcher(watcherState);
|
|
59
66
|
stopServer(serverState);
|
|
67
|
+
if (stopAutoSync)
|
|
68
|
+
stopAutoSync();
|
|
60
69
|
});
|
|
61
70
|
}
|
package/dist/src/lance.d.ts
CHANGED
|
@@ -35,6 +35,13 @@ export declare const pullOllamaModel: (model: string, piOrHost?: ExtensionAPI |
|
|
|
35
35
|
message: string;
|
|
36
36
|
}>;
|
|
37
37
|
export declare const upsertEntry: (dataDir: string, collectionName: string, entry: Record<string, string>, cfg: WikiConfig) => Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Precomputed-vector insert path — used by sync. Upserts rows that are already
|
|
40
|
+
* embedded (vectors come from the server) into the namespaced table, keyed by
|
|
41
|
+
* `id` (merge-insert). Bypasses the auto-embed source field entirely. Carries
|
|
42
|
+
* text + metadata. Idempotent (re-fetching a boundary row is a no-op).
|
|
43
|
+
*/
|
|
44
|
+
export declare const upsertPrecomputed: (dataDir: string, collectionName: string, model: string, dim: number, rows: Array<Record<string, unknown>>, cfg: WikiConfig) => Promise<void>;
|
|
38
45
|
export declare const searchHybrid: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
|
|
39
46
|
export declare const searchFts: (dataDir: string, collectionName: string, query: string, limit: number, cfg: WikiConfig) => Promise<unknown[]>;
|
|
40
47
|
export declare const getStatus: (dataDir: string) => Promise<Record<string, unknown>>;
|