memwarden 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +402 -0
- package/dist/bundle/bundle.d.ts +28 -0
- package/dist/bundle/bundle.js +85 -0
- package/dist/cli/bin.d.ts +2 -0
- package/dist/cli/bin.js +593 -0
- package/dist/cli/connect.d.ts +63 -0
- package/dist/cli/connect.js +121 -0
- package/dist/cli/hook.d.ts +24 -0
- package/dist/cli/hook.js +186 -0
- package/dist/cli/tools.d.ts +47 -0
- package/dist/cli/tools.js +246 -0
- package/dist/daemon/ensure.d.ts +12 -0
- package/dist/daemon/ensure.js +54 -0
- package/dist/daemon/service.d.ts +15 -0
- package/dist/daemon/service.js +210 -0
- package/dist/embedding/index.d.ts +10 -0
- package/dist/embedding/index.js +33 -0
- package/dist/embedding/local-embedding.d.ts +14 -0
- package/dist/embedding/local-embedding.js +80 -0
- package/dist/functions/access-tracker.d.ts +13 -0
- package/dist/functions/access-tracker.js +92 -0
- package/dist/functions/audit.d.ts +46 -0
- package/dist/functions/audit.js +0 -0
- package/dist/functions/cjk-segmenter.d.ts +6 -0
- package/dist/functions/cjk-segmenter.js +120 -0
- package/dist/functions/compress-synthetic.d.ts +2 -0
- package/dist/functions/compress-synthetic.js +104 -0
- package/dist/functions/config.d.ts +68 -0
- package/dist/functions/config.js +231 -0
- package/dist/functions/conflicts.d.ts +19 -0
- package/dist/functions/conflicts.js +328 -0
- package/dist/functions/context.d.ts +3 -0
- package/dist/functions/context.js +155 -0
- package/dist/functions/dedup.d.ts +11 -0
- package/dist/functions/dedup.js +51 -0
- package/dist/functions/dejafix.d.ts +96 -0
- package/dist/functions/dejafix.js +356 -0
- package/dist/functions/doctor.d.ts +29 -0
- package/dist/functions/doctor.js +137 -0
- package/dist/functions/forget.d.ts +3 -0
- package/dist/functions/forget.js +87 -0
- package/dist/functions/hybrid-search.d.ts +17 -0
- package/dist/functions/hybrid-search.js +205 -0
- package/dist/functions/index.d.ts +32 -0
- package/dist/functions/index.js +44 -0
- package/dist/functions/keyed-mutex.d.ts +1 -0
- package/dist/functions/keyed-mutex.js +21 -0
- package/dist/functions/logger.d.ts +6 -0
- package/dist/functions/logger.js +37 -0
- package/dist/functions/memory-utils.d.ts +2 -0
- package/dist/functions/memory-utils.js +29 -0
- package/dist/functions/observe.d.ts +5 -0
- package/dist/functions/observe.js +326 -0
- package/dist/functions/paths.d.ts +1 -0
- package/dist/functions/paths.js +38 -0
- package/dist/functions/privacy.d.ts +1 -0
- package/dist/functions/privacy.js +30 -0
- package/dist/functions/provenance.d.ts +9 -0
- package/dist/functions/provenance.js +57 -0
- package/dist/functions/quantized-vector-index.d.ts +60 -0
- package/dist/functions/quantized-vector-index.js +275 -0
- package/dist/functions/receipt.d.ts +31 -0
- package/dist/functions/receipt.js +95 -0
- package/dist/functions/search-index.d.ts +27 -0
- package/dist/functions/search-index.js +217 -0
- package/dist/functions/search.d.ts +25 -0
- package/dist/functions/search.js +523 -0
- package/dist/functions/stemmer.d.ts +1 -0
- package/dist/functions/stemmer.js +110 -0
- package/dist/functions/synonyms.d.ts +1 -0
- package/dist/functions/synonyms.js +69 -0
- package/dist/functions/turboquant.d.ts +53 -0
- package/dist/functions/turboquant.js +278 -0
- package/dist/functions/types.d.ts +217 -0
- package/dist/functions/types.js +8 -0
- package/dist/functions/vector-index.d.ts +25 -0
- package/dist/functions/vector-index.js +125 -0
- package/dist/functions/vector-persistence.d.ts +14 -0
- package/dist/functions/vector-persistence.js +75 -0
- package/dist/functions/verify.d.ts +13 -0
- package/dist/functions/verify.js +104 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +219 -0
- package/dist/kernel/http.d.ts +24 -0
- package/dist/kernel/http.js +261 -0
- package/dist/kernel/index.d.ts +19 -0
- package/dist/kernel/index.js +21 -0
- package/dist/kernel/kernel.d.ts +80 -0
- package/dist/kernel/kernel.js +297 -0
- package/dist/kernel/pubsub.d.ts +21 -0
- package/dist/kernel/pubsub.js +38 -0
- package/dist/kernel/types.d.ts +139 -0
- package/dist/kernel/types.js +20 -0
- package/dist/mcp/bin.d.ts +2 -0
- package/dist/mcp/bin.js +27 -0
- package/dist/mcp/server.d.ts +34 -0
- package/dist/mcp/server.js +377 -0
- package/dist/observability/metrics.d.ts +26 -0
- package/dist/observability/metrics.js +104 -0
- package/dist/proxy/server.d.ts +30 -0
- package/dist/proxy/server.js +331 -0
- package/dist/state/kv.d.ts +41 -0
- package/dist/state/kv.js +50 -0
- package/dist/state/oplog.d.ts +25 -0
- package/dist/state/oplog.js +57 -0
- package/dist/state/schema.d.ts +60 -0
- package/dist/state/schema.js +88 -0
- package/dist/state/store-libsql.d.ts +46 -0
- package/dist/state/store-libsql.js +263 -0
- package/dist/state/store-memory.d.ts +23 -0
- package/dist/state/store-memory.js +121 -0
- package/dist/state/store.d.ts +87 -0
- package/dist/state/store.js +58 -0
- package/dist/triggers/api.d.ts +14 -0
- package/dist/triggers/api.js +510 -0
- package/dist/triggers/auth.d.ts +1 -0
- package/dist/triggers/auth.js +13 -0
- package/package.json +58 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export declare const DAEMON_ENTRY: string;
|
|
2
|
+
/** The stable, user-global brain location (one memory across every tool). */
|
|
3
|
+
export declare function defaultDataDir(): string;
|
|
4
|
+
export declare function daemonAlive(url: string): Promise<boolean>;
|
|
5
|
+
export type EnsureResult = "already" | "started" | "failed";
|
|
6
|
+
/**
|
|
7
|
+
* Ensure the daemon at `url` is up, spawning it if not. Idempotent and
|
|
8
|
+
* race-safe: concurrent callers may both spawn, but the daemon exits 0 on
|
|
9
|
+
* EADDRINUSE (see index.ts) so the loser simply goes away and the winner
|
|
10
|
+
* serves. Returns once the daemon answers /livez or the timeout elapses.
|
|
11
|
+
*/
|
|
12
|
+
export declare function ensureDaemon(url: string, dataDir?: string, timeoutMs?: number): Promise<EnsureResult>;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Daemon lifecycle — the self-healing core. ensureDaemon makes the daemon
|
|
3
|
+
// reachable: if it isn't, spawn it detached (so it outlives the caller's
|
|
4
|
+
// shell) pointed at a stable global brain, and wait for it to answer. Shared
|
|
5
|
+
// by the CLI (`memwarden up`) and the MCP server (which revives a dead daemon
|
|
6
|
+
// on demand), so any entry point can bring the brain back with no human in
|
|
7
|
+
// the loop.
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
|
+
import { mkdirSync } from "node:fs";
|
|
10
|
+
import { homedir } from "node:os";
|
|
11
|
+
import { dirname, join } from "node:path";
|
|
12
|
+
import { fileURLToPath } from "node:url";
|
|
13
|
+
// dist/daemon/ensure.js -> dist/index.js
|
|
14
|
+
export const DAEMON_ENTRY = join(dirname(fileURLToPath(import.meta.url)), "..", "index.js");
|
|
15
|
+
/** The stable, user-global brain location (one memory across every tool). */
|
|
16
|
+
export function defaultDataDir() {
|
|
17
|
+
return process.env.MEMWARDEN_DATA_DIR ?? join(homedir(), ".memwarden");
|
|
18
|
+
}
|
|
19
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
20
|
+
export async function daemonAlive(url) {
|
|
21
|
+
try {
|
|
22
|
+
const res = await fetch(`${url}/memwarden/livez`);
|
|
23
|
+
return res.ok;
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Ensure the daemon at `url` is up, spawning it if not. Idempotent and
|
|
31
|
+
* race-safe: concurrent callers may both spawn, but the daemon exits 0 on
|
|
32
|
+
* EADDRINUSE (see index.ts) so the loser simply goes away and the winner
|
|
33
|
+
* serves. Returns once the daemon answers /livez or the timeout elapses.
|
|
34
|
+
*/
|
|
35
|
+
export async function ensureDaemon(url, dataDir = defaultDataDir(), timeoutMs = 15000) {
|
|
36
|
+
if (await daemonAlive(url))
|
|
37
|
+
return "already";
|
|
38
|
+
// libSQL won't create the data directory; make it so the daemon can open
|
|
39
|
+
// its db instead of crashing on boot.
|
|
40
|
+
mkdirSync(dataDir, { recursive: true });
|
|
41
|
+
const child = spawn(process.execPath, [DAEMON_ENTRY], {
|
|
42
|
+
detached: true,
|
|
43
|
+
stdio: "ignore",
|
|
44
|
+
env: { ...process.env, MEMWARDEN_DATA_DIR: dataDir },
|
|
45
|
+
});
|
|
46
|
+
child.unref();
|
|
47
|
+
const deadline = Date.now() + timeoutMs;
|
|
48
|
+
while (Date.now() < deadline) {
|
|
49
|
+
await sleep(250);
|
|
50
|
+
if (await daemonAlive(url))
|
|
51
|
+
return "started";
|
|
52
|
+
}
|
|
53
|
+
return "failed";
|
|
54
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export interface ServiceResult {
|
|
2
|
+
kind: "launchd" | "systemd" | "unsupported";
|
|
3
|
+
ok: boolean;
|
|
4
|
+
path?: string;
|
|
5
|
+
message: string;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Install + start the supervised daemon for this platform. Best-effort. When
|
|
9
|
+
* `secret` is provided it is baked into the service environment so the
|
|
10
|
+
* login-launched daemon enforces auth (otherwise a managed daemon would run
|
|
11
|
+
* open even though the CLI generated a secret).
|
|
12
|
+
*/
|
|
13
|
+
export declare function installService(dataDir: string, secret?: string): ServiceResult;
|
|
14
|
+
/** Stop + remove the supervised daemon. Best-effort. */
|
|
15
|
+
export declare function uninstallService(): ServiceResult;
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
//
|
|
2
|
+
// OS service installer — the crash/reboot self-heal. Registers the daemon
|
|
3
|
+
// with the platform supervisor so it starts at login and restarts if it
|
|
4
|
+
// dies, with no human in the loop:
|
|
5
|
+
// macOS ~/Library/LaunchAgents/ai.memwarden.daemon.plist (launchd)
|
|
6
|
+
// Linux ~/.config/systemd/user/memwarden.service (systemd --user)
|
|
7
|
+
//
|
|
8
|
+
// KeepAlive/Restart are set to "restart on FAILURE only" (SuccessfulExit
|
|
9
|
+
// false / on-failure). That pairs with the daemon's clean exit(0) on
|
|
10
|
+
// EADDRINUSE: if another instance already holds the port, the supervised one
|
|
11
|
+
// exits cleanly and is NOT relaunched (no crash loop); a real crash (non-zero
|
|
12
|
+
// exit) IS relaunched. Best-effort: any failure returns ok:false so `up`
|
|
13
|
+
// falls back to a detached spawn.
|
|
14
|
+
import { execFileSync } from "node:child_process";
|
|
15
|
+
import { chmodSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
|
|
16
|
+
import { homedir } from "node:os";
|
|
17
|
+
import { dirname, join } from "node:path";
|
|
18
|
+
import { DAEMON_ENTRY } from "./ensure.js";
|
|
19
|
+
const LABEL = "ai.memwarden.daemon";
|
|
20
|
+
function errMsg(err) {
|
|
21
|
+
return err instanceof Error ? err.message : String(err);
|
|
22
|
+
}
|
|
23
|
+
function plistPath(home) {
|
|
24
|
+
return join(home, "Library", "LaunchAgents", `${LABEL}.plist`);
|
|
25
|
+
}
|
|
26
|
+
function systemdPath(home) {
|
|
27
|
+
return join(home, ".config", "systemd", "user", "memwarden.service");
|
|
28
|
+
}
|
|
29
|
+
// XML-escape a value before interpolating it into the plist (the secret is
|
|
30
|
+
// base64url so it has no XML metacharacters, but be defensive).
|
|
31
|
+
function xmlEscape(s) {
|
|
32
|
+
return s
|
|
33
|
+
.replace(/&/g, "&")
|
|
34
|
+
.replace(/</g, "<")
|
|
35
|
+
.replace(/>/g, ">")
|
|
36
|
+
.replace(/"/g, """)
|
|
37
|
+
.replace(/'/g, "'");
|
|
38
|
+
}
|
|
39
|
+
function macPlist(node, dataDir, secret) {
|
|
40
|
+
const log = join(dataDir, "daemon.log");
|
|
41
|
+
// The managed daemon resolves its auth secret from MEMWARDEN_SECRET, so it
|
|
42
|
+
// must be in the service environment or a login-launched daemon would run
|
|
43
|
+
// open. Only emitted when a secret was resolved.
|
|
44
|
+
const secretEntry = secret
|
|
45
|
+
? `\n <key>MEMWARDEN_SECRET</key><string>${xmlEscape(secret)}</string>`
|
|
46
|
+
: "";
|
|
47
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
48
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
49
|
+
<plist version="1.0">
|
|
50
|
+
<dict>
|
|
51
|
+
<key>Label</key><string>${LABEL}</string>
|
|
52
|
+
<key>ProgramArguments</key>
|
|
53
|
+
<array>
|
|
54
|
+
<string>${node}</string>
|
|
55
|
+
<string>${DAEMON_ENTRY}</string>
|
|
56
|
+
</array>
|
|
57
|
+
<key>EnvironmentVariables</key>
|
|
58
|
+
<dict>
|
|
59
|
+
<key>MEMWARDEN_DATA_DIR</key><string>${dataDir}</string>${secretEntry}
|
|
60
|
+
</dict>
|
|
61
|
+
<key>RunAtLoad</key><true/>
|
|
62
|
+
<key>KeepAlive</key>
|
|
63
|
+
<dict><key>SuccessfulExit</key><false/></dict>
|
|
64
|
+
<key>StandardOutPath</key><string>${log}</string>
|
|
65
|
+
<key>StandardErrorPath</key><string>${log}</string>
|
|
66
|
+
</dict>
|
|
67
|
+
</plist>
|
|
68
|
+
`;
|
|
69
|
+
}
|
|
70
|
+
function systemdUnit(node, dataDir, secret) {
|
|
71
|
+
// Same reason as the plist: the managed daemon needs MEMWARDEN_SECRET in its
|
|
72
|
+
// environment to enforce auth. Only emitted when a secret was resolved.
|
|
73
|
+
const secretEnv = secret
|
|
74
|
+
? `\nEnvironment=MEMWARDEN_SECRET=${secret}`
|
|
75
|
+
: "";
|
|
76
|
+
return `[Unit]
|
|
77
|
+
Description=memwarden memory daemon
|
|
78
|
+
After=network.target
|
|
79
|
+
|
|
80
|
+
[Service]
|
|
81
|
+
ExecStart=${node} ${DAEMON_ENTRY}
|
|
82
|
+
Environment=MEMWARDEN_DATA_DIR=${dataDir}${secretEnv}
|
|
83
|
+
Restart=on-failure
|
|
84
|
+
RestartSec=2
|
|
85
|
+
|
|
86
|
+
[Install]
|
|
87
|
+
WantedBy=default.target
|
|
88
|
+
`;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Install + start the supervised daemon for this platform. Best-effort. When
|
|
92
|
+
* `secret` is provided it is baked into the service environment so the
|
|
93
|
+
* login-launched daemon enforces auth (otherwise a managed daemon would run
|
|
94
|
+
* open even though the CLI generated a secret).
|
|
95
|
+
*/
|
|
96
|
+
export function installService(dataDir, secret) {
|
|
97
|
+
const home = homedir();
|
|
98
|
+
const node = process.execPath;
|
|
99
|
+
try {
|
|
100
|
+
mkdirSync(dataDir, { recursive: true });
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
// non-fatal; the write below will surface a real error if the dir is bad
|
|
104
|
+
}
|
|
105
|
+
if (process.platform === "darwin") {
|
|
106
|
+
const path = plistPath(home);
|
|
107
|
+
try {
|
|
108
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
109
|
+
writeFileSync(path, macPlist(node, dataDir, secret), "utf8");
|
|
110
|
+
// Lock the plist down: it now carries the secret in plaintext.
|
|
111
|
+
try {
|
|
112
|
+
chmodSync(path, 0o600);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
// best-effort
|
|
116
|
+
}
|
|
117
|
+
try {
|
|
118
|
+
execFileSync("launchctl", ["unload", path], { stdio: "ignore" });
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
// not previously loaded — fine
|
|
122
|
+
}
|
|
123
|
+
execFileSync("launchctl", ["load", "-w", path], { stdio: "ignore" });
|
|
124
|
+
return {
|
|
125
|
+
kind: "launchd",
|
|
126
|
+
ok: true,
|
|
127
|
+
path,
|
|
128
|
+
message: "starts at login, restarts on crash",
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
return { kind: "launchd", ok: false, path, message: errMsg(err) };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
if (process.platform === "linux") {
|
|
136
|
+
const path = systemdPath(home);
|
|
137
|
+
try {
|
|
138
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
139
|
+
writeFileSync(path, systemdUnit(node, dataDir, secret), "utf8");
|
|
140
|
+
// Lock the unit down: it now carries the secret in plaintext.
|
|
141
|
+
try {
|
|
142
|
+
chmodSync(path, 0o600);
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
// best-effort
|
|
146
|
+
}
|
|
147
|
+
execFileSync("systemctl", ["--user", "daemon-reload"], { stdio: "ignore" });
|
|
148
|
+
execFileSync("systemctl", ["--user", "enable", "--now", "memwarden"], {
|
|
149
|
+
stdio: "ignore",
|
|
150
|
+
});
|
|
151
|
+
return {
|
|
152
|
+
kind: "systemd",
|
|
153
|
+
ok: true,
|
|
154
|
+
path,
|
|
155
|
+
message: "starts at login, restarts on crash",
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
catch (err) {
|
|
159
|
+
return { kind: "systemd", ok: false, path, message: errMsg(err) };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return {
|
|
163
|
+
kind: "unsupported",
|
|
164
|
+
ok: false,
|
|
165
|
+
message: `no supported service manager for ${process.platform}`,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
/** Stop + remove the supervised daemon. Best-effort. */
|
|
169
|
+
export function uninstallService() {
|
|
170
|
+
const home = homedir();
|
|
171
|
+
if (process.platform === "darwin") {
|
|
172
|
+
const path = plistPath(home);
|
|
173
|
+
try {
|
|
174
|
+
try {
|
|
175
|
+
execFileSync("launchctl", ["unload", path], { stdio: "ignore" });
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
// not loaded
|
|
179
|
+
}
|
|
180
|
+
rmSync(path, { force: true });
|
|
181
|
+
return { kind: "launchd", ok: true, path, message: "removed" };
|
|
182
|
+
}
|
|
183
|
+
catch (err) {
|
|
184
|
+
return { kind: "launchd", ok: false, path, message: errMsg(err) };
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (process.platform === "linux") {
|
|
188
|
+
const path = systemdPath(home);
|
|
189
|
+
try {
|
|
190
|
+
try {
|
|
191
|
+
execFileSync("systemctl", ["--user", "disable", "--now", "memwarden"], {
|
|
192
|
+
stdio: "ignore",
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
catch {
|
|
196
|
+
// not enabled
|
|
197
|
+
}
|
|
198
|
+
rmSync(path, { force: true });
|
|
199
|
+
return { kind: "systemd", ok: true, path, message: "removed" };
|
|
200
|
+
}
|
|
201
|
+
catch (err) {
|
|
202
|
+
return { kind: "systemd", ok: false, path, message: errMsg(err) };
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return {
|
|
206
|
+
kind: "unsupported",
|
|
207
|
+
ok: false,
|
|
208
|
+
message: `no supported service manager for ${process.platform}`,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from "../functions/types.js";
|
|
2
|
+
export { LocalEmbeddingProvider } from "./local-embedding.js";
|
|
3
|
+
export declare function getEmbeddingProviderName(): "local" | "none";
|
|
4
|
+
export declare function getEmbeddingModel(): string;
|
|
5
|
+
/**
|
|
6
|
+
* Build the configured embedding provider, or null for BM25-only mode.
|
|
7
|
+
* Construction is cheap (no model load); the model loads lazily on first
|
|
8
|
+
* embed, or eagerly via warmup() at boot.
|
|
9
|
+
*/
|
|
10
|
+
export declare function createEmbeddingProvider(): EmbeddingProvider | null;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Embedding provider selection. memwarden's vector stream (and therefore
|
|
3
|
+
// the TurboQuant-compressed semantic memory that distinguishes it) only
|
|
4
|
+
// lights up when a provider is configured here.
|
|
5
|
+
//
|
|
6
|
+
// MEMWARDEN_EMBEDDING_PROVIDER = local (default) | none
|
|
7
|
+
// MEMWARDEN_EMBEDDING_MODEL = Xenova/all-MiniLM-L6-v2 (default)
|
|
8
|
+
//
|
|
9
|
+
// "local" runs entirely on-device (see local-embedding.ts). "none" keeps
|
|
10
|
+
// memwarden in BM25-only mode. Cloud providers (openai, voyage) can be
|
|
11
|
+
// added here later behind the same interface without touching callers.
|
|
12
|
+
import { LocalEmbeddingProvider } from "./local-embedding.js";
|
|
13
|
+
export { LocalEmbeddingProvider } from "./local-embedding.js";
|
|
14
|
+
export function getEmbeddingProviderName() {
|
|
15
|
+
const raw = (process.env.MEMWARDEN_EMBEDDING_PROVIDER ?? "local")
|
|
16
|
+
.trim()
|
|
17
|
+
.toLowerCase();
|
|
18
|
+
return raw === "none" ? "none" : "local";
|
|
19
|
+
}
|
|
20
|
+
export function getEmbeddingModel() {
|
|
21
|
+
const raw = process.env.MEMWARDEN_EMBEDDING_MODEL?.trim();
|
|
22
|
+
return raw && raw.length > 0 ? raw : "Xenova/all-MiniLM-L6-v2";
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Build the configured embedding provider, or null for BM25-only mode.
|
|
26
|
+
* Construction is cheap (no model load); the model loads lazily on first
|
|
27
|
+
* embed, or eagerly via warmup() at boot.
|
|
28
|
+
*/
|
|
29
|
+
export function createEmbeddingProvider() {
|
|
30
|
+
if (getEmbeddingProviderName() === "none")
|
|
31
|
+
return null;
|
|
32
|
+
return new LocalEmbeddingProvider(getEmbeddingModel());
|
|
33
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from "../functions/types.js";
|
|
2
|
+
export declare class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
3
|
+
readonly name: string;
|
|
4
|
+
readonly dimensions: number;
|
|
5
|
+
private model;
|
|
6
|
+
private extractor;
|
|
7
|
+
private loading;
|
|
8
|
+
constructor(model?: string);
|
|
9
|
+
private ensure;
|
|
10
|
+
/** Warm the model so the first observe/search doesn't pay the load. */
|
|
11
|
+
warmup(): Promise<void>;
|
|
12
|
+
embed(text: string): Promise<Float32Array>;
|
|
13
|
+
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Local, in-process embedding provider. Runs a sentence-transformer
|
|
3
|
+
// (all-MiniLM-L6-v2, 384-dim) through transformers.js on the WASM/WebGPU
|
|
4
|
+
// backend — no Python, no CUDA, no native compilation, so it preserves
|
|
5
|
+
// memwarden's zero-native-dependency, self-custody promise. The model
|
|
6
|
+
// (~23MB ONNX) is downloaded once on first use and cached on disk.
|
|
7
|
+
//
|
|
8
|
+
// transformers.js is an OPTIONAL dependency, loaded lazily via a dynamic
|
|
9
|
+
// import with a variable specifier so the core typechecks and the test
|
|
10
|
+
// suite runs without it installed. If it is absent, embed() throws a clear
|
|
11
|
+
// message and the vector stream stays off (BM25 keeps working) — the
|
|
12
|
+
// guarded add path in search.ts soft-fails, it never breaks observe.
|
|
13
|
+
const DEFAULT_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
14
|
+
const MINILM_DIMS = 384;
|
|
15
|
+
// Known output dimensions for the models we ship as presets. Anything else
|
|
16
|
+
// is probed from the first embedding.
|
|
17
|
+
const KNOWN_DIMS = {
|
|
18
|
+
"Xenova/all-MiniLM-L6-v2": 384,
|
|
19
|
+
"Xenova/bge-small-en-v1.5": 384,
|
|
20
|
+
"Xenova/nomic-embed-text-v1": 768,
|
|
21
|
+
};
|
|
22
|
+
export class LocalEmbeddingProvider {
|
|
23
|
+
name;
|
|
24
|
+
dimensions;
|
|
25
|
+
model;
|
|
26
|
+
extractor = null;
|
|
27
|
+
loading = null;
|
|
28
|
+
constructor(model = DEFAULT_MODEL) {
|
|
29
|
+
this.model = model;
|
|
30
|
+
this.name = `local:${model}`;
|
|
31
|
+
this.dimensions = KNOWN_DIMS[model] ?? MINILM_DIMS;
|
|
32
|
+
}
|
|
33
|
+
// Lazily construct the feature-extraction pipeline. The import uses a
|
|
34
|
+
// variable specifier on purpose: it keeps tsc from resolving the optional
|
|
35
|
+
// package at build time and isolates the heavy load to first use.
|
|
36
|
+
async ensure() {
|
|
37
|
+
if (this.extractor)
|
|
38
|
+
return this.extractor;
|
|
39
|
+
if (!this.loading) {
|
|
40
|
+
this.loading = (async () => {
|
|
41
|
+
const specifier = "@huggingface/transformers";
|
|
42
|
+
let mod;
|
|
43
|
+
try {
|
|
44
|
+
mod = (await import(specifier));
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
throw new Error("Local embeddings require the optional '@huggingface/transformers' package. " +
|
|
48
|
+
"Install it with: npm install @huggingface/transformers");
|
|
49
|
+
}
|
|
50
|
+
const pipeline = mod.pipeline;
|
|
51
|
+
if (typeof pipeline !== "function") {
|
|
52
|
+
throw new Error("@huggingface/transformers did not export a usable 'pipeline'");
|
|
53
|
+
}
|
|
54
|
+
const extractor = await pipeline("feature-extraction", this.model);
|
|
55
|
+
this.extractor = extractor;
|
|
56
|
+
return extractor;
|
|
57
|
+
})();
|
|
58
|
+
}
|
|
59
|
+
return this.loading;
|
|
60
|
+
}
|
|
61
|
+
/** Warm the model so the first observe/search doesn't pay the load. */
|
|
62
|
+
async warmup() {
|
|
63
|
+
await this.ensure();
|
|
64
|
+
}
|
|
65
|
+
async embed(text) {
|
|
66
|
+
const extractor = await this.ensure();
|
|
67
|
+
const out = await extractor(text, { pooling: "mean", normalize: true });
|
|
68
|
+
const row = out.tolist()[0];
|
|
69
|
+
if (!row)
|
|
70
|
+
throw new Error("embedding extraction returned no rows");
|
|
71
|
+
return Float32Array.from(row);
|
|
72
|
+
}
|
|
73
|
+
async embedBatch(texts) {
|
|
74
|
+
if (texts.length === 0)
|
|
75
|
+
return [];
|
|
76
|
+
const extractor = await this.ensure();
|
|
77
|
+
const out = await extractor(texts, { pooling: "mean", normalize: true });
|
|
78
|
+
return out.tolist().map((row) => Float32Array.from(row));
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { StateKV } from "../state/kv.js";
|
|
2
|
+
export interface AccessLog {
|
|
3
|
+
memoryId: string;
|
|
4
|
+
count: number;
|
|
5
|
+
lastAt: string;
|
|
6
|
+
recent: number[];
|
|
7
|
+
}
|
|
8
|
+
export declare function emptyAccessLog(memoryId: string): AccessLog;
|
|
9
|
+
export declare function normalizeAccessLog(raw: unknown): AccessLog;
|
|
10
|
+
export declare function getAccessLog(kv: StateKV, memoryId: string): Promise<AccessLog>;
|
|
11
|
+
export declare function recordAccess(kv: StateKV, memoryId: string, timestampMs?: number): Promise<void>;
|
|
12
|
+
export declare function recordAccessBatch(kv: StateKV, memoryIds: string[], timestampMs?: number): Promise<void>;
|
|
13
|
+
export declare function deleteAccessLog(kv: StateKV, memoryId: string): Promise<void>;
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Access-frequency tracking for retention scoring. search.ts and context.ts
|
|
3
|
+
// call recordAccessBatch fire-and-forget after assembling results, so later
|
|
4
|
+
// retention/decay can weight memories by how recently they were used. Each
|
|
5
|
+
// per-memory write is serialized through the keyed mutex, and every failure is
|
|
6
|
+
// swallowed: access tracking must never break a read.
|
|
7
|
+
import { KV } from "../state/schema.js";
|
|
8
|
+
import { withKeyedLock } from "./keyed-mutex.js";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
10
|
+
const RECENT_CAP = 20;
|
|
11
|
+
export function emptyAccessLog(memoryId) {
|
|
12
|
+
return { memoryId, count: 0, lastAt: "", recent: [] };
|
|
13
|
+
}
|
|
14
|
+
export function normalizeAccessLog(raw) {
|
|
15
|
+
const r = (raw ?? {});
|
|
16
|
+
const count = typeof r.count === "number" && Number.isFinite(r.count)
|
|
17
|
+
? Math.max(0, Math.floor(r.count))
|
|
18
|
+
: 0;
|
|
19
|
+
const recentAll = Array.isArray(r.recent)
|
|
20
|
+
? r.recent.filter((n) => typeof n === "number" && Number.isFinite(n))
|
|
21
|
+
: [];
|
|
22
|
+
const recent = recentAll.slice(-RECENT_CAP);
|
|
23
|
+
return {
|
|
24
|
+
memoryId: typeof r.memoryId === "string" ? r.memoryId : "",
|
|
25
|
+
// count can never be below the number of timestamps we actually hold
|
|
26
|
+
count: Math.max(count, recent.length),
|
|
27
|
+
lastAt: typeof r.lastAt === "string" ? r.lastAt : "",
|
|
28
|
+
recent,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export async function getAccessLog(kv, memoryId) {
|
|
32
|
+
try {
|
|
33
|
+
const raw = await kv.get(KV.accessLog, memoryId);
|
|
34
|
+
if (!raw)
|
|
35
|
+
return emptyAccessLog(memoryId);
|
|
36
|
+
const log = normalizeAccessLog(raw);
|
|
37
|
+
if (!log.memoryId)
|
|
38
|
+
log.memoryId = memoryId;
|
|
39
|
+
return log;
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
return emptyAccessLog(memoryId);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
function keyFor(memoryId) {
|
|
46
|
+
return `mem:access:${memoryId}`;
|
|
47
|
+
}
|
|
48
|
+
export async function recordAccess(kv, memoryId, timestampMs) {
|
|
49
|
+
if (!memoryId)
|
|
50
|
+
return;
|
|
51
|
+
const ts = timestampMs ?? Date.now();
|
|
52
|
+
try {
|
|
53
|
+
await withKeyedLock(keyFor(memoryId), async () => {
|
|
54
|
+
const log = await getAccessLog(kv, memoryId);
|
|
55
|
+
log.count += 1;
|
|
56
|
+
log.lastAt = new Date(ts).toISOString();
|
|
57
|
+
log.recent.push(ts);
|
|
58
|
+
if (log.recent.length > RECENT_CAP) {
|
|
59
|
+
log.recent = log.recent.slice(-RECENT_CAP);
|
|
60
|
+
}
|
|
61
|
+
await kv.set(KV.accessLog, memoryId, log);
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
catch (err) {
|
|
65
|
+
try {
|
|
66
|
+
logger.warn("recordAccess failed", {
|
|
67
|
+
memoryId,
|
|
68
|
+
error: err instanceof Error ? err.message : String(err),
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
// the side path must never throw
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
export async function recordAccessBatch(kv, memoryIds, timestampMs) {
|
|
77
|
+
if (!memoryIds || memoryIds.length === 0)
|
|
78
|
+
return;
|
|
79
|
+
const ts = timestampMs ?? Date.now();
|
|
80
|
+
const ids = [...new Set(memoryIds.filter(Boolean))];
|
|
81
|
+
await Promise.allSettled(ids.map((id) => recordAccess(kv, id, ts)));
|
|
82
|
+
}
|
|
83
|
+
export async function deleteAccessLog(kv, memoryId) {
|
|
84
|
+
if (!memoryId)
|
|
85
|
+
return;
|
|
86
|
+
try {
|
|
87
|
+
await withKeyedLock(keyFor(memoryId), () => kv.delete(KV.accessLog, memoryId));
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
// best-effort, idempotent
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export type StoreKind = "sqlite" | "markdown" | "json";
|
|
2
|
+
export interface ForeignMemory {
|
|
3
|
+
id: string;
|
|
4
|
+
title: string;
|
|
5
|
+
text: string;
|
|
6
|
+
/** File references found in (or stored alongside) the memory. */
|
|
7
|
+
files: string[];
|
|
8
|
+
/** ISO timestamp when the memory was recorded, if the store has one. */
|
|
9
|
+
timestamp?: string;
|
|
10
|
+
/** Where in the store this came from (table, file, array index). */
|
|
11
|
+
origin: string;
|
|
12
|
+
}
|
|
13
|
+
export type AuditStatus = "missing" | "drifted" | "present" | "unanchored";
|
|
14
|
+
export interface AuditFinding {
|
|
15
|
+
id: string;
|
|
16
|
+
title: string;
|
|
17
|
+
origin: string;
|
|
18
|
+
status: AuditStatus;
|
|
19
|
+
detail: string;
|
|
20
|
+
}
|
|
21
|
+
export interface AuditReport {
|
|
22
|
+
store: string;
|
|
23
|
+
kind: StoreKind;
|
|
24
|
+
root: string;
|
|
25
|
+
total: number;
|
|
26
|
+
/** Number of memories that carried at least one checkable file reference. */
|
|
27
|
+
anchored: number;
|
|
28
|
+
uniqueFiles: number;
|
|
29
|
+
missing: AuditFinding[];
|
|
30
|
+
drifted: AuditFinding[];
|
|
31
|
+
present: number;
|
|
32
|
+
unanchored: number;
|
|
33
|
+
/** True when at least one memory had a timestamp, so drift was checkable. */
|
|
34
|
+
driftCheckable: boolean;
|
|
35
|
+
}
|
|
36
|
+
/** Extract file references from free text. Exported for tests. */
|
|
37
|
+
export declare function extractFileRefs(text: string): {
|
|
38
|
+
pathRefs: string[];
|
|
39
|
+
bareRefs: string[];
|
|
40
|
+
};
|
|
41
|
+
export declare function detectStoreKind(path: string): StoreKind;
|
|
42
|
+
export declare function loadStore(path: string): Promise<{
|
|
43
|
+
kind: StoreKind;
|
|
44
|
+
memories: ForeignMemory[];
|
|
45
|
+
}>;
|
|
46
|
+
export declare function auditStore(storePath: string, root: string): Promise<AuditReport>;
|
|
Binary file
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
type Script = "han" | "kana" | "hangul" | "other";
|
|
2
|
+
export declare function hasCjk(text: string): boolean;
|
|
3
|
+
export declare function detectScript(text: string): Script;
|
|
4
|
+
export declare function segmentCjk(text: string): string[];
|
|
5
|
+
export declare function __resetCjkSegmenterStateForTests(): void;
|
|
6
|
+
export {};
|