memwarden 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +402 -0
- package/dist/bundle/bundle.d.ts +28 -0
- package/dist/bundle/bundle.js +85 -0
- package/dist/cli/bin.d.ts +2 -0
- package/dist/cli/bin.js +593 -0
- package/dist/cli/connect.d.ts +63 -0
- package/dist/cli/connect.js +121 -0
- package/dist/cli/hook.d.ts +24 -0
- package/dist/cli/hook.js +186 -0
- package/dist/cli/tools.d.ts +47 -0
- package/dist/cli/tools.js +246 -0
- package/dist/daemon/ensure.d.ts +12 -0
- package/dist/daemon/ensure.js +54 -0
- package/dist/daemon/service.d.ts +15 -0
- package/dist/daemon/service.js +210 -0
- package/dist/embedding/index.d.ts +10 -0
- package/dist/embedding/index.js +33 -0
- package/dist/embedding/local-embedding.d.ts +14 -0
- package/dist/embedding/local-embedding.js +80 -0
- package/dist/functions/access-tracker.d.ts +13 -0
- package/dist/functions/access-tracker.js +92 -0
- package/dist/functions/audit.d.ts +46 -0
- package/dist/functions/audit.js +0 -0
- package/dist/functions/cjk-segmenter.d.ts +6 -0
- package/dist/functions/cjk-segmenter.js +120 -0
- package/dist/functions/compress-synthetic.d.ts +2 -0
- package/dist/functions/compress-synthetic.js +104 -0
- package/dist/functions/config.d.ts +68 -0
- package/dist/functions/config.js +231 -0
- package/dist/functions/conflicts.d.ts +19 -0
- package/dist/functions/conflicts.js +328 -0
- package/dist/functions/context.d.ts +3 -0
- package/dist/functions/context.js +155 -0
- package/dist/functions/dedup.d.ts +11 -0
- package/dist/functions/dedup.js +51 -0
- package/dist/functions/dejafix.d.ts +96 -0
- package/dist/functions/dejafix.js +356 -0
- package/dist/functions/doctor.d.ts +29 -0
- package/dist/functions/doctor.js +137 -0
- package/dist/functions/forget.d.ts +3 -0
- package/dist/functions/forget.js +87 -0
- package/dist/functions/hybrid-search.d.ts +17 -0
- package/dist/functions/hybrid-search.js +205 -0
- package/dist/functions/index.d.ts +32 -0
- package/dist/functions/index.js +44 -0
- package/dist/functions/keyed-mutex.d.ts +1 -0
- package/dist/functions/keyed-mutex.js +21 -0
- package/dist/functions/logger.d.ts +6 -0
- package/dist/functions/logger.js +37 -0
- package/dist/functions/memory-utils.d.ts +2 -0
- package/dist/functions/memory-utils.js +29 -0
- package/dist/functions/observe.d.ts +5 -0
- package/dist/functions/observe.js +326 -0
- package/dist/functions/paths.d.ts +1 -0
- package/dist/functions/paths.js +38 -0
- package/dist/functions/privacy.d.ts +1 -0
- package/dist/functions/privacy.js +30 -0
- package/dist/functions/provenance.d.ts +9 -0
- package/dist/functions/provenance.js +57 -0
- package/dist/functions/quantized-vector-index.d.ts +60 -0
- package/dist/functions/quantized-vector-index.js +275 -0
- package/dist/functions/receipt.d.ts +31 -0
- package/dist/functions/receipt.js +95 -0
- package/dist/functions/search-index.d.ts +27 -0
- package/dist/functions/search-index.js +217 -0
- package/dist/functions/search.d.ts +25 -0
- package/dist/functions/search.js +523 -0
- package/dist/functions/stemmer.d.ts +1 -0
- package/dist/functions/stemmer.js +110 -0
- package/dist/functions/synonyms.d.ts +1 -0
- package/dist/functions/synonyms.js +69 -0
- package/dist/functions/turboquant.d.ts +53 -0
- package/dist/functions/turboquant.js +278 -0
- package/dist/functions/types.d.ts +217 -0
- package/dist/functions/types.js +8 -0
- package/dist/functions/vector-index.d.ts +25 -0
- package/dist/functions/vector-index.js +125 -0
- package/dist/functions/vector-persistence.d.ts +14 -0
- package/dist/functions/vector-persistence.js +75 -0
- package/dist/functions/verify.d.ts +13 -0
- package/dist/functions/verify.js +104 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +219 -0
- package/dist/kernel/http.d.ts +24 -0
- package/dist/kernel/http.js +261 -0
- package/dist/kernel/index.d.ts +19 -0
- package/dist/kernel/index.js +21 -0
- package/dist/kernel/kernel.d.ts +80 -0
- package/dist/kernel/kernel.js +297 -0
- package/dist/kernel/pubsub.d.ts +21 -0
- package/dist/kernel/pubsub.js +38 -0
- package/dist/kernel/types.d.ts +139 -0
- package/dist/kernel/types.js +20 -0
- package/dist/mcp/bin.d.ts +2 -0
- package/dist/mcp/bin.js +27 -0
- package/dist/mcp/server.d.ts +34 -0
- package/dist/mcp/server.js +377 -0
- package/dist/observability/metrics.d.ts +26 -0
- package/dist/observability/metrics.js +104 -0
- package/dist/proxy/server.d.ts +30 -0
- package/dist/proxy/server.js +331 -0
- package/dist/state/kv.d.ts +41 -0
- package/dist/state/kv.js +50 -0
- package/dist/state/oplog.d.ts +25 -0
- package/dist/state/oplog.js +57 -0
- package/dist/state/schema.d.ts +60 -0
- package/dist/state/schema.js +88 -0
- package/dist/state/store-libsql.d.ts +46 -0
- package/dist/state/store-libsql.js +263 -0
- package/dist/state/store-memory.d.ts +23 -0
- package/dist/state/store-memory.js +121 -0
- package/dist/state/store.d.ts +87 -0
- package/dist/state/store.js +58 -0
- package/dist/triggers/api.d.ts +14 -0
- package/dist/triggers/api.js +510 -0
- package/dist/triggers/auth.d.ts +1 -0
- package/dist/triggers/auth.js +13 -0
- package/package.json +58 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Flat brute-force cosine vector index: the full-precision baseline behind the
|
|
3
|
+
// VectorIndexLike contract (QuantizedVectorIndex is the compressed default).
|
|
4
|
+
// Pure and engine-independent.
|
|
5
|
+
//
|
|
6
|
+
// The base64 helpers pass byteOffset + byteLength explicitly on purpose:
|
|
7
|
+
// Buffer.from(b64, "base64") hands back a slice of Node's shared pool, and a
|
|
8
|
+
// naive `new Float32Array(buf.buffer)` would mint a view over the whole pool
|
|
9
|
+
// (phantom dimensions). The same care applies on encode if the source array is
|
|
10
|
+
// itself a view. Keep these exact.
|
|
11
|
+
function float32ToBase64(arr) {
|
|
12
|
+
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength).toString("base64");
|
|
13
|
+
}
|
|
14
|
+
function base64ToFloat32(b64) {
|
|
15
|
+
const buf = Buffer.from(b64, "base64");
|
|
16
|
+
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
|
17
|
+
}
|
|
18
|
+
function cosine(a, b) {
|
|
19
|
+
if (a.length !== b.length)
|
|
20
|
+
return 0;
|
|
21
|
+
let dot = 0;
|
|
22
|
+
let na = 0;
|
|
23
|
+
let nb = 0;
|
|
24
|
+
for (let i = 0; i < a.length; i++) {
|
|
25
|
+
const x = a[i];
|
|
26
|
+
const y = b[i];
|
|
27
|
+
dot += x * y;
|
|
28
|
+
na += x * x;
|
|
29
|
+
nb += y * y;
|
|
30
|
+
}
|
|
31
|
+
const denom = Math.sqrt(na) * Math.sqrt(nb);
|
|
32
|
+
return denom === 0 ? 0 : dot / denom;
|
|
33
|
+
}
|
|
34
|
+
export class VectorIndex {
|
|
35
|
+
vectors = new Map();
|
|
36
|
+
add(obsId, sessionId, embedding) {
|
|
37
|
+
this.vectors.set(obsId, { embedding, sessionId });
|
|
38
|
+
}
|
|
39
|
+
remove(obsId) {
|
|
40
|
+
this.vectors.delete(obsId);
|
|
41
|
+
}
|
|
42
|
+
has(obsId) {
|
|
43
|
+
return this.vectors.has(obsId);
|
|
44
|
+
}
|
|
45
|
+
ids() {
|
|
46
|
+
return [...this.vectors.keys()];
|
|
47
|
+
}
|
|
48
|
+
get size() {
|
|
49
|
+
return this.vectors.size;
|
|
50
|
+
}
|
|
51
|
+
search(query, limit = 20) {
|
|
52
|
+
const scored = [];
|
|
53
|
+
for (const [obsId, entry] of this.vectors) {
|
|
54
|
+
scored.push({ obsId, sessionId: entry.sessionId, score: cosine(query, entry.embedding) });
|
|
55
|
+
}
|
|
56
|
+
scored.sort((a, b) => b.score - a.score);
|
|
57
|
+
return limit < scored.length ? scored.slice(0, limit) : scored;
|
|
58
|
+
}
|
|
59
|
+
// Reports any stored vectors whose dimension differs from `expected`, plus
|
|
60
|
+
// the distinct dimensions seen. The persistence guard refuses to load an
|
|
61
|
+
// index with mismatches; the only clean state is no mismatches and a single
|
|
62
|
+
// seen dimension equal to `expected`.
|
|
63
|
+
validateDimensions(expected) {
|
|
64
|
+
const mismatches = [];
|
|
65
|
+
const seenDimensions = new Set();
|
|
66
|
+
for (const [obsId, entry] of this.vectors) {
|
|
67
|
+
const dim = entry.embedding.length;
|
|
68
|
+
seenDimensions.add(dim);
|
|
69
|
+
if (dim !== expected)
|
|
70
|
+
mismatches.push({ obsId, dim });
|
|
71
|
+
}
|
|
72
|
+
return { mismatches, seenDimensions };
|
|
73
|
+
}
|
|
74
|
+
clear() {
|
|
75
|
+
this.vectors.clear();
|
|
76
|
+
}
|
|
77
|
+
restoreFrom(other) {
|
|
78
|
+
this.vectors = new Map();
|
|
79
|
+
for (const [obsId, entry] of other.vectors) {
|
|
80
|
+
this.vectors.set(obsId, {
|
|
81
|
+
embedding: new Float32Array(entry.embedding),
|
|
82
|
+
sessionId: entry.sessionId,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
serialize() {
|
|
87
|
+
const rows = [];
|
|
88
|
+
for (const [obsId, entry] of this.vectors) {
|
|
89
|
+
rows.push([obsId, { embedding: float32ToBase64(entry.embedding), sessionId: entry.sessionId }]);
|
|
90
|
+
}
|
|
91
|
+
return JSON.stringify(rows);
|
|
92
|
+
}
|
|
93
|
+
static deserialize(json) {
|
|
94
|
+
const idx = new VectorIndex();
|
|
95
|
+
let rows;
|
|
96
|
+
try {
|
|
97
|
+
rows = JSON.parse(json);
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return idx;
|
|
101
|
+
}
|
|
102
|
+
if (!Array.isArray(rows))
|
|
103
|
+
return idx;
|
|
104
|
+
for (const row of rows) {
|
|
105
|
+
if (!Array.isArray(row) || row.length < 2)
|
|
106
|
+
continue;
|
|
107
|
+
const [obsId, entry] = row;
|
|
108
|
+
if (typeof obsId !== "string" ||
|
|
109
|
+
typeof entry?.embedding !== "string" ||
|
|
110
|
+
typeof entry?.sessionId !== "string") {
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
try {
|
|
114
|
+
idx.vectors.set(obsId, {
|
|
115
|
+
embedding: base64ToFloat32(entry.embedding),
|
|
116
|
+
sessionId: entry.sessionId,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// skip a corrupt row rather than fail the whole restore
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return idx;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { StateKV } from "../state/kv.js";
|
|
2
|
+
/**
|
|
3
|
+
* Persists the current quantized index. No-op (false) when quantization is
|
|
4
|
+
* disabled or the active index is not a QuantizedVectorIndex.
|
|
5
|
+
*/
|
|
6
|
+
export declare function persistVectorIndex(kv: StateKV): Promise<boolean>;
|
|
7
|
+
/**
|
|
8
|
+
* Loads a previously persisted quantized index and installs it as the
|
|
9
|
+
* active vector index. Returns true only when a valid blob was loaded AND
|
|
10
|
+
* its params match the current configuration (and the provider dimensions,
|
|
11
|
+
* when a provider is wired). Any mismatch leaves the current index in
|
|
12
|
+
* place and returns false so the caller can rebuild.
|
|
13
|
+
*/
|
|
14
|
+
export declare function loadVectorIndex(kv: StateKV): Promise<boolean>;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Quantized-vector-index persistence. Nothing else saves or loads vector
|
|
3
|
+
// state, so this owns it. Best-effort soft-fail throughout, matching search.ts:
|
|
4
|
+
// a persistence problem must never break observe/search.
|
|
5
|
+
//
|
|
6
|
+
// Layout: one blob under the `quantParams` scope. The blob embeds its own
|
|
7
|
+
// params (seed, bits, dims, version, level-table hash); deserialize
|
|
8
|
+
// validates them and returns null on any mismatch, which callers treat as
|
|
9
|
+
// "rebuild from source of truth".
|
|
10
|
+
import { KV } from "../state/schema.js";
|
|
11
|
+
import { QuantizedVectorIndex } from "./quantized-vector-index.js";
|
|
12
|
+
import { getVectorIndex, setVectorIndex, getEmbeddingProvider } from "./search.js";
|
|
13
|
+
import { isQuantizedVectorEnabled, getQuantRescoreDepth } from "./config.js";
|
|
14
|
+
import { logger } from "./logger.js";
|
|
15
|
+
const BLOB_KEY = "index-blob";
|
|
16
|
+
/**
|
|
17
|
+
* Persists the current quantized index. No-op (false) when quantization is
|
|
18
|
+
* disabled or the active index is not a QuantizedVectorIndex.
|
|
19
|
+
*/
|
|
20
|
+
export async function persistVectorIndex(kv) {
|
|
21
|
+
const idx = getVectorIndex();
|
|
22
|
+
if (!isQuantizedVectorEnabled() || !(idx instanceof QuantizedVectorIndex)) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
try {
|
|
26
|
+
await kv.set(KV.quantParams, BLOB_KEY, idx.serialize());
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
catch (err) {
|
|
30
|
+
logger.warn("vector-persistence: persist failed", {
|
|
31
|
+
error: err instanceof Error ? err.message : String(err),
|
|
32
|
+
});
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Loads a previously persisted quantized index and installs it as the
|
|
38
|
+
* active vector index. Returns true only when a valid blob was loaded AND
|
|
39
|
+
* its params match the current configuration (and the provider dimensions,
|
|
40
|
+
* when a provider is wired). Any mismatch leaves the current index in
|
|
41
|
+
* place and returns false so the caller can rebuild.
|
|
42
|
+
*/
|
|
43
|
+
export async function loadVectorIndex(kv) {
|
|
44
|
+
if (!isQuantizedVectorEnabled())
|
|
45
|
+
return false;
|
|
46
|
+
try {
|
|
47
|
+
const blob = await kv.get(KV.quantParams, BLOB_KEY);
|
|
48
|
+
if (typeof blob !== "string" || !blob)
|
|
49
|
+
return false;
|
|
50
|
+
const idx = QuantizedVectorIndex.deserialize(blob);
|
|
51
|
+
if (!idx) {
|
|
52
|
+
logger.warn("vector-persistence: stored index params no longer valid — rebuild required");
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
const provider = getEmbeddingProvider();
|
|
56
|
+
if (provider && provider.dimensions !== idx.params.dims) {
|
|
57
|
+
logger.warn("vector-persistence: stored dims mismatch provider — rebuild", {
|
|
58
|
+
stored: idx.params.dims,
|
|
59
|
+
provider: provider.dimensions,
|
|
60
|
+
});
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
// The blob carries the rescore setting it was built with; the current
|
|
64
|
+
// environment wins. Lowering to 0 also frees the retained full vectors.
|
|
65
|
+
idx.reconcileRescoreDepth(getQuantRescoreDepth());
|
|
66
|
+
setVectorIndex(idx);
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
logger.warn("vector-persistence: load failed", {
|
|
71
|
+
error: err instanceof Error ? err.message : String(err),
|
|
72
|
+
});
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Provenance } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Hash the referenced files under `root` at capture time (best-effort). Files
|
|
4
|
+
* that don't exist or are too large are simply omitted; the result is stored
|
|
5
|
+
* in provenance so later recall can detect content drift.
|
|
6
|
+
*/
|
|
7
|
+
export declare function hashFiles(files: string[] | undefined, root: string): Record<string, string>;
|
|
8
|
+
export type VerifyStatus = "verified" | "sourced_unverified" | "stale" | "unsourced";
|
|
9
|
+
export interface Verdict {
|
|
10
|
+
status: VerifyStatus;
|
|
11
|
+
reason: string;
|
|
12
|
+
}
|
|
13
|
+
export declare function classifyProvenance(prov: Provenance | undefined, root: string): Verdict;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Verified Recall: classify a memory's trustworthiness against the live repo.
|
|
3
|
+
// This is what makes "verified" literal — not just "does the file exist" but
|
|
4
|
+
// "is the file still what it was when we learned this".
|
|
5
|
+
//
|
|
6
|
+
// verified a referenced file exists and still matches its
|
|
7
|
+
// capture-time content hash (code-backed and current)
|
|
8
|
+
// sourced_unverified sourced (command/confirmation, or files present but
|
|
9
|
+
// none hashable), so allowed, but NOT content-verified
|
|
10
|
+
// stale a referenced file was deleted, or its content changed
|
|
11
|
+
// unsourced no evidence at all (no files, no command, not confirmed)
|
|
12
|
+
//
|
|
13
|
+
// All checks read the repo, so this runs in the daemon (same machine). Hashing
|
|
14
|
+
// is best-effort: files missing at capture, non-files, and files over the size
|
|
15
|
+
// cap are not hashed, so such a memory verifies by existence only and reports
|
|
16
|
+
// sourced_unverified rather than verified.
|
|
17
|
+
import { createHash } from "node:crypto";
|
|
18
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
19
|
+
import { isAbsolute, resolve } from "node:path";
|
|
20
|
+
import { isUnsourced } from "./provenance.js";
|
|
21
|
+
// Don't hash enormous files; treat them as unhashed (existence-only).
|
|
22
|
+
const MAX_HASH_BYTES = 2_000_000;
|
|
23
|
+
function hashFile(abs) {
|
|
24
|
+
try {
|
|
25
|
+
const st = statSync(abs);
|
|
26
|
+
if (!st.isFile() || st.size > MAX_HASH_BYTES)
|
|
27
|
+
return null;
|
|
28
|
+
return createHash("sha256").update(readFileSync(abs)).digest("hex");
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function resolveUnder(root, file) {
|
|
35
|
+
return isAbsolute(file) ? file : resolve(root, file);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Hash the referenced files under `root` at capture time (best-effort). Files
|
|
39
|
+
* that don't exist or are too large are simply omitted; the result is stored
|
|
40
|
+
* in provenance so later recall can detect content drift.
|
|
41
|
+
*/
|
|
42
|
+
export function hashFiles(files, root) {
|
|
43
|
+
const out = {};
|
|
44
|
+
if (!files)
|
|
45
|
+
return out;
|
|
46
|
+
for (const f of files) {
|
|
47
|
+
const h = hashFile(resolveUnder(root, f));
|
|
48
|
+
if (h)
|
|
49
|
+
out[f] = h;
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
export function classifyProvenance(prov, root) {
|
|
54
|
+
if (isUnsourced(prov)) {
|
|
55
|
+
return { status: "unsourced", reason: "no file, command, or user-confirmation evidence" };
|
|
56
|
+
}
|
|
57
|
+
const files = prov?.files ?? [];
|
|
58
|
+
const hashes = prov?.fileHashes ?? {};
|
|
59
|
+
const deleted = [];
|
|
60
|
+
const changed = [];
|
|
61
|
+
let hashMatched = 0; // existing files whose captured hash still matches
|
|
62
|
+
let unchecked = 0; // existing files we could not content-check
|
|
63
|
+
for (const f of files) {
|
|
64
|
+
const abs = resolveUnder(root, f);
|
|
65
|
+
if (!existsSync(abs)) {
|
|
66
|
+
deleted.push(f);
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const recorded = hashes[f];
|
|
70
|
+
if (!recorded) {
|
|
71
|
+
unchecked++; // no hash captured (e.g. too large at capture)
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
const current = hashFile(abs);
|
|
75
|
+
if (current && current !== recorded)
|
|
76
|
+
changed.push(f);
|
|
77
|
+
else if (current && current === recorded)
|
|
78
|
+
hashMatched++;
|
|
79
|
+
else
|
|
80
|
+
unchecked++; // can't hash now (e.g. grew past the cap) -> unverified
|
|
81
|
+
}
|
|
82
|
+
if (deleted.length > 0 || changed.length > 0) {
|
|
83
|
+
const parts = [];
|
|
84
|
+
if (deleted.length > 0)
|
|
85
|
+
parts.push(`deleted: ${deleted.slice(0, 2).join(", ")}`);
|
|
86
|
+
if (changed.length > 0)
|
|
87
|
+
parts.push(`changed: ${changed.slice(0, 2).join(", ")}`);
|
|
88
|
+
return { status: "stale", reason: `references files that no longer match (${parts.join("; ")})` };
|
|
89
|
+
}
|
|
90
|
+
// Verified only when EVERY existing referenced file was content-checked.
|
|
91
|
+
// A single unchecked file (unhashed, or too large) leaves the memory
|
|
92
|
+
// sourced-but-not-verified, so one matching hash can't vouch for the rest.
|
|
93
|
+
if (hashMatched > 0 && unchecked === 0) {
|
|
94
|
+
return { status: "verified", reason: "all referenced files exist and match their captured hashes" };
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
status: "sourced_unverified",
|
|
98
|
+
reason: hashMatched > 0
|
|
99
|
+
? "some referenced files verified, but others could not be content-checked"
|
|
100
|
+
: files.length > 0
|
|
101
|
+
? "referenced files exist but were not hashed at capture (existence only)"
|
|
102
|
+
: "sourced by command or user, no file evidence to verify against",
|
|
103
|
+
};
|
|
104
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
//
|
|
2
|
+
// memwarden boot entrypoint.
|
|
3
|
+
//
|
|
4
|
+
// - build the kernel via registerWorker,
|
|
5
|
+
// - register app functions (./functions/*) if present,
|
|
6
|
+
// - start the node:http REST server on restPort,
|
|
7
|
+
// - keep the periodic sweeps as plain setInterval(...).unref() timers
|
|
8
|
+
// that fire `trigger mem::*` (no scheduler in the SDK surface),
|
|
9
|
+
// - graceful shutdown on SIGINT/SIGTERM.
|
|
10
|
+
//
|
|
11
|
+
// the core: the ./functions/* modules may not all exist yet. Function
|
|
12
|
+
// registration is therefore best-effort: a missing module is logged and
|
|
13
|
+
// skipped so the kernel still boots and serves whatever is wired.
|
|
14
|
+
import { registerWorker, startHttpServer } from "./kernel/index.js";
|
|
15
|
+
import { StoreLibsql } from "./state/store-libsql.js";
|
|
16
|
+
import { StateKV } from "./state/kv.js";
|
|
17
|
+
import { registerCoreFunctions, setEmbeddingProvider, setVectorIndex, makeVectorIndex, } from "./functions/index.js";
|
|
18
|
+
import { isQuantizedVectorEnabled, isProxyEnabled, getUpstreamUrl, getUpstreamKey, getProxyPort, getSecret, } from "./functions/config.js";
|
|
19
|
+
import { createEmbeddingProvider } from "./embedding/index.js";
|
|
20
|
+
import { registerApiTriggers } from "./triggers/api.js";
|
|
21
|
+
import { startProxyServer } from "./proxy/server.js";
|
|
22
|
+
const REST_PORT = parseInt(process.env.MEMWARDEN_REST_PORT ?? "3111", 10);
|
|
23
|
+
const STORE_URL = process.env.MEMWARDEN_STORE_URL ??
|
|
24
|
+
(process.env.MEMWARDEN_DATA_DIR
|
|
25
|
+
? `file:${process.env.MEMWARDEN_DATA_DIR}/memwarden.db`
|
|
26
|
+
: "file:./data/memwarden.db");
|
|
27
|
+
// Top-level safety net. Under sustained write load a single `state::*`
|
|
28
|
+
// or fire-and-forget trigger rejection should never terminate the
|
|
29
|
+
// long-lived memory service. The kernel surfaces rejections to the
|
|
30
|
+
// relevant call site via .catch(); everything else is logged and
|
|
31
|
+
// continued. Throttle to avoid spamming on bursts (matches the daemon
|
|
32
|
+
// index.ts which reads reason.code / function_id / message).
|
|
33
|
+
let lastUnhandledLogAt = 0;
|
|
34
|
+
process.on("unhandledRejection", (reason) => {
|
|
35
|
+
const now = Date.now();
|
|
36
|
+
if (now - lastUnhandledLogAt < 60_000)
|
|
37
|
+
return;
|
|
38
|
+
lastUnhandledLogAt = now;
|
|
39
|
+
const r = reason;
|
|
40
|
+
console.warn(`[memwarden] unhandledRejection (suppressed):`, r?.code
|
|
41
|
+
? `${r.code} ${r.function_id ?? ""} ${r.message ?? ""}`.trim()
|
|
42
|
+
: reason);
|
|
43
|
+
});
|
|
44
|
+
/**
|
|
45
|
+
* Optionally load a function-registration module by path and call its
|
|
46
|
+
* exported registrar. Missing modules are skipped.
|
|
47
|
+
*/
|
|
48
|
+
async function tryRegister(modulePath, exportName, ...args) {
|
|
49
|
+
try {
|
|
50
|
+
const mod = (await import(modulePath));
|
|
51
|
+
const fn = mod[exportName];
|
|
52
|
+
if (typeof fn === "function") {
|
|
53
|
+
fn(...args);
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
const code = err.code;
|
|
60
|
+
// ERR_MODULE_NOT_FOUND is expected while functions are still being
|
|
61
|
+
// wired; anything else is a real registration failure worth a log.
|
|
62
|
+
if (code !== "ERR_MODULE_NOT_FOUND") {
|
|
63
|
+
console.warn(`[memwarden] failed to register ${exportName} from ${modulePath}:`, err instanceof Error ? err.message : err);
|
|
64
|
+
}
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Register the application functions against the kernel.
|
|
70
|
+
*
|
|
71
|
+
* The core (mem::observe / mem::context / mem::search and their
|
|
72
|
+
* HTTP routes) is wired statically: the modules exist and share a single
|
|
73
|
+
* StateKV constructed over the kernel. Functions still being wired
|
|
74
|
+
* (smart-search, remember, enrich, events, health) remain best-effort
|
|
75
|
+
* dynamic imports so the kernel still boots while they land.
|
|
76
|
+
*/
|
|
77
|
+
async function registerFunctions(sdk) {
|
|
78
|
+
// Core path: one StateKV over the kernel, shared by all three functions.
|
|
79
|
+
const kv = new StateKV(sdk);
|
|
80
|
+
registerCoreFunctions(sdk, kv);
|
|
81
|
+
registerApiTriggers(sdk);
|
|
82
|
+
let registered = 3; // observe + context + search
|
|
83
|
+
// Functions still being wired; absent modules are no-ops.
|
|
84
|
+
const tasks = [
|
|
85
|
+
tryRegister("./functions/smart-search.js", "registerSmartSearchFunction", sdk),
|
|
86
|
+
tryRegister("./functions/remember.js", "registerRememberFunction", sdk),
|
|
87
|
+
tryRegister("./functions/enrich.js", "registerEnrichFunction", sdk),
|
|
88
|
+
tryRegister("./triggers/events.js", "registerEventTriggers", sdk),
|
|
89
|
+
tryRegister("./health/monitor.js", "registerHealthMonitor", sdk),
|
|
90
|
+
];
|
|
91
|
+
const results = await Promise.all(tasks);
|
|
92
|
+
for (const ok of results)
|
|
93
|
+
if (ok)
|
|
94
|
+
registered++;
|
|
95
|
+
return registered;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Install the periodic maintenance sweeps. The SDK surface has no cron
|
|
99
|
+
* primitive: these are plain unref'd interval timers that fire a
|
|
100
|
+
* `trigger mem::*`. A trigger to an unregistered function rejects
|
|
101
|
+
* harmlessly (caught here), so this is safe to install before the
|
|
102
|
+
* corresponding functions are wired.
|
|
103
|
+
*/
|
|
104
|
+
function installSweeps(sdk) {
|
|
105
|
+
const timers = [];
|
|
106
|
+
const HOUR = 60 * 60 * 1000;
|
|
107
|
+
const DAY = 24 * HOUR;
|
|
108
|
+
const fire = (functionId, payload) => {
|
|
109
|
+
sdk
|
|
110
|
+
.trigger({ function_id: functionId, payload })
|
|
111
|
+
.catch(() => undefined);
|
|
112
|
+
};
|
|
113
|
+
const schedule = (enabled, intervalMs, functionId, payload) => {
|
|
114
|
+
if (!enabled)
|
|
115
|
+
return;
|
|
116
|
+
// Small jitter spreads sweep load so they don't all fire on the
|
|
117
|
+
// same tick after a restart.
|
|
118
|
+
const jitter = Math.floor(Math.random() * Math.min(intervalMs, 60_000));
|
|
119
|
+
const timer = setInterval(() => fire(functionId, payload), intervalMs);
|
|
120
|
+
timer.unref();
|
|
121
|
+
timers.push(timer);
|
|
122
|
+
const kickoff = setTimeout(() => fire(functionId, payload), jitter);
|
|
123
|
+
kickoff.unref();
|
|
124
|
+
};
|
|
125
|
+
const autoForgetInterval = parseInt(process.env.AUTO_FORGET_INTERVAL_MS ?? "3600000", 10);
|
|
126
|
+
const consolidationInterval = parseInt(process.env.CONSOLIDATION_INTERVAL_MS ?? "7200000", 10);
|
|
127
|
+
schedule(process.env.AUTO_FORGET_ENABLED !== "false", autoForgetInterval, "mem::auto-forget", { dryRun: false });
|
|
128
|
+
schedule(process.env.LESSON_DECAY_ENABLED !== "false", DAY, "mem::lesson-decay-sweep", {});
|
|
129
|
+
schedule(process.env.INSIGHT_DECAY_ENABLED !== "false", DAY, "mem::insight-decay-sweep", {});
|
|
130
|
+
schedule(true, HOUR, "mem::diagnostic::recent-searches-sweep", {});
|
|
131
|
+
schedule(process.env.CONSOLIDATION_ENABLED === "true", consolidationInterval, "mem::consolidate-pipeline", {});
|
|
132
|
+
return timers;
|
|
133
|
+
}
|
|
134
|
+
async function main() {
|
|
135
|
+
const store = new StoreLibsql({ url: STORE_URL });
|
|
136
|
+
const sdk = registerWorker("in-process", {
|
|
137
|
+
workerName: "memwarden",
|
|
138
|
+
invocationTimeoutMs: 180000,
|
|
139
|
+
}, { store });
|
|
140
|
+
const registered = await registerFunctions(sdk);
|
|
141
|
+
console.log(`[memwarden] kernel ready — ${registered} function module(s) registered, store=${STORE_URL}`);
|
|
142
|
+
// Semantic memory: wire the embedding provider and the (TurboQuant-
|
|
143
|
+
// compressed by default) vector index. With no provider, memwarden runs
|
|
144
|
+
// BM25-only — identical to the prior behavior. The model loads lazily on
|
|
145
|
+
// first observe/search; warm it in the background so the first request is
|
|
146
|
+
// fast without blocking boot.
|
|
147
|
+
const embProvider = createEmbeddingProvider();
|
|
148
|
+
if (embProvider) {
|
|
149
|
+
setEmbeddingProvider(embProvider);
|
|
150
|
+
setVectorIndex(makeVectorIndex(embProvider.dimensions));
|
|
151
|
+
const quantized = isQuantizedVectorEnabled();
|
|
152
|
+
console.log(`[memwarden] semantic memory: ${embProvider.name} (${embProvider.dimensions}d), ` +
|
|
153
|
+
`storage=${quantized ? "TurboQuant-compressed" : "full-precision"}`);
|
|
154
|
+
const warmable = embProvider;
|
|
155
|
+
if (typeof warmable.warmup === "function") {
|
|
156
|
+
warmable.warmup().catch((err) => {
|
|
157
|
+
console.warn(`[memwarden] embedding model warmup failed — vector stream stays off until it loads:`, err instanceof Error ? err.message : err);
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
console.log(`[memwarden] semantic memory: disabled (BM25-only)`);
|
|
163
|
+
}
|
|
164
|
+
const http = startHttpServer(sdk, { port: REST_PORT });
|
|
165
|
+
// Race-safe self-heal: if another memwarden already holds the port, this
|
|
166
|
+
// spawn is redundant — exit cleanly (0) rather than crash, so concurrent
|
|
167
|
+
// ensureDaemon() callers never surface an error.
|
|
168
|
+
http.server.on("error", (err) => {
|
|
169
|
+
if (err.code === "EADDRINUSE") {
|
|
170
|
+
console.log(`[memwarden] port ${REST_PORT} already in use — another instance is running; exiting.`);
|
|
171
|
+
process.exit(0);
|
|
172
|
+
}
|
|
173
|
+
console.error(`[memwarden] HTTP server error:`, err);
|
|
174
|
+
process.exit(1);
|
|
175
|
+
});
|
|
176
|
+
console.log(`[memwarden] REST API: http://127.0.0.1:${REST_PORT}/memwarden/*`);
|
|
177
|
+
// The memory proxy — the universal cross-tool layer. Off until an upstream
|
|
178
|
+
// is configured (it has nothing to forward to otherwise). When on, point
|
|
179
|
+
// any OpenAI-compatible tool's base URL at it and every model call, local
|
|
180
|
+
// or paid, flows through memwarden's recall + capture.
|
|
181
|
+
let proxy;
|
|
182
|
+
const upstreamUrl = getUpstreamUrl();
|
|
183
|
+
if (isProxyEnabled() && upstreamUrl) {
|
|
184
|
+
const proxyPort = getProxyPort();
|
|
185
|
+
const cwd = process.cwd();
|
|
186
|
+
proxy = startProxyServer({
|
|
187
|
+
port: proxyPort,
|
|
188
|
+
upstreamUrl,
|
|
189
|
+
daemonUrl: `http://127.0.0.1:${REST_PORT}`,
|
|
190
|
+
project: cwd,
|
|
191
|
+
cwd,
|
|
192
|
+
...(getUpstreamKey() ? { upstreamKey: getUpstreamKey() } : {}),
|
|
193
|
+
...(getSecret() ? { secret: getSecret() } : {}),
|
|
194
|
+
});
|
|
195
|
+
console.log(`[memwarden] memory proxy: http://127.0.0.1:${proxyPort}/v1 -> ${upstreamUrl} ` +
|
|
196
|
+
`(point any OpenAI-compatible tool here for automatic memory)`);
|
|
197
|
+
}
|
|
198
|
+
const timers = installSweeps(sdk);
|
|
199
|
+
let shuttingDown = false;
|
|
200
|
+
const shutdown = async () => {
|
|
201
|
+
if (shuttingDown)
|
|
202
|
+
return;
|
|
203
|
+
shuttingDown = true;
|
|
204
|
+
console.log(`\n[memwarden] Shutting down...`);
|
|
205
|
+
for (const t of timers)
|
|
206
|
+
clearInterval(t);
|
|
207
|
+
await http.close().catch(() => undefined);
|
|
208
|
+
if (proxy)
|
|
209
|
+
await proxy.close().catch(() => undefined);
|
|
210
|
+
await sdk.shutdown();
|
|
211
|
+
process.exit(0);
|
|
212
|
+
};
|
|
213
|
+
process.on("SIGINT", () => void shutdown());
|
|
214
|
+
process.on("SIGTERM", () => void shutdown());
|
|
215
|
+
}
|
|
216
|
+
main().catch((err) => {
|
|
217
|
+
console.error(`[memwarden] Fatal:`, err);
|
|
218
|
+
process.exit(1);
|
|
219
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { type Server } from "node:http";
|
|
2
|
+
import type { Kernel } from "./kernel.js";
|
|
3
|
+
export interface HttpServerOptions {
|
|
4
|
+
port: number;
|
|
5
|
+
host?: string;
|
|
6
|
+
/** Allowed CORS origins. Defaults to the local viewer/REST quartet. */
|
|
7
|
+
allowedOrigins?: string[];
|
|
8
|
+
/** Max request body bytes before 413. Defaults to 16 MiB. */
|
|
9
|
+
maxBodyBytes?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface RunningHttpServer {
|
|
12
|
+
server: Server;
|
|
13
|
+
port: number;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
}
|
|
16
|
+
export declare function startHttpServer(kernel: Kernel, opts: HttpServerOptions): RunningHttpServer;
|
|
17
|
+
/**
|
|
18
|
+
* Accept only a loopback Host header bound to our port (or with no port). The
|
|
19
|
+
* Host header is case-insensitive and may carry `:port` or be a bracketed IPv6
|
|
20
|
+
* literal; we split host/port robustly and reject anything non-loopback. This
|
|
21
|
+
* is the DNS-rebinding guard: the value reflects the hostname the client
|
|
22
|
+
* actually targeted, which a rebinding attacker cannot forge to "localhost".
|
|
23
|
+
*/
|
|
24
|
+
export declare function isLoopbackHost(hostHeader: string | undefined, port: number | undefined): boolean;
|