memwarden 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +402 -0
  3. package/dist/bundle/bundle.d.ts +28 -0
  4. package/dist/bundle/bundle.js +85 -0
  5. package/dist/cli/bin.d.ts +2 -0
  6. package/dist/cli/bin.js +593 -0
  7. package/dist/cli/connect.d.ts +63 -0
  8. package/dist/cli/connect.js +121 -0
  9. package/dist/cli/hook.d.ts +24 -0
  10. package/dist/cli/hook.js +186 -0
  11. package/dist/cli/tools.d.ts +47 -0
  12. package/dist/cli/tools.js +246 -0
  13. package/dist/daemon/ensure.d.ts +12 -0
  14. package/dist/daemon/ensure.js +54 -0
  15. package/dist/daemon/service.d.ts +15 -0
  16. package/dist/daemon/service.js +210 -0
  17. package/dist/embedding/index.d.ts +10 -0
  18. package/dist/embedding/index.js +33 -0
  19. package/dist/embedding/local-embedding.d.ts +14 -0
  20. package/dist/embedding/local-embedding.js +80 -0
  21. package/dist/functions/access-tracker.d.ts +13 -0
  22. package/dist/functions/access-tracker.js +92 -0
  23. package/dist/functions/audit.d.ts +46 -0
  24. package/dist/functions/audit.js +0 -0
  25. package/dist/functions/cjk-segmenter.d.ts +6 -0
  26. package/dist/functions/cjk-segmenter.js +120 -0
  27. package/dist/functions/compress-synthetic.d.ts +2 -0
  28. package/dist/functions/compress-synthetic.js +104 -0
  29. package/dist/functions/config.d.ts +68 -0
  30. package/dist/functions/config.js +231 -0
  31. package/dist/functions/conflicts.d.ts +19 -0
  32. package/dist/functions/conflicts.js +328 -0
  33. package/dist/functions/context.d.ts +3 -0
  34. package/dist/functions/context.js +155 -0
  35. package/dist/functions/dedup.d.ts +11 -0
  36. package/dist/functions/dedup.js +51 -0
  37. package/dist/functions/dejafix.d.ts +96 -0
  38. package/dist/functions/dejafix.js +356 -0
  39. package/dist/functions/doctor.d.ts +29 -0
  40. package/dist/functions/doctor.js +137 -0
  41. package/dist/functions/forget.d.ts +3 -0
  42. package/dist/functions/forget.js +87 -0
  43. package/dist/functions/hybrid-search.d.ts +17 -0
  44. package/dist/functions/hybrid-search.js +205 -0
  45. package/dist/functions/index.d.ts +32 -0
  46. package/dist/functions/index.js +44 -0
  47. package/dist/functions/keyed-mutex.d.ts +1 -0
  48. package/dist/functions/keyed-mutex.js +21 -0
  49. package/dist/functions/logger.d.ts +6 -0
  50. package/dist/functions/logger.js +37 -0
  51. package/dist/functions/memory-utils.d.ts +2 -0
  52. package/dist/functions/memory-utils.js +29 -0
  53. package/dist/functions/observe.d.ts +5 -0
  54. package/dist/functions/observe.js +326 -0
  55. package/dist/functions/paths.d.ts +1 -0
  56. package/dist/functions/paths.js +38 -0
  57. package/dist/functions/privacy.d.ts +1 -0
  58. package/dist/functions/privacy.js +30 -0
  59. package/dist/functions/provenance.d.ts +9 -0
  60. package/dist/functions/provenance.js +57 -0
  61. package/dist/functions/quantized-vector-index.d.ts +60 -0
  62. package/dist/functions/quantized-vector-index.js +275 -0
  63. package/dist/functions/receipt.d.ts +31 -0
  64. package/dist/functions/receipt.js +95 -0
  65. package/dist/functions/search-index.d.ts +27 -0
  66. package/dist/functions/search-index.js +217 -0
  67. package/dist/functions/search.d.ts +25 -0
  68. package/dist/functions/search.js +523 -0
  69. package/dist/functions/stemmer.d.ts +1 -0
  70. package/dist/functions/stemmer.js +110 -0
  71. package/dist/functions/synonyms.d.ts +1 -0
  72. package/dist/functions/synonyms.js +69 -0
  73. package/dist/functions/turboquant.d.ts +53 -0
  74. package/dist/functions/turboquant.js +278 -0
  75. package/dist/functions/types.d.ts +217 -0
  76. package/dist/functions/types.js +8 -0
  77. package/dist/functions/vector-index.d.ts +25 -0
  78. package/dist/functions/vector-index.js +125 -0
  79. package/dist/functions/vector-persistence.d.ts +14 -0
  80. package/dist/functions/vector-persistence.js +75 -0
  81. package/dist/functions/verify.d.ts +13 -0
  82. package/dist/functions/verify.js +104 -0
  83. package/dist/index.d.ts +1 -0
  84. package/dist/index.js +219 -0
  85. package/dist/kernel/http.d.ts +24 -0
  86. package/dist/kernel/http.js +261 -0
  87. package/dist/kernel/index.d.ts +19 -0
  88. package/dist/kernel/index.js +21 -0
  89. package/dist/kernel/kernel.d.ts +80 -0
  90. package/dist/kernel/kernel.js +297 -0
  91. package/dist/kernel/pubsub.d.ts +21 -0
  92. package/dist/kernel/pubsub.js +38 -0
  93. package/dist/kernel/types.d.ts +139 -0
  94. package/dist/kernel/types.js +20 -0
  95. package/dist/mcp/bin.d.ts +2 -0
  96. package/dist/mcp/bin.js +27 -0
  97. package/dist/mcp/server.d.ts +34 -0
  98. package/dist/mcp/server.js +377 -0
  99. package/dist/observability/metrics.d.ts +26 -0
  100. package/dist/observability/metrics.js +104 -0
  101. package/dist/proxy/server.d.ts +30 -0
  102. package/dist/proxy/server.js +331 -0
  103. package/dist/state/kv.d.ts +41 -0
  104. package/dist/state/kv.js +50 -0
  105. package/dist/state/oplog.d.ts +25 -0
  106. package/dist/state/oplog.js +57 -0
  107. package/dist/state/schema.d.ts +60 -0
  108. package/dist/state/schema.js +88 -0
  109. package/dist/state/store-libsql.d.ts +46 -0
  110. package/dist/state/store-libsql.js +263 -0
  111. package/dist/state/store-memory.d.ts +23 -0
  112. package/dist/state/store-memory.js +121 -0
  113. package/dist/state/store.d.ts +87 -0
  114. package/dist/state/store.js +58 -0
  115. package/dist/triggers/api.d.ts +14 -0
  116. package/dist/triggers/api.js +510 -0
  117. package/dist/triggers/auth.d.ts +1 -0
  118. package/dist/triggers/auth.js +13 -0
  119. package/package.json +58 -0
@@ -0,0 +1,326 @@
1
+ //
2
+ // The write path (mem::observe). Accepts a HookPayload, validates it, optionally
3
+ // dedups, privacy-strips the raw payload, builds a RawObservation, then — inside
4
+ // a per-session keyed lock — enforces the per-session cap, persists the raw
5
+ // observation, updates/creates the session row (observationCount++, updatedAt,
6
+ // firstPrompt), and runs the default zero-LLM synthetic compression: write the
7
+ // synthetic over the same obsId and add it to the BM25 and vector indexes.
8
+ // Referenced files are hashed into provenance for Verified Recall. Returns
9
+ // { observationId }.
10
+ //
11
+ // Image detection + modality tagging are kept (pure, keeps the observation
12
+ // shape stable); image-to-disk persistence and vision embedding are out of
13
+ // scope. LLM-based compression (AUTO_COMPRESS) has no provider wired, so the
14
+ // synthetic path is always taken.
15
+ import { TriggerAction } from "../kernel/index.js";
16
+ import { KV, STREAM, generateId } from "../state/schema.js";
17
+ import { stripPrivateData } from "./privacy.js";
18
+ import { withKeyedLock } from "./keyed-mutex.js";
19
+ import { isAutoCompressEnabled, getAgentId } from "./config.js";
20
+ import { buildSyntheticCompression } from "./compress-synthetic.js";
21
+ import { extractProvenance } from "./provenance.js";
22
+ import { hashFiles } from "./verify.js";
23
+ import { recordFix, looksLikeResolvedFix } from "./dejafix.js";
24
+ import { getSearchIndex, vectorIndexAddGuarded } from "./search.js";
25
+ import { logger } from "./logger.js";
26
+ import { metrics } from "../observability/metrics.js";
27
+ export function extractImage(d) {
28
+ if (!d)
29
+ return undefined;
30
+ if (typeof d === "string") {
31
+ if (d.startsWith("data:image/") ||
32
+ d.startsWith("iVBORw0KGgo") ||
33
+ d.startsWith("/9j/")) {
34
+ return d;
35
+ }
36
+ return undefined;
37
+ }
38
+ if (typeof d === "object" && d !== null) {
39
+ const obj = d;
40
+ if (typeof obj["image_data"] === "string")
41
+ return obj["image_data"];
42
+ if (typeof obj["image_path"] === "string")
43
+ return obj["image_path"];
44
+ if (typeof obj["imageBase64"] === "string")
45
+ return obj["imageBase64"];
46
+ if (typeof obj["imagePath"] === "string")
47
+ return obj["imagePath"];
48
+ for (const key of Object.keys(obj)) {
49
+ const match = extractImage(obj[key]);
50
+ if (match)
51
+ return match;
52
+ }
53
+ }
54
+ return undefined;
55
+ }
56
+ export function registerObserveFunction(sdk, kv, dedupMap, maxObservationsPerSession) {
57
+ sdk.registerFunction("mem::observe", async (payload) => {
58
+ if (!payload?.sessionId ||
59
+ typeof payload.sessionId !== "string" ||
60
+ !payload.hookType ||
61
+ typeof payload.hookType !== "string" ||
62
+ !payload.timestamp ||
63
+ typeof payload.timestamp !== "string") {
64
+ return {
65
+ success: false,
66
+ error: "Invalid payload: sessionId, hookType, and timestamp are required",
67
+ };
68
+ }
69
+ const obsId = generateId("obs");
70
+ let dedupHash;
71
+ if (dedupMap) {
72
+ const d = typeof payload.data === "object" && payload.data !== null
73
+ ? payload.data
74
+ : {};
75
+ const toolName = d["tool_name"] || payload.hookType;
76
+ dedupHash = dedupMap.computeHash(payload.sessionId, toolName, d["tool_input"]);
77
+ if (dedupMap.isDuplicate(dedupHash)) {
78
+ return { deduplicated: true, sessionId: payload.sessionId };
79
+ }
80
+ }
81
+ let sanitizedRaw = payload.data;
82
+ try {
83
+ const jsonStr = JSON.stringify(payload.data);
84
+ const sanitized = stripPrivateData(jsonStr);
85
+ sanitizedRaw = JSON.parse(sanitized);
86
+ }
87
+ catch {
88
+ sanitizedRaw = stripPrivateData(String(payload.data));
89
+ }
90
+ const raw = {
91
+ id: obsId,
92
+ sessionId: payload.sessionId,
93
+ timestamp: payload.timestamp,
94
+ hookType: payload.hookType,
95
+ raw: sanitizedRaw,
96
+ };
97
+ let extractedImage;
98
+ if (typeof sanitizedRaw === "object" && sanitizedRaw !== null) {
99
+ const d = sanitizedRaw;
100
+ if (payload.hookType === "post_tool_use" ||
101
+ payload.hookType === "post_tool_failure") {
102
+ if (typeof d["tool_name"] === "string")
103
+ raw.toolName = d["tool_name"];
104
+ raw.toolInput = d["tool_input"];
105
+ raw.toolOutput = d["tool_output"] || d["error"];
106
+ }
107
+ if (payload.hookType === "prompt_submit") {
108
+ if (typeof d["prompt"] === "string")
109
+ raw.userPrompt = d["prompt"];
110
+ }
111
+ extractedImage = extractImage(sanitizedRaw);
112
+ if (extractedImage) {
113
+ raw.modality =
114
+ raw.toolInput || raw.toolOutput || raw.userPrompt
115
+ ? "mixed"
116
+ : "image";
117
+ }
118
+ }
119
+ else if (typeof sanitizedRaw === "string") {
120
+ extractedImage = extractImage(sanitizedRaw);
121
+ if (extractedImage) {
122
+ raw.modality = "image";
123
+ }
124
+ }
125
+ return withKeyedLock(`obs:${payload.sessionId}`, async () => {
126
+ if (maxObservationsPerSession && maxObservationsPerSession > 0) {
127
+ const existing = await kv.list(KV.observations(payload.sessionId));
128
+ if (existing.length >= maxObservationsPerSession) {
129
+ return {
130
+ success: false,
131
+ error: `Session observation limit reached (${maxObservationsPerSession})`,
132
+ };
133
+ }
134
+ }
135
+ // Existing session is the source of truth for agentId (even
136
+ // undefined). Env AGENT_ID only fires when no session row exists yet —
137
+ // otherwise an unscoped session would get retroactively scoped by a
138
+ // later AGENT_ID export.
139
+ const existingSession = await kv.get(KV.sessions, payload.sessionId);
140
+ const inheritedAgentId = existingSession
141
+ ? existingSession.agentId
142
+ : getAgentId();
143
+ if (inheritedAgentId) {
144
+ raw.agentId = inheritedAgentId;
145
+ }
146
+ await kv.set(KV.observations(payload.sessionId), obsId, raw);
147
+ if (dedupMap && dedupHash) {
148
+ dedupMap.record(dedupHash);
149
+ }
150
+ // Live-viewer stream fan-out. The kernel routes stream::set /
151
+ // stream::send to its in-process pub/sub. Durably unused in-process,
152
+ // but kept so the viewer wiring stays identical.
153
+ await sdk.trigger({
154
+ function_id: "stream::set",
155
+ payload: {
156
+ stream_name: STREAM.name,
157
+ group_id: STREAM.group(payload.sessionId),
158
+ item_id: obsId,
159
+ data: { type: "raw", observation: raw },
160
+ },
161
+ });
162
+ sdk.trigger({
163
+ function_id: "stream::send",
164
+ payload: {
165
+ stream_name: STREAM.name,
166
+ group_id: STREAM.viewerGroup,
167
+ id: `raw-${obsId}`,
168
+ type: "raw_observation",
169
+ data: {
170
+ type: "raw",
171
+ observation: raw,
172
+ sessionId: payload.sessionId,
173
+ },
174
+ },
175
+ action: TriggerAction.Void(),
176
+ });
177
+ const session = existingSession;
178
+ if (session) {
179
+ const updates = [
180
+ { type: "set", path: "updatedAt", value: new Date().toISOString() },
181
+ {
182
+ type: "set",
183
+ path: "observationCount",
184
+ value: (session.observationCount || 0) + 1,
185
+ },
186
+ ];
187
+ if (!session.firstPrompt && typeof raw.userPrompt === "string") {
188
+ const trimmed = raw.userPrompt.replace(/\s+/g, " ").trim();
189
+ if (trimmed.length > 0) {
190
+ updates.push({
191
+ type: "set",
192
+ path: "firstPrompt",
193
+ value: trimmed.slice(0, 200),
194
+ });
195
+ }
196
+ }
197
+ await kv.update(KV.sessions, payload.sessionId, updates);
198
+ }
199
+ else if (typeof payload.project === "string" &&
200
+ payload.project.trim().length > 0 &&
201
+ typeof payload.cwd === "string" &&
202
+ payload.cwd.trim().length > 0) {
203
+ // Connectors that skip POST /session/start can fire observations
204
+ // before the session record exists. Create it now from the
205
+ // observation payload — but only when project + cwd are present
206
+ // (HookPayload contract). Older payloads without those fields keep
207
+ // the original no-op behaviour.
208
+ const trimmedPrompt = typeof raw.userPrompt === "string"
209
+ ? raw.userPrompt.replace(/\s+/g, " ").trim().slice(0, 200)
210
+ : undefined;
211
+ const ts = new Date().toISOString();
212
+ await kv.set(KV.sessions, payload.sessionId, {
213
+ id: payload.sessionId,
214
+ project: payload.project,
215
+ cwd: payload.cwd,
216
+ startedAt: payload.timestamp ?? ts,
217
+ updatedAt: ts,
218
+ status: "active",
219
+ observationCount: 1,
220
+ ...(inheritedAgentId ? { agentId: inheritedAgentId } : {}),
221
+ ...(trimmedPrompt && trimmedPrompt.length > 0
222
+ ? { firstPrompt: trimmedPrompt }
223
+ : {}),
224
+ });
225
+ }
226
+ // Per-observation LLM compression is opt-in .
227
+ // Default path: build a zero-LLM synthetic compression so recall and
228
+ // BM25 search work without an LLM. The memwarden has no LLM provider
229
+ // wired in the core, so the synthetic path is always taken.
230
+ if (isAutoCompressEnabled()) {
231
+ await sdk.trigger({
232
+ function_id: "mem::compress",
233
+ payload: {
234
+ observationId: obsId,
235
+ sessionId: payload.sessionId,
236
+ raw,
237
+ },
238
+ action: TriggerAction.Void(),
239
+ });
240
+ }
241
+ else {
242
+ const synthetic = buildSyntheticCompression(raw);
243
+ // Attach the evidence trail so the doctor and Verified Recall can later
244
+ // judge whether this memory is sourced and still valid. Hash the
245
+ // referenced files now (under cwd) so content drift is detectable.
246
+ const prov = extractProvenance(payload);
247
+ if (prov.files && prov.files.length > 0 && payload.cwd) {
248
+ const fileHashes = hashFiles(prov.files, payload.cwd);
249
+ if (Object.keys(fileHashes).length > 0)
250
+ prov.fileHashes = fileHashes;
251
+ }
252
+ synthetic.provenance = prov;
253
+ metrics.recordObserve(JSON.stringify(raw), JSON.stringify(synthetic));
254
+ // Déjà Fix opportunistic capture. When this observation already looks
255
+ // like a recorded fix (contains BOTH a recognizable error AND
256
+ // resolution language), extract its error signature and store a
257
+ // FixMemory so any agent that later hits the same error can recall the
258
+ // verified fix. Reuses the same provenance (with fileHashes) we just
259
+ // built, so Verified Recall can detect drift. Strictly best-effort and
260
+ // gated: it must never throw on or block the observe hot path, and
261
+ // non-fix observations are completely untouched.
262
+ try {
263
+ const fixText = [
264
+ synthetic.title,
265
+ synthetic.narrative,
266
+ ...(synthetic.facts ?? []),
267
+ ]
268
+ .filter((s) => typeof s === "string" && s.length > 0)
269
+ .join("\n");
270
+ if (payload.cwd && looksLikeResolvedFix(fixText)) {
271
+ const tool = raw.agentId ?? payload.agent;
272
+ await recordFix(kv, {
273
+ errorText: fixText,
274
+ observationId: obsId,
275
+ fix: synthetic.narrative || synthetic.title,
276
+ provenance: prov,
277
+ cwd: payload.cwd,
278
+ timestamp: payload.timestamp,
279
+ sessionId: payload.sessionId,
280
+ ...(tool ? { tool } : {}),
281
+ });
282
+ }
283
+ }
284
+ catch (err) {
285
+ // The side path must never break observe.
286
+ logger.warn("dejafix opportunistic capture failed", {
287
+ obsId,
288
+ error: err instanceof Error ? err.message : String(err),
289
+ });
290
+ }
291
+ await kv.set(KV.observations(payload.sessionId), obsId, synthetic);
292
+ getSearchIndex().add(synthetic);
293
+ await vectorIndexAddGuarded(synthetic.id, synthetic.sessionId, synthetic.title + " " + (synthetic.narrative || ""), { kind: "synthetic", logId: synthetic.id });
294
+ await sdk.trigger({
295
+ function_id: "stream::set",
296
+ payload: {
297
+ stream_name: STREAM.name,
298
+ group_id: STREAM.group(payload.sessionId),
299
+ item_id: obsId,
300
+ data: { type: "compressed", observation: synthetic },
301
+ },
302
+ });
303
+ await sdk.trigger({
304
+ function_id: "stream::set",
305
+ payload: {
306
+ stream_name: STREAM.name,
307
+ group_id: STREAM.viewerGroup,
308
+ item_id: obsId,
309
+ data: {
310
+ type: "compressed",
311
+ observation: synthetic,
312
+ sessionId: payload.sessionId,
313
+ },
314
+ },
315
+ });
316
+ }
317
+ logger.info("Observation captured", {
318
+ obsId,
319
+ sessionId: payload.sessionId,
320
+ hook: payload.hookType,
321
+ compress: isAutoCompressEnabled() ? "llm" : "synthetic",
322
+ });
323
+ return { observationId: obsId };
324
+ });
325
+ });
326
+ }
@@ -0,0 +1 @@
1
+ export declare function canonicalizePath(p: string | undefined | null): string;
@@ -0,0 +1,38 @@
1
+ //
2
+ // Path canonicalization for project/cwd scoping. The unified memory layer
3
+ // scopes recall by working directory, and tools report that directory in
4
+ // inconsistent forms: symlinked (/tmp -> /private/tmp on macOS), with or
5
+ // without a trailing slash, with `..` segments. Exact string comparison then
6
+ // silently misses — the worst failure mode for a memory layer, because it
7
+ // looks like "no memory" rather than an error.
8
+ //
9
+ // canonicalizePath resolves an absolute path to its real, symlink-free form
10
+ // so two spellings of the same directory compare equal. Non-absolute values
11
+ // (labels like "mcp", or "/work/alpha"-style virtual projects in tests) pass
12
+ // through unchanged, and a path that does not exist falls back to syntactic
13
+ // normalization. Applied to BOTH the query filter and the stored value at
14
+ // comparison time, so it is robust regardless of how the path was captured.
15
+ import { realpathSync } from "node:fs";
16
+ import { isAbsolute, normalize } from "node:path";
17
+ // Cache only successfully-resolved (existing) paths: those are stable, while
18
+ // a not-yet-existing path might come to exist later and must re-resolve.
19
+ const cache = new Map();
20
+ export function canonicalizePath(p) {
21
+ const s = (p ?? "").trim();
22
+ if (!s || !isAbsolute(s))
23
+ return s;
24
+ const hit = cache.get(s);
25
+ if (hit !== undefined)
26
+ return hit;
27
+ try {
28
+ const real = realpathSync(s);
29
+ cache.set(s, real);
30
+ return real;
31
+ }
32
+ catch {
33
+ let n = normalize(s);
34
+ if (n.length > 1 && n.endsWith("/"))
35
+ n = n.slice(0, -1);
36
+ return n;
37
+ }
38
+ }
@@ -0,0 +1 @@
1
+ export declare function stripPrivateData(input: string): string;
@@ -0,0 +1,30 @@
1
+ //
2
+ // Redacts secrets from raw observation text before anything is persisted:
3
+ // any span the user wraps in <private>...</private>, plus a set of well-known
4
+ // credential formats (provider API keys, bearer tokens, JWTs, cloud keys).
5
+ // The credential shapes are public, factual patterns.
6
+ const PRIVATE_SPAN = /<private>[\s\S]*?<\/private>/gi;
7
+ const SECRET_PATTERNS = [
8
+ /(?:api[_-]?key|secret|token|password|credential|auth)\s*[=:]\s*["']?[A-Za-z0-9_\-/.+]{20,}["']?/gi,
9
+ /Bearer\s+[A-Za-z0-9._\-+/=]{20,}/gi,
10
+ /sk-proj-[A-Za-z0-9\-_]{20,}/g,
11
+ /(?:sk|pk|rk|ak)-[A-Za-z0-9][A-Za-z0-9\-_]{19,}/g,
12
+ /sk-ant-[A-Za-z0-9\-_]{20,}/g,
13
+ /gh[pus]_[A-Za-z0-9]{36,}/g,
14
+ /github_pat_[A-Za-z0-9_]{22,}/g,
15
+ /xoxb-[A-Za-z0-9\-]+/g,
16
+ /AKIA[0-9A-Z]{16}/g,
17
+ /AIza[A-Za-z0-9\-_]{35}/g,
18
+ /eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g,
19
+ /npm_[A-Za-z0-9]{36}/g,
20
+ /glpat-[A-Za-z0-9\-_]{20,}/g,
21
+ /dop_v1_[A-Za-z0-9]{64}/g,
22
+ ];
23
+ export function stripPrivateData(input) {
24
+ let out = input.replace(PRIVATE_SPAN, "[REDACTED]");
25
+ for (const pattern of SECRET_PATTERNS) {
26
+ // Fresh RegExp per pass to avoid any shared lastIndex state.
27
+ out = out.replace(new RegExp(pattern.source, pattern.flags), "[REDACTED_SECRET]");
28
+ }
29
+ return out;
30
+ }
@@ -0,0 +1,9 @@
1
+ import type { Provenance } from "./types.js";
2
+ export declare function extractProvenance(payload: {
3
+ cwd?: string;
4
+ timestamp?: string;
5
+ agent?: string;
6
+ data?: unknown;
7
+ }): Provenance;
8
+ /** True when a memory has no evidence backing it. */
9
+ export declare function isUnsourced(p: Provenance | undefined): boolean;
@@ -0,0 +1,57 @@
1
+ //
2
+ // Extract provenance from an observe payload — the evidence trail that
3
+ // lets the doctor judge a memory's trustworthiness later. Pure, no I/O.
4
+ const FILE_KEYS = ["file_path", "filePath", "file", "path", "notebook_path"];
5
+ // Looks like a path: has a slash or a dotted extension.
6
+ const PATH_RE = /(^|\/)[\w.\-/]+\.\w{1,8}$|\//;
7
+ function collectFiles(toolInput) {
8
+ if (!toolInput || typeof toolInput !== "object")
9
+ return [];
10
+ const obj = toolInput;
11
+ const files = new Set();
12
+ for (const k of FILE_KEYS) {
13
+ const v = obj[k];
14
+ if (typeof v === "string" && v.trim())
15
+ files.add(v.trim());
16
+ }
17
+ // Also catch path-shaped string values in any field (e.g. globs, targets).
18
+ for (const v of Object.values(obj)) {
19
+ if (typeof v === "string" && v.length < 400 && PATH_RE.test(v) && !v.includes(" ")) {
20
+ files.add(v.trim());
21
+ }
22
+ }
23
+ return Array.from(files);
24
+ }
25
+ export function extractProvenance(payload) {
26
+ const data = (payload.data ?? {});
27
+ const toolName = typeof data["tool_name"] === "string" ? data["tool_name"] : undefined;
28
+ const toolInput = data["tool_input"];
29
+ const files = collectFiles(toolInput);
30
+ let command = toolName;
31
+ // For shell tools, capture the actual command for a sharper source.
32
+ if (toolName && toolInput && typeof toolInput === "object") {
33
+ const cmd = toolInput["command"];
34
+ if (typeof cmd === "string" && cmd.trim()) {
35
+ command = `${toolName}: ${cmd.trim().slice(0, 200)}`;
36
+ }
37
+ }
38
+ const prov = { userConfirmed: false };
39
+ if (payload.cwd)
40
+ prov.cwd = payload.cwd;
41
+ if (files.length > 0)
42
+ prov.files = files;
43
+ if (command)
44
+ prov.command = command;
45
+ if (payload.agent)
46
+ prov.agent = payload.agent;
47
+ if (payload.timestamp)
48
+ prov.capturedAt = payload.timestamp;
49
+ return prov;
50
+ }
51
+ /** True when a memory has no evidence backing it. */
52
+ export function isUnsourced(p) {
53
+ if (!p)
54
+ return true;
55
+ const hasFiles = Array.isArray(p.files) && p.files.length > 0;
56
+ return !hasFiles && !p.command && !p.userConfirmed;
57
+ }
@@ -0,0 +1,60 @@
1
+ import { type QuantBits } from "./turboquant.js";
2
+ export interface QuantParams {
3
+ version: number;
4
+ bits: QuantBits;
5
+ dims: number;
6
+ paddedDims: number;
7
+ seed: string;
8
+ rounds: number;
9
+ levelHash: string;
10
+ rescoreDepth: number;
11
+ }
12
+ export declare class QuantizedVectorIndex {
13
+ readonly params: QuantParams;
14
+ private vectors;
15
+ private signFlips;
16
+ private scratch;
17
+ private queryScratch;
18
+ constructor(opts: {
19
+ dims: number;
20
+ bits: QuantBits;
21
+ seed: string;
22
+ rescoreDepth: number;
23
+ });
24
+ add(obsId: string, sessionId: string, embedding: Float32Array): void;
25
+ remove(obsId: string): void;
26
+ has(obsId: string): boolean;
27
+ ids(): string[];
28
+ /**
29
+ * Aligns the rescore setting with the current configuration after a
30
+ * restore: the persisted blob carries the rescoreDepth it was built
31
+ * with, which may no longer match the environment. Lowering to 0 drops
32
+ * the retained full vectors (reclaiming memory); raising it keeps
33
+ * working with whatever full vectors the blob had (entries without one
34
+ * simply keep their asymmetric score — the rescore pass guards on
35
+ * presence).
36
+ */
37
+ reconcileRescoreDepth(depth: number): void;
38
+ search(query: Float32Array, limit?: number): Array<{
39
+ obsId: string;
40
+ sessionId: string;
41
+ score: number;
42
+ }>;
43
+ get size(): number;
44
+ validateDimensions(expected: number): {
45
+ mismatches: Array<{
46
+ obsId: string;
47
+ dim: number;
48
+ }>;
49
+ seenDimensions: Set<number>;
50
+ };
51
+ clear(): void;
52
+ restoreFrom(other: QuantizedVectorIndex): void;
53
+ serialize(): string;
54
+ /**
55
+ * Returns null when the payload's params don't reproduce the current
56
+ * algorithm (version, bits, seed, dims, rounds or level-table hash
57
+ * mismatch) — the caller is expected to fall back to a full rebuild.
58
+ */
59
+ static deserialize(json: string): QuantizedVectorIndex | null;
60
+ }