memwarden 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +402 -0
- package/dist/bundle/bundle.d.ts +28 -0
- package/dist/bundle/bundle.js +85 -0
- package/dist/cli/bin.d.ts +2 -0
- package/dist/cli/bin.js +593 -0
- package/dist/cli/connect.d.ts +63 -0
- package/dist/cli/connect.js +121 -0
- package/dist/cli/hook.d.ts +24 -0
- package/dist/cli/hook.js +186 -0
- package/dist/cli/tools.d.ts +47 -0
- package/dist/cli/tools.js +246 -0
- package/dist/daemon/ensure.d.ts +12 -0
- package/dist/daemon/ensure.js +54 -0
- package/dist/daemon/service.d.ts +15 -0
- package/dist/daemon/service.js +210 -0
- package/dist/embedding/index.d.ts +10 -0
- package/dist/embedding/index.js +33 -0
- package/dist/embedding/local-embedding.d.ts +14 -0
- package/dist/embedding/local-embedding.js +80 -0
- package/dist/functions/access-tracker.d.ts +13 -0
- package/dist/functions/access-tracker.js +92 -0
- package/dist/functions/audit.d.ts +46 -0
- package/dist/functions/audit.js +0 -0
- package/dist/functions/cjk-segmenter.d.ts +6 -0
- package/dist/functions/cjk-segmenter.js +120 -0
- package/dist/functions/compress-synthetic.d.ts +2 -0
- package/dist/functions/compress-synthetic.js +104 -0
- package/dist/functions/config.d.ts +68 -0
- package/dist/functions/config.js +231 -0
- package/dist/functions/conflicts.d.ts +19 -0
- package/dist/functions/conflicts.js +328 -0
- package/dist/functions/context.d.ts +3 -0
- package/dist/functions/context.js +155 -0
- package/dist/functions/dedup.d.ts +11 -0
- package/dist/functions/dedup.js +51 -0
- package/dist/functions/dejafix.d.ts +96 -0
- package/dist/functions/dejafix.js +356 -0
- package/dist/functions/doctor.d.ts +29 -0
- package/dist/functions/doctor.js +137 -0
- package/dist/functions/forget.d.ts +3 -0
- package/dist/functions/forget.js +87 -0
- package/dist/functions/hybrid-search.d.ts +17 -0
- package/dist/functions/hybrid-search.js +205 -0
- package/dist/functions/index.d.ts +32 -0
- package/dist/functions/index.js +44 -0
- package/dist/functions/keyed-mutex.d.ts +1 -0
- package/dist/functions/keyed-mutex.js +21 -0
- package/dist/functions/logger.d.ts +6 -0
- package/dist/functions/logger.js +37 -0
- package/dist/functions/memory-utils.d.ts +2 -0
- package/dist/functions/memory-utils.js +29 -0
- package/dist/functions/observe.d.ts +5 -0
- package/dist/functions/observe.js +326 -0
- package/dist/functions/paths.d.ts +1 -0
- package/dist/functions/paths.js +38 -0
- package/dist/functions/privacy.d.ts +1 -0
- package/dist/functions/privacy.js +30 -0
- package/dist/functions/provenance.d.ts +9 -0
- package/dist/functions/provenance.js +57 -0
- package/dist/functions/quantized-vector-index.d.ts +60 -0
- package/dist/functions/quantized-vector-index.js +275 -0
- package/dist/functions/receipt.d.ts +31 -0
- package/dist/functions/receipt.js +95 -0
- package/dist/functions/search-index.d.ts +27 -0
- package/dist/functions/search-index.js +217 -0
- package/dist/functions/search.d.ts +25 -0
- package/dist/functions/search.js +523 -0
- package/dist/functions/stemmer.d.ts +1 -0
- package/dist/functions/stemmer.js +110 -0
- package/dist/functions/synonyms.d.ts +1 -0
- package/dist/functions/synonyms.js +69 -0
- package/dist/functions/turboquant.d.ts +53 -0
- package/dist/functions/turboquant.js +278 -0
- package/dist/functions/types.d.ts +217 -0
- package/dist/functions/types.js +8 -0
- package/dist/functions/vector-index.d.ts +25 -0
- package/dist/functions/vector-index.js +125 -0
- package/dist/functions/vector-persistence.d.ts +14 -0
- package/dist/functions/vector-persistence.js +75 -0
- package/dist/functions/verify.d.ts +13 -0
- package/dist/functions/verify.js +104 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +219 -0
- package/dist/kernel/http.d.ts +24 -0
- package/dist/kernel/http.js +261 -0
- package/dist/kernel/index.d.ts +19 -0
- package/dist/kernel/index.js +21 -0
- package/dist/kernel/kernel.d.ts +80 -0
- package/dist/kernel/kernel.js +297 -0
- package/dist/kernel/pubsub.d.ts +21 -0
- package/dist/kernel/pubsub.js +38 -0
- package/dist/kernel/types.d.ts +139 -0
- package/dist/kernel/types.js +20 -0
- package/dist/mcp/bin.d.ts +2 -0
- package/dist/mcp/bin.js +27 -0
- package/dist/mcp/server.d.ts +34 -0
- package/dist/mcp/server.js +377 -0
- package/dist/observability/metrics.d.ts +26 -0
- package/dist/observability/metrics.js +104 -0
- package/dist/proxy/server.d.ts +30 -0
- package/dist/proxy/server.js +331 -0
- package/dist/state/kv.d.ts +41 -0
- package/dist/state/kv.js +50 -0
- package/dist/state/oplog.d.ts +25 -0
- package/dist/state/oplog.js +57 -0
- package/dist/state/schema.d.ts +60 -0
- package/dist/state/schema.js +88 -0
- package/dist/state/store-libsql.d.ts +46 -0
- package/dist/state/store-libsql.js +263 -0
- package/dist/state/store-memory.d.ts +23 -0
- package/dist/state/store-memory.js +121 -0
- package/dist/state/store.d.ts +87 -0
- package/dist/state/store.js +58 -0
- package/dist/triggers/api.d.ts +14 -0
- package/dist/triggers/api.js +510 -0
- package/dist/triggers/auth.d.ts +1 -0
- package/dist/triggers/auth.js +13 -0
- package/package.json +58 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
//
|
|
2
|
+
// The write path (mem::observe). Accepts a HookPayload, validates it, optionally
|
|
3
|
+
// dedups, privacy-strips the raw payload, builds a RawObservation, then — inside
|
|
4
|
+
// a per-session keyed lock — enforces the per-session cap, persists the raw
|
|
5
|
+
// observation, updates/creates the session row (observationCount++, updatedAt,
|
|
6
|
+
// firstPrompt), and runs the default zero-LLM synthetic compression: write the
|
|
7
|
+
// synthetic over the same obsId and add it to the BM25 and vector indexes.
|
|
8
|
+
// Referenced files are hashed into provenance for Verified Recall. Returns
|
|
9
|
+
// { observationId }.
|
|
10
|
+
//
|
|
11
|
+
// Image detection + modality tagging are kept (pure, keeps the observation
|
|
12
|
+
// shape stable); image-to-disk persistence and vision embedding are out of
|
|
13
|
+
// scope. LLM-based compression (AUTO_COMPRESS) has no provider wired, so the
|
|
14
|
+
// synthetic path is always taken.
|
|
15
|
+
import { TriggerAction } from "../kernel/index.js";
|
|
16
|
+
import { KV, STREAM, generateId } from "../state/schema.js";
|
|
17
|
+
import { stripPrivateData } from "./privacy.js";
|
|
18
|
+
import { withKeyedLock } from "./keyed-mutex.js";
|
|
19
|
+
import { isAutoCompressEnabled, getAgentId } from "./config.js";
|
|
20
|
+
import { buildSyntheticCompression } from "./compress-synthetic.js";
|
|
21
|
+
import { extractProvenance } from "./provenance.js";
|
|
22
|
+
import { hashFiles } from "./verify.js";
|
|
23
|
+
import { recordFix, looksLikeResolvedFix } from "./dejafix.js";
|
|
24
|
+
import { getSearchIndex, vectorIndexAddGuarded } from "./search.js";
|
|
25
|
+
import { logger } from "./logger.js";
|
|
26
|
+
import { metrics } from "../observability/metrics.js";
|
|
27
|
+
export function extractImage(d) {
|
|
28
|
+
if (!d)
|
|
29
|
+
return undefined;
|
|
30
|
+
if (typeof d === "string") {
|
|
31
|
+
if (d.startsWith("data:image/") ||
|
|
32
|
+
d.startsWith("iVBORw0KGgo") ||
|
|
33
|
+
d.startsWith("/9j/")) {
|
|
34
|
+
return d;
|
|
35
|
+
}
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
if (typeof d === "object" && d !== null) {
|
|
39
|
+
const obj = d;
|
|
40
|
+
if (typeof obj["image_data"] === "string")
|
|
41
|
+
return obj["image_data"];
|
|
42
|
+
if (typeof obj["image_path"] === "string")
|
|
43
|
+
return obj["image_path"];
|
|
44
|
+
if (typeof obj["imageBase64"] === "string")
|
|
45
|
+
return obj["imageBase64"];
|
|
46
|
+
if (typeof obj["imagePath"] === "string")
|
|
47
|
+
return obj["imagePath"];
|
|
48
|
+
for (const key of Object.keys(obj)) {
|
|
49
|
+
const match = extractImage(obj[key]);
|
|
50
|
+
if (match)
|
|
51
|
+
return match;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
export function registerObserveFunction(sdk, kv, dedupMap, maxObservationsPerSession) {
|
|
57
|
+
sdk.registerFunction("mem::observe", async (payload) => {
|
|
58
|
+
if (!payload?.sessionId ||
|
|
59
|
+
typeof payload.sessionId !== "string" ||
|
|
60
|
+
!payload.hookType ||
|
|
61
|
+
typeof payload.hookType !== "string" ||
|
|
62
|
+
!payload.timestamp ||
|
|
63
|
+
typeof payload.timestamp !== "string") {
|
|
64
|
+
return {
|
|
65
|
+
success: false,
|
|
66
|
+
error: "Invalid payload: sessionId, hookType, and timestamp are required",
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
const obsId = generateId("obs");
|
|
70
|
+
let dedupHash;
|
|
71
|
+
if (dedupMap) {
|
|
72
|
+
const d = typeof payload.data === "object" && payload.data !== null
|
|
73
|
+
? payload.data
|
|
74
|
+
: {};
|
|
75
|
+
const toolName = d["tool_name"] || payload.hookType;
|
|
76
|
+
dedupHash = dedupMap.computeHash(payload.sessionId, toolName, d["tool_input"]);
|
|
77
|
+
if (dedupMap.isDuplicate(dedupHash)) {
|
|
78
|
+
return { deduplicated: true, sessionId: payload.sessionId };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
let sanitizedRaw = payload.data;
|
|
82
|
+
try {
|
|
83
|
+
const jsonStr = JSON.stringify(payload.data);
|
|
84
|
+
const sanitized = stripPrivateData(jsonStr);
|
|
85
|
+
sanitizedRaw = JSON.parse(sanitized);
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
sanitizedRaw = stripPrivateData(String(payload.data));
|
|
89
|
+
}
|
|
90
|
+
const raw = {
|
|
91
|
+
id: obsId,
|
|
92
|
+
sessionId: payload.sessionId,
|
|
93
|
+
timestamp: payload.timestamp,
|
|
94
|
+
hookType: payload.hookType,
|
|
95
|
+
raw: sanitizedRaw,
|
|
96
|
+
};
|
|
97
|
+
let extractedImage;
|
|
98
|
+
if (typeof sanitizedRaw === "object" && sanitizedRaw !== null) {
|
|
99
|
+
const d = sanitizedRaw;
|
|
100
|
+
if (payload.hookType === "post_tool_use" ||
|
|
101
|
+
payload.hookType === "post_tool_failure") {
|
|
102
|
+
if (typeof d["tool_name"] === "string")
|
|
103
|
+
raw.toolName = d["tool_name"];
|
|
104
|
+
raw.toolInput = d["tool_input"];
|
|
105
|
+
raw.toolOutput = d["tool_output"] || d["error"];
|
|
106
|
+
}
|
|
107
|
+
if (payload.hookType === "prompt_submit") {
|
|
108
|
+
if (typeof d["prompt"] === "string")
|
|
109
|
+
raw.userPrompt = d["prompt"];
|
|
110
|
+
}
|
|
111
|
+
extractedImage = extractImage(sanitizedRaw);
|
|
112
|
+
if (extractedImage) {
|
|
113
|
+
raw.modality =
|
|
114
|
+
raw.toolInput || raw.toolOutput || raw.userPrompt
|
|
115
|
+
? "mixed"
|
|
116
|
+
: "image";
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
else if (typeof sanitizedRaw === "string") {
|
|
120
|
+
extractedImage = extractImage(sanitizedRaw);
|
|
121
|
+
if (extractedImage) {
|
|
122
|
+
raw.modality = "image";
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return withKeyedLock(`obs:${payload.sessionId}`, async () => {
|
|
126
|
+
if (maxObservationsPerSession && maxObservationsPerSession > 0) {
|
|
127
|
+
const existing = await kv.list(KV.observations(payload.sessionId));
|
|
128
|
+
if (existing.length >= maxObservationsPerSession) {
|
|
129
|
+
return {
|
|
130
|
+
success: false,
|
|
131
|
+
error: `Session observation limit reached (${maxObservationsPerSession})`,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Existing session is the source of truth for agentId (even
|
|
136
|
+
// undefined). Env AGENT_ID only fires when no session row exists yet —
|
|
137
|
+
// otherwise an unscoped session would get retroactively scoped by a
|
|
138
|
+
// later AGENT_ID export.
|
|
139
|
+
const existingSession = await kv.get(KV.sessions, payload.sessionId);
|
|
140
|
+
const inheritedAgentId = existingSession
|
|
141
|
+
? existingSession.agentId
|
|
142
|
+
: getAgentId();
|
|
143
|
+
if (inheritedAgentId) {
|
|
144
|
+
raw.agentId = inheritedAgentId;
|
|
145
|
+
}
|
|
146
|
+
await kv.set(KV.observations(payload.sessionId), obsId, raw);
|
|
147
|
+
if (dedupMap && dedupHash) {
|
|
148
|
+
dedupMap.record(dedupHash);
|
|
149
|
+
}
|
|
150
|
+
// Live-viewer stream fan-out. The kernel routes stream::set /
|
|
151
|
+
// stream::send to its in-process pub/sub. Durably unused in-process,
|
|
152
|
+
// but kept so the viewer wiring stays identical.
|
|
153
|
+
await sdk.trigger({
|
|
154
|
+
function_id: "stream::set",
|
|
155
|
+
payload: {
|
|
156
|
+
stream_name: STREAM.name,
|
|
157
|
+
group_id: STREAM.group(payload.sessionId),
|
|
158
|
+
item_id: obsId,
|
|
159
|
+
data: { type: "raw", observation: raw },
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
sdk.trigger({
|
|
163
|
+
function_id: "stream::send",
|
|
164
|
+
payload: {
|
|
165
|
+
stream_name: STREAM.name,
|
|
166
|
+
group_id: STREAM.viewerGroup,
|
|
167
|
+
id: `raw-${obsId}`,
|
|
168
|
+
type: "raw_observation",
|
|
169
|
+
data: {
|
|
170
|
+
type: "raw",
|
|
171
|
+
observation: raw,
|
|
172
|
+
sessionId: payload.sessionId,
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
action: TriggerAction.Void(),
|
|
176
|
+
});
|
|
177
|
+
const session = existingSession;
|
|
178
|
+
if (session) {
|
|
179
|
+
const updates = [
|
|
180
|
+
{ type: "set", path: "updatedAt", value: new Date().toISOString() },
|
|
181
|
+
{
|
|
182
|
+
type: "set",
|
|
183
|
+
path: "observationCount",
|
|
184
|
+
value: (session.observationCount || 0) + 1,
|
|
185
|
+
},
|
|
186
|
+
];
|
|
187
|
+
if (!session.firstPrompt && typeof raw.userPrompt === "string") {
|
|
188
|
+
const trimmed = raw.userPrompt.replace(/\s+/g, " ").trim();
|
|
189
|
+
if (trimmed.length > 0) {
|
|
190
|
+
updates.push({
|
|
191
|
+
type: "set",
|
|
192
|
+
path: "firstPrompt",
|
|
193
|
+
value: trimmed.slice(0, 200),
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
await kv.update(KV.sessions, payload.sessionId, updates);
|
|
198
|
+
}
|
|
199
|
+
else if (typeof payload.project === "string" &&
|
|
200
|
+
payload.project.trim().length > 0 &&
|
|
201
|
+
typeof payload.cwd === "string" &&
|
|
202
|
+
payload.cwd.trim().length > 0) {
|
|
203
|
+
// Connectors that skip POST /session/start can fire observations
|
|
204
|
+
// before the session record exists. Create it now from the
|
|
205
|
+
// observation payload — but only when project + cwd are present
|
|
206
|
+
// (HookPayload contract). Older payloads without those fields keep
|
|
207
|
+
// the original no-op behaviour.
|
|
208
|
+
const trimmedPrompt = typeof raw.userPrompt === "string"
|
|
209
|
+
? raw.userPrompt.replace(/\s+/g, " ").trim().slice(0, 200)
|
|
210
|
+
: undefined;
|
|
211
|
+
const ts = new Date().toISOString();
|
|
212
|
+
await kv.set(KV.sessions, payload.sessionId, {
|
|
213
|
+
id: payload.sessionId,
|
|
214
|
+
project: payload.project,
|
|
215
|
+
cwd: payload.cwd,
|
|
216
|
+
startedAt: payload.timestamp ?? ts,
|
|
217
|
+
updatedAt: ts,
|
|
218
|
+
status: "active",
|
|
219
|
+
observationCount: 1,
|
|
220
|
+
...(inheritedAgentId ? { agentId: inheritedAgentId } : {}),
|
|
221
|
+
...(trimmedPrompt && trimmedPrompt.length > 0
|
|
222
|
+
? { firstPrompt: trimmedPrompt }
|
|
223
|
+
: {}),
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
// Per-observation LLM compression is opt-in .
|
|
227
|
+
// Default path: build a zero-LLM synthetic compression so recall and
|
|
228
|
+
// BM25 search work without an LLM. The memwarden has no LLM provider
|
|
229
|
+
// wired in the core, so the synthetic path is always taken.
|
|
230
|
+
if (isAutoCompressEnabled()) {
|
|
231
|
+
await sdk.trigger({
|
|
232
|
+
function_id: "mem::compress",
|
|
233
|
+
payload: {
|
|
234
|
+
observationId: obsId,
|
|
235
|
+
sessionId: payload.sessionId,
|
|
236
|
+
raw,
|
|
237
|
+
},
|
|
238
|
+
action: TriggerAction.Void(),
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
else {
|
|
242
|
+
const synthetic = buildSyntheticCompression(raw);
|
|
243
|
+
// Attach the evidence trail so the doctor and Verified Recall can later
|
|
244
|
+
// judge whether this memory is sourced and still valid. Hash the
|
|
245
|
+
// referenced files now (under cwd) so content drift is detectable.
|
|
246
|
+
const prov = extractProvenance(payload);
|
|
247
|
+
if (prov.files && prov.files.length > 0 && payload.cwd) {
|
|
248
|
+
const fileHashes = hashFiles(prov.files, payload.cwd);
|
|
249
|
+
if (Object.keys(fileHashes).length > 0)
|
|
250
|
+
prov.fileHashes = fileHashes;
|
|
251
|
+
}
|
|
252
|
+
synthetic.provenance = prov;
|
|
253
|
+
metrics.recordObserve(JSON.stringify(raw), JSON.stringify(synthetic));
|
|
254
|
+
// Déjà Fix opportunistic capture. When this observation already looks
|
|
255
|
+
// like a recorded fix (contains BOTH a recognizable error AND
|
|
256
|
+
// resolution language), extract its error signature and store a
|
|
257
|
+
// FixMemory so any agent that later hits the same error can recall the
|
|
258
|
+
// verified fix. Reuses the same provenance (with fileHashes) we just
|
|
259
|
+
// built, so Verified Recall can detect drift. Strictly best-effort and
|
|
260
|
+
// gated: it must never throw on or block the observe hot path, and
|
|
261
|
+
// non-fix observations are completely untouched.
|
|
262
|
+
try {
|
|
263
|
+
const fixText = [
|
|
264
|
+
synthetic.title,
|
|
265
|
+
synthetic.narrative,
|
|
266
|
+
...(synthetic.facts ?? []),
|
|
267
|
+
]
|
|
268
|
+
.filter((s) => typeof s === "string" && s.length > 0)
|
|
269
|
+
.join("\n");
|
|
270
|
+
if (payload.cwd && looksLikeResolvedFix(fixText)) {
|
|
271
|
+
const tool = raw.agentId ?? payload.agent;
|
|
272
|
+
await recordFix(kv, {
|
|
273
|
+
errorText: fixText,
|
|
274
|
+
observationId: obsId,
|
|
275
|
+
fix: synthetic.narrative || synthetic.title,
|
|
276
|
+
provenance: prov,
|
|
277
|
+
cwd: payload.cwd,
|
|
278
|
+
timestamp: payload.timestamp,
|
|
279
|
+
sessionId: payload.sessionId,
|
|
280
|
+
...(tool ? { tool } : {}),
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
catch (err) {
|
|
285
|
+
// The side path must never break observe.
|
|
286
|
+
logger.warn("dejafix opportunistic capture failed", {
|
|
287
|
+
obsId,
|
|
288
|
+
error: err instanceof Error ? err.message : String(err),
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
await kv.set(KV.observations(payload.sessionId), obsId, synthetic);
|
|
292
|
+
getSearchIndex().add(synthetic);
|
|
293
|
+
await vectorIndexAddGuarded(synthetic.id, synthetic.sessionId, synthetic.title + " " + (synthetic.narrative || ""), { kind: "synthetic", logId: synthetic.id });
|
|
294
|
+
await sdk.trigger({
|
|
295
|
+
function_id: "stream::set",
|
|
296
|
+
payload: {
|
|
297
|
+
stream_name: STREAM.name,
|
|
298
|
+
group_id: STREAM.group(payload.sessionId),
|
|
299
|
+
item_id: obsId,
|
|
300
|
+
data: { type: "compressed", observation: synthetic },
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
await sdk.trigger({
|
|
304
|
+
function_id: "stream::set",
|
|
305
|
+
payload: {
|
|
306
|
+
stream_name: STREAM.name,
|
|
307
|
+
group_id: STREAM.viewerGroup,
|
|
308
|
+
item_id: obsId,
|
|
309
|
+
data: {
|
|
310
|
+
type: "compressed",
|
|
311
|
+
observation: synthetic,
|
|
312
|
+
sessionId: payload.sessionId,
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
logger.info("Observation captured", {
|
|
318
|
+
obsId,
|
|
319
|
+
sessionId: payload.sessionId,
|
|
320
|
+
hook: payload.hookType,
|
|
321
|
+
compress: isAutoCompressEnabled() ? "llm" : "synthetic",
|
|
322
|
+
});
|
|
323
|
+
return { observationId: obsId };
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function canonicalizePath(p: string | undefined | null): string;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Path canonicalization for project/cwd scoping. The unified memory layer
|
|
3
|
+
// scopes recall by working directory, and tools report that directory in
|
|
4
|
+
// inconsistent forms: symlinked (/tmp -> /private/tmp on macOS), with or
|
|
5
|
+
// without a trailing slash, with `..` segments. Exact string comparison then
|
|
6
|
+
// silently misses — the worst failure mode for a memory layer, because it
|
|
7
|
+
// looks like "no memory" rather than an error.
|
|
8
|
+
//
|
|
9
|
+
// canonicalizePath resolves an absolute path to its real, symlink-free form
|
|
10
|
+
// so two spellings of the same directory compare equal. Non-absolute values
|
|
11
|
+
// (labels like "mcp", or "/work/alpha"-style virtual projects in tests) pass
|
|
12
|
+
// through unchanged, and a path that does not exist falls back to syntactic
|
|
13
|
+
// normalization. Applied to BOTH the query filter and the stored value at
|
|
14
|
+
// comparison time, so it is robust regardless of how the path was captured.
|
|
15
|
+
import { realpathSync } from "node:fs";
|
|
16
|
+
import { isAbsolute, normalize } from "node:path";
|
|
17
|
+
// Cache only successfully-resolved (existing) paths: those are stable, while
|
|
18
|
+
// a not-yet-existing path might come to exist later and must re-resolve.
|
|
19
|
+
const cache = new Map();
|
|
20
|
+
export function canonicalizePath(p) {
|
|
21
|
+
const s = (p ?? "").trim();
|
|
22
|
+
if (!s || !isAbsolute(s))
|
|
23
|
+
return s;
|
|
24
|
+
const hit = cache.get(s);
|
|
25
|
+
if (hit !== undefined)
|
|
26
|
+
return hit;
|
|
27
|
+
try {
|
|
28
|
+
const real = realpathSync(s);
|
|
29
|
+
cache.set(s, real);
|
|
30
|
+
return real;
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
let n = normalize(s);
|
|
34
|
+
if (n.length > 1 && n.endsWith("/"))
|
|
35
|
+
n = n.slice(0, -1);
|
|
36
|
+
return n;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function stripPrivateData(input: string): string;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Redacts secrets from raw observation text before anything is persisted:
|
|
3
|
+
// any span the user wraps in <private>...</private>, plus a set of well-known
|
|
4
|
+
// credential formats (provider API keys, bearer tokens, JWTs, cloud keys).
|
|
5
|
+
// The credential shapes are public, factual patterns.
|
|
6
|
+
const PRIVATE_SPAN = /<private>[\s\S]*?<\/private>/gi;
|
|
7
|
+
const SECRET_PATTERNS = [
|
|
8
|
+
/(?:api[_-]?key|secret|token|password|credential|auth)\s*[=:]\s*["']?[A-Za-z0-9_\-/.+]{20,}["']?/gi,
|
|
9
|
+
/Bearer\s+[A-Za-z0-9._\-+/=]{20,}/gi,
|
|
10
|
+
/sk-proj-[A-Za-z0-9\-_]{20,}/g,
|
|
11
|
+
/(?:sk|pk|rk|ak)-[A-Za-z0-9][A-Za-z0-9\-_]{19,}/g,
|
|
12
|
+
/sk-ant-[A-Za-z0-9\-_]{20,}/g,
|
|
13
|
+
/gh[pus]_[A-Za-z0-9]{36,}/g,
|
|
14
|
+
/github_pat_[A-Za-z0-9_]{22,}/g,
|
|
15
|
+
/xoxb-[A-Za-z0-9\-]+/g,
|
|
16
|
+
/AKIA[0-9A-Z]{16}/g,
|
|
17
|
+
/AIza[A-Za-z0-9\-_]{35}/g,
|
|
18
|
+
/eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g,
|
|
19
|
+
/npm_[A-Za-z0-9]{36}/g,
|
|
20
|
+
/glpat-[A-Za-z0-9\-_]{20,}/g,
|
|
21
|
+
/dop_v1_[A-Za-z0-9]{64}/g,
|
|
22
|
+
];
|
|
23
|
+
export function stripPrivateData(input) {
|
|
24
|
+
let out = input.replace(PRIVATE_SPAN, "[REDACTED]");
|
|
25
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
26
|
+
// Fresh RegExp per pass to avoid any shared lastIndex state.
|
|
27
|
+
out = out.replace(new RegExp(pattern.source, pattern.flags), "[REDACTED_SECRET]");
|
|
28
|
+
}
|
|
29
|
+
return out;
|
|
30
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Provenance } from "./types.js";
|
|
2
|
+
export declare function extractProvenance(payload: {
|
|
3
|
+
cwd?: string;
|
|
4
|
+
timestamp?: string;
|
|
5
|
+
agent?: string;
|
|
6
|
+
data?: unknown;
|
|
7
|
+
}): Provenance;
|
|
8
|
+
/** True when a memory has no evidence backing it. */
|
|
9
|
+
export declare function isUnsourced(p: Provenance | undefined): boolean;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Extract provenance from an observe payload — the evidence trail that
|
|
3
|
+
// lets the doctor judge a memory's trustworthiness later. Pure, no I/O.
|
|
4
|
+
const FILE_KEYS = ["file_path", "filePath", "file", "path", "notebook_path"];
|
|
5
|
+
// Looks like a path: has a slash or a dotted extension.
|
|
6
|
+
const PATH_RE = /(^|\/)[\w.\-/]+\.\w{1,8}$|\//;
|
|
7
|
+
function collectFiles(toolInput) {
|
|
8
|
+
if (!toolInput || typeof toolInput !== "object")
|
|
9
|
+
return [];
|
|
10
|
+
const obj = toolInput;
|
|
11
|
+
const files = new Set();
|
|
12
|
+
for (const k of FILE_KEYS) {
|
|
13
|
+
const v = obj[k];
|
|
14
|
+
if (typeof v === "string" && v.trim())
|
|
15
|
+
files.add(v.trim());
|
|
16
|
+
}
|
|
17
|
+
// Also catch path-shaped string values in any field (e.g. globs, targets).
|
|
18
|
+
for (const v of Object.values(obj)) {
|
|
19
|
+
if (typeof v === "string" && v.length < 400 && PATH_RE.test(v) && !v.includes(" ")) {
|
|
20
|
+
files.add(v.trim());
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return Array.from(files);
|
|
24
|
+
}
|
|
25
|
+
export function extractProvenance(payload) {
|
|
26
|
+
const data = (payload.data ?? {});
|
|
27
|
+
const toolName = typeof data["tool_name"] === "string" ? data["tool_name"] : undefined;
|
|
28
|
+
const toolInput = data["tool_input"];
|
|
29
|
+
const files = collectFiles(toolInput);
|
|
30
|
+
let command = toolName;
|
|
31
|
+
// For shell tools, capture the actual command for a sharper source.
|
|
32
|
+
if (toolName && toolInput && typeof toolInput === "object") {
|
|
33
|
+
const cmd = toolInput["command"];
|
|
34
|
+
if (typeof cmd === "string" && cmd.trim()) {
|
|
35
|
+
command = `${toolName}: ${cmd.trim().slice(0, 200)}`;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const prov = { userConfirmed: false };
|
|
39
|
+
if (payload.cwd)
|
|
40
|
+
prov.cwd = payload.cwd;
|
|
41
|
+
if (files.length > 0)
|
|
42
|
+
prov.files = files;
|
|
43
|
+
if (command)
|
|
44
|
+
prov.command = command;
|
|
45
|
+
if (payload.agent)
|
|
46
|
+
prov.agent = payload.agent;
|
|
47
|
+
if (payload.timestamp)
|
|
48
|
+
prov.capturedAt = payload.timestamp;
|
|
49
|
+
return prov;
|
|
50
|
+
}
|
|
51
|
+
/** True when a memory has no evidence backing it. */
|
|
52
|
+
export function isUnsourced(p) {
|
|
53
|
+
if (!p)
|
|
54
|
+
return true;
|
|
55
|
+
const hasFiles = Array.isArray(p.files) && p.files.length > 0;
|
|
56
|
+
return !hasFiles && !p.command && !p.userConfirmed;
|
|
57
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { type QuantBits } from "./turboquant.js";
|
|
2
|
+
export interface QuantParams {
|
|
3
|
+
version: number;
|
|
4
|
+
bits: QuantBits;
|
|
5
|
+
dims: number;
|
|
6
|
+
paddedDims: number;
|
|
7
|
+
seed: string;
|
|
8
|
+
rounds: number;
|
|
9
|
+
levelHash: string;
|
|
10
|
+
rescoreDepth: number;
|
|
11
|
+
}
|
|
12
|
+
export declare class QuantizedVectorIndex {
|
|
13
|
+
readonly params: QuantParams;
|
|
14
|
+
private vectors;
|
|
15
|
+
private signFlips;
|
|
16
|
+
private scratch;
|
|
17
|
+
private queryScratch;
|
|
18
|
+
constructor(opts: {
|
|
19
|
+
dims: number;
|
|
20
|
+
bits: QuantBits;
|
|
21
|
+
seed: string;
|
|
22
|
+
rescoreDepth: number;
|
|
23
|
+
});
|
|
24
|
+
add(obsId: string, sessionId: string, embedding: Float32Array): void;
|
|
25
|
+
remove(obsId: string): void;
|
|
26
|
+
has(obsId: string): boolean;
|
|
27
|
+
ids(): string[];
|
|
28
|
+
/**
|
|
29
|
+
* Aligns the rescore setting with the current configuration after a
|
|
30
|
+
* restore: the persisted blob carries the rescoreDepth it was built
|
|
31
|
+
* with, which may no longer match the environment. Lowering to 0 drops
|
|
32
|
+
* the retained full vectors (reclaiming memory); raising it keeps
|
|
33
|
+
* working with whatever full vectors the blob had (entries without one
|
|
34
|
+
* simply keep their asymmetric score — the rescore pass guards on
|
|
35
|
+
* presence).
|
|
36
|
+
*/
|
|
37
|
+
reconcileRescoreDepth(depth: number): void;
|
|
38
|
+
search(query: Float32Array, limit?: number): Array<{
|
|
39
|
+
obsId: string;
|
|
40
|
+
sessionId: string;
|
|
41
|
+
score: number;
|
|
42
|
+
}>;
|
|
43
|
+
get size(): number;
|
|
44
|
+
validateDimensions(expected: number): {
|
|
45
|
+
mismatches: Array<{
|
|
46
|
+
obsId: string;
|
|
47
|
+
dim: number;
|
|
48
|
+
}>;
|
|
49
|
+
seenDimensions: Set<number>;
|
|
50
|
+
};
|
|
51
|
+
clear(): void;
|
|
52
|
+
restoreFrom(other: QuantizedVectorIndex): void;
|
|
53
|
+
serialize(): string;
|
|
54
|
+
/**
|
|
55
|
+
* Returns null when the payload's params don't reproduce the current
|
|
56
|
+
* algorithm (version, bits, seed, dims, rounds or level-table hash
|
|
57
|
+
* mismatch) — the caller is expected to fall back to a full rebuild.
|
|
58
|
+
*/
|
|
59
|
+
static deserialize(json: string): QuantizedVectorIndex | null;
|
|
60
|
+
}
|