@gethmy/mcp 2.3.4 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +500 -19
- package/dist/index.js +500 -19
- package/package.json +2 -2
- package/src/__tests__/memory-audit.test.ts +386 -0
- package/src/memory-audit.ts +534 -0
- package/src/memory-cleanup.ts +89 -1
- package/src/server.ts +116 -7
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for memory quality audit.
|
|
3
|
+
*
|
|
4
|
+
* Run with: bun test packages/mcp-server/src/__tests__/memory-audit.test.ts
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { describe, expect, mock, test } from "bun:test";
|
|
8
|
+
import { runMemoryAudit } from "../memory-audit.js";
|
|
9
|
+
|
|
10
|
+
function daysAgo(days: number): string {
|
|
11
|
+
return new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function makeMockClient(
|
|
15
|
+
entities: unknown[],
|
|
16
|
+
relations?: Record<string, number>,
|
|
17
|
+
) {
|
|
18
|
+
const deletedIds: string[] = [];
|
|
19
|
+
const updatedEntities: Array<{
|
|
20
|
+
id: string;
|
|
21
|
+
updates: Record<string, unknown>;
|
|
22
|
+
}> = [];
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
client: {
|
|
26
|
+
listMemoryEntities: mock(async (opts: { offset?: number }) => {
|
|
27
|
+
const offset = opts.offset ?? 0;
|
|
28
|
+
if (offset > 0) return { entities: [], count: 0 };
|
|
29
|
+
return { entities, count: entities.length };
|
|
30
|
+
}),
|
|
31
|
+
getRelatedEntities: mock(async (id: string) => {
|
|
32
|
+
const n = relations?.[id] ?? 0;
|
|
33
|
+
return {
|
|
34
|
+
outgoing: Array(n).fill({}),
|
|
35
|
+
incoming: [],
|
|
36
|
+
};
|
|
37
|
+
}),
|
|
38
|
+
deleteMemoryEntity: mock(async (id: string) => {
|
|
39
|
+
deletedIds.push(id);
|
|
40
|
+
return { success: true };
|
|
41
|
+
}),
|
|
42
|
+
updateMemoryEntity: mock(
|
|
43
|
+
async (id: string, updates: Record<string, unknown>) => {
|
|
44
|
+
updatedEntities.push({ id, updates });
|
|
45
|
+
return { entity: { id, ...updates } };
|
|
46
|
+
},
|
|
47
|
+
),
|
|
48
|
+
} as any,
|
|
49
|
+
deletedIds,
|
|
50
|
+
updatedEntities,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
describe("runMemoryAudit", () => {
|
|
55
|
+
test("buckets a healthy modern entity into 'keep'", async () => {
|
|
56
|
+
const { client } = makeMockClient(
|
|
57
|
+
[
|
|
58
|
+
{
|
|
59
|
+
id: "healthy",
|
|
60
|
+
type: "pattern",
|
|
61
|
+
title: "Auth refresh token rotation pattern",
|
|
62
|
+
content:
|
|
63
|
+
"Rotate refresh tokens on every use. Keep a rolling window of two valid tokens to allow retry on network failures.",
|
|
64
|
+
confidence: 0.95,
|
|
65
|
+
memory_tier: "reference",
|
|
66
|
+
access_count: 25,
|
|
67
|
+
last_accessed_at: daysAgo(1),
|
|
68
|
+
created_at: daysAgo(90),
|
|
69
|
+
tags: ["auth", "security"],
|
|
70
|
+
embedding: [0.1, 0.2, 0.3],
|
|
71
|
+
promoted_from_id: "orig-1",
|
|
72
|
+
metadata: {},
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
{ healthy: 3 },
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
79
|
+
expect(report.summary.keep).toBe(1);
|
|
80
|
+
expect(report.summary.delete).toBe(0);
|
|
81
|
+
expect(report.lowest[0].score).toBeGreaterThanOrEqual(70);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test("buckets a legacy default-confidence entity into archive/delete", async () => {
|
|
85
|
+
const { client } = makeMockClient([
|
|
86
|
+
{
|
|
87
|
+
id: "legacy",
|
|
88
|
+
type: "context",
|
|
89
|
+
title: "x",
|
|
90
|
+
content: "",
|
|
91
|
+
confidence: 1.0,
|
|
92
|
+
memory_tier: "draft",
|
|
93
|
+
access_count: 0,
|
|
94
|
+
last_accessed_at: null,
|
|
95
|
+
created_at: daysAgo(120),
|
|
96
|
+
tags: [],
|
|
97
|
+
embedding: null,
|
|
98
|
+
promoted_from_id: null,
|
|
99
|
+
metadata: {},
|
|
100
|
+
},
|
|
101
|
+
]);
|
|
102
|
+
|
|
103
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
104
|
+
expect(report.summary.legacyCount).toBe(1);
|
|
105
|
+
const a = report.lowest[0];
|
|
106
|
+
expect(a.legacy).toBe(true);
|
|
107
|
+
expect(a.bucket === "archive" || a.bucket === "delete").toBe(true);
|
|
108
|
+
expect(a.legacyReasons.length).toBeGreaterThan(1);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("dryRun does not delete, archive, or flag", async () => {
|
|
112
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
113
|
+
{
|
|
114
|
+
id: "bad",
|
|
115
|
+
type: "context",
|
|
116
|
+
title: "x",
|
|
117
|
+
content: "",
|
|
118
|
+
confidence: 0.1,
|
|
119
|
+
memory_tier: "draft",
|
|
120
|
+
access_count: 0,
|
|
121
|
+
last_accessed_at: null,
|
|
122
|
+
created_at: daysAgo(100),
|
|
123
|
+
tags: [],
|
|
124
|
+
embedding: null,
|
|
125
|
+
},
|
|
126
|
+
]);
|
|
127
|
+
|
|
128
|
+
await runMemoryAudit(client, "ws-1", undefined, { dryRun: true });
|
|
129
|
+
expect(deletedIds).toHaveLength(0);
|
|
130
|
+
expect(updatedEntities).toHaveLength(0);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("executes delete + archive + flag in non-dryRun", async () => {
|
|
134
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
135
|
+
// delete (very low)
|
|
136
|
+
{
|
|
137
|
+
id: "trash",
|
|
138
|
+
type: "context",
|
|
139
|
+
title: "x",
|
|
140
|
+
content: "",
|
|
141
|
+
confidence: 0.05,
|
|
142
|
+
memory_tier: "draft",
|
|
143
|
+
access_count: 0,
|
|
144
|
+
last_accessed_at: null,
|
|
145
|
+
created_at: daysAgo(200),
|
|
146
|
+
tags: [],
|
|
147
|
+
},
|
|
148
|
+
// archive (middling-low)
|
|
149
|
+
{
|
|
150
|
+
id: "archive-me",
|
|
151
|
+
type: "pattern",
|
|
152
|
+
title: "Partial pattern that lacks context here",
|
|
153
|
+
content: "Some content that is a bit more substantive than nothing.",
|
|
154
|
+
confidence: 0.3,
|
|
155
|
+
memory_tier: "draft",
|
|
156
|
+
access_count: 0,
|
|
157
|
+
last_accessed_at: daysAgo(40),
|
|
158
|
+
created_at: daysAgo(40),
|
|
159
|
+
tags: [],
|
|
160
|
+
},
|
|
161
|
+
// review (medium) — decent content but no tags, no relations, no embedding
|
|
162
|
+
{
|
|
163
|
+
id: "review-me",
|
|
164
|
+
type: "pattern",
|
|
165
|
+
title: "Reasonable pattern with decent content body here",
|
|
166
|
+
content:
|
|
167
|
+
"This entity has enough content to pass the length check. Confidence is moderate, access is limited.",
|
|
168
|
+
confidence: 0.5,
|
|
169
|
+
memory_tier: "episode",
|
|
170
|
+
access_count: 1,
|
|
171
|
+
last_accessed_at: daysAgo(25),
|
|
172
|
+
created_at: daysAgo(40),
|
|
173
|
+
tags: [],
|
|
174
|
+
embedding: null,
|
|
175
|
+
},
|
|
176
|
+
]);
|
|
177
|
+
|
|
178
|
+
const report = await runMemoryAudit(client, "ws-1", undefined, {
|
|
179
|
+
dryRun: false,
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
expect(deletedIds).toContain("trash");
|
|
183
|
+
expect(
|
|
184
|
+
updatedEntities.find((u) => u.id === "archive-me")?.updates.confidence,
|
|
185
|
+
).toBe(0.25);
|
|
186
|
+
expect(
|
|
187
|
+
(
|
|
188
|
+
updatedEntities.find((u) => u.id === "review-me")?.updates
|
|
189
|
+
.metadata as Record<string, unknown>
|
|
190
|
+
)?.needs_review,
|
|
191
|
+
).toBe(true);
|
|
192
|
+
|
|
193
|
+
expect(report.actionsTaken.deleted).toBeGreaterThanOrEqual(1);
|
|
194
|
+
expect(report.actionsTaken.archived).toBeGreaterThanOrEqual(1);
|
|
195
|
+
expect(report.actionsTaken.flaggedReview).toBeGreaterThanOrEqual(1);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test("detects stuck-draft legacy signal", async () => {
|
|
199
|
+
const { client } = makeMockClient([
|
|
200
|
+
{
|
|
201
|
+
id: "stuck",
|
|
202
|
+
type: "context",
|
|
203
|
+
title: "Old draft that never made it",
|
|
204
|
+
content:
|
|
205
|
+
"Some content that is long enough to not count as thin content right here.",
|
|
206
|
+
confidence: 0.6,
|
|
207
|
+
memory_tier: "draft",
|
|
208
|
+
access_count: 1,
|
|
209
|
+
last_accessed_at: daysAgo(70),
|
|
210
|
+
created_at: daysAgo(75),
|
|
211
|
+
tags: ["x"],
|
|
212
|
+
embedding: [0.1],
|
|
213
|
+
promoted_from_id: null,
|
|
214
|
+
},
|
|
215
|
+
]);
|
|
216
|
+
|
|
217
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
218
|
+
expect(report.legacyBreakdown.stuckDraft).toBe(1);
|
|
219
|
+
expect(report.lowest[0].reasons).toContain(
|
|
220
|
+
"stuck draft >60d never promoted",
|
|
221
|
+
);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
test("distribution buckets sum to scanned count", async () => {
|
|
225
|
+
const entities = Array.from({ length: 10 }, (_, i) => ({
|
|
226
|
+
id: `e${i}`,
|
|
227
|
+
type: "context",
|
|
228
|
+
title: `Entity number ${i} with decent title length`,
|
|
229
|
+
content: "Some content string that is long enough to count properly.",
|
|
230
|
+
confidence: 0.1 * (i + 1),
|
|
231
|
+
memory_tier: i % 3 === 0 ? "reference" : "episode",
|
|
232
|
+
access_count: i,
|
|
233
|
+
last_accessed_at: daysAgo(i * 2),
|
|
234
|
+
created_at: daysAgo(i * 5 + 1),
|
|
235
|
+
tags: i % 2 === 0 ? ["tag"] : [],
|
|
236
|
+
embedding: i % 2 === 0 ? [0.1] : null,
|
|
237
|
+
}));
|
|
238
|
+
const { client } = makeMockClient(entities);
|
|
239
|
+
|
|
240
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
241
|
+
const total =
|
|
242
|
+
report.distribution["0-20"] +
|
|
243
|
+
report.distribution["20-40"] +
|
|
244
|
+
report.distribution["40-70"] +
|
|
245
|
+
report.distribution["70-100"];
|
|
246
|
+
expect(total).toBe(report.summary.scanned);
|
|
247
|
+
expect(report.summary.scanned).toBe(10);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test("empty workspace returns success with zeros", async () => {
|
|
251
|
+
const { client } = makeMockClient([]);
|
|
252
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
253
|
+
expect(report.success).toBe(true);
|
|
254
|
+
expect(report.summary.scanned).toBe(0);
|
|
255
|
+
expect(report.lowest).toHaveLength(0);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test("deleteBelow=0 disables deletion entirely", async () => {
|
|
259
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
260
|
+
{
|
|
261
|
+
id: "trash",
|
|
262
|
+
type: "context",
|
|
263
|
+
title: "x",
|
|
264
|
+
content: "",
|
|
265
|
+
confidence: 0.05,
|
|
266
|
+
memory_tier: "draft",
|
|
267
|
+
access_count: 0,
|
|
268
|
+
last_accessed_at: null,
|
|
269
|
+
created_at: daysAgo(200),
|
|
270
|
+
tags: [],
|
|
271
|
+
},
|
|
272
|
+
]);
|
|
273
|
+
|
|
274
|
+
await runMemoryAudit(client, "ws-1", undefined, {
|
|
275
|
+
dryRun: false,
|
|
276
|
+
deleteBelow: 0,
|
|
277
|
+
});
|
|
278
|
+
expect(deletedIds).toHaveLength(0);
|
|
279
|
+
// Should land in archive bucket (score < 40 but >= 0)
|
|
280
|
+
expect(
|
|
281
|
+
updatedEntities.find((u) => u.id === "trash")?.updates.confidence,
|
|
282
|
+
).toBe(0.25);
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
test("stale-draft filter flags draft+0access+age>threshold separately from bucket", async () => {
|
|
286
|
+
const { client } = makeMockClient(
|
|
287
|
+
[
|
|
288
|
+
// Stale draft — should be flagged by the filter, but otherwise healthy
|
|
289
|
+
// enough to bucket as "review" (not archive).
|
|
290
|
+
{
|
|
291
|
+
id: "stale-draft",
|
|
292
|
+
type: "context",
|
|
293
|
+
title:
|
|
294
|
+
"Task transition: feature work started but never touched again",
|
|
295
|
+
content:
|
|
296
|
+
"This draft has enough content and tags to score reasonably, " +
|
|
297
|
+
"but nobody ever accessed it after creation — classic promote-or-drop candidate.",
|
|
298
|
+
confidence: 0.4,
|
|
299
|
+
memory_tier: "draft",
|
|
300
|
+
access_count: 0,
|
|
301
|
+
last_accessed_at: null,
|
|
302
|
+
created_at: daysAgo(10),
|
|
303
|
+
tags: ["task"],
|
|
304
|
+
embedding: [0.1],
|
|
305
|
+
},
|
|
306
|
+
// Fresh draft — same shape but under the age threshold, must NOT flag.
|
|
307
|
+
{
|
|
308
|
+
id: "fresh-draft",
|
|
309
|
+
type: "context",
|
|
310
|
+
title: "Task transition: a fresh draft still within the grace window",
|
|
311
|
+
content:
|
|
312
|
+
"Content long enough to not be thin at all, really properly sized.",
|
|
313
|
+
confidence: 0.4,
|
|
314
|
+
memory_tier: "draft",
|
|
315
|
+
access_count: 0,
|
|
316
|
+
last_accessed_at: null,
|
|
317
|
+
created_at: daysAgo(3),
|
|
318
|
+
tags: ["task"],
|
|
319
|
+
embedding: [0.1],
|
|
320
|
+
},
|
|
321
|
+
// Non-draft old zero-access — must NOT flag (filter is draft-only).
|
|
322
|
+
{
|
|
323
|
+
id: "old-episode",
|
|
324
|
+
type: "pattern",
|
|
325
|
+
title: "Episode entity that is old and unaccessed but not a draft",
|
|
326
|
+
content:
|
|
327
|
+
"Sometimes reference/episode tier entities sit unaccessed; " +
|
|
328
|
+
"they're not draft-promotion candidates so the filter should skip them.",
|
|
329
|
+
confidence: 0.8,
|
|
330
|
+
memory_tier: "episode",
|
|
331
|
+
access_count: 0,
|
|
332
|
+
last_accessed_at: null,
|
|
333
|
+
created_at: daysAgo(30),
|
|
334
|
+
tags: ["pat"],
|
|
335
|
+
embedding: [0.1],
|
|
336
|
+
},
|
|
337
|
+
],
|
|
338
|
+
{ "stale-draft": 1, "fresh-draft": 1, "old-episode": 2 },
|
|
339
|
+
);
|
|
340
|
+
|
|
341
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
342
|
+
expect(report.summary.staleDraftCount).toBe(1);
|
|
343
|
+
expect(report.staleDrafts).toHaveLength(1);
|
|
344
|
+
expect(report.staleDrafts[0].id).toBe("stale-draft");
|
|
345
|
+
expect(report.healthReport).toContain("Stale Drafts");
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
test("staleDraftAgeDays option tunes the filter threshold", async () => {
|
|
349
|
+
const { client } = makeMockClient([
|
|
350
|
+
{
|
|
351
|
+
id: "draft-5d",
|
|
352
|
+
type: "context",
|
|
353
|
+
title: "Five-day-old untouched draft",
|
|
354
|
+
content:
|
|
355
|
+
"Content long enough to pass the thin-content check, absolutely.",
|
|
356
|
+
confidence: 0.4,
|
|
357
|
+
memory_tier: "draft",
|
|
358
|
+
access_count: 0,
|
|
359
|
+
last_accessed_at: null,
|
|
360
|
+
created_at: daysAgo(5),
|
|
361
|
+
tags: ["x"],
|
|
362
|
+
embedding: [0.1],
|
|
363
|
+
},
|
|
364
|
+
]);
|
|
365
|
+
|
|
366
|
+
const defaultRun = await runMemoryAudit(client, "ws-1");
|
|
367
|
+
expect(defaultRun.summary.staleDraftCount).toBe(0);
|
|
368
|
+
|
|
369
|
+
const tightRun = await runMemoryAudit(client, "ws-1", undefined, {
|
|
370
|
+
staleDraftAgeDays: 3,
|
|
371
|
+
});
|
|
372
|
+
expect(tightRun.summary.staleDraftCount).toBe(1);
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
test("fetch error surfaces as report.success=false", async () => {
|
|
376
|
+
const client = {
|
|
377
|
+
listMemoryEntities: mock(async () => {
|
|
378
|
+
throw new Error("API down");
|
|
379
|
+
}),
|
|
380
|
+
} as any;
|
|
381
|
+
|
|
382
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
383
|
+
expect(report.success).toBe(false);
|
|
384
|
+
expect(report.errors.length).toBeGreaterThan(0);
|
|
385
|
+
});
|
|
386
|
+
});
|