@gethmy/mcp 2.3.4 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +475 -19
- package/dist/index.js +475 -19
- package/package.json +2 -2
- package/src/__tests__/memory-audit.test.ts +296 -0
- package/src/memory-audit.ts +485 -0
- package/src/memory-cleanup.ts +89 -1
- package/src/server.ts +100 -7
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for memory quality audit.
|
|
3
|
+
*
|
|
4
|
+
* Run with: bun test packages/mcp-server/src/__tests__/memory-audit.test.ts
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { describe, expect, mock, test } from "bun:test";
|
|
8
|
+
import { runMemoryAudit } from "../memory-audit.js";
|
|
9
|
+
|
|
10
|
+
function daysAgo(days: number): string {
|
|
11
|
+
return new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function makeMockClient(
|
|
15
|
+
entities: unknown[],
|
|
16
|
+
relations?: Record<string, number>,
|
|
17
|
+
) {
|
|
18
|
+
const deletedIds: string[] = [];
|
|
19
|
+
const updatedEntities: Array<{
|
|
20
|
+
id: string;
|
|
21
|
+
updates: Record<string, unknown>;
|
|
22
|
+
}> = [];
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
client: {
|
|
26
|
+
listMemoryEntities: mock(async (opts: { offset?: number }) => {
|
|
27
|
+
const offset = opts.offset ?? 0;
|
|
28
|
+
if (offset > 0) return { entities: [], count: 0 };
|
|
29
|
+
return { entities, count: entities.length };
|
|
30
|
+
}),
|
|
31
|
+
getRelatedEntities: mock(async (id: string) => {
|
|
32
|
+
const n = relations?.[id] ?? 0;
|
|
33
|
+
return {
|
|
34
|
+
outgoing: Array(n).fill({}),
|
|
35
|
+
incoming: [],
|
|
36
|
+
};
|
|
37
|
+
}),
|
|
38
|
+
deleteMemoryEntity: mock(async (id: string) => {
|
|
39
|
+
deletedIds.push(id);
|
|
40
|
+
return { success: true };
|
|
41
|
+
}),
|
|
42
|
+
updateMemoryEntity: mock(
|
|
43
|
+
async (id: string, updates: Record<string, unknown>) => {
|
|
44
|
+
updatedEntities.push({ id, updates });
|
|
45
|
+
return { entity: { id, ...updates } };
|
|
46
|
+
},
|
|
47
|
+
),
|
|
48
|
+
} as any,
|
|
49
|
+
deletedIds,
|
|
50
|
+
updatedEntities,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
describe("runMemoryAudit", () => {
|
|
55
|
+
test("buckets a healthy modern entity into 'keep'", async () => {
|
|
56
|
+
const { client } = makeMockClient(
|
|
57
|
+
[
|
|
58
|
+
{
|
|
59
|
+
id: "healthy",
|
|
60
|
+
type: "pattern",
|
|
61
|
+
title: "Auth refresh token rotation pattern",
|
|
62
|
+
content:
|
|
63
|
+
"Rotate refresh tokens on every use. Keep a rolling window of two valid tokens to allow retry on network failures.",
|
|
64
|
+
confidence: 0.95,
|
|
65
|
+
memory_tier: "reference",
|
|
66
|
+
access_count: 25,
|
|
67
|
+
last_accessed_at: daysAgo(1),
|
|
68
|
+
created_at: daysAgo(90),
|
|
69
|
+
tags: ["auth", "security"],
|
|
70
|
+
embedding: [0.1, 0.2, 0.3],
|
|
71
|
+
promoted_from_id: "orig-1",
|
|
72
|
+
metadata: {},
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
{ healthy: 3 },
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
79
|
+
expect(report.summary.keep).toBe(1);
|
|
80
|
+
expect(report.summary.delete).toBe(0);
|
|
81
|
+
expect(report.lowest[0].score).toBeGreaterThanOrEqual(70);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test("buckets a legacy default-confidence entity into archive/delete", async () => {
|
|
85
|
+
const { client } = makeMockClient([
|
|
86
|
+
{
|
|
87
|
+
id: "legacy",
|
|
88
|
+
type: "context",
|
|
89
|
+
title: "x",
|
|
90
|
+
content: "",
|
|
91
|
+
confidence: 1.0,
|
|
92
|
+
memory_tier: "draft",
|
|
93
|
+
access_count: 0,
|
|
94
|
+
last_accessed_at: null,
|
|
95
|
+
created_at: daysAgo(120),
|
|
96
|
+
tags: [],
|
|
97
|
+
embedding: null,
|
|
98
|
+
promoted_from_id: null,
|
|
99
|
+
metadata: {},
|
|
100
|
+
},
|
|
101
|
+
]);
|
|
102
|
+
|
|
103
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
104
|
+
expect(report.summary.legacyCount).toBe(1);
|
|
105
|
+
const a = report.lowest[0];
|
|
106
|
+
expect(a.legacy).toBe(true);
|
|
107
|
+
expect(a.bucket === "archive" || a.bucket === "delete").toBe(true);
|
|
108
|
+
expect(a.legacyReasons.length).toBeGreaterThan(1);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("dryRun does not delete, archive, or flag", async () => {
|
|
112
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
113
|
+
{
|
|
114
|
+
id: "bad",
|
|
115
|
+
type: "context",
|
|
116
|
+
title: "x",
|
|
117
|
+
content: "",
|
|
118
|
+
confidence: 0.1,
|
|
119
|
+
memory_tier: "draft",
|
|
120
|
+
access_count: 0,
|
|
121
|
+
last_accessed_at: null,
|
|
122
|
+
created_at: daysAgo(100),
|
|
123
|
+
tags: [],
|
|
124
|
+
embedding: null,
|
|
125
|
+
},
|
|
126
|
+
]);
|
|
127
|
+
|
|
128
|
+
await runMemoryAudit(client, "ws-1", undefined, { dryRun: true });
|
|
129
|
+
expect(deletedIds).toHaveLength(0);
|
|
130
|
+
expect(updatedEntities).toHaveLength(0);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("executes delete + archive + flag in non-dryRun", async () => {
|
|
134
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
135
|
+
// delete (very low)
|
|
136
|
+
{
|
|
137
|
+
id: "trash",
|
|
138
|
+
type: "context",
|
|
139
|
+
title: "x",
|
|
140
|
+
content: "",
|
|
141
|
+
confidence: 0.05,
|
|
142
|
+
memory_tier: "draft",
|
|
143
|
+
access_count: 0,
|
|
144
|
+
last_accessed_at: null,
|
|
145
|
+
created_at: daysAgo(200),
|
|
146
|
+
tags: [],
|
|
147
|
+
},
|
|
148
|
+
// archive (middling-low)
|
|
149
|
+
{
|
|
150
|
+
id: "archive-me",
|
|
151
|
+
type: "pattern",
|
|
152
|
+
title: "Partial pattern that lacks context here",
|
|
153
|
+
content: "Some content that is a bit more substantive than nothing.",
|
|
154
|
+
confidence: 0.3,
|
|
155
|
+
memory_tier: "draft",
|
|
156
|
+
access_count: 0,
|
|
157
|
+
last_accessed_at: daysAgo(40),
|
|
158
|
+
created_at: daysAgo(40),
|
|
159
|
+
tags: [],
|
|
160
|
+
},
|
|
161
|
+
// review (medium) — decent content but no tags, no relations, no embedding
|
|
162
|
+
{
|
|
163
|
+
id: "review-me",
|
|
164
|
+
type: "pattern",
|
|
165
|
+
title: "Reasonable pattern with decent content body here",
|
|
166
|
+
content:
|
|
167
|
+
"This entity has enough content to pass the length check. Confidence is moderate, access is limited.",
|
|
168
|
+
confidence: 0.5,
|
|
169
|
+
memory_tier: "episode",
|
|
170
|
+
access_count: 1,
|
|
171
|
+
last_accessed_at: daysAgo(25),
|
|
172
|
+
created_at: daysAgo(40),
|
|
173
|
+
tags: [],
|
|
174
|
+
embedding: null,
|
|
175
|
+
},
|
|
176
|
+
]);
|
|
177
|
+
|
|
178
|
+
const report = await runMemoryAudit(client, "ws-1", undefined, {
|
|
179
|
+
dryRun: false,
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
expect(deletedIds).toContain("trash");
|
|
183
|
+
expect(
|
|
184
|
+
updatedEntities.find((u) => u.id === "archive-me")?.updates.confidence,
|
|
185
|
+
).toBe(0.25);
|
|
186
|
+
expect(
|
|
187
|
+
(
|
|
188
|
+
updatedEntities.find((u) => u.id === "review-me")?.updates
|
|
189
|
+
.metadata as Record<string, unknown>
|
|
190
|
+
)?.needs_review,
|
|
191
|
+
).toBe(true);
|
|
192
|
+
|
|
193
|
+
expect(report.actionsTaken.deleted).toBeGreaterThanOrEqual(1);
|
|
194
|
+
expect(report.actionsTaken.archived).toBeGreaterThanOrEqual(1);
|
|
195
|
+
expect(report.actionsTaken.flaggedReview).toBeGreaterThanOrEqual(1);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test("detects stuck-draft legacy signal", async () => {
|
|
199
|
+
const { client } = makeMockClient([
|
|
200
|
+
{
|
|
201
|
+
id: "stuck",
|
|
202
|
+
type: "context",
|
|
203
|
+
title: "Old draft that never made it",
|
|
204
|
+
content:
|
|
205
|
+
"Some content that is long enough to not count as thin content right here.",
|
|
206
|
+
confidence: 0.6,
|
|
207
|
+
memory_tier: "draft",
|
|
208
|
+
access_count: 1,
|
|
209
|
+
last_accessed_at: daysAgo(70),
|
|
210
|
+
created_at: daysAgo(75),
|
|
211
|
+
tags: ["x"],
|
|
212
|
+
embedding: [0.1],
|
|
213
|
+
promoted_from_id: null,
|
|
214
|
+
},
|
|
215
|
+
]);
|
|
216
|
+
|
|
217
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
218
|
+
expect(report.legacyBreakdown.stuckDraft).toBe(1);
|
|
219
|
+
expect(report.lowest[0].reasons).toContain(
|
|
220
|
+
"stuck draft >60d never promoted",
|
|
221
|
+
);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
test("distribution buckets sum to scanned count", async () => {
|
|
225
|
+
const entities = Array.from({ length: 10 }, (_, i) => ({
|
|
226
|
+
id: `e${i}`,
|
|
227
|
+
type: "context",
|
|
228
|
+
title: `Entity number ${i} with decent title length`,
|
|
229
|
+
content: "Some content string that is long enough to count properly.",
|
|
230
|
+
confidence: 0.1 * (i + 1),
|
|
231
|
+
memory_tier: i % 3 === 0 ? "reference" : "episode",
|
|
232
|
+
access_count: i,
|
|
233
|
+
last_accessed_at: daysAgo(i * 2),
|
|
234
|
+
created_at: daysAgo(i * 5 + 1),
|
|
235
|
+
tags: i % 2 === 0 ? ["tag"] : [],
|
|
236
|
+
embedding: i % 2 === 0 ? [0.1] : null,
|
|
237
|
+
}));
|
|
238
|
+
const { client } = makeMockClient(entities);
|
|
239
|
+
|
|
240
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
241
|
+
const total =
|
|
242
|
+
report.distribution["0-20"] +
|
|
243
|
+
report.distribution["20-40"] +
|
|
244
|
+
report.distribution["40-70"] +
|
|
245
|
+
report.distribution["70-100"];
|
|
246
|
+
expect(total).toBe(report.summary.scanned);
|
|
247
|
+
expect(report.summary.scanned).toBe(10);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test("empty workspace returns success with zeros", async () => {
|
|
251
|
+
const { client } = makeMockClient([]);
|
|
252
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
253
|
+
expect(report.success).toBe(true);
|
|
254
|
+
expect(report.summary.scanned).toBe(0);
|
|
255
|
+
expect(report.lowest).toHaveLength(0);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test("deleteBelow=0 disables deletion entirely", async () => {
|
|
259
|
+
const { client, deletedIds, updatedEntities } = makeMockClient([
|
|
260
|
+
{
|
|
261
|
+
id: "trash",
|
|
262
|
+
type: "context",
|
|
263
|
+
title: "x",
|
|
264
|
+
content: "",
|
|
265
|
+
confidence: 0.05,
|
|
266
|
+
memory_tier: "draft",
|
|
267
|
+
access_count: 0,
|
|
268
|
+
last_accessed_at: null,
|
|
269
|
+
created_at: daysAgo(200),
|
|
270
|
+
tags: [],
|
|
271
|
+
},
|
|
272
|
+
]);
|
|
273
|
+
|
|
274
|
+
await runMemoryAudit(client, "ws-1", undefined, {
|
|
275
|
+
dryRun: false,
|
|
276
|
+
deleteBelow: 0,
|
|
277
|
+
});
|
|
278
|
+
expect(deletedIds).toHaveLength(0);
|
|
279
|
+
// Should land in archive bucket (score < 40 but >= 0)
|
|
280
|
+
expect(
|
|
281
|
+
updatedEntities.find((u) => u.id === "trash")?.updates.confidence,
|
|
282
|
+
).toBe(0.25);
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
test("fetch error surfaces as report.success=false", async () => {
|
|
286
|
+
const client = {
|
|
287
|
+
listMemoryEntities: mock(async () => {
|
|
288
|
+
throw new Error("API down");
|
|
289
|
+
}),
|
|
290
|
+
} as any;
|
|
291
|
+
|
|
292
|
+
const report = await runMemoryAudit(client, "ws-1");
|
|
293
|
+
expect(report.success).toBe(false);
|
|
294
|
+
expect(report.errors.length).toBeGreaterThan(0);
|
|
295
|
+
});
|
|
296
|
+
});
|