@gethmy/mcp 2.3.3 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ /**
2
+ * Memory Quality Audit
3
+ *
4
+ * Scores every memory entity against modern quality standards and buckets
5
+ * them into keep / review / archive / delete. Designed to catch legacy
6
+ * memories that pre-date tier/decay/embedding optimizations.
7
+ *
8
+ * Composite score (0-100): confidence (25) + decay (20) + structural (15) +
9
+ * content (15) + tier-age-fit (15) + access (10). Legacy signals (default
10
+ * confidence, missing embedding, stuck draft, no graph presence) are reported
11
+ * but don't change the score — they provide explanation.
12
+ */
13
+
14
+ import { evaluateLifecycle } from "@harmony/memory";
15
+ import type { HarmonyApiClient } from "./api-client.js";
16
+
17
+ // Embeddings migration landed 2026-02-18. Entities older than this without
18
+ // embeddings are pre-vector and legacy by construction.
19
+ const EMBEDDINGS_MIGRATION_AT = Date.parse("2026-02-18T00:00:00Z");
20
+ const MS_PER_DAY = 1000 * 60 * 60 * 24;
21
+ const BATCH_SIZE = 100;
22
+ const CONCURRENCY_LIMIT = 5;
23
+
24
+ interface AuditEntity {
25
+ id: string;
26
+ type: string;
27
+ title: string;
28
+ content: string;
29
+ confidence: number;
30
+ memory_tier: "draft" | "episode" | "reference";
31
+ access_count: number;
32
+ last_accessed_at: string | null;
33
+ created_at: string;
34
+ updated_at?: string;
35
+ tags?: string[];
36
+ metadata?: Record<string, unknown>;
37
+ embedding?: unknown;
38
+ promoted_from_id?: string | null;
39
+ }
40
+
41
+ export type AuditBucket = "keep" | "review" | "archive" | "delete";
42
+
43
+ export interface AuditOptions {
44
+ dryRun?: boolean;
45
+ archiveBelow?: number;
46
+ deleteBelow?: number;
47
+ includeLegacyFlag?: boolean;
48
+ limit?: number;
49
+ }
50
+
51
+ interface EntityAudit {
52
+ id: string;
53
+ title: string;
54
+ type: string;
55
+ tier: string;
56
+ ageDays: number;
57
+ score: number;
58
+ bucket: AuditBucket;
59
+ reasons: string[];
60
+ legacy: boolean;
61
+ legacyReasons: string[];
62
+ subScores: {
63
+ confidence: number;
64
+ decay: number;
65
+ structural: number;
66
+ content: number;
67
+ tierAgeFit: number;
68
+ access: number;
69
+ };
70
+ }
71
+
72
+ export interface AuditReport {
73
+ success: boolean;
74
+ dryRun: boolean;
75
+ timestamp: string;
76
+ workspace: { id: string; projectId?: string };
77
+ summary: {
78
+ totalEntities: number;
79
+ scanned: number;
80
+ keep: number;
81
+ review: number;
82
+ archive: number;
83
+ delete: number;
84
+ legacyCount: number;
85
+ };
86
+ actionsTaken: {
87
+ flaggedReview: number;
88
+ archived: number;
89
+ deleted: number;
90
+ };
91
+ distribution: {
92
+ "0-20": number;
93
+ "20-40": number;
94
+ "40-70": number;
95
+ "70-100": number;
96
+ };
97
+ legacyBreakdown: {
98
+ defaultConfidence: number;
99
+ missingEmbedding: number;
100
+ stuckDraft: number;
101
+ noGraphPresence: number;
102
+ };
103
+ lowest: EntityAudit[];
104
+ errors: Array<{ entityId?: string; step: string; message: string }>;
105
+ healthReport: string;
106
+ }
107
+
108
+ const BOILERPLATE_PATTERNS = [
109
+ /^todo:?$/i,
110
+ /^placeholder/i,
111
+ /^\.\.\.$/,
112
+ /^untitled/i,
113
+ /^(note|memo|draft)\s*\d*$/i,
114
+ ];
115
+
116
+ function isBoilerplate(title: string, content: string): boolean {
117
+ const t = title.trim();
118
+ const c = content.trim();
119
+ if (c.length === 0) return true;
120
+ for (const pat of BOILERPLATE_PATTERNS) {
121
+ if (pat.test(t)) return true;
122
+ }
123
+ return false;
124
+ }
125
+
126
+ function scoreEntity(
127
+ entity: AuditEntity,
128
+ relationCount: number,
129
+ archiveBelow: number,
130
+ deleteBelow: number,
131
+ ): EntityAudit {
132
+ const now = Date.now();
133
+ const ageDays = (now - Date.parse(entity.created_at)) / MS_PER_DAY;
134
+ // If an entity was never accessed, decay should start from creation time,
135
+ // not from "now" (which would falsely yield a fresh decay score of 1.0).
136
+ const effectiveLastAccess = entity.last_accessed_at ?? entity.created_at;
137
+ const lifecycle = evaluateLifecycle({
138
+ memory_tier: entity.memory_tier,
139
+ confidence: entity.confidence,
140
+ access_count: entity.access_count,
141
+ last_accessed_at: effectiveLastAccess,
142
+ created_at: entity.created_at,
143
+ });
144
+
145
+ const reasons: string[] = [];
146
+ const legacyReasons: string[] = [];
147
+
148
+ // Confidence (25)
149
+ const confidence = Math.max(0, Math.min(1, entity.confidence)) * 25;
150
+
151
+ // Decay (20)
152
+ const decay = Math.max(0, Math.min(1, lifecycle.decay.score)) * 20;
153
+ if (lifecycle.decay.score < 0.2)
154
+ reasons.push(`decay score ${lifecycle.decay.score.toFixed(2)}`);
155
+
156
+ // Structural completeness (15)
157
+ const hasEmbedding = entity.embedding != null;
158
+ const hasTags = (entity.tags?.length || 0) >= 1;
159
+ const hasRelations = relationCount > 0;
160
+ let structural = 0;
161
+ if (hasEmbedding) structural += 6;
162
+ if (hasTags) structural += 4;
163
+ if (hasRelations) structural += 5;
164
+ if (!hasEmbedding) reasons.push("no embedding");
165
+ if (!hasTags) reasons.push("no tags");
166
+ if (!hasRelations) reasons.push("no relations");
167
+
168
+ // Content quality (15)
169
+ let content = 0;
170
+ const contentLen = entity.content?.length || 0;
171
+ if (contentLen >= 80) content += 8;
172
+ const titleOk =
173
+ entity.title.trim().length >= 4 &&
174
+ !/^(untitled|draft|note)\b/i.test(entity.title.trim());
175
+ if (titleOk) content += 4;
176
+ if (!isBoilerplate(entity.title, entity.content)) content += 3;
177
+ if (contentLen < 80) reasons.push(`thin content (${contentLen} chars)`);
178
+ if (isBoilerplate(entity.title, entity.content))
179
+ reasons.push("boilerplate title/content");
180
+
181
+ // Tier-age fit (15)
182
+ let tierAgeFit = 15;
183
+ if (
184
+ entity.memory_tier === "draft" &&
185
+ ageDays > 60 &&
186
+ !entity.promoted_from_id
187
+ ) {
188
+ tierAgeFit = 0;
189
+ reasons.push("stuck draft >60d never promoted");
190
+ }
191
+ if (entity.promoted_from_id) {
192
+ tierAgeFit = Math.min(15, tierAgeFit + 5);
193
+ }
194
+
195
+ // Access pattern (10)
196
+ const access = Math.min(10, Math.log10((entity.access_count || 0) + 1) * 5);
197
+ if (entity.access_count === 0 && ageDays > 14) reasons.push("never accessed");
198
+
199
+ const raw = confidence + decay + structural + content + tierAgeFit + access;
200
+ const score = Math.round(Math.max(0, Math.min(100, raw)));
201
+
202
+ // Legacy detection
203
+ let legacy = false;
204
+ if (entity.confidence === 1.0 && entity.access_count === 0 && ageDays > 30) {
205
+ legacy = true;
206
+ legacyReasons.push("default confidence never validated");
207
+ }
208
+ if (
209
+ !hasEmbedding &&
210
+ Date.parse(entity.created_at) < EMBEDDINGS_MIGRATION_AT
211
+ ) {
212
+ legacy = true;
213
+ legacyReasons.push("pre-embeddings migration");
214
+ }
215
+ if (
216
+ entity.memory_tier === "draft" &&
217
+ ageDays > 60 &&
218
+ !entity.promoted_from_id
219
+ ) {
220
+ legacy = true;
221
+ legacyReasons.push("stuck draft");
222
+ }
223
+ if (!hasTags && !hasRelations) {
224
+ legacy = true;
225
+ legacyReasons.push("no graph presence");
226
+ }
227
+
228
+ // Bucket
229
+ let bucket: AuditBucket;
230
+ if (score < deleteBelow) bucket = "delete";
231
+ else if (score < archiveBelow) bucket = "archive";
232
+ else if (score < 70) bucket = "review";
233
+ else bucket = "keep";
234
+
235
+ return {
236
+ id: entity.id,
237
+ title: entity.title,
238
+ type: entity.type,
239
+ tier: entity.memory_tier,
240
+ ageDays: Math.round(ageDays),
241
+ score,
242
+ bucket,
243
+ reasons,
244
+ legacy,
245
+ legacyReasons,
246
+ subScores: {
247
+ confidence: Math.round(confidence),
248
+ decay: Math.round(decay),
249
+ structural,
250
+ content,
251
+ tierAgeFit,
252
+ access: Math.round(access),
253
+ },
254
+ };
255
+ }
256
+
257
+ export async function runMemoryAudit(
258
+ client: HarmonyApiClient,
259
+ workspaceId: string,
260
+ projectId?: string,
261
+ options?: AuditOptions,
262
+ ): Promise<AuditReport> {
263
+ const dryRun = options?.dryRun !== false;
264
+ const archiveBelow = options?.archiveBelow ?? 40;
265
+ const deleteBelow = options?.deleteBelow ?? 20;
266
+ const limit = options?.limit ?? 500;
267
+
268
+ const report: AuditReport = {
269
+ success: true,
270
+ dryRun,
271
+ timestamp: new Date().toISOString(),
272
+ workspace: { id: workspaceId, projectId },
273
+ summary: {
274
+ totalEntities: 0,
275
+ scanned: 0,
276
+ keep: 0,
277
+ review: 0,
278
+ archive: 0,
279
+ delete: 0,
280
+ legacyCount: 0,
281
+ },
282
+ actionsTaken: { flaggedReview: 0, archived: 0, deleted: 0 },
283
+ distribution: { "0-20": 0, "20-40": 0, "40-70": 0, "70-100": 0 },
284
+ legacyBreakdown: {
285
+ defaultConfidence: 0,
286
+ missingEmbedding: 0,
287
+ stuckDraft: 0,
288
+ noGraphPresence: 0,
289
+ },
290
+ lowest: [],
291
+ errors: [],
292
+ healthReport: "",
293
+ };
294
+
295
+ // Paginate
296
+ const entities: AuditEntity[] = [];
297
+ let offset = 0;
298
+ try {
299
+ while (entities.length < limit) {
300
+ const pageSize = Math.min(BATCH_SIZE, limit - entities.length);
301
+ const result = await client.listMemoryEntities({
302
+ workspace_id: workspaceId,
303
+ project_id: projectId,
304
+ limit: pageSize,
305
+ offset,
306
+ });
307
+ const page = (result.entities || []) as AuditEntity[];
308
+ if (page.length === 0) break;
309
+ entities.push(...page);
310
+ if (page.length < pageSize) break;
311
+ offset += pageSize;
312
+ }
313
+ } catch (err) {
314
+ report.errors.push({
315
+ step: "fetch",
316
+ message: `Failed to fetch entities: ${(err as Error).message}`,
317
+ });
318
+ report.success = false;
319
+ report.healthReport = renderReport(report);
320
+ return report;
321
+ }
322
+
323
+ report.summary.totalEntities = entities.length;
324
+
325
+ // Fetch relation counts concurrently
326
+ const relationCounts = new Map<string, number>();
327
+ for (let i = 0; i < entities.length; i += CONCURRENCY_LIMIT) {
328
+ const batch = entities.slice(i, i + CONCURRENCY_LIMIT);
329
+ const results = await Promise.allSettled(
330
+ batch.map(async (e) => {
331
+ const related = await client.getRelatedEntities(e.id);
332
+ const count =
333
+ (related.outgoing?.length || 0) + (related.incoming?.length || 0);
334
+ return { id: e.id, count };
335
+ }),
336
+ );
337
+ for (const r of results) {
338
+ if (r.status === "fulfilled") {
339
+ relationCounts.set(r.value.id, r.value.count);
340
+ }
341
+ }
342
+ }
343
+
344
+ // Score each entity
345
+ const audits: EntityAudit[] = [];
346
+ for (const entity of entities) {
347
+ const relCount = relationCounts.get(entity.id) ?? 0;
348
+ const audit = scoreEntity(entity, relCount, archiveBelow, deleteBelow);
349
+ audits.push(audit);
350
+ report.summary.scanned++;
351
+ report.summary[audit.bucket]++;
352
+ if (audit.legacy) report.summary.legacyCount++;
353
+
354
+ // Distribution bin
355
+ if (audit.score < 20) report.distribution["0-20"]++;
356
+ else if (audit.score < 40) report.distribution["20-40"]++;
357
+ else if (audit.score < 70) report.distribution["40-70"]++;
358
+ else report.distribution["70-100"]++;
359
+
360
+ // Legacy breakdown
361
+ for (const reason of audit.legacyReasons) {
362
+ if (reason.startsWith("default confidence"))
363
+ report.legacyBreakdown.defaultConfidence++;
364
+ else if (reason.startsWith("pre-embeddings"))
365
+ report.legacyBreakdown.missingEmbedding++;
366
+ else if (reason.startsWith("stuck draft"))
367
+ report.legacyBreakdown.stuckDraft++;
368
+ else if (reason.startsWith("no graph"))
369
+ report.legacyBreakdown.noGraphPresence++;
370
+ }
371
+ }
372
+
373
+ // Top 10 lowest-scoring
374
+ report.lowest = [...audits].sort((a, b) => a.score - b.score).slice(0, 10);
375
+
376
+ // Execute actions
377
+ if (!dryRun) {
378
+ for (const audit of audits) {
379
+ try {
380
+ if (audit.bucket === "delete") {
381
+ await client.deleteMemoryEntity(audit.id);
382
+ report.actionsTaken.deleted++;
383
+ } else if (audit.bucket === "archive") {
384
+ await client.updateMemoryEntity(audit.id, {
385
+ confidence: 0.25,
386
+ metadata: {
387
+ audit_archived_at: new Date().toISOString(),
388
+ audit_score: audit.score,
389
+ audit_reasons: audit.reasons,
390
+ },
391
+ });
392
+ report.actionsTaken.archived++;
393
+ } else if (audit.bucket === "review") {
394
+ await client.updateMemoryEntity(audit.id, {
395
+ metadata: {
396
+ needs_review: true,
397
+ audit_score: audit.score,
398
+ audit_reasons: audit.reasons,
399
+ audit_at: new Date().toISOString(),
400
+ },
401
+ });
402
+ report.actionsTaken.flaggedReview++;
403
+ }
404
+ } catch (err) {
405
+ report.errors.push({
406
+ entityId: audit.id,
407
+ step: audit.bucket,
408
+ message: (err as Error).message,
409
+ });
410
+ }
411
+ }
412
+ }
413
+
414
+ report.healthReport = renderReport(report);
415
+ return report;
416
+ }
417
+
418
+ function renderReport(report: AuditReport): string {
419
+ const mode = report.dryRun ? "Dry Run (preview)" : "Executed";
420
+ const s = report.summary;
421
+ const lines: string[] = [
422
+ "# Memory Quality Audit\n",
423
+ `**Mode:** ${mode} | **Scanned:** ${s.scanned}/${s.totalEntities} | **Legacy:** ${s.legacyCount}`,
424
+ "",
425
+ "## Distribution",
426
+ `- 70-100 (keep): ${report.distribution["70-100"]}`,
427
+ `- 40-69 (review): ${report.distribution["40-70"]}`,
428
+ `- 20-39 (archive): ${report.distribution["20-40"]}`,
429
+ `- 0-19 (delete): ${report.distribution["0-20"]}`,
430
+ "",
431
+ "## Buckets",
432
+ `- **Keep:** ${s.keep}`,
433
+ `- **Review:** ${s.review}${!report.dryRun ? ` (flagged ${report.actionsTaken.flaggedReview})` : ""}`,
434
+ `- **Archive:** ${s.archive}${!report.dryRun ? ` (archived ${report.actionsTaken.archived})` : ""}`,
435
+ `- **Delete:** ${s.delete}${!report.dryRun ? ` (deleted ${report.actionsTaken.deleted})` : ""}`,
436
+ "",
437
+ ];
438
+
439
+ const l = report.legacyBreakdown;
440
+ if (s.legacyCount > 0) {
441
+ lines.push("## Legacy Breakdown");
442
+ lines.push(`- Default confidence, never validated: ${l.defaultConfidence}`);
443
+ lines.push(`- Pre-embeddings migration: ${l.missingEmbedding}`);
444
+ lines.push(`- Stuck drafts (>60d, no promotion): ${l.stuckDraft}`);
445
+ lines.push(`- No tags + no relations: ${l.noGraphPresence}`);
446
+ lines.push("");
447
+ }
448
+
449
+ if (report.lowest.length > 0) {
450
+ lines.push("## Lowest-Scoring (top 10)");
451
+ lines.push("| Score | Bucket | Tier | Age | Title | Reasons |");
452
+ lines.push("|-------|--------|------|-----|-------|---------|");
453
+ for (const a of report.lowest) {
454
+ const reasonStr = a.reasons.slice(0, 3).join(", ") || "—";
455
+ const titleTrunc =
456
+ a.title.length > 40 ? `${a.title.slice(0, 37)}...` : a.title;
457
+ lines.push(
458
+ `| ${a.score} | ${a.bucket} | ${a.tier} | ${a.ageDays}d | ${titleTrunc} | ${reasonStr} |`,
459
+ );
460
+ }
461
+ lines.push("");
462
+ }
463
+
464
+ if (report.errors.length > 0) {
465
+ lines.push("## Errors");
466
+ for (const e of report.errors.slice(0, 10)) {
467
+ lines.push(
468
+ `- **${e.step}${e.entityId ? ` ${e.entityId}` : ""}:** ${e.message}`,
469
+ );
470
+ }
471
+ lines.push("");
472
+ }
473
+
474
+ if (report.dryRun) {
475
+ lines.push("---");
476
+ lines.push(
477
+ "*Run with `dryRun: false` to flag review entries, archive low-quality memories, and delete worst offenders.*",
478
+ );
479
+ }
480
+
481
+ return lines.join("\n");
482
+ }
483
+
484
+ // Exposed for reuse from memory-cleanup.ts
485
+ export { scoreEntity };