@gethmy/mcp 2.2.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,616 @@
1
+ /**
2
+ * Unified Memory Cleanup
3
+ *
4
+ * Orchestrates a 5-stage cleanup pipeline: prune stale drafts, consolidate
5
+ * similar memories, detect orphans, detect duplicates, and backfill embeddings.
6
+ *
7
+ * All stages are non-fatal — individual failures are collected but never block
8
+ * the remaining stages. Defaults to dry-run mode (preview only).
9
+ */
10
+
11
+ import { evaluateLifecycle } from "@harmony/memory";
12
+ import type { HarmonyApiClient } from "./api-client.js";
13
+ import {
14
+ type ConsolidationResult,
15
+ consolidateMemories,
16
+ } from "./consolidation.js";
17
+ import { findSimilarEntities } from "./graph-expansion.js";
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Types
21
+ // ---------------------------------------------------------------------------
22
+
23
+ interface MemoryEntity {
24
+ id: string;
25
+ type: string;
26
+ title: string;
27
+ content: string;
28
+ confidence: number;
29
+ memory_tier: "draft" | "episode" | "reference";
30
+ access_count: number;
31
+ last_accessed_at: string | null;
32
+ created_at: string;
33
+ updated_at?: string;
34
+ tags?: string[];
35
+ }
36
+
37
+ export type CleanupStep =
38
+ | "prune"
39
+ | "consolidate"
40
+ | "orphans"
41
+ | "duplicates"
42
+ | "backfill";
43
+
44
+ export interface CleanupOptions {
45
+ dryRun?: boolean;
46
+ steps?: CleanupStep[];
47
+ maxAgeDays?: number;
48
+ minClusterSize?: number;
49
+ orphanAgeDays?: number;
50
+ }
51
+
52
+ interface PruneStepResult {
53
+ staleDraftsFound: number;
54
+ pruned: number;
55
+ items: Array<{
56
+ id: string;
57
+ title: string;
58
+ ageDays: number;
59
+ decayScore: number;
60
+ }>;
61
+ }
62
+
63
+ interface ConsolidateStepResult {
64
+ clustersFound: number;
65
+ entitiesProcessed: number;
66
+ consolidated: number;
67
+ details: ConsolidationResult["details"];
68
+ }
69
+
70
+ interface OrphanStepResult {
71
+ orphansFound: number;
72
+ removed: number;
73
+ items: Array<{
74
+ id: string;
75
+ title: string;
76
+ type: string;
77
+ tier: string;
78
+ ageDays: number;
79
+ accessCount: number;
80
+ }>;
81
+ }
82
+
83
+ interface DuplicateStepResult {
84
+ duplicatePairsFound: number;
85
+ resolved: number;
86
+ pairs: Array<{
87
+ keepId: string;
88
+ keepTitle: string;
89
+ removeId: string;
90
+ removeTitle: string;
91
+ similarity: number;
92
+ }>;
93
+ }
94
+
95
+ interface BackfillStepResult {
96
+ processed: number;
97
+ remaining: number;
98
+ errors: Array<{ entity_id: string; error: string }>;
99
+ }
100
+
101
+ export interface CleanupReport {
102
+ success: boolean;
103
+ dryRun: boolean;
104
+ timestamp: string;
105
+ workspace: { id: string; projectId?: string };
106
+
107
+ summary: {
108
+ totalEntities: number;
109
+ issuesFound: number;
110
+ actionsTaken: number;
111
+ };
112
+
113
+ steps: {
114
+ prune?: PruneStepResult;
115
+ consolidate?: ConsolidateStepResult;
116
+ orphans?: OrphanStepResult;
117
+ duplicates?: DuplicateStepResult;
118
+ backfill?: BackfillStepResult;
119
+ };
120
+
121
+ errors: Array<{ step: string; message: string }>;
122
+ healthReport: string;
123
+ }
124
+
125
+ const ALL_STEPS: CleanupStep[] = [
126
+ "prune",
127
+ "consolidate",
128
+ "orphans",
129
+ "duplicates",
130
+ "backfill",
131
+ ];
132
+
133
+ // ---------------------------------------------------------------------------
134
+ // Main orchestrator
135
+ // ---------------------------------------------------------------------------
136
+
137
+ export async function runMemoryCleanup(
138
+ client: HarmonyApiClient,
139
+ workspaceId: string,
140
+ projectId?: string,
141
+ options?: CleanupOptions,
142
+ ): Promise<CleanupReport> {
143
+ const dryRun = options?.dryRun !== false;
144
+ const steps = options?.steps ?? ALL_STEPS;
145
+ const maxAgeDays = options?.maxAgeDays ?? 30;
146
+ const minClusterSize = options?.minClusterSize ?? 3;
147
+ const orphanAgeDays = options?.orphanAgeDays ?? 14;
148
+
149
+ const report: CleanupReport = {
150
+ success: true,
151
+ dryRun,
152
+ timestamp: new Date().toISOString(),
153
+ workspace: { id: workspaceId, projectId },
154
+ summary: { totalEntities: 0, issuesFound: 0, actionsTaken: 0 },
155
+ steps: {},
156
+ errors: [],
157
+ healthReport: "",
158
+ };
159
+
160
+ // Fetch all entities once (shared across steps)
161
+ let entities: MemoryEntity[] = [];
162
+ try {
163
+ const listResult = await client.listMemoryEntities({
164
+ workspace_id: workspaceId,
165
+ project_id: projectId,
166
+ limit: 200,
167
+ });
168
+ entities = (listResult.entities || []) as MemoryEntity[];
169
+ report.summary.totalEntities = entities.length;
170
+ } catch (err) {
171
+ report.errors.push({
172
+ step: "init",
173
+ message: `Failed to fetch entities: ${(err as Error).message}`,
174
+ });
175
+ report.success = false;
176
+ report.healthReport = generateHealthReport(report);
177
+ return report;
178
+ }
179
+
180
+ // Stage 1: Prune stale drafts
181
+ if (steps.includes("prune")) {
182
+ try {
183
+ report.steps.prune = runPruneStep(entities, maxAgeDays);
184
+ if (!dryRun) {
185
+ for (const item of report.steps.prune.items) {
186
+ try {
187
+ await client.deleteMemoryEntity(item.id);
188
+ report.steps.prune.pruned++;
189
+ } catch {
190
+ // Non-fatal
191
+ }
192
+ }
193
+ report.summary.actionsTaken += report.steps.prune.pruned;
194
+ }
195
+ report.summary.issuesFound += report.steps.prune.staleDraftsFound;
196
+ } catch (err) {
197
+ report.errors.push({
198
+ step: "prune",
199
+ message: (err as Error).message,
200
+ });
201
+ }
202
+ }
203
+
204
+ // Stage 2: Consolidate similar memories
205
+ if (steps.includes("consolidate")) {
206
+ try {
207
+ const result = await consolidateMemories(client, workspaceId, projectId, {
208
+ dryRun,
209
+ minClusterSize,
210
+ });
211
+ report.steps.consolidate = {
212
+ clustersFound: result.clustersFound,
213
+ entitiesProcessed: result.entitiesProcessed,
214
+ consolidated: result.consolidated,
215
+ details: result.details,
216
+ };
217
+ report.summary.issuesFound += result.clustersFound;
218
+ if (!dryRun) report.summary.actionsTaken += result.consolidated;
219
+ } catch (err) {
220
+ report.errors.push({
221
+ step: "consolidate",
222
+ message: (err as Error).message,
223
+ });
224
+ }
225
+ }
226
+
227
+ // Stage 3: Detect orphans
228
+ if (steps.includes("orphans")) {
229
+ try {
230
+ report.steps.orphans = await runOrphanStep(
231
+ client,
232
+ entities,
233
+ orphanAgeDays,
234
+ );
235
+ if (!dryRun) {
236
+ for (const item of report.steps.orphans.items) {
237
+ try {
238
+ await client.deleteMemoryEntity(item.id);
239
+ report.steps.orphans.removed++;
240
+ } catch {
241
+ // Non-fatal
242
+ }
243
+ }
244
+ report.summary.actionsTaken += report.steps.orphans.removed;
245
+ }
246
+ report.summary.issuesFound += report.steps.orphans.orphansFound;
247
+ } catch (err) {
248
+ report.errors.push({
249
+ step: "orphans",
250
+ message: (err as Error).message,
251
+ });
252
+ }
253
+ }
254
+
255
+ // Stage 4: Detect duplicates
256
+ if (steps.includes("duplicates")) {
257
+ try {
258
+ report.steps.duplicates = await runDuplicateStep(
259
+ client,
260
+ entities,
261
+ workspaceId,
262
+ projectId,
263
+ );
264
+ if (!dryRun) {
265
+ for (const pair of report.steps.duplicates.pairs) {
266
+ try {
267
+ await client.deleteMemoryEntity(pair.removeId);
268
+ report.steps.duplicates.resolved++;
269
+ } catch {
270
+ // Non-fatal
271
+ }
272
+ }
273
+ report.summary.actionsTaken += report.steps.duplicates.resolved;
274
+ }
275
+ report.summary.issuesFound += report.steps.duplicates.duplicatePairsFound;
276
+ } catch (err) {
277
+ report.errors.push({
278
+ step: "duplicates",
279
+ message: (err as Error).message,
280
+ });
281
+ }
282
+ }
283
+
284
+ // Stage 5: Backfill embeddings
285
+ if (steps.includes("backfill")) {
286
+ try {
287
+ if (dryRun) {
288
+ // In dry-run, just report that backfill would run
289
+ report.steps.backfill = {
290
+ processed: 0,
291
+ remaining: -1,
292
+ errors: [],
293
+ };
294
+ } else {
295
+ const result = await client.backfillEmbeddings(workspaceId);
296
+ report.steps.backfill = {
297
+ processed: result.processed,
298
+ remaining: result.remaining,
299
+ errors: result.errors || [],
300
+ };
301
+ report.summary.actionsTaken += result.processed;
302
+ }
303
+ } catch (err) {
304
+ report.errors.push({
305
+ step: "backfill",
306
+ message: (err as Error).message,
307
+ });
308
+ }
309
+ }
310
+
311
+ report.healthReport = generateHealthReport(report);
312
+ return report;
313
+ }
314
+
315
+ // ---------------------------------------------------------------------------
316
+ // Step implementations
317
+ // ---------------------------------------------------------------------------
318
+
319
+ function runPruneStep(
320
+ entities: MemoryEntity[],
321
+ maxAgeDays: number,
322
+ ): PruneStepResult {
323
+ const now = Date.now();
324
+ const drafts = entities.filter((e) => e.memory_tier === "draft");
325
+ const stale: PruneStepResult["items"] = [];
326
+
327
+ for (const entity of drafts) {
328
+ const ageDays =
329
+ (now - new Date(entity.created_at).getTime()) / (1000 * 60 * 60 * 24);
330
+ if (ageDays < maxAgeDays) continue;
331
+
332
+ const lifecycle = evaluateLifecycle(entity);
333
+ stale.push({
334
+ id: entity.id,
335
+ title: entity.title,
336
+ ageDays: Math.round(ageDays),
337
+ decayScore: Math.round(lifecycle.decay.score * 100) / 100,
338
+ });
339
+ }
340
+
341
+ return { staleDraftsFound: stale.length, pruned: 0, items: stale };
342
+ }
343
+
344
+ async function runOrphanStep(
345
+ client: HarmonyApiClient,
346
+ entities: MemoryEntity[],
347
+ orphanAgeDays: number,
348
+ ): Promise<OrphanStepResult> {
349
+ const now = Date.now();
350
+ const result: OrphanStepResult = { orphansFound: 0, removed: 0, items: [] };
351
+
352
+ // Pre-filter: only check entities that look like orphan candidates
353
+ const candidates = entities.filter((e) => {
354
+ if (e.memory_tier === "reference") return false;
355
+ if (e.access_count >= 2) return false;
356
+ const ageDays =
357
+ (now - new Date(e.created_at).getTime()) / (1000 * 60 * 60 * 24);
358
+ return ageDays >= orphanAgeDays;
359
+ });
360
+
361
+ for (const entity of candidates) {
362
+ try {
363
+ const related = await client.getRelatedEntities(entity.id);
364
+ const totalRelations =
365
+ (related.outgoing?.length || 0) + (related.incoming?.length || 0);
366
+ if (totalRelations > 0) continue;
367
+
368
+ const ageDays =
369
+ (now - new Date(entity.created_at).getTime()) / (1000 * 60 * 60 * 24);
370
+ result.items.push({
371
+ id: entity.id,
372
+ title: entity.title,
373
+ type: entity.type,
374
+ tier: entity.memory_tier,
375
+ ageDays: Math.round(ageDays),
376
+ accessCount: entity.access_count,
377
+ });
378
+ result.orphansFound++;
379
+ } catch {
380
+ // Non-fatal: skip this entity
381
+ }
382
+ }
383
+
384
+ return result;
385
+ }
386
+
387
+ async function runDuplicateStep(
388
+ client: HarmonyApiClient,
389
+ entities: MemoryEntity[],
390
+ workspaceId: string,
391
+ projectId?: string,
392
+ ): Promise<DuplicateStepResult> {
393
+ const result: DuplicateStepResult = {
394
+ duplicatePairsFound: 0,
395
+ resolved: 0,
396
+ pairs: [],
397
+ };
398
+
399
+ const seenPairs = new Set<string>();
400
+ const flaggedForRemoval = new Set<string>();
401
+
402
+ for (const entity of entities) {
403
+ if (flaggedForRemoval.has(entity.id)) continue;
404
+
405
+ let similar: Array<{
406
+ id: string;
407
+ type: string;
408
+ title: string;
409
+ content: string;
410
+ confidence: number;
411
+ }>;
412
+ try {
413
+ similar = await findSimilarEntities(
414
+ client,
415
+ entity.title,
416
+ entity.content,
417
+ workspaceId,
418
+ { projectId, limit: 5, minRrfScore: 0.05, excludeIds: [entity.id] },
419
+ );
420
+ } catch {
421
+ continue;
422
+ }
423
+
424
+ for (const match of similar) {
425
+ if (flaggedForRemoval.has(match.id)) continue;
426
+
427
+ const pairKey = [entity.id, match.id].sort().join(":");
428
+ if (seenPairs.has(pairKey)) continue;
429
+ seenPairs.add(pairKey);
430
+
431
+ const sim = titleSimilarity(entity.title, match.title);
432
+ if (sim < 0.85) continue;
433
+
434
+ // Keep the one with higher confidence, more accesses, or higher tier
435
+ const entityScore = entityQualityScore(entity);
436
+ const matchEntity = entities.find((e) => e.id === match.id);
437
+ const matchScore = matchEntity
438
+ ? entityQualityScore(matchEntity)
439
+ : match.confidence;
440
+
441
+ const [keep, remove] =
442
+ entityScore >= matchScore
443
+ ? [entity, { id: match.id, title: match.title }]
444
+ : [{ id: match.id, title: match.title }, entity];
445
+
446
+ flaggedForRemoval.add(remove.id);
447
+ result.pairs.push({
448
+ keepId: keep.id,
449
+ keepTitle: keep.title,
450
+ removeId: remove.id,
451
+ removeTitle: remove.title,
452
+ similarity: Math.round(sim * 100) / 100,
453
+ });
454
+ result.duplicatePairsFound++;
455
+ }
456
+ }
457
+
458
+ return result;
459
+ }
460
+
461
+ // ---------------------------------------------------------------------------
462
+ // Helpers
463
+ // ---------------------------------------------------------------------------
464
+
465
+ const TIER_WEIGHTS: Record<string, number> = {
466
+ reference: 3,
467
+ episode: 2,
468
+ draft: 1,
469
+ };
470
+
471
+ function entityQualityScore(entity: MemoryEntity): number {
472
+ return (
473
+ entity.confidence +
474
+ (TIER_WEIGHTS[entity.memory_tier] || 0) +
475
+ Math.min(entity.access_count, 10) * 0.1
476
+ );
477
+ }
478
+
479
+ function titleSimilarity(a: string, b: string): number {
480
+ const na = a.toLowerCase().trim();
481
+ const nb = b.toLowerCase().trim();
482
+ if (na === nb) return 1;
483
+
484
+ const wordsA = new Set(na.split(/\W+/).filter(Boolean));
485
+ const wordsB = new Set(nb.split(/\W+/).filter(Boolean));
486
+ if (wordsA.size === 0 || wordsB.size === 0) return 0;
487
+
488
+ let intersection = 0;
489
+ for (const w of wordsA) {
490
+ if (wordsB.has(w)) intersection++;
491
+ }
492
+ // Jaccard similarity
493
+ const union = wordsA.size + wordsB.size - intersection;
494
+ return union > 0 ? intersection / union : 0;
495
+ }
496
+
497
+ // ---------------------------------------------------------------------------
498
+ // Health report renderer
499
+ // ---------------------------------------------------------------------------
500
+
501
+ function generateHealthReport(report: CleanupReport): string {
502
+ const mode = report.dryRun ? "Dry Run (preview)" : "Executed";
503
+ const lines: string[] = [
504
+ "# Memory Health Report\n",
505
+ `**Mode:** ${mode} | **Entities:** ${report.summary.totalEntities} | **Issues:** ${report.summary.issuesFound} | **Actions:** ${report.summary.actionsTaken}`,
506
+ "",
507
+ ];
508
+
509
+ // Prune
510
+ if (report.steps.prune) {
511
+ const p = report.steps.prune;
512
+ lines.push("## Stale Drafts");
513
+ if (p.staleDraftsFound === 0) {
514
+ lines.push("No stale drafts found.\n");
515
+ } else {
516
+ lines.push(
517
+ `Found **${p.staleDraftsFound}** stale drafts${!report.dryRun ? ` (pruned ${p.pruned})` : ""}:`,
518
+ );
519
+ lines.push("| Title | Age | Decay |");
520
+ lines.push("|-------|-----|-------|");
521
+ for (const item of p.items.slice(0, 20)) {
522
+ lines.push(`| ${item.title} | ${item.ageDays}d | ${item.decayScore} |`);
523
+ }
524
+ lines.push("");
525
+ }
526
+ }
527
+
528
+ // Consolidate
529
+ if (report.steps.consolidate) {
530
+ const c = report.steps.consolidate;
531
+ lines.push("## Consolidation");
532
+ if (c.clustersFound === 0) {
533
+ lines.push(
534
+ `Scanned ${c.entitiesProcessed} draft/episode entities — no clusters found.\n`,
535
+ );
536
+ } else {
537
+ lines.push(
538
+ `Found **${c.clustersFound}** clusters across ${c.entitiesProcessed} entities:`,
539
+ );
540
+ for (const d of c.details.slice(0, 10)) {
541
+ lines.push(`- **${d.mergedTitle}** — ${d.clusterSize} entities`);
542
+ }
543
+ lines.push("");
544
+ }
545
+ }
546
+
547
+ // Orphans
548
+ if (report.steps.orphans) {
549
+ const o = report.steps.orphans;
550
+ lines.push("## Orphaned Entities");
551
+ if (o.orphansFound === 0) {
552
+ lines.push("No orphans found.\n");
553
+ } else {
554
+ lines.push(
555
+ `Found **${o.orphansFound}** orphans${!report.dryRun ? ` (removed ${o.removed})` : ""}:`,
556
+ );
557
+ lines.push("| Title | Type | Tier | Age | Accesses |");
558
+ lines.push("|-------|------|------|-----|----------|");
559
+ for (const item of o.items.slice(0, 20)) {
560
+ lines.push(
561
+ `| ${item.title} | ${item.type} | ${item.tier} | ${item.ageDays}d | ${item.accessCount} |`,
562
+ );
563
+ }
564
+ lines.push("");
565
+ }
566
+ }
567
+
568
+ // Duplicates
569
+ if (report.steps.duplicates) {
570
+ const d = report.steps.duplicates;
571
+ lines.push("## Near-Duplicates");
572
+ if (d.duplicatePairsFound === 0) {
573
+ lines.push("No duplicates found.\n");
574
+ } else {
575
+ lines.push(
576
+ `Found **${d.duplicatePairsFound}** duplicate pairs${!report.dryRun ? ` (resolved ${d.resolved})` : ""}:`,
577
+ );
578
+ for (const pair of d.pairs.slice(0, 20)) {
579
+ lines.push(
580
+ `- "${pair.keepTitle}" ~ "${pair.removeTitle}" (${Math.round(pair.similarity * 100)}% similar, keep first)`,
581
+ );
582
+ }
583
+ lines.push("");
584
+ }
585
+ }
586
+
587
+ // Backfill
588
+ if (report.steps.backfill) {
589
+ const b = report.steps.backfill;
590
+ lines.push("## Embedding Coverage");
591
+ if (report.dryRun) {
592
+ lines.push("Backfill will run when executed with `dryRun: false`.\n");
593
+ } else if (b.remaining === 0) {
594
+ lines.push(`All embeddings up to date (processed ${b.processed}).\n`);
595
+ } else {
596
+ lines.push(
597
+ `Processed ${b.processed} entities. ${b.remaining} still need embeddings.\n`,
598
+ );
599
+ }
600
+ }
601
+
602
+ // Errors
603
+ if (report.errors.length > 0) {
604
+ lines.push("## Errors");
605
+ for (const e of report.errors) {
606
+ lines.push(`- **${e.step}:** ${e.message}`);
607
+ }
608
+ lines.push("");
609
+ }
610
+
611
+ if (report.dryRun) {
612
+ lines.push("---\n*Run with `dryRun: false` to execute cleanup.*");
613
+ }
614
+
615
+ return lines.join("\n");
616
+ }
@@ -439,7 +439,19 @@ export function generatePrompt(
439
439
  sections.push(`- ${f}`);
440
440
  });
441
441
  sections.push(
442
- `- **Memory:** When you discover important domain knowledge, architectural decisions, or infrastructure details, store them via \`harmony_remember\`. Focus on durable knowledge that future agents would benefit from not ephemeral task details (those are auto-extracted from your session).`,
442
+ `- **Memory:** Store reusable knowledge via \`harmony_remember\`. Only store what a future agent couldn't easily discover from the code itself, applies beyond this specific card, and includes a "because" (not just what, but why).`,
443
+ );
444
+ sections.push(
445
+ ` - GOOD: "BoardContext card state must use moveCard action, never direct setState — optimistic updates depend on action ordering"`,
446
+ );
447
+ sections.push(
448
+ ` - GOOD: "Mobile bottom bar is 64px, overlaps fixed-position drawers — always add pb-16 to drawer content"`,
449
+ );
450
+ sections.push(
451
+ ` - BAD: "Fixed the login button" (no reusable knowledge — the fix is in the code)`,
452
+ );
453
+ sections.push(
454
+ ` - BAD: "Completed card #42" (ephemeral, auto-tracked by session)`,
443
455
  );
444
456
 
445
457
  // Output suggestions