@gethmy/mcp 2.4.7 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,902 +0,0 @@
1
- /**
2
- * Unified Memory Cleanup
3
- *
4
- * Orchestrates a 5-stage cleanup pipeline: prune stale drafts, consolidate
5
- * similar memories, detect orphans, detect duplicates, and backfill embeddings.
6
- *
7
- * All stages are non-fatal — individual failures are collected but never block
8
- * the remaining stages. Defaults to dry-run mode (preview only).
9
- */
10
-
11
- import { evaluateLifecycle } from "@harmony/memory";
12
- import type { HarmonyApiClient } from "./api-client.js";
13
- import {
14
- type ConsolidationResult,
15
- consolidateMemories,
16
- } from "./consolidation.js";
17
- import { findSimilarEntities } from "./graph-expansion.js";
18
- import { type AuditReport, runMemoryAudit } from "./memory-audit.js";
19
-
20
- // ---------------------------------------------------------------------------
21
- // Types
22
- // ---------------------------------------------------------------------------
23
-
24
- interface MemoryEntity {
25
- id: string;
26
- type: string;
27
- title: string;
28
- content: string;
29
- confidence: number;
30
- memory_tier: "draft" | "episode" | "reference";
31
- access_count: number;
32
- last_accessed_at: string | null;
33
- created_at: string;
34
- updated_at?: string;
35
- tags?: string[];
36
- }
37
-
38
- export type CleanupStep =
39
- | "prune"
40
- | "consolidate"
41
- | "orphans"
42
- | "duplicates"
43
- | "backfill"
44
- | "audit";
45
-
46
- export interface CleanupOptions {
47
- dryRun?: boolean;
48
- steps?: CleanupStep[];
49
- maxAgeDays?: number;
50
- minClusterSize?: number;
51
- orphanAgeDays?: number;
52
- auditArchiveBelow?: number;
53
- auditDeleteBelow?: number;
54
- }
55
-
56
- // ---------------------------------------------------------------------------
57
- // Purge types
58
- // ---------------------------------------------------------------------------
59
-
60
- export interface PurgeFilters {
61
- tier?: "draft" | "episode" | "reference";
62
- scope?: string;
63
- type?: string;
64
- olderThanDays?: number;
65
- maxConfidence?: number;
66
- tags?: string[];
67
- }
68
-
69
- export interface PurgeOptions {
70
- dryRun?: boolean;
71
- filters: PurgeFilters;
72
- }
73
-
74
- export interface PurgeReport {
75
- success: boolean;
76
- dryRun: boolean;
77
- timestamp: string;
78
- workspace: { id: string; projectId: string };
79
- filters: PurgeFilters;
80
- matched: number;
81
- purged: number;
82
- items: Array<{
83
- id: string;
84
- title: string;
85
- type: string;
86
- tier: string;
87
- confidence: number;
88
- ageDays: number;
89
- }>;
90
- errors: Array<{ entityId: string; message: string }>;
91
- }
92
-
93
- interface PruneStepResult {
94
- staleDraftsFound: number;
95
- pruned: number;
96
- items: Array<{
97
- id: string;
98
- title: string;
99
- ageDays: number;
100
- decayScore: number;
101
- }>;
102
- }
103
-
104
- interface ConsolidateStepResult {
105
- clustersFound: number;
106
- entitiesProcessed: number;
107
- consolidated: number;
108
- details: ConsolidationResult["details"];
109
- }
110
-
111
- interface OrphanStepResult {
112
- orphansFound: number;
113
- removed: number;
114
- items: Array<{
115
- id: string;
116
- title: string;
117
- type: string;
118
- tier: string;
119
- ageDays: number;
120
- accessCount: number;
121
- }>;
122
- }
123
-
124
- interface DuplicateStepResult {
125
- duplicatePairsFound: number;
126
- resolved: number;
127
- pairs: Array<{
128
- keepId: string;
129
- keepTitle: string;
130
- removeId: string;
131
- removeTitle: string;
132
- similarity: number;
133
- }>;
134
- }
135
-
136
- interface BackfillStepResult {
137
- processed: number;
138
- remaining: number;
139
- errors: Array<{ entity_id: string; error: string }>;
140
- }
141
-
142
- interface AuditStepResult {
143
- scanned: number;
144
- legacyCount: number;
145
- buckets: { keep: number; review: number; archive: number; delete: number };
146
- actions: { flaggedReview: number; archived: number; deleted: number };
147
- lowestScore: number | null;
148
- report: AuditReport;
149
- }
150
-
151
- export interface CleanupReport {
152
- success: boolean;
153
- dryRun: boolean;
154
- timestamp: string;
155
- workspace: { id: string; projectId?: string };
156
-
157
- summary: {
158
- totalEntities: number;
159
- issuesFound: number;
160
- actionsTaken: number;
161
- };
162
-
163
- steps: {
164
- prune?: PruneStepResult;
165
- consolidate?: ConsolidateStepResult;
166
- orphans?: OrphanStepResult;
167
- duplicates?: DuplicateStepResult;
168
- backfill?: BackfillStepResult;
169
- audit?: AuditStepResult;
170
- };
171
-
172
- errors: Array<{ step: string; message: string }>;
173
- healthReport: string;
174
- }
175
-
176
- const ALL_STEPS: CleanupStep[] = [
177
- "prune",
178
- "consolidate",
179
- "orphans",
180
- "duplicates",
181
- "backfill",
182
- "audit",
183
- ];
184
-
185
- const MS_PER_DAY = 1000 * 60 * 60 * 24;
186
- const MAX_ENTITIES_FETCH = 200;
187
- const DUPLICATE_SIMILARITY_THRESHOLD = 0.85;
188
- const CONCURRENCY_LIMIT = 5;
189
-
190
- // ---------------------------------------------------------------------------
191
- // Main orchestrator
192
- // ---------------------------------------------------------------------------
193
-
194
- export async function runMemoryCleanup(
195
- client: HarmonyApiClient,
196
- workspaceId: string,
197
- projectId?: string,
198
- options?: CleanupOptions,
199
- ): Promise<CleanupReport> {
200
- const dryRun = options?.dryRun !== false;
201
- const steps = options?.steps ?? ALL_STEPS;
202
- const maxAgeDays = options?.maxAgeDays ?? 30;
203
- const minClusterSize = options?.minClusterSize ?? 3;
204
- const orphanAgeDays = options?.orphanAgeDays ?? 14;
205
-
206
- const report: CleanupReport = {
207
- success: true,
208
- dryRun,
209
- timestamp: new Date().toISOString(),
210
- workspace: { id: workspaceId, projectId },
211
- summary: { totalEntities: 0, issuesFound: 0, actionsTaken: 0 },
212
- steps: {},
213
- errors: [],
214
- healthReport: "",
215
- };
216
-
217
- // Fetch all entities once (shared across steps)
218
- let entities: MemoryEntity[] = [];
219
- try {
220
- const listResult = await client.listMemoryEntities({
221
- workspace_id: workspaceId,
222
- project_id: projectId,
223
- limit: MAX_ENTITIES_FETCH,
224
- });
225
- entities = (listResult.entities || []) as MemoryEntity[];
226
- report.summary.totalEntities = entities.length;
227
- } catch (err) {
228
- report.errors.push({
229
- step: "init",
230
- message: `Failed to fetch entities: ${(err as Error).message}`,
231
- });
232
- report.success = false;
233
- report.healthReport = generateHealthReport(report);
234
- return report;
235
- }
236
-
237
- // Stage 1: Prune stale drafts
238
- if (steps.includes("prune")) {
239
- try {
240
- report.steps.prune = runPruneStep(entities, maxAgeDays);
241
- if (!dryRun) {
242
- for (const item of report.steps.prune.items) {
243
- try {
244
- await client.deleteMemoryEntity(item.id);
245
- report.steps.prune.pruned++;
246
- } catch (err) {
247
- report.errors.push({
248
- step: "prune",
249
- message: `Failed to delete ${item.id}: ${(err as Error).message}`,
250
- });
251
- }
252
- }
253
- report.summary.actionsTaken += report.steps.prune.pruned;
254
- }
255
- report.summary.issuesFound += report.steps.prune.staleDraftsFound;
256
- } catch (err) {
257
- report.errors.push({
258
- step: "prune",
259
- message: (err as Error).message,
260
- });
261
- }
262
- }
263
-
264
- // Stage 2: Consolidate similar memories
265
- if (steps.includes("consolidate")) {
266
- try {
267
- const result = await consolidateMemories(client, workspaceId, projectId, {
268
- dryRun,
269
- minClusterSize,
270
- });
271
- report.steps.consolidate = {
272
- clustersFound: result.clustersFound,
273
- entitiesProcessed: result.entitiesProcessed,
274
- consolidated: result.consolidated,
275
- details: result.details,
276
- };
277
- report.summary.issuesFound += result.clustersFound;
278
- if (!dryRun) report.summary.actionsTaken += result.consolidated;
279
- } catch (err) {
280
- report.errors.push({
281
- step: "consolidate",
282
- message: (err as Error).message,
283
- });
284
- }
285
- }
286
-
287
- // Stage 3: Detect orphans
288
- if (steps.includes("orphans")) {
289
- try {
290
- report.steps.orphans = await runOrphanStep(
291
- client,
292
- entities,
293
- orphanAgeDays,
294
- );
295
- if (!dryRun) {
296
- for (const item of report.steps.orphans.items) {
297
- try {
298
- await client.deleteMemoryEntity(item.id);
299
- report.steps.orphans.removed++;
300
- } catch (err) {
301
- report.errors.push({
302
- step: "orphans",
303
- message: `Failed to delete ${item.id}: ${(err as Error).message}`,
304
- });
305
- }
306
- }
307
- report.summary.actionsTaken += report.steps.orphans.removed;
308
- }
309
- report.summary.issuesFound += report.steps.orphans.orphansFound;
310
- } catch (err) {
311
- report.errors.push({
312
- step: "orphans",
313
- message: (err as Error).message,
314
- });
315
- }
316
- }
317
-
318
- // Stage 4: Detect duplicates
319
- if (steps.includes("duplicates")) {
320
- try {
321
- report.steps.duplicates = await runDuplicateStep(
322
- client,
323
- entities,
324
- workspaceId,
325
- projectId,
326
- );
327
- if (!dryRun) {
328
- for (const pair of report.steps.duplicates.pairs) {
329
- try {
330
- await client.deleteMemoryEntity(pair.removeId);
331
- report.steps.duplicates.resolved++;
332
- } catch (err) {
333
- report.errors.push({
334
- step: "duplicates",
335
- message: `Failed to delete ${pair.removeId}: ${(err as Error).message}`,
336
- });
337
- }
338
- }
339
- report.summary.actionsTaken += report.steps.duplicates.resolved;
340
- }
341
- report.summary.issuesFound += report.steps.duplicates.duplicatePairsFound;
342
- } catch (err) {
343
- report.errors.push({
344
- step: "duplicates",
345
- message: (err as Error).message,
346
- });
347
- }
348
- }
349
-
350
- // Stage 5: Backfill embeddings
351
- if (steps.includes("backfill")) {
352
- try {
353
- if (dryRun) {
354
- // In dry-run, just report that backfill would run
355
- report.steps.backfill = {
356
- processed: 0,
357
- remaining: -1,
358
- errors: [],
359
- };
360
- } else {
361
- const result = await client.backfillEmbeddings(workspaceId);
362
- report.steps.backfill = {
363
- processed: result.processed,
364
- remaining: result.remaining,
365
- errors: result.errors || [],
366
- };
367
- report.summary.actionsTaken += result.processed;
368
- }
369
- } catch (err) {
370
- report.errors.push({
371
- step: "backfill",
372
- message: (err as Error).message,
373
- });
374
- }
375
- }
376
-
377
- // Stage 6: Quality audit — rate every entity against modern standards
378
- if (steps.includes("audit")) {
379
- try {
380
- const auditReport = await runMemoryAudit(client, workspaceId, projectId, {
381
- dryRun,
382
- archiveBelow: options?.auditArchiveBelow,
383
- deleteBelow: options?.auditDeleteBelow,
384
- });
385
- const low =
386
- auditReport.lowest.length > 0 ? auditReport.lowest[0].score : null;
387
- report.steps.audit = {
388
- scanned: auditReport.summary.scanned,
389
- legacyCount: auditReport.summary.legacyCount,
390
- buckets: {
391
- keep: auditReport.summary.keep,
392
- review: auditReport.summary.review,
393
- archive: auditReport.summary.archive,
394
- delete: auditReport.summary.delete,
395
- },
396
- actions: auditReport.actionsTaken,
397
- lowestScore: low,
398
- report: auditReport,
399
- };
400
- report.summary.issuesFound +=
401
- auditReport.summary.review +
402
- auditReport.summary.archive +
403
- auditReport.summary.delete;
404
- if (!dryRun) {
405
- report.summary.actionsTaken +=
406
- auditReport.actionsTaken.flaggedReview +
407
- auditReport.actionsTaken.archived +
408
- auditReport.actionsTaken.deleted;
409
- }
410
- for (const err of auditReport.errors) {
411
- report.errors.push({
412
- step: `audit:${err.step}`,
413
- message: err.entityId
414
- ? `${err.entityId}: ${err.message}`
415
- : err.message,
416
- });
417
- }
418
- } catch (err) {
419
- report.errors.push({
420
- step: "audit",
421
- message: (err as Error).message,
422
- });
423
- }
424
- }
425
-
426
- report.healthReport = generateHealthReport(report);
427
- return report;
428
- }
429
-
430
- // ---------------------------------------------------------------------------
431
- // Step implementations
432
- // ---------------------------------------------------------------------------
433
-
434
- function runPruneStep(
435
- entities: MemoryEntity[],
436
- maxAgeDays: number,
437
- ): PruneStepResult {
438
- const now = Date.now();
439
- const drafts = entities.filter((e) => e.memory_tier === "draft");
440
- const stale: PruneStepResult["items"] = [];
441
-
442
- for (const entity of drafts) {
443
- const ageDays = (now - new Date(entity.created_at).getTime()) / MS_PER_DAY;
444
- if (ageDays < maxAgeDays) continue;
445
-
446
- const lifecycle = evaluateLifecycle(entity);
447
- stale.push({
448
- id: entity.id,
449
- title: entity.title,
450
- ageDays: Math.round(ageDays),
451
- decayScore: Math.round(lifecycle.decay.score * 100) / 100,
452
- });
453
- }
454
-
455
- return { staleDraftsFound: stale.length, pruned: 0, items: stale };
456
- }
457
-
458
- async function runOrphanStep(
459
- client: HarmonyApiClient,
460
- entities: MemoryEntity[],
461
- orphanAgeDays: number,
462
- ): Promise<OrphanStepResult> {
463
- const now = Date.now();
464
- const result: OrphanStepResult = { orphansFound: 0, removed: 0, items: [] };
465
-
466
- // Pre-filter: only check entities that look like orphan candidates
467
- const candidates = entities.filter((e) => {
468
- if (e.memory_tier === "reference") return false;
469
- if (e.access_count >= 2) return false;
470
- const ageDays = (now - new Date(e.created_at).getTime()) / MS_PER_DAY;
471
- return ageDays >= orphanAgeDays;
472
- });
473
-
474
- // Check relations in concurrent batches
475
- for (let i = 0; i < candidates.length; i += CONCURRENCY_LIMIT) {
476
- const batch = candidates.slice(i, i + CONCURRENCY_LIMIT);
477
- const results = await Promise.allSettled(
478
- batch.map(async (entity) => {
479
- const related = await client.getRelatedEntities(entity.id);
480
- const totalRelations =
481
- (related.outgoing?.length || 0) + (related.incoming?.length || 0);
482
- if (totalRelations > 0) return null;
483
-
484
- const ageDays =
485
- (now - new Date(entity.created_at).getTime()) / MS_PER_DAY;
486
- return {
487
- id: entity.id,
488
- title: entity.title,
489
- type: entity.type,
490
- tier: entity.memory_tier,
491
- ageDays: Math.round(ageDays),
492
- accessCount: entity.access_count,
493
- };
494
- }),
495
- );
496
-
497
- for (const r of results) {
498
- if (r.status === "fulfilled" && r.value) {
499
- result.items.push(r.value);
500
- result.orphansFound++;
501
- }
502
- }
503
- }
504
-
505
- return result;
506
- }
507
-
508
- async function runDuplicateStep(
509
- client: HarmonyApiClient,
510
- entities: MemoryEntity[],
511
- workspaceId: string,
512
- projectId?: string,
513
- ): Promise<DuplicateStepResult> {
514
- const result: DuplicateStepResult = {
515
- duplicatePairsFound: 0,
516
- resolved: 0,
517
- pairs: [],
518
- };
519
-
520
- const seenPairs = new Set<string>();
521
- const flaggedForRemoval = new Set<string>();
522
- const entityMap = new Map(entities.map((e) => [e.id, e]));
523
-
524
- // Pre-fetch similarities in concurrent batches
525
- type SimilarMatch = {
526
- id: string;
527
- type: string;
528
- title: string;
529
- content: string;
530
- confidence: number;
531
- };
532
- const similarityMap = new Map<string, SimilarMatch[]>();
533
- for (let i = 0; i < entities.length; i += CONCURRENCY_LIMIT) {
534
- const batch = entities.slice(i, i + CONCURRENCY_LIMIT);
535
- const results = await Promise.allSettled(
536
- batch.map(async (entity) => {
537
- const similar = await findSimilarEntities(
538
- client,
539
- entity.title,
540
- entity.content,
541
- workspaceId,
542
- { projectId, limit: 5, minRrfScore: 0.05, excludeIds: [entity.id] },
543
- );
544
- return { entityId: entity.id, similar };
545
- }),
546
- );
547
- for (const r of results) {
548
- if (r.status === "fulfilled") {
549
- similarityMap.set(r.value.entityId, r.value.similar);
550
- }
551
- }
552
- }
553
-
554
- // Process pairs sequentially (flaggedForRemoval creates dependencies)
555
- for (const entity of entities) {
556
- if (flaggedForRemoval.has(entity.id)) continue;
557
- const similar = similarityMap.get(entity.id) || [];
558
-
559
- for (const match of similar) {
560
- if (flaggedForRemoval.has(match.id)) continue;
561
-
562
- const pairKey = [entity.id, match.id].sort().join(":");
563
- if (seenPairs.has(pairKey)) continue;
564
- seenPairs.add(pairKey);
565
-
566
- const sim = titleSimilarity(entity.title, match.title);
567
- if (sim < DUPLICATE_SIMILARITY_THRESHOLD) continue;
568
-
569
- // Keep the one with higher confidence, more accesses, or higher tier
570
- const entityScore = entityQualityScore(entity);
571
- const matchEntity = entityMap.get(match.id);
572
- const matchScore = matchEntity
573
- ? entityQualityScore(matchEntity)
574
- : match.confidence;
575
-
576
- const [keep, remove] =
577
- entityScore >= matchScore
578
- ? [entity, { id: match.id, title: match.title }]
579
- : [{ id: match.id, title: match.title }, entity];
580
-
581
- flaggedForRemoval.add(remove.id);
582
- result.pairs.push({
583
- keepId: keep.id,
584
- keepTitle: keep.title,
585
- removeId: remove.id,
586
- removeTitle: remove.title,
587
- similarity: Math.round(sim * 100) / 100,
588
- });
589
- result.duplicatePairsFound++;
590
- }
591
- }
592
-
593
- return result;
594
- }
595
-
596
- // ---------------------------------------------------------------------------
597
- // Helpers
598
- // ---------------------------------------------------------------------------
599
-
600
- const TIER_WEIGHTS: Record<string, number> = {
601
- reference: 3,
602
- episode: 2,
603
- draft: 1,
604
- };
605
-
606
- function entityQualityScore(entity: MemoryEntity): number {
607
- return (
608
- entity.confidence +
609
- (TIER_WEIGHTS[entity.memory_tier] || 0) +
610
- Math.min(entity.access_count, 10) * 0.1
611
- );
612
- }
613
-
614
- function titleSimilarity(a: string, b: string): number {
615
- const na = a.toLowerCase().trim();
616
- const nb = b.toLowerCase().trim();
617
- if (na === nb) return 1;
618
-
619
- const wordsA = new Set(na.split(/\W+/).filter(Boolean));
620
- const wordsB = new Set(nb.split(/\W+/).filter(Boolean));
621
- if (wordsA.size === 0 || wordsB.size === 0) return 0;
622
-
623
- let intersection = 0;
624
- for (const w of wordsA) {
625
- if (wordsB.has(w)) intersection++;
626
- }
627
- // Jaccard similarity
628
- const union = wordsA.size + wordsB.size - intersection;
629
- return union > 0 ? intersection / union : 0;
630
- }
631
-
632
- // ---------------------------------------------------------------------------
633
- // Health report renderer
634
- // ---------------------------------------------------------------------------
635
-
636
- function generateHealthReport(report: CleanupReport): string {
637
- const mode = report.dryRun ? "Dry Run (preview)" : "Executed";
638
- const lines: string[] = [
639
- "# Memory Health Report\n",
640
- `**Mode:** ${mode} | **Entities:** ${report.summary.totalEntities} | **Issues:** ${report.summary.issuesFound} | **Actions:** ${report.summary.actionsTaken}`,
641
- "",
642
- ];
643
-
644
- if (report.summary.totalEntities >= MAX_ENTITIES_FETCH) {
645
- lines.push(
646
- `> **Note:** Entity count hit the ${MAX_ENTITIES_FETCH} fetch limit. Some entities may not have been analyzed.\n`,
647
- );
648
- }
649
-
650
- // Prune
651
- if (report.steps.prune) {
652
- const p = report.steps.prune;
653
- lines.push("## Stale Drafts");
654
- if (p.staleDraftsFound === 0) {
655
- lines.push("No stale drafts found.\n");
656
- } else {
657
- lines.push(
658
- `Found **${p.staleDraftsFound}** stale drafts${!report.dryRun ? ` (pruned ${p.pruned})` : ""}:`,
659
- );
660
- lines.push("| Title | Age | Decay |");
661
- lines.push("|-------|-----|-------|");
662
- for (const item of p.items.slice(0, 20)) {
663
- lines.push(`| ${item.title} | ${item.ageDays}d | ${item.decayScore} |`);
664
- }
665
- lines.push("");
666
- }
667
- }
668
-
669
- // Consolidate
670
- if (report.steps.consolidate) {
671
- const c = report.steps.consolidate;
672
- lines.push("## Consolidation");
673
- if (c.clustersFound === 0) {
674
- lines.push(
675
- `Scanned ${c.entitiesProcessed} draft/episode entities — no clusters found.\n`,
676
- );
677
- } else {
678
- lines.push(
679
- `Found **${c.clustersFound}** clusters across ${c.entitiesProcessed} entities:`,
680
- );
681
- for (const d of c.details.slice(0, 10)) {
682
- lines.push(`- **${d.mergedTitle}** — ${d.clusterSize} entities`);
683
- }
684
- lines.push("");
685
- }
686
- }
687
-
688
- // Orphans
689
- if (report.steps.orphans) {
690
- const o = report.steps.orphans;
691
- lines.push("## Orphaned Entities");
692
- if (o.orphansFound === 0) {
693
- lines.push("No orphans found.\n");
694
- } else {
695
- lines.push(
696
- `Found **${o.orphansFound}** orphans${!report.dryRun ? ` (removed ${o.removed})` : ""}:`,
697
- );
698
- lines.push("| Title | Type | Tier | Age | Accesses |");
699
- lines.push("|-------|------|------|-----|----------|");
700
- for (const item of o.items.slice(0, 20)) {
701
- lines.push(
702
- `| ${item.title} | ${item.type} | ${item.tier} | ${item.ageDays}d | ${item.accessCount} |`,
703
- );
704
- }
705
- lines.push("");
706
- }
707
- }
708
-
709
- // Duplicates
710
- if (report.steps.duplicates) {
711
- const d = report.steps.duplicates;
712
- lines.push("## Near-Duplicates");
713
- if (d.duplicatePairsFound === 0) {
714
- lines.push("No duplicates found.\n");
715
- } else {
716
- lines.push(
717
- `Found **${d.duplicatePairsFound}** duplicate pairs${!report.dryRun ? ` (resolved ${d.resolved})` : ""}:`,
718
- );
719
- for (const pair of d.pairs.slice(0, 20)) {
720
- lines.push(
721
- `- "${pair.keepTitle}" ~ "${pair.removeTitle}" (${Math.round(pair.similarity * 100)}% similar, keep first)`,
722
- );
723
- }
724
- lines.push("");
725
- }
726
- }
727
-
728
- // Backfill
729
- if (report.steps.backfill) {
730
- const b = report.steps.backfill;
731
- lines.push("## Embedding Coverage");
732
- if (report.dryRun) {
733
- lines.push("Backfill will run when executed with `dryRun: false`.\n");
734
- } else if (b.remaining === 0) {
735
- lines.push(`All embeddings up to date (processed ${b.processed}).\n`);
736
- } else {
737
- lines.push(
738
- `Processed ${b.processed} entities. ${b.remaining} still need embeddings.\n`,
739
- );
740
- }
741
- }
742
-
743
- // Audit
744
- if (report.steps.audit) {
745
- const a = report.steps.audit;
746
- lines.push("## Quality Audit");
747
- lines.push(
748
- `Scanned ${a.scanned} entities. Legacy signals on ${a.legacyCount}.`,
749
- );
750
- lines.push(
751
- `Buckets — keep: ${a.buckets.keep}, review: ${a.buckets.review}, archive: ${a.buckets.archive}, delete: ${a.buckets.delete}.`,
752
- );
753
- if (!report.dryRun) {
754
- lines.push(
755
- `Actions — flagged: ${a.actions.flaggedReview}, archived: ${a.actions.archived}, deleted: ${a.actions.deleted}.`,
756
- );
757
- }
758
- if (a.report.lowest.length > 0) {
759
- const worst = a.report.lowest[0];
760
- lines.push(
761
- `Lowest score: **${worst.score}** — "${worst.title}" (${worst.reasons.slice(0, 2).join(", ") || "—"}).`,
762
- );
763
- }
764
- lines.push("");
765
- }
766
-
767
- // Errors
768
- if (report.errors.length > 0) {
769
- lines.push("## Errors");
770
- for (const e of report.errors) {
771
- lines.push(`- **${e.step}:** ${e.message}`);
772
- }
773
- lines.push("");
774
- }
775
-
776
- if (report.dryRun) {
777
- lines.push("---\n*Run with `dryRun: false` to execute cleanup.*");
778
- }
779
-
780
- return lines.join("\n");
781
- }
782
-
783
- // ---------------------------------------------------------------------------
784
- // Purge — filtered bulk deletion
785
- // ---------------------------------------------------------------------------
786
-
787
- export async function purgeMemories(
788
- client: HarmonyApiClient,
789
- workspaceId: string,
790
- projectId: string,
791
- options: PurgeOptions,
792
- ): Promise<PurgeReport> {
793
- const dryRun = options.dryRun !== false;
794
- const { filters } = options;
795
-
796
- // Safety: require at least one narrowing filter
797
- const hasFilter =
798
- filters.tier ||
799
- filters.scope ||
800
- filters.type ||
801
- filters.olderThanDays !== undefined ||
802
- filters.maxConfidence !== undefined ||
803
- (filters.tags && filters.tags.length > 0);
804
-
805
- if (!hasFilter) {
806
- throw new Error(
807
- "At least one narrowing filter (tier, scope, type, olderThanDays, maxConfidence, tags) is required.",
808
- );
809
- }
810
-
811
- // Paginate through all matching entities
812
- const allMatches: MemoryEntity[] = [];
813
- let offset = 0;
814
- const pageSize = 100;
815
- const now = Date.now();
816
-
817
- while (true) {
818
- const result = await client.listMemoryEntities({
819
- workspace_id: workspaceId,
820
- project_id: projectId,
821
- type: filters.type,
822
- scope: filters.scope,
823
- tags: filters.tags,
824
- limit: pageSize,
825
- offset,
826
- });
827
-
828
- const entities = (result.entities || []) as MemoryEntity[];
829
- if (entities.length === 0) break;
830
-
831
- // Client-side filtering for fields the API doesn't support natively
832
- for (const entity of entities) {
833
- if (filters.tier && entity.memory_tier !== filters.tier) continue;
834
- if (
835
- filters.maxConfidence !== undefined &&
836
- entity.confidence > filters.maxConfidence
837
- )
838
- continue;
839
- if (filters.olderThanDays !== undefined) {
840
- const ref = entity.last_accessed_at || entity.created_at;
841
- const ageDays = (now - new Date(ref).getTime()) / MS_PER_DAY;
842
- if (ageDays < filters.olderThanDays) continue;
843
- }
844
- allMatches.push(entity);
845
- }
846
-
847
- if (entities.length < pageSize) break;
848
- offset += pageSize;
849
- }
850
-
851
- // Build preview items
852
- const items = allMatches.map((e) => ({
853
- id: e.id,
854
- title: e.title,
855
- type: e.type,
856
- tier: e.memory_tier,
857
- confidence: e.confidence,
858
- ageDays: Math.round(
859
- (now - new Date(e.last_accessed_at || e.created_at).getTime()) /
860
- MS_PER_DAY,
861
- ),
862
- }));
863
-
864
- // Execute deletions if not dry-run
865
- const errors: Array<{ entityId: string; message: string }> = [];
866
- let purged = 0;
867
-
868
- if (!dryRun) {
869
- // Delete in batches to avoid overwhelming the API
870
- for (let i = 0; i < allMatches.length; i += CONCURRENCY_LIMIT) {
871
- const batch = allMatches.slice(i, i + CONCURRENCY_LIMIT);
872
- const results = await Promise.allSettled(
873
- batch.map((e) => client.deleteMemoryEntity(e.id)),
874
- );
875
- for (let j = 0; j < results.length; j++) {
876
- if (results[j].status === "fulfilled") {
877
- purged++;
878
- } else {
879
- errors.push({
880
- entityId: batch[j].id,
881
- message:
882
- results[j].status === "rejected"
883
- ? String((results[j] as PromiseRejectedResult).reason)
884
- : "Unknown error",
885
- });
886
- }
887
- }
888
- }
889
- }
890
-
891
- return {
892
- success: errors.length === 0,
893
- dryRun,
894
- timestamp: new Date().toISOString(),
895
- workspace: { id: workspaceId, projectId },
896
- filters,
897
- matched: allMatches.length,
898
- purged: dryRun ? 0 : purged,
899
- items,
900
- errors,
901
- };
902
- }