@gethmy/mcp 2.4.7 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,383 +0,0 @@
1
- /**
2
- * Smart Memory Consolidation
3
- *
4
- * Clusters similar draft/episode memories and merges them into
5
- * consolidated reference entities to reduce noise and improve retrieval.
6
- */
7
-
8
- import type { HarmonyApiClient } from "./api-client.js";
9
- import { findSimilarEntities } from "./graph-expansion.js";
10
-
11
- interface MemoryEntity {
12
- id: string;
13
- type: string;
14
- title: string;
15
- content: string;
16
- confidence: number;
17
- memory_tier: string;
18
- tags: string[];
19
- metadata?: Record<string, unknown>;
20
- updated_at?: string;
21
- }
22
-
23
- export interface ConsolidationResult {
24
- consolidated: number;
25
- clustersFound: number;
26
- entitiesProcessed: number;
27
- details: Array<{
28
- clusterSize: number;
29
- mergedTitle: string;
30
- memberTitles: string[];
31
- entityId?: string; // set when not dry run
32
- }>;
33
- }
34
-
35
- export interface ConsolidationOptions {
36
- dryRun?: boolean;
37
- minClusterSize?: number; // Default: 3 (was 2 — raised to avoid premature merging)
38
- }
39
-
40
- /**
41
- * Consolidate similar draft/episode memories into reference entities.
42
- *
43
- * 1. Lists all draft and episode tier entities in scope
44
- * 2. Groups by entity type
45
- * 3. For each type group, finds clusters via embedding similarity
46
- * 4. Merges clusters into new reference entities with part_of relations
47
- */
48
- export async function consolidateMemories(
49
- client: HarmonyApiClient,
50
- workspaceId: string,
51
- projectId?: string,
52
- options?: ConsolidationOptions,
53
- ): Promise<ConsolidationResult> {
54
- const dryRun = options?.dryRun !== false; // default true
55
- const minClusterSize = options?.minClusterSize ?? 3; // raised from 2 to reduce noise
56
-
57
- const result: ConsolidationResult = {
58
- consolidated: 0,
59
- clustersFound: 0,
60
- entitiesProcessed: 0,
61
- details: [],
62
- };
63
-
64
- // Step 1: Fetch all draft and episode entities
65
- const listResult = await client.listMemoryEntities({
66
- workspace_id: workspaceId,
67
- project_id: projectId,
68
- limit: 100,
69
- });
70
-
71
- const allEntities = ((listResult.entities || []) as MemoryEntity[]).filter(
72
- (e) => e.memory_tier === "draft" || e.memory_tier === "episode",
73
- );
74
-
75
- result.entitiesProcessed = allEntities.length;
76
- if (allEntities.length < minClusterSize) return result;
77
-
78
- // Step 2: Group by type
79
- const typeGroups = new Map<string, MemoryEntity[]>();
80
- for (const entity of allEntities) {
81
- const group = typeGroups.get(entity.type) || [];
82
- group.push(entity);
83
- typeGroups.set(entity.type, group);
84
- }
85
-
86
- // Step 3: Find clusters within each type group
87
- for (const [type, entities] of typeGroups) {
88
- if (entities.length < minClusterSize) continue;
89
-
90
- const clustered = new Set<string>();
91
- const clusters: MemoryEntity[][] = [];
92
-
93
- for (const entity of entities) {
94
- if (clustered.has(entity.id)) continue;
95
-
96
- // Search for similar entities using embedding-based search
97
- const similar = await findSimilarEntities(
98
- client,
99
- entity.title,
100
- entity.content,
101
- workspaceId,
102
- {
103
- projectId,
104
- limit: 20,
105
- minRrfScore: 0.01,
106
- excludeIds: [...clustered],
107
- },
108
- );
109
-
110
- // Filter to only entities in our current type group that aren't yet clustered
111
- const entityIdSet = new Set(entities.map((e) => e.id));
112
- const clusterMembers = similar.filter(
113
- (s) =>
114
- entityIdSet.has(s.id) &&
115
- !clustered.has(s.id) &&
116
- s.id !== entity.id &&
117
- s.type === type,
118
- );
119
-
120
- if (clusterMembers.length >= minClusterSize - 1) {
121
- const cluster = [
122
- entity,
123
- ...clusterMembers.slice(0, 5).map((s) => {
124
- // Map back to full entity from our list
125
- return entities.find((e) => e.id === s.id) || entity;
126
- }),
127
- ];
128
-
129
- // Deduplicate by id
130
- const uniqueCluster: MemoryEntity[] = [];
131
- const seen = new Set<string>();
132
- for (const member of cluster) {
133
- if (!seen.has(member.id)) {
134
- seen.add(member.id);
135
- uniqueCluster.push(member);
136
- }
137
- }
138
-
139
- if (uniqueCluster.length >= minClusterSize) {
140
- clusters.push(uniqueCluster);
141
- for (const member of uniqueCluster) {
142
- clustered.add(member.id);
143
- }
144
- }
145
- }
146
- }
147
-
148
- // Step 4: Create consolidated entities for each cluster
149
- for (const cluster of clusters) {
150
- result.clustersFound++;
151
-
152
- // Derive title from most common words across cluster titles
153
- const mergedTitle = deriveClusterTitle(cluster, type);
154
- const memberTitles = cluster.map((e) => e.title);
155
-
156
- // Synthesize content: extract unique knowledge from each member,
157
- // not just a bullet list of titles. Each member's content is trimmed
158
- // to its first meaningful paragraph (skipping headers and metadata).
159
- const mergedContent = synthesizeClusterContent(cluster, type);
160
-
161
- // Max confidence from cluster members
162
- const maxConfidence = Math.max(...cluster.map((e) => e.confidence));
163
-
164
- // Union of all tags (deduped)
165
- const allTags = [...new Set(cluster.flatMap((e) => e.tags || []))];
166
-
167
- const detail: ConsolidationResult["details"][0] = {
168
- clusterSize: cluster.length,
169
- mergedTitle,
170
- memberTitles,
171
- };
172
-
173
- if (!dryRun) {
174
- try {
175
- // Create consolidated reference entity
176
- const createResult = await client.createMemoryEntity({
177
- workspace_id: workspaceId,
178
- project_id: projectId,
179
- type,
180
- scope: "project",
181
- memory_tier: "reference",
182
- title: mergedTitle,
183
- content: mergedContent,
184
- confidence: maxConfidence,
185
- tags: [...allTags.slice(0, 15), "consolidated"],
186
- metadata: {
187
- source: "consolidation",
188
- member_ids: cluster.map((e) => e.id),
189
- consolidated_at: new Date().toISOString(),
190
- },
191
- });
192
-
193
- const newEntity = createResult.entity as { id: string };
194
- if (newEntity?.id) {
195
- detail.entityId = newEntity.id;
196
-
197
- // Create part_of relations from members → consolidated entity
198
- for (const member of cluster) {
199
- try {
200
- await client.createMemoryRelation({
201
- source_id: member.id,
202
- target_id: newEntity.id,
203
- relation_type: "part_of",
204
- confidence: 0.8,
205
- });
206
- } catch {
207
- // Skip duplicate relations
208
- }
209
- }
210
-
211
- // Downgrade member confidence by 0.3 (min 0.1)
212
- for (const member of cluster) {
213
- try {
214
- const newConf = Math.max(member.confidence - 0.3, 0.1);
215
- await client.updateMemoryEntity(member.id, {
216
- confidence: newConf,
217
- metadata: {
218
- consolidated_into: newEntity.id,
219
- original_confidence: member.confidence,
220
- },
221
- });
222
- } catch {
223
- // Non-fatal
224
- }
225
- }
226
-
227
- result.consolidated++;
228
- }
229
- } catch {
230
- // Non-fatal: consolidation failure for one cluster shouldn't block others
231
- }
232
- } else {
233
- result.consolidated++;
234
- }
235
-
236
- result.details.push(detail);
237
- }
238
- }
239
-
240
- return result;
241
- }
242
-
243
- /**
244
- * Synthesize cluster content by extracting unique, actionable knowledge
245
- * from each member entity. Skips boilerplate (headers, metadata, agent names)
246
- * and deduplicates similar lines across members.
247
- */
248
- function synthesizeClusterContent(
249
- cluster: MemoryEntity[],
250
- type: string,
251
- ): string {
252
- // Lines to skip: headers, agent metadata, timestamps, progress percentages
253
- const SKIP_PATTERNS = [
254
- /^##\s/,
255
- /^Agent:/,
256
- /^Duration:/,
257
- /^Labels:/,
258
- /^Progress:/,
259
- /^Session status:/,
260
- /^Completed at/,
261
- /^Final state:/,
262
- /^Related:/,
263
- /^When working on:/,
264
- /^\d+\.\s+.+\(\d+%,\s*\+\d+%\)/, // procedure step with progress percentages
265
- /^Last updated:/,
266
- /^Recurring pattern:/,
267
- /^Consolidated from/,
268
- ];
269
-
270
- const seenLines = new Set<string>();
271
- const knowledgeLines: string[] = [];
272
-
273
- for (const entity of cluster) {
274
- const lines = entity.content.split("\n").map((l) => l.trim());
275
-
276
- for (const line of lines) {
277
- if (!line || line.length < 20) continue;
278
- if (SKIP_PATTERNS.some((p) => p.test(line))) continue;
279
-
280
- // Normalize for dedup: lowercase, strip markdown formatting
281
- const normalized = line
282
- .toLowerCase()
283
- .replace(/[*_`#[\]]/g, "")
284
- .trim();
285
- if (seenLines.has(normalized)) continue;
286
- seenLines.add(normalized);
287
-
288
- knowledgeLines.push(line);
289
- }
290
- }
291
-
292
- if (knowledgeLines.length === 0) {
293
- // Fallback: if no knowledge was extractable, use a compact summary
294
- return `${cluster.length} related ${type} entities consolidated. Original titles:\n${cluster.map((e) => `- ${e.title}`).join("\n")}`;
295
- }
296
-
297
- // Cap at ~400 tokens worth of content (1600 chars)
298
- const MAX_CHARS = 1600;
299
- const result: string[] = [
300
- `Consolidated knowledge from ${cluster.length} ${type} entities:\n`,
301
- ];
302
- let charCount = result[0].length;
303
-
304
- for (const line of knowledgeLines) {
305
- if (charCount + line.length + 3 > MAX_CHARS) break;
306
- result.push(`- ${line}`);
307
- charCount += line.length + 3;
308
- }
309
-
310
- return result.join("\n");
311
- }
312
-
313
- /**
314
- * Derive a cluster title from the most common meaningful words across member titles.
315
- */
316
- function deriveClusterTitle(cluster: MemoryEntity[], type: string): string {
317
- const stopWords = new Set([
318
- "the",
319
- "a",
320
- "an",
321
- "is",
322
- "are",
323
- "was",
324
- "were",
325
- "be",
326
- "been",
327
- "being",
328
- "have",
329
- "has",
330
- "had",
331
- "do",
332
- "does",
333
- "did",
334
- "will",
335
- "shall",
336
- "would",
337
- "should",
338
- "may",
339
- "might",
340
- "can",
341
- "could",
342
- "of",
343
- "in",
344
- "to",
345
- "for",
346
- "with",
347
- "on",
348
- "at",
349
- "from",
350
- "by",
351
- "and",
352
- "or",
353
- "but",
354
- "not",
355
- "session",
356
- "blocker",
357
- "pattern",
358
- "solution",
359
- "error",
360
- "task",
361
- "mid-session",
362
- ]);
363
-
364
- const wordCounts = new Map<string, number>();
365
- for (const entity of cluster) {
366
- const words = entity.title
367
- .toLowerCase()
368
- .split(/\W+/)
369
- .filter((w) => w.length > 2 && !stopWords.has(w));
370
- for (const word of words) {
371
- wordCounts.set(word, (wordCounts.get(word) || 0) + 1);
372
- }
373
- }
374
-
375
- // Sort by frequency, take top 4 for more descriptive titles
376
- const topWords = [...wordCounts.entries()]
377
- .sort((a, b) => b[1] - a[1])
378
- .slice(0, 4)
379
- .map(([word]) => word[0].toUpperCase() + word.slice(1));
380
-
381
- const suffix = topWords.length > 0 ? topWords.join(" / ") : "Various";
382
- return `${type[0].toUpperCase() + type.slice(1)}: ${suffix}`;
383
- }