@gethmy/mcp 2.4.6 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1175 +0,0 @@
1
- /**
2
- * Context Assembly Engine
3
- *
4
- * Token-budget-aware context constructor that assembles relevant memories
5
- * for a given task, producing a manifest of what was included/excluded.
6
- */
7
-
8
- import type { GraphRelation } from "@harmony/memory";
9
- import { checkPromotion, discoverRelatedContext } from "@harmony/memory";
10
- import type { HarmonyApiClient } from "./api-client.js";
11
-
12
- // Types
13
- export type MemoryTier = "draft" | "episode" | "reference";
14
-
15
- export interface ContextEntity {
16
- id: string;
17
- type: string;
18
- title: string;
19
- content: string;
20
- confidence: number;
21
- tags: string[];
22
- memory_tier: MemoryTier;
23
- access_count: number;
24
- last_accessed_at: string | null;
25
- created_at: string;
26
- updated_at: string;
27
- relevanceScore?: number;
28
- metadata?: Record<string, unknown>;
29
- // Hybrid search signals (from DB RPC)
30
- rrf_score?: number;
31
- fts_rank?: number;
32
- semantic_rank?: number;
33
- }
34
-
35
- export interface ContextManifestEntry {
36
- entityId: string;
37
- title: string;
38
- type: string;
39
- tier: MemoryTier;
40
- relevanceScore: number;
41
- reasons: string[];
42
- tokenCount: number;
43
- truncated: boolean;
44
- }
45
-
46
- export interface ContextManifest {
47
- assemblyId: string;
48
- timestamp: string;
49
- included: ContextManifestEntry[];
50
- excluded: Array<{
51
- entityId: string;
52
- title: string;
53
- type: string;
54
- tier: MemoryTier;
55
- relevanceScore: number;
56
- reason: string;
57
- }>;
58
- budgetUsed: number;
59
- budgetTotal: number;
60
- tierBreakdown: Record<MemoryTier, { count: number; tokens: number }>;
61
- procedureBreakdown?: { count: number; tokens: number; budget: number };
62
- }
63
-
64
- export interface AssembleContextOptions {
65
- workspaceId: string;
66
- projectId?: string;
67
- taskContext: string; // Card title + description for relevance matching
68
- cardLabels?: string[];
69
- cardId?: string;
70
- tokenBudget?: number; // Default: 4000 tokens
71
- client: HarmonyApiClient;
72
- graphWalkEnabled?: boolean; // Default: true — enrich candidates via knowledge graph relations
73
- queryExpansionEnabled?: boolean; // Default: true — expand query with synonyms/variations
74
- enableLlmReranking?: boolean; // Default: false — LLM re-ranking when scores are clustered
75
- rerankFn?: (
76
- taskContext: string,
77
- candidates: Array<{ id: string; title: string; snippet: string }>,
78
- ) => Promise<string[]>; // Custom re-rank function
79
- }
80
-
81
- export interface AssembledContext {
82
- context: string;
83
- manifest: ContextManifest;
84
- memories: ContextEntity[];
85
- }
86
-
87
- // Constants
88
- const DEFAULT_TOKEN_BUDGET = 4000;
89
- const MAX_TOKENS_PER_ENTITY = 500;
90
- const MIN_RELEVANCE_THRESHOLD = 0.15; // raised from 0.1 to filter low-signal entities
91
-
92
- // Tier weight multipliers for relevance scoring
93
- const TIER_WEIGHTS: Record<MemoryTier, number> = {
94
- reference: 1.0,
95
- episode: 0.7,
96
- draft: 0.4,
97
- };
98
-
99
- // Dedicated procedure budget as a fraction of total budget
100
- const PROCEDURE_BUDGET_FRACTION = 0.15;
101
-
102
- // Tier budget allocation percentages (of remaining budget after procedure reservation)
103
- const TIER_BUDGET_ALLOCATION: Record<MemoryTier, number> = {
104
- reference: 0.6,
105
- episode: 0.3,
106
- draft: 0.1,
107
- };
108
-
109
- // Minimum guaranteed slots per tier (reduced from 3 to avoid filling context with noise)
110
- const MIN_REFERENCE_SLOTS = 1;
111
-
112
- // Graph walk configuration
113
- const GRAPH_WALK_MAX_DEPTH = 1;
114
- const GRAPH_WALK_MAX_ENTITIES = 10;
115
- const GRAPH_WALK_MIN_CONFIDENCE = 0.5;
116
- const GRAPH_WALK_SEED_COUNT = 5;
117
-
118
- // Query expansion configuration
119
- const MAX_QUERY_VARIATIONS = 4;
120
-
121
- // LLM re-ranking configuration
122
- const RERANK_CLUSTER_THRESHOLD = 0.05;
123
- const RERANK_TOP_N = 10;
124
- const RERANK_MIN_CANDIDATES = 5;
125
-
126
- // Graph walk relation-type bonuses for relevance scoring
127
- const RELATION_BONUSES: Record<string, number> = {
128
- depends_on: 0.15,
129
- resolved_by: 0.2,
130
- relates_to: 0.1,
131
- implements: 0.15,
132
- blocks: 0.15,
133
- references: 0.1,
134
- extends: 0.1,
135
- caused_by: 0.15,
136
- };
137
-
138
- // Synonym map for query expansion (common dev term variations)
139
- // NOTE: Avoid circular references (auth->login, login->auth) — first synonym
140
- // is used for replacement, so each key should expand to non-overlapping terms.
141
- const QUERY_SYNONYMS: Record<string, string[]> = {
142
- auth: ["authentication", "authorization", "session"],
143
- authentication: ["auth", "session", "sign-in"],
144
- login: ["sign-in", "authentication", "session"],
145
- bug: ["error", "issue", "defect", "problem"],
146
- error: ["exception", "failure", "issue"],
147
- fix: ["resolve", "patch", "repair", "correct"],
148
- deploy: ["deployment", "release", "ship", "publish"],
149
- test: ["testing", "spec", "assertion", "verify"],
150
- config: ["configuration", "settings", "setup"],
151
- db: ["database", "storage", "persistence"],
152
- database: ["storage", "persistence", "data store"],
153
- api: ["endpoint", "route", "service"],
154
- ui: ["frontend", "component", "view"],
155
- perf: ["performance", "speed", "latency"],
156
- performance: ["speed", "latency", "optimization"],
157
- };
158
-
159
- /**
160
- * Estimate token count (rough: 1 token per 4 chars)
161
- */
162
- function estimateTokens(text: string): number {
163
- return Math.ceil(text.length / 4);
164
- }
165
-
166
- /**
167
- * Content quality gate: filter out entities that waste token budget.
168
- * Returns true if the entity passes quality checks.
169
- */
170
- function passesQualityGate(entity: ContextEntity): boolean {
171
- const content = entity.content.trim();
172
-
173
- // Gate 1: Minimum content length — entities with <50 chars of content
174
- // are too shallow to provide value (e.g., "Resolved bug: Fix login button")
175
- if (content.length < 50) return false;
176
-
177
- // Gate 2: Title-content similarity — skip entities where content is just
178
- // the title restated. Normalize both and check if content adds anything.
179
- const normalizedTitle = entity.title
180
- .toLowerCase()
181
- .replace(/[^a-z0-9\s]/g, "")
182
- .trim();
183
- const normalizedContent = content
184
- .toLowerCase()
185
- .replace(/[^a-z0-9\s]/g, "")
186
- .trim();
187
- if (normalizedContent.length < normalizedTitle.length * 1.5) {
188
- // Content is barely longer than the title — likely just a reformulation
189
- return false;
190
- }
191
-
192
- // Gate 3: Pattern noise detection — skip "Pattern: recurring X (N instances)"
193
- // and "Consolidated from N type memories:" entities that are just catalogs
194
- if (
195
- entity.type === "pattern" &&
196
- /recurring .+ \(\d+ instances\)/i.test(entity.title)
197
- ) {
198
- // Check if content is just a member list (lines starting with "- ")
199
- const lines = content.split("\n").filter((l) => l.trim().length > 0);
200
- const bulletLines = lines.filter((l) => l.trim().startsWith("- "));
201
- if (bulletLines.length > lines.length * 0.6) return false;
202
- }
203
-
204
- // Gate 4: Procedure quality — procedures must contain actual steps,
205
- // not just a card title wrapped in a template
206
- if (entity.type === "procedure") {
207
- // Count numbered steps (1. ..., 2. ..., etc.)
208
- const stepCount = (content.match(/^\d+\.\s/gm) || []).length;
209
- if (stepCount < 3) return false;
210
- }
211
-
212
- return true;
213
- }
214
-
215
- /**
216
- * Generate a unique assembly ID
217
- */
218
- function generateAssemblyId(): string {
219
- return `ctx_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
220
- }
221
-
222
- /**
223
- * Truncate entity content to fit within token limit.
224
- * Keeps first paragraph + bullet points if present.
225
- */
226
- function truncateContent(
227
- content: string,
228
- maxTokens: number,
229
- ): { text: string; truncated: boolean } {
230
- const currentTokens = estimateTokens(content);
231
- if (currentTokens <= maxTokens) {
232
- return { text: content, truncated: false };
233
- }
234
-
235
- // Try to keep first paragraph
236
- const paragraphs = content.split(/\n\n+/);
237
- let result = paragraphs[0];
238
-
239
- // Add bullet points from subsequent paragraphs if they fit
240
- for (let i = 1; i < paragraphs.length; i++) {
241
- const lines = paragraphs[i]
242
- .split("\n")
243
- .filter((l) => l.startsWith("- ") || l.startsWith("* "));
244
- if (lines.length > 0) {
245
- const bulletSection = lines.join("\n");
246
- if (estimateTokens(result + "\n\n" + bulletSection) <= maxTokens) {
247
- result += "\n\n" + bulletSection;
248
- }
249
- }
250
- }
251
-
252
- // Hard truncate if still too long
253
- if (estimateTokens(result) > maxTokens) {
254
- const maxChars = maxTokens * 4;
255
- result = result.slice(0, maxChars - 3) + "...";
256
- }
257
-
258
- return { text: result, truncated: true };
259
- }
260
-
261
- /**
262
- * Escape regex metacharacters in a string for safe use in RegExp constructor.
263
- */
264
- function escapeRegex(str: string): string {
265
- return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
266
- }
267
-
268
- /**
269
- * Expand a query into multiple search variations using synonym substitution.
270
- * Returns the original query plus up to 3 additional variations (4 total).
271
- */
272
- export function expandQuery(taskContext: string): string[] {
273
- const queries = [taskContext];
274
- const lowerQueries = [taskContext.toLowerCase()];
275
- const words = taskContext
276
- .toLowerCase()
277
- .split(/\W+/)
278
- .filter((w) => w.length > 2);
279
-
280
- // Find words that have synonym expansions
281
- const expandableWords = words.filter((w) => QUERY_SYNONYMS[w]);
282
-
283
- for (const word of expandableWords) {
284
- const synonyms = QUERY_SYNONYMS[word];
285
- if (!synonyms) continue;
286
- // Create a variation by replacing the word with its first synonym
287
- const variation = taskContext.replace(
288
- new RegExp(`\\b${escapeRegex(word)}\\b`, "gi"),
289
- synonyms[0],
290
- );
291
- const lowerVariation = variation.toLowerCase();
292
- if (
293
- lowerVariation !== taskContext.toLowerCase() &&
294
- !lowerQueries.includes(lowerVariation)
295
- ) {
296
- queries.push(variation);
297
- lowerQueries.push(lowerVariation);
298
- }
299
- if (queries.length >= MAX_QUERY_VARIATIONS) break;
300
- }
301
-
302
- // Also extract key noun phrases as a compact query
303
- if (words.length >= 3) {
304
- const keyPhrases = words
305
- .filter(
306
- (w) =>
307
- ![
308
- "the",
309
- "and",
310
- "for",
311
- "with",
312
- "this",
313
- "that",
314
- "from",
315
- "into",
316
- ].includes(w),
317
- )
318
- .slice(0, 4)
319
- .join(" ");
320
- if (!lowerQueries.includes(keyPhrases)) {
321
- queries.push(keyPhrases);
322
- }
323
- }
324
-
325
- return queries.slice(0, MAX_QUERY_VARIATIONS);
326
- }
327
-
328
- /**
329
- * Compute relevance score for an entity against task context.
330
- */
331
- export function computeRelevanceScore(
332
- entity: ContextEntity,
333
- taskContext: string,
334
- cardLabels: string[],
335
- graphRelations?: GraphRelation[],
336
- ): { score: number; reasons: string[] } {
337
- const reasons: string[] = [];
338
- let score = 0;
339
-
340
- // 0. DB hybrid search signal (RRF score from FTS + vector fusion)
341
- // Scaled to 0-0.3 contribution; when present, reduces reliance on word-overlap
342
- const hasRrfScore = entity.rrf_score !== undefined && entity.rrf_score > 0;
343
- if (hasRrfScore) {
344
- // RRF scores are typically 0-0.04; normalize to 0-1 range then scale
345
- const normalizedRrf = Math.min(entity.rrf_score! / 0.04, 1.0);
346
- const rrfContribution = normalizedRrf * 0.3;
347
- score += rrfContribution;
348
- reasons.push(`hybrid_search(rrf=${entity.rrf_score!.toFixed(4)})`);
349
- }
350
-
351
- // 1. Text match: simple word overlap scoring (reduced weight when RRF available)
352
- const textMatchWeight = hasRrfScore ? 0.15 : 0.4;
353
- const taskWords = new Set(
354
- taskContext
355
- .toLowerCase()
356
- .split(/\W+/)
357
- .filter((w) => w.length > 2),
358
- );
359
- const entityWords = new Set(
360
- `${entity.title} ${entity.content}`
361
- .toLowerCase()
362
- .split(/\W+/)
363
- .filter((w) => w.length > 2),
364
- );
365
- const overlap = [...taskWords].filter((w) => entityWords.has(w));
366
- if (overlap.length > 0) {
367
- const textScore =
368
- Math.min(overlap.length / Math.max(taskWords.size, 1), 1.0) *
369
- textMatchWeight;
370
- score += textScore;
371
- reasons.push(`text_match(${overlap.length} words)`);
372
- }
373
-
374
- // 2. Tag overlap with card labels
375
- if (cardLabels.length > 0 && entity.tags.length > 0) {
376
- const labelSet = new Set(cardLabels.map((l) => l.toLowerCase()));
377
- const tagOverlap = entity.tags.filter((t) => labelSet.has(t.toLowerCase()));
378
- if (tagOverlap.length > 0) {
379
- const tagScore = (tagOverlap.length / cardLabels.length) * 0.3;
380
- score += tagScore;
381
- reasons.push(`tag_match(${tagOverlap.join(",")})`);
382
- }
383
- }
384
-
385
- // 3. Confidence as a quality signal
386
- score += entity.confidence * 0.15;
387
- if (entity.confidence >= 0.9) {
388
- reasons.push("high_confidence");
389
- }
390
-
391
- // 4. Recency: decay based on last access with tier-specific half-lives
392
- if (entity.last_accessed_at) {
393
- const daysSinceAccess =
394
- (Date.now() - new Date(entity.last_accessed_at).getTime()) /
395
- (1000 * 60 * 60 * 24);
396
- const halfLife = { draft: 7, episode: 30, reference: 180 }[
397
- entity.memory_tier
398
- ];
399
- const recencyScore = 0.5 ** (daysSinceAccess / halfLife) * 0.1;
400
- score += recencyScore;
401
- if (daysSinceAccess < 7) reasons.push("recently_accessed");
402
- }
403
-
404
- // 5. Access frequency (log-scaled)
405
- if (entity.access_count > 0) {
406
- const freqScore = Math.log10(entity.access_count + 1) * 0.05;
407
- score += Math.min(freqScore, 0.1);
408
- if (entity.access_count >= 5)
409
- reasons.push(`frequently_used(${entity.access_count})`);
410
- }
411
-
412
- // 6. Usefulness score from feedback loop (0-0.15 weight)
413
- const usefulnessScore = (entity.metadata?.usefulness_score as number) ?? 0;
414
- if (usefulnessScore >= 3) {
415
- const usefulnessBoost = Math.min(usefulnessScore / 20, 0.15);
416
- score += usefulnessBoost;
417
- reasons.push(`useful(${usefulnessScore})`);
418
- } else if (usefulnessScore === 0 && entity.access_count >= 5) {
419
- // Accessed many times but never marked useful — slight penalty
420
- score -= 0.02;
421
- reasons.push("low_usefulness");
422
- }
423
-
424
- // Procedure boost: actionable step-by-step instructions are highly valuable
425
- if (entity.type === "procedure") {
426
- score += 0.1;
427
- reasons.push("procedure_boost");
428
- }
429
-
430
- // 7. Graph walk relation bonus: boost entities discovered via knowledge graph
431
- if (graphRelations && graphRelations.length > 0) {
432
- const entityRelations = graphRelations.filter(
433
- (r) => r.source_id === entity.id || r.target_id === entity.id,
434
- );
435
- if (entityRelations.length > 0) {
436
- // Take the highest relation bonus (don't stack all of them)
437
- let bestBonus = 0;
438
- let bestRelType = "";
439
- for (const rel of entityRelations) {
440
- const bonus = RELATION_BONUSES[rel.relation_type] ?? 0.1;
441
- if (bonus > bestBonus) {
442
- bestBonus = bonus;
443
- bestRelType = rel.relation_type;
444
- }
445
- }
446
- score += bestBonus;
447
- reasons.push(`graph_walk(${bestRelType})`);
448
- }
449
- }
450
-
451
- // Clamp raw score to 0-1 range before applying tier weight
452
- score = Math.max(0, Math.min(score, 1.0));
453
-
454
- // Apply tier weight
455
- const tierWeight = TIER_WEIGHTS[entity.memory_tier];
456
- score *= tierWeight;
457
-
458
- return { score, reasons };
459
- }
460
-
461
- /**
462
- * Assemble context from knowledge graph entities with token budget management.
463
- */
464
- export async function assembleContext(
465
- options: AssembleContextOptions,
466
- ): Promise<AssembledContext> {
467
- const {
468
- workspaceId,
469
- projectId,
470
- taskContext,
471
- cardLabels = [],
472
- tokenBudget = DEFAULT_TOKEN_BUDGET,
473
- client,
474
- graphWalkEnabled = true,
475
- queryExpansionEnabled = true,
476
- enableLlmReranking = false,
477
- rerankFn,
478
- } = options;
479
-
480
- const assemblyId = generateAssemblyId();
481
- const manifest: ContextManifest = {
482
- assemblyId,
483
- timestamp: new Date().toISOString(),
484
- included: [],
485
- excluded: [],
486
- budgetUsed: 0,
487
- budgetTotal: tokenBudget,
488
- tierBreakdown: {
489
- draft: { count: 0, tokens: 0 },
490
- episode: { count: 0, tokens: 0 },
491
- reference: { count: 0, tokens: 0 },
492
- },
493
- };
494
-
495
- // Fetch candidate entities: search by task context (with query expansion) + list by project
496
- const candidates: ContextEntity[] = [];
497
-
498
- // P1: Query expansion — search with multiple query variations to catch synonym mismatches
499
- const queries = queryExpansionEnabled
500
- ? expandQuery(taskContext)
501
- : [taskContext];
502
-
503
- const searchResults = await Promise.allSettled(
504
- queries.map((query) =>
505
- client.searchMemoryEntities(workspaceId, query, {
506
- project_id: projectId,
507
- limit: 30,
508
- }),
509
- ),
510
- );
511
-
512
- const candidateIds = new Set<string>();
513
- for (const result of searchResults) {
514
- if (result.status !== "fulfilled") continue;
515
- if (result.value.entities?.length > 0) {
516
- for (const raw of result.value.entities) {
517
- const entity = mapToContextEntity(raw);
518
- if (!candidateIds.has(entity.id)) {
519
- candidateIds.add(entity.id);
520
- candidates.push(entity);
521
- }
522
- }
523
- }
524
- }
525
-
526
- // Also fetch by project scope if we have few candidates
527
- if (candidates.length < 10 && projectId) {
528
- try {
529
- const listResult = await client.listMemoryEntities({
530
- workspace_id: workspaceId,
531
- project_id: projectId,
532
- limit: 30,
533
- });
534
- if (listResult.entities?.length > 0) {
535
- for (const raw of listResult.entities) {
536
- const entity = mapToContextEntity(raw);
537
- if (!candidateIds.has(entity.id)) {
538
- candidateIds.add(entity.id);
539
- candidates.push(entity);
540
- }
541
- }
542
- }
543
- } catch {
544
- // List failed, continue with what we have
545
- }
546
- }
547
-
548
- // Cross-project memory: fetch workspace-scoped entities only
549
- // This ensures shared decisions/patterns are available without leaking project-private data
550
- if (candidates.length < 20) {
551
- try {
552
- const wsResult = await client.listMemoryEntities({
553
- workspace_id: workspaceId,
554
- scope: "workspace",
555
- limit: 20,
556
- });
557
- if (wsResult.entities?.length > 0) {
558
- for (const raw of wsResult.entities) {
559
- const entity = mapToContextEntity(raw);
560
- if (!candidateIds.has(entity.id)) {
561
- candidateIds.add(entity.id);
562
- candidates.push(entity);
563
- }
564
- }
565
- }
566
- } catch {
567
- // Continue with what we have
568
- }
569
- }
570
-
571
- // P0: Graph walk enrichment — discover related entities via knowledge graph
572
- let graphRelations: GraphRelation[] = [];
573
- if (graphWalkEnabled && candidates.length > 0) {
574
- try {
575
- // Take top candidates by RRF score (or first N if no RRF scores)
576
- const seedCandidates = [...candidates]
577
- .sort((a, b) => (b.rrf_score ?? 0) - (a.rrf_score ?? 0))
578
- .slice(0, GRAPH_WALK_SEED_COUNT);
579
- const seedIds = seedCandidates.map((c) => c.id);
580
-
581
- const walkResult = await discoverRelatedContext(
582
- client,
583
- seedIds,
584
- GRAPH_WALK_MAX_DEPTH,
585
- GRAPH_WALK_MAX_ENTITIES,
586
- GRAPH_WALK_MIN_CONFIDENCE,
587
- );
588
-
589
- graphRelations = walkResult.relations;
590
-
591
- // Add discovered entities to candidate pool (skip those already present)
592
- const newEntityIds = walkResult.entities
593
- .filter((e) => !candidateIds.has(e.id))
594
- .map((e) => e.id);
595
-
596
- if (newEntityIds.length > 0) {
597
- // Fetch full entity data in parallel (graph walk only returns summary fields)
598
- const fetchResults = await Promise.allSettled(
599
- newEntityIds.map((id) => client.getMemoryEntity(id)),
600
- );
601
- for (const result of fetchResults) {
602
- if (result.status !== "fulfilled" || !result.value.entity) continue;
603
- const mapped = mapToContextEntity(result.value.entity);
604
- candidateIds.add(mapped.id);
605
- candidates.push(mapped);
606
- }
607
- }
608
- } catch {
609
- // Graph walk failed, continue with search-only candidates
610
- }
611
- }
612
-
613
- if (candidates.length === 0) {
614
- return {
615
- context: "",
616
- manifest,
617
- memories: [],
618
- };
619
- }
620
-
621
- // Quality gate: filter out low-value entities before scoring
622
- const qualityCandidates = candidates.filter((entity) => {
623
- if (passesQualityGate(entity)) return true;
624
- manifest.excluded.push({
625
- entityId: entity.id,
626
- title: entity.title,
627
- type: entity.type,
628
- tier: entity.memory_tier,
629
- relevanceScore: 0,
630
- reason: "failed_quality_gate",
631
- });
632
- return false;
633
- });
634
-
635
- if (qualityCandidates.length === 0) {
636
- return {
637
- context: "",
638
- manifest,
639
- memories: [],
640
- };
641
- }
642
-
643
- // Score all candidates (pass graph relations for relation-type bonuses)
644
- const scored = qualityCandidates.map((entity) => {
645
- const { score, reasons } = computeRelevanceScore(
646
- entity,
647
- taskContext,
648
- cardLabels,
649
- graphRelations.length > 0 ? graphRelations : undefined,
650
- );
651
- return { entity, score, reasons };
652
- });
653
-
654
- // Sort by score descending
655
- scored.sort((a, b) => b.score - a.score);
656
-
657
- // P2: Optional LLM re-ranking when top scores are clustered
658
- if (
659
- enableLlmReranking &&
660
- rerankFn &&
661
- scored.length >= RERANK_MIN_CANDIDATES
662
- ) {
663
- const topN = scored.slice(0, RERANK_TOP_N);
664
- const scoreRange = topN[0].score - topN[topN.length - 1].score;
665
- // Only re-rank when scores are tightly clustered
666
- if (scoreRange <= RERANK_CLUSTER_THRESHOLD) {
667
- try {
668
- const rerankCandidates = topN.map((s) => ({
669
- id: s.entity.id,
670
- title: s.entity.title,
671
- snippet: s.entity.content.slice(0, 200),
672
- }));
673
- const rerankedIds = await rerankFn(taskContext, rerankCandidates);
674
- // Reorder based on LLM ranking
675
- const idOrder = new Map(rerankedIds.map((id, i) => [id, i]));
676
- topN.sort((a, b) => {
677
- const aIdx = idOrder.get(a.entity.id) ?? 999;
678
- const bIdx = idOrder.get(b.entity.id) ?? 999;
679
- return aIdx - bIdx;
680
- });
681
- // Splice reranked items back in
682
- scored.splice(0, topN.length, ...topN);
683
- } catch {
684
- // Re-ranking failed, continue with static ordering
685
- }
686
- }
687
- }
688
-
689
- // Reserve dedicated procedure budget, allocate remaining to tiers
690
- const procedureBudget = Math.floor(tokenBudget * PROCEDURE_BUDGET_FRACTION);
691
- const remainingBudget = tokenBudget - procedureBudget;
692
-
693
- const tierBudgets: Record<MemoryTier, number> = {
694
- reference: Math.floor(remainingBudget * TIER_BUDGET_ALLOCATION.reference),
695
- episode: Math.floor(remainingBudget * TIER_BUDGET_ALLOCATION.episode),
696
- draft: Math.floor(remainingBudget * TIER_BUDGET_ALLOCATION.draft),
697
- };
698
-
699
- const tierUsed: Record<MemoryTier, number> = {
700
- reference: 0,
701
- episode: 0,
702
- draft: 0,
703
- };
704
- let procedureUsed = 0;
705
- const included: Array<{
706
- entity: ContextEntity;
707
- score: number;
708
- reasons: string[];
709
- tokens: number;
710
- truncated: boolean;
711
- }> = [];
712
- let totalUsed = 0;
713
-
714
- // First pass: guarantee minimum reference slots
715
- let referenceCount = 0;
716
- for (const item of scored) {
717
- if (
718
- item.entity.memory_tier === "reference" &&
719
- item.entity.type !== "procedure" &&
720
- referenceCount < MIN_REFERENCE_SLOTS
721
- ) {
722
- const { text, truncated } = truncateContent(
723
- item.entity.content,
724
- MAX_TOKENS_PER_ENTITY,
725
- );
726
- const tokens = estimateTokens(`### ${item.entity.title}\n${text}`);
727
- if (totalUsed + tokens <= tokenBudget) {
728
- included.push({ ...item, tokens, truncated });
729
- item.entity.content = text;
730
- totalUsed += tokens;
731
- tierUsed.reference += tokens;
732
- referenceCount++;
733
- }
734
- }
735
- }
736
-
737
- // Second pass: include procedure entities with dedicated budget
738
- const includedIds = new Set(included.map((i) => i.entity.id));
739
- const procedureCandidates = scored.filter(
740
- (item) =>
741
- item.entity.type === "procedure" && !includedIds.has(item.entity.id),
742
- );
743
- for (const item of procedureCandidates) {
744
- if (item.score < MIN_RELEVANCE_THRESHOLD) {
745
- manifest.excluded.push({
746
- entityId: item.entity.id,
747
- title: item.entity.title,
748
- type: item.entity.type,
749
- tier: item.entity.memory_tier,
750
- relevanceScore: item.score,
751
- reason: "below_relevance_threshold",
752
- });
753
- continue;
754
- }
755
-
756
- const { text, truncated } = truncateContent(
757
- item.entity.content,
758
- MAX_TOKENS_PER_ENTITY,
759
- );
760
- const tokens = estimateTokens(`### ${item.entity.title}\n${text}`);
761
-
762
- // Check dedicated procedure budget, allow overflow to total remaining
763
- if (procedureUsed + tokens > procedureBudget) {
764
- const totalRemaining = tokenBudget - totalUsed;
765
- if (tokens > totalRemaining) {
766
- manifest.excluded.push({
767
- entityId: item.entity.id,
768
- title: item.entity.title,
769
- type: item.entity.type,
770
- tier: item.entity.memory_tier,
771
- relevanceScore: item.score,
772
- reason: "procedure_budget_exceeded",
773
- });
774
- continue;
775
- }
776
- }
777
-
778
- if (totalUsed + tokens > tokenBudget) {
779
- manifest.excluded.push({
780
- entityId: item.entity.id,
781
- title: item.entity.title,
782
- type: item.entity.type,
783
- tier: item.entity.memory_tier,
784
- relevanceScore: item.score,
785
- reason: "total_budget_exceeded",
786
- });
787
- continue;
788
- }
789
-
790
- included.push({ ...item, tokens, truncated });
791
- item.entity.content = text;
792
- totalUsed += tokens;
793
- procedureUsed += tokens;
794
- includedIds.add(item.entity.id);
795
- }
796
-
797
- // Third pass: fill remaining budget by score (non-procedure entities)
798
- for (const item of scored) {
799
- if (includedIds.has(item.entity.id)) continue;
800
- if (item.entity.type === "procedure") continue; // Already handled
801
- if (item.score < MIN_RELEVANCE_THRESHOLD) {
802
- manifest.excluded.push({
803
- entityId: item.entity.id,
804
- title: item.entity.title,
805
- type: item.entity.type,
806
- tier: item.entity.memory_tier,
807
- relevanceScore: item.score,
808
- reason: "below_relevance_threshold",
809
- });
810
- continue;
811
- }
812
-
813
- const tier = item.entity.memory_tier;
814
- const { text, truncated } = truncateContent(
815
- item.entity.content,
816
- MAX_TOKENS_PER_ENTITY,
817
- );
818
- const tokens = estimateTokens(`### ${item.entity.title}\n${text}`);
819
-
820
- // Check tier budget (allow overflow to unused tiers)
821
- if (tierUsed[tier] + tokens > tierBudgets[tier]) {
822
- // Check if there's unused budget from other tiers
823
- const totalRemaining = tokenBudget - totalUsed;
824
- if (tokens > totalRemaining) {
825
- manifest.excluded.push({
826
- entityId: item.entity.id,
827
- title: item.entity.title,
828
- type: item.entity.type,
829
- tier,
830
- relevanceScore: item.score,
831
- reason: "budget_exceeded",
832
- });
833
- continue;
834
- }
835
- }
836
-
837
- if (totalUsed + tokens > tokenBudget) {
838
- manifest.excluded.push({
839
- entityId: item.entity.id,
840
- title: item.entity.title,
841
- type: item.entity.type,
842
- tier,
843
- relevanceScore: item.score,
844
- reason: "total_budget_exceeded",
845
- });
846
- continue;
847
- }
848
-
849
- included.push({ ...item, tokens, truncated });
850
- item.entity.content = text;
851
- totalUsed += tokens;
852
- tierUsed[tier] += tokens;
853
- includedIds.add(item.entity.id);
854
- }
855
-
856
- // Build manifest
857
- manifest.budgetUsed = totalUsed;
858
- const procedureItems = included.filter((i) => i.entity.type === "procedure");
859
- manifest.tierBreakdown = {
860
- reference: {
861
- count: included.filter(
862
- (i) =>
863
- i.entity.memory_tier === "reference" && i.entity.type !== "procedure",
864
- ).length,
865
- tokens: tierUsed.reference,
866
- },
867
- episode: {
868
- count: included.filter(
869
- (i) =>
870
- i.entity.memory_tier === "episode" && i.entity.type !== "procedure",
871
- ).length,
872
- tokens: tierUsed.episode,
873
- },
874
- draft: {
875
- count: included.filter(
876
- (i) =>
877
- i.entity.memory_tier === "draft" && i.entity.type !== "procedure",
878
- ).length,
879
- tokens: tierUsed.draft,
880
- },
881
- };
882
- manifest.procedureBreakdown = {
883
- count: procedureItems.length,
884
- tokens: procedureUsed,
885
- budget: procedureBudget,
886
- };
887
-
888
- for (const item of included) {
889
- manifest.included.push({
890
- entityId: item.entity.id,
891
- title: item.entity.title,
892
- type: item.entity.type,
893
- tier: item.entity.memory_tier,
894
- relevanceScore: item.score,
895
- reasons: item.reasons,
896
- tokenCount: item.tokens,
897
- truncated: item.truncated,
898
- });
899
- }
900
-
901
- // Build context string — procedures in their own section
902
- const contextSections: string[] = [];
903
- const nonProcedureItems = included.filter(
904
- (i) => i.entity.type !== "procedure",
905
- );
906
-
907
- if (included.length > 0) {
908
- // Procedure section first (actionable instructions)
909
- if (procedureItems.length > 0) {
910
- contextSections.push(
911
- `## Procedures (${procedureItems.length} loaded, ${procedureUsed}/${procedureBudget} tokens)`,
912
- );
913
- for (const item of procedureItems) {
914
- const tags =
915
- item.entity.tags.length > 0
916
- ? ` [${item.entity.tags.join(", ")}]`
917
- : "";
918
- const tierLabel =
919
- item.entity.memory_tier !== "reference"
920
- ? ` (${item.entity.memory_tier})`
921
- : "";
922
- contextSections.push(
923
- `\n### ${item.entity.title} (confidence: ${item.entity.confidence})${tierLabel}${tags}`,
924
- );
925
- contextSections.push(item.entity.content);
926
- }
927
- }
928
-
929
- // Non-procedure memories
930
- if (nonProcedureItems.length > 0) {
931
- contextSections.push(
932
- `\n## Relevant Memories (${nonProcedureItems.length} loaded, ${manifest.excluded.length} excluded)`,
933
- );
934
- contextSections.push(
935
- `*Assembly: ${assemblyId} | Budget: ${totalUsed}/${tokenBudget} tokens*`,
936
- );
937
-
938
- for (const item of nonProcedureItems) {
939
- const tags =
940
- item.entity.tags.length > 0
941
- ? ` [${item.entity.tags.join(", ")}]`
942
- : "";
943
- const tierLabel =
944
- item.entity.memory_tier !== "reference"
945
- ? ` (${item.entity.memory_tier})`
946
- : "";
947
- contextSections.push(
948
- `\n### ${item.entity.title} (${item.entity.type}, confidence: ${item.entity.confidence})${tierLabel}${tags}`,
949
- );
950
- contextSections.push(item.entity.content);
951
- }
952
- }
953
- }
954
-
955
- // Increment access_count for included entities (fire-and-forget)
956
- incrementAccessCounts(
957
- client,
958
- included.map((i) => i.entity.id),
959
- ).catch(() => {});
960
-
961
- // Auto-promote entities that cross access thresholds after the bump (fire-and-forget)
962
- promoteEligibleEntities(
963
- client,
964
- included.map((i) => i.entity),
965
- ).catch(() => {});
966
-
967
- return {
968
- context: contextSections.join("\n"),
969
- manifest,
970
- memories: included.map((i) => i.entity),
971
- };
972
- }
973
-
974
- /**
975
- * Map raw API entity to ContextEntity
976
- */
977
- export function mapToContextEntity(raw: unknown): ContextEntity {
978
- const e = raw as Record<string, unknown>;
979
- return {
980
- id: e.id as string,
981
- type: e.type as string,
982
- title: e.title as string,
983
- content: e.content as string,
984
- confidence: (e.confidence as number) ?? 1.0,
985
- tags: (e.tags as string[]) || [],
986
- memory_tier: (e.memory_tier as MemoryTier) || "reference",
987
- access_count: (e.access_count as number) || 0,
988
- last_accessed_at: (e.last_accessed_at as string) || null,
989
- created_at: (e.created_at as string) || "",
990
- updated_at: (e.updated_at as string) || "",
991
- metadata: (e.metadata as Record<string, unknown>) ?? undefined,
992
- // Hybrid search signals (present when results come from RPC)
993
- rrf_score: (e.rrf_score as number) ?? undefined,
994
- fts_rank: (e.fts_rank as number) ?? undefined,
995
- semantic_rank: (e.semantic_rank as number) ?? undefined,
996
- };
997
- }
998
-
999
- /**
1000
- * Increment access counts for entities loaded into context.
1001
- * Uses batch_touch_knowledge_entities RPC for a single-roundtrip update.
1002
- * Falls back to individual touches if the batch endpoint is unavailable.
1003
- */
1004
- async function incrementAccessCounts(
1005
- client: HarmonyApiClient,
1006
- entityIds: string[],
1007
- ): Promise<void> {
1008
- if (entityIds.length === 0) return;
1009
- try {
1010
- await client.batchTouchMemoryEntities(entityIds);
1011
- } catch {
1012
- // Fallback: individual touches (e.g. older server version)
1013
- await Promise.allSettled(
1014
- entityIds.map((id) => client.touchMemoryEntity(id)),
1015
- );
1016
- }
1017
- }
1018
-
1019
- /**
1020
- * Check included entities for promotion eligibility after access count bump.
1021
- * Uses access_count + 1 to reflect the touch that just happened.
1022
- */
1023
- async function promoteEligibleEntities(
1024
- client: HarmonyApiClient,
1025
- entities: ContextEntity[],
1026
- ): Promise<void> {
1027
- for (const entity of entities) {
1028
- if (entity.memory_tier === "reference") continue;
1029
- if (!entity.created_at) continue;
1030
-
1031
- // +1 because incrementAccessCounts just bumped it
1032
- const promotion = checkPromotion(
1033
- entity.memory_tier,
1034
- entity.access_count + 1,
1035
- entity.confidence,
1036
- entity.created_at,
1037
- );
1038
-
1039
- if (promotion.eligible && promotion.targetTier) {
1040
- try {
1041
- await client.updateMemoryEntity(entity.id, {
1042
- memory_tier: promotion.targetTier,
1043
- metadata: {
1044
- ...(entity.metadata || {}),
1045
- promoted_at: new Date().toISOString(),
1046
- promotion_reason: promotion.reason,
1047
- promoted_from: entity.memory_tier,
1048
- },
1049
- });
1050
- } catch {
1051
- // Non-fatal: promotion is best-effort
1052
- }
1053
- }
1054
- }
1055
- }
1056
-
1057
- // In-memory manifest cache (keyed by assemblyId)
1058
- const manifestCache = new Map<string, ContextManifest>();
1059
- const MAX_CACHE_SIZE = 50;
1060
-
1061
- /**
1062
- * Store a manifest for later retrieval.
1063
- */
1064
- export function cacheManifest(manifest: ContextManifest): void {
1065
- if (manifestCache.size >= MAX_CACHE_SIZE) {
1066
- // Remove oldest entry
1067
- const firstKey = manifestCache.keys().next().value;
1068
- if (firstKey) manifestCache.delete(firstKey);
1069
- }
1070
- manifestCache.set(manifest.assemblyId, manifest);
1071
- }
1072
-
1073
- /**
1074
- * Retrieve a cached manifest by assembly ID.
1075
- */
1076
- export function getCachedManifest(
1077
- assemblyId: string,
1078
- ): ContextManifest | undefined {
1079
- return manifestCache.get(assemblyId);
1080
- }
1081
-
1082
- // --- Feedback-Driven Scoring ---
1083
-
1084
- /** Track which assemblyId was used for which card session */
1085
- const sessionAssemblyMap = new Map<string, string>();
1086
- const MAX_SESSION_MAP_SIZE = 100;
1087
-
1088
- /**
1089
- * Associate an assemblyId with a card session for later feedback.
1090
- * Called when context is assembled during session start or prompt generation.
1091
- */
1092
- export function trackSessionAssembly(cardId: string, assemblyId: string): void {
1093
- if (sessionAssemblyMap.size >= MAX_SESSION_MAP_SIZE) {
1094
- const firstKey = sessionAssemblyMap.keys().next().value;
1095
- if (firstKey) sessionAssemblyMap.delete(firstKey);
1096
- }
1097
- sessionAssemblyMap.set(cardId, assemblyId);
1098
- }
1099
-
1100
- /**
1101
- * Get the assemblyId associated with a card session.
1102
- */
1103
- export function getSessionAssemblyId(cardId: string): string | undefined {
1104
- return sessionAssemblyMap.get(cardId);
1105
- }
1106
-
1107
- /**
1108
- * Record context feedback based on session outcome.
1109
- * Adjusts entity confidence based on whether the session completed successfully.
1110
- *
1111
- * - Completed successfully (status=completed, progress>=100): boost included entities
1112
- * - Paused/blocked: neutral or slight penalty for included entities
1113
- */
1114
- export async function recordContextFeedback(
1115
- client: HarmonyApiClient,
1116
- cardId: string,
1117
- sessionStatus: "completed" | "paused",
1118
- progressPercent?: number,
1119
- hadBlockers?: boolean,
1120
- ): Promise<{ adjusted: number }> {
1121
- const assemblyId = sessionAssemblyMap.get(cardId);
1122
- if (!assemblyId) return { adjusted: 0 };
1123
-
1124
- const manifest = manifestCache.get(assemblyId);
1125
- if (!manifest || manifest.included.length === 0) return { adjusted: 0 };
1126
-
1127
- let adjusted = 0;
1128
- const isSuccess =
1129
- sessionStatus === "completed" && (progressPercent ?? 0) >= 100;
1130
-
1131
- for (const entry of manifest.included) {
1132
- try {
1133
- if (isSuccess) {
1134
- // Boost confidence by +0.05 (max 1.0) and increment usefulness_score
1135
- const { entity } = await client.getMemoryEntity(entry.entityId);
1136
- const e = entity as {
1137
- confidence: number;
1138
- metadata?: Record<string, unknown>;
1139
- };
1140
- const currentUsefulness = (e.metadata?.usefulness_score as number) ?? 0;
1141
- const newConfidence = Math.min((e.confidence ?? 0.5) + 0.05, 1.0);
1142
-
1143
- await client.updateMemoryEntity(entry.entityId, {
1144
- confidence: newConfidence,
1145
- metadata: {
1146
- usefulness_score: currentUsefulness + 1,
1147
- last_feedback_at: new Date().toISOString(),
1148
- },
1149
- });
1150
- adjusted++;
1151
- } else if (hadBlockers) {
1152
- // Slight penalty for entities included when session had blockers
1153
- const { entity } = await client.getMemoryEntity(entry.entityId);
1154
- const e = entity as { confidence: number };
1155
- const newConfidence = Math.max((e.confidence ?? 0.5) - 0.02, 0.1);
1156
-
1157
- await client.updateMemoryEntity(entry.entityId, {
1158
- confidence: newConfidence,
1159
- metadata: {
1160
- last_feedback_at: new Date().toISOString(),
1161
- },
1162
- });
1163
- adjusted++;
1164
- }
1165
- // Paused without blockers: no change (neutral signal)
1166
- } catch {
1167
- // Non-fatal: individual entity update failure
1168
- }
1169
- }
1170
-
1171
- // Clean up tracking
1172
- sessionAssemblyMap.delete(cardId);
1173
-
1174
- return { adjusted };
1175
- }