thumbgate 1.12.0 → 1.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,710 @@
1
+ /**
2
+ * Context Engine
3
+ *
4
+ * Inspired by Dropbox Dash's architecture for intelligent context retrieval.
5
+ * Pre-computes knowledge bundles from project docs, routes queries to relevant
6
+ * context, scores retrieval quality, and manages prompt templates.
7
+ *
8
+ * Key insight: instead of agents reading 100+ docs at runtime, pre-compute
9
+ * topical bundles and route queries to the most relevant subset. This reduces
10
+ * MCP tool calls and context window consumption.
11
+ *
12
+ * Ported from Subway_RN_Demo/scripts/context-engine.js for ThumbGate.
13
+ * Ported from Subway_RN_Demo/scripts/context-engine.js for thumbgate.
14
+ * PATH: PROJECT_ROOT = path.join(__dirname, '..') — 1 level up from scripts/
15
+ */
16
+
17
+ 'use strict';
18
+
19
+ const fs = require('fs');
20
+ const path = require('path');
21
+ const crypto = require('crypto');
22
+ const { constructContextPack } = require('./contextfs');
23
+ const { ensureDir } = require('./fs-utils');
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Default paths
27
+ // ---------------------------------------------------------------------------
28
+
29
+ const PROJECT_ROOT = path.join(__dirname, '..');
30
+ const DEFAULT_DOCS_DIR = path.join(PROJECT_ROOT, 'docs');
31
+ const CONTEXT_ENGINE_DIR = path.join(PROJECT_ROOT, '.claude', 'context-engine');
32
+ const DEFAULT_INDEX_PATH = path.join(CONTEXT_ENGINE_DIR, 'knowledge-index.json');
33
+ const DEFAULT_QUALITY_LOG_PATH = path.join(CONTEXT_ENGINE_DIR, 'quality-log.json');
34
+ const DEFAULT_REGISTRY_PATH = path.join(CONTEXT_ENGINE_DIR, 'prompt-registry.json');
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Category detection rules (from filename patterns)
38
+ // ---------------------------------------------------------------------------
39
+
40
+ // Order: specific domain rules first, broader categories last.
41
+ // This prevents "ANDROID_BUILD" matching BUILD→ci-cd before ANDROID→mobile-dev.
42
+ const CATEGORY_RULES = [
43
+ { category: 'mobile-dev', pattern: /ANDROID|IOS|EXPO|TURBOMODULE|DEVICE|METRO|MMKV/i },
44
+ { category: 'mcp-ai', pattern: /MCP|CONTEXT7|CLAUDE|AGENTIC|AGENT|(?:^|_)AI(?:_|\.)|MEMORY/i },
45
+ { category: 'security', pattern: /SECURITY|CVE|CODEQL|INJECTION|AUDIT|PERMISSION/i },
46
+ { category: 'testing', pattern: /TEST|COVERAGE|REASSURE|RNTL|MAESTRO|PERF/i },
47
+ { category: 'ado-git', pattern: /(?:^|_)ADO(?:_|\.)|AZURE|PR_|SQUASH|BRANCH|GITFLOW|GIT_/i },
48
+ { category: 'architecture', pattern: /ARCHITECTURE|FEATURE|PROJECT_STRUCTURE|PLUGIN|REDUX|CART/i },
49
+ { category: 'ci-cd', pattern: /(?:^|_)CI(?:_|\.)|(?:^|_)CD(?:_|\.)|BUILD|WORKFLOW|PIPELINE|FIREBASE|(?:^|_)ACT(?:_|\.)/i },
50
+ ];
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // MCP Consolidation Manifest
54
+ // ---------------------------------------------------------------------------
55
+
56
+ const TOOL_CONSOLIDATION = {
57
+ 'context:retrieve': {
58
+ sources: ['context7', 'knowledge-index'],
59
+ description: 'Unified context retrieval',
60
+ },
61
+ 'memory:query': {
62
+ sources: ['jsonl-memory', 'lancedb-vectors'],
63
+ description: 'Unified memory access',
64
+ },
65
+ 'quality:check': {
66
+ sources: ['sonarqube', 'eslint', 'jest'],
67
+ description: 'Unified quality gate',
68
+ },
69
+ 'docs:lookup': {
70
+ sources: ['context7', 'knowledge-bundles'],
71
+ description: 'Documentation lookup',
72
+ },
73
+ };
74
+
75
+ // ---------------------------------------------------------------------------
76
+ // Utility: ensure directory exists
77
+ // ---------------------------------------------------------------------------
78
+
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Knowledge Bundle Builder
82
+ // ---------------------------------------------------------------------------
83
+
84
+ /**
85
+ * Determine the category for a doc file based on its filename.
86
+ *
87
+ * @param {string} filename - The filename (e.g., "CI_FIXES.md")
88
+ * @returns {string} Category string (e.g., "ci-cd", "testing", "general")
89
+ */
90
+ function categorizeDoc(filename) {
91
+ for (const rule of CATEGORY_RULES) {
92
+ if (rule.pattern.test(filename)) {
93
+ return rule.category;
94
+ }
95
+ }
96
+ return 'general';
97
+ }
98
+
99
+ /**
100
+ * Extract a summary from a markdown file: title + first 3 non-empty lines after it.
101
+ *
102
+ * @param {string} filePath - Absolute path to the markdown file
103
+ * @returns {{ title: string, summary: string }} Extracted title and summary
104
+ */
105
+ function extractDocSummary(filePath) {
106
+ let content;
107
+ try {
108
+ content = fs.readFileSync(filePath, 'utf-8');
109
+ } catch {
110
+ return { title: path.basename(filePath, '.md'), summary: '' };
111
+ }
112
+
113
+ const lines = content.split('\n');
114
+ let title = path.basename(filePath, '.md');
115
+ let titleLineIndex = -1;
116
+
117
+ // Find first heading line
118
+ for (let i = 0; i < lines.length; i++) {
119
+ const trimmed = lines[i].trim();
120
+ if (trimmed.startsWith('#')) {
121
+ title = trimmed.replace(/^#+\s*/, '');
122
+ titleLineIndex = i;
123
+ break;
124
+ }
125
+ }
126
+
127
+ // Collect first 3 non-empty lines after the title
128
+ const summaryLines = [];
129
+ const startIdx = titleLineIndex + 1;
130
+ for (let i = startIdx; i < lines.length && summaryLines.length < 3; i++) {
131
+ const trimmed = lines[i].trim();
132
+ if (trimmed && !trimmed.startsWith('#')) {
133
+ summaryLines.push(trimmed);
134
+ }
135
+ }
136
+
137
+ return { title, summary: summaryLines.join(' ') };
138
+ }
139
+
140
+ /**
141
+ * Scan a docs directory and build a pre-computed knowledge index.
142
+ *
143
+ * Groups markdown files into topical bundles by category, extracts titles
144
+ * and summaries, and writes the index to disk for fast runtime lookup.
145
+ *
146
+ * @param {string} [docsDir] - Path to the docs directory (default: project docs/)
147
+ * @param {string} [outputPath] - Path to write the index JSON (default: .claude/context-engine/knowledge-index.json)
148
+ * @returns {{ bundles: object, totalDocs: number, generatedAt: string }} The generated index
149
+ */
150
+ function buildKnowledgeIndex(docsDir, outputPath) {
151
+ const docs = docsDir || DEFAULT_DOCS_DIR;
152
+ const output = outputPath || DEFAULT_INDEX_PATH;
153
+ const bundles = {};
154
+
155
+ // Scan for .md files
156
+ let files;
157
+ try {
158
+ files = fs.readdirSync(docs).filter((f) => f.endsWith('.md'));
159
+ } catch {
160
+ files = [];
161
+ }
162
+
163
+ for (const file of files) {
164
+ const filePath = path.join(docs, file);
165
+ const category = categorizeDoc(file);
166
+ const { title, summary } = extractDocSummary(filePath);
167
+
168
+ if (!bundles[category]) {
169
+ bundles[category] = {
170
+ category,
171
+ docs: [],
172
+ keywords: [],
173
+ };
174
+ }
175
+
176
+ const doc = {
177
+ filename: file,
178
+ title,
179
+ summary,
180
+ };
181
+
182
+ bundles[category].docs.push(doc);
183
+
184
+ // Extract keywords from title and filename
185
+ const words = `${title} ${file.replace(/[._-]/g, ' ')}`
186
+ .toLowerCase()
187
+ .split(/\s+/)
188
+ .filter((w) => w.length > 2 && w !== 'md');
189
+
190
+ for (const word of words) {
191
+ if (!bundles[category].keywords.includes(word)) {
192
+ bundles[category].keywords.push(word);
193
+ }
194
+ }
195
+ }
196
+
197
+ const index = {
198
+ bundles,
199
+ metadata: {
200
+ builtAt: new Date().toISOString(),
201
+ docCount: files.length,
202
+ version: '1.0.0',
203
+ checksum: crypto
204
+ .createHash('sha256')
205
+ .update(JSON.stringify(bundles))
206
+ .digest('hex')
207
+ .slice(0, 12),
208
+ },
209
+ };
210
+
211
+ // Persist to disk
212
+ try {
213
+ ensureDir(path.dirname(output));
214
+ fs.writeFileSync(output, JSON.stringify(index, null, 2));
215
+ } catch {
216
+ // Non-critical — index still returned in memory
217
+ }
218
+
219
+ return index;
220
+ }
221
+
222
+ // ---------------------------------------------------------------------------
223
+ // Context Router
224
+ // ---------------------------------------------------------------------------
225
+
226
+ /**
227
+ * Score a single bundle against a set of query tokens.
228
+ *
229
+ * Counts how many query tokens match the bundle's keywords, then normalizes
230
+ * by bundle size to avoid large bundles always winning.
231
+ *
232
+ * @param {string[]} queryTokens - Lowercased query words
233
+ * @param {{ keywords: string[], docs: object[] }} bundle - A knowledge bundle
234
+ * @returns {number} Relevance score (higher is better)
235
+ */
236
+ function scoreBundle(queryTokens, bundle) {
237
+ if (!bundle.keywords.length || !queryTokens.length) return 0;
238
+
239
+ let matches = 0;
240
+ for (const token of queryTokens) {
241
+ for (const keyword of bundle.keywords) {
242
+ if (keyword.includes(token) || token.includes(keyword)) {
243
+ matches++;
244
+ break; // Count each token at most once
245
+ }
246
+ }
247
+ }
248
+
249
+ // Normalize: raw matches / sqrt(bundle size) to balance precision vs. recall
250
+ const bundleSize = bundle.docs.length || 1;
251
+ return matches / Math.sqrt(bundleSize);
252
+ }
253
+
254
+ /**
255
+ * Route a natural-language query to the most relevant knowledge bundles.
256
+ *
257
+ * Replaces multiple MCP tool calls with a single pre-computed lookup.
258
+ *
259
+ * @param {string} query - Natural-language query (e.g., "How do I fix Android build errors?")
260
+ * @param {string} [indexPath] - Path to the knowledge index JSON
261
+ * @param {number} [topN=3] - Number of top bundles to return
262
+ * @returns {{ query: string, results: object[] }} Top-N bundles with scores and doc references
263
+ */
264
+ /**
265
+ * Base routing logic for bundles.
266
+ */
267
+ function baseRouteQuery(query, index, topN) {
268
+ const queryTokens = query
269
+ .toLowerCase()
270
+ .split(/\s+/)
271
+ .filter((w) => w.length > 2);
272
+
273
+ const scored = Object.entries(index.bundles)
274
+ .map(([category, bundle]) => ({
275
+ category,
276
+ score: scoreBundle(queryTokens, bundle),
277
+ docs: bundle.docs,
278
+ }))
279
+ .filter((entry) => entry.score > 0)
280
+ .sort((a, b) => b.score - a.score)
281
+ .slice(0, topN);
282
+
283
+ return scored;
284
+ }
285
+
286
+ /**
287
+ * Adaptive Retrieval Loop (Agentic RAG)
288
+ *
289
+ * Step 1: Analyze query to expand tokens or identify intent.
290
+ * Step 2: Perform retrieval with boosted weights for intent-matching categories.
291
+ */
292
+ function routeQuery(query, indexPath, topN) {
293
+ const idxPath = indexPath || DEFAULT_INDEX_PATH;
294
+ const n = topN || 3;
295
+
296
+ // Load index
297
+ let index;
298
+ try {
299
+ index = JSON.parse(fs.readFileSync(idxPath, 'utf-8'));
300
+ } catch {
301
+ index = buildKnowledgeIndex(undefined, idxPath);
302
+ }
303
+
304
+ // Step 1: Intent Detection (Simple heuristic for now, can be LLM-backed)
305
+ const lowerQuery = query.toLowerCase();
306
+ let intentBoost = null;
307
+ if (lowerQuery.includes('test') || lowerQuery.includes('jest')) intentBoost = 'testing';
308
+ if (lowerQuery.includes('build') || lowerQuery.includes('ci')) intentBoost = 'ci-cd';
309
+ if (lowerQuery.includes('security') || lowerQuery.includes('audit')) intentBoost = 'security';
310
+ if (lowerQuery.includes('mobile') || lowerQuery.includes('android')) intentBoost = 'mobile-dev';
311
+ if (lowerQuery.includes('memory') || lowerQuery.includes('thumbgate')) intentBoost = 'mcp-ai';
312
+
313
+ // Step 2: Contextual Ranking
314
+ const queryTokens = query
315
+ .toLowerCase()
316
+ .split(/\s+/)
317
+ .filter((w) => w.length > 2);
318
+
319
+ const scored = Object.entries(index.bundles)
320
+ .map(([category, bundle]) => {
321
+ let score = scoreBundle(queryTokens, bundle);
322
+
323
+ // Boost score if intent matches category
324
+ if (intentBoost && category === intentBoost) {
325
+ score *= 1.5;
326
+ }
327
+
328
+ return {
329
+ category,
330
+ score,
331
+ docs: bundle.docs,
332
+ };
333
+ })
334
+ .filter((entry) => entry.score > 0)
335
+ .sort((a, b) => b.score - a.score)
336
+ .slice(0, n);
337
+
338
+ // Recursive Retrieval (EvoSkill Hardening)
339
+ // Drill down to get high-density structured state documents
340
+ let denseContext = null;
341
+ if (scored.length > 0) {
342
+ try {
343
+ denseContext = constructContextPack({
344
+ query,
345
+ maxItems: 3,
346
+ namespaces: ['rules', 'memoryLearning', 'memoryError']
347
+ });
348
+ } catch (err) {
349
+ // Graceful fallback if contextfs is unavailable
350
+ }
351
+ }
352
+
353
+ return {
354
+ query,
355
+ intent: intentBoost,
356
+ results: scored,
357
+ denseContext: denseContext ? denseContext.items : [],
358
+ indexAge: index.metadata && index.metadata.builtAt,
359
+ retrievalType: intentBoost ? 'adaptive' : 'base',
360
+ };
361
+ }
362
+
363
+ // ---------------------------------------------------------------------------
364
+ // Quality Scorer
365
+ // ---------------------------------------------------------------------------
366
+
367
+ /**
368
+ * Score retrieval quality by comparing retrieved docs against expected topics.
369
+ *
370
+ * Uses a precision/recall-style metric:
371
+ * - Precision: what fraction of retrieved docs are relevant to expected topics?
372
+ * - Recall: what fraction of expected topics are covered by retrieved docs?
373
+ *
374
+ * @param {string} query - The original query
375
+ * @param {string[]} retrievedDocs - Filenames of retrieved docs
376
+ * @param {string[]} expectedTopics - Expected topic keywords to match against
377
+ * @param {string} [logPath] - Optional path for the quality log
378
+ * @returns {{ precision: number, recall: number, f1: number, query: string, timestamp: string }}
379
+ */
380
+ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics, logPath) {
381
+ if (!retrievedDocs.length || !expectedTopics.length) {
382
+ const result = {
383
+ query,
384
+ precision: 0,
385
+ recall: 0,
386
+ f1: 0,
387
+ retrievedCount: retrievedDocs.length,
388
+ expectedCount: expectedTopics.length,
389
+ timestamp: new Date().toISOString(),
390
+ };
391
+ logQualityResult(result, logPath);
392
+ return result;
393
+ }
394
+
395
+ const normalizedDocs = retrievedDocs.map((d) => d.toLowerCase());
396
+ const normalizedTopics = expectedTopics.map((t) => t.toLowerCase());
397
+
398
+ // Precision: how many retrieved docs match at least one expected topic?
399
+ let relevantRetrieved = 0;
400
+ for (const doc of normalizedDocs) {
401
+ for (const topic of normalizedTopics) {
402
+ if (doc.includes(topic) || topic.includes(doc.replace('.md', ''))) {
403
+ relevantRetrieved++;
404
+ break;
405
+ }
406
+ }
407
+ }
408
+ const precision = relevantRetrieved / normalizedDocs.length;
409
+
410
+ // Recall: how many expected topics are covered by at least one retrieved doc?
411
+ let topicsCovered = 0;
412
+ for (const topic of normalizedTopics) {
413
+ for (const doc of normalizedDocs) {
414
+ if (doc.includes(topic) || topic.includes(doc.replace('.md', ''))) {
415
+ topicsCovered++;
416
+ break;
417
+ }
418
+ }
419
+ }
420
+ const recall = topicsCovered / normalizedTopics.length;
421
+
422
+ // F1 harmonic mean
423
+ const f1 = precision + recall > 0 ? (2 * precision * recall) / (precision + recall) : 0;
424
+
425
+ const result = {
426
+ query,
427
+ precision: Math.round(precision * 1000) / 1000,
428
+ recall: Math.round(recall * 1000) / 1000,
429
+ f1: Math.round(f1 * 1000) / 1000,
430
+ retrievedCount: retrievedDocs.length,
431
+ expectedCount: expectedTopics.length,
432
+ timestamp: new Date().toISOString(),
433
+ };
434
+
435
+ logQualityResult(result, logPath);
436
+ return result;
437
+ }
438
+
439
+ /**
440
+ * Append a quality result to the JSONL quality log.
441
+ *
442
+ * @param {object} result - Quality score result object
443
+ * @param {string} [logPath] - Path to the quality log file
444
+ */
445
+ function logQualityResult(result, logPath) {
446
+ const log = logPath || DEFAULT_QUALITY_LOG_PATH;
447
+ const entry = { ...result };
448
+
449
+ // Ensure timestamp is always present
450
+ if (!entry.timestamp) {
451
+ entry.timestamp = new Date().toISOString();
452
+ }
453
+
454
+ try {
455
+ ensureDir(path.dirname(log));
456
+ fs.appendFileSync(log, JSON.stringify(entry) + '\n');
457
+ } catch {
458
+ // Non-critical — scoring still works without persistence
459
+ }
460
+ }
461
+
462
+ // ---------------------------------------------------------------------------
463
+ // Prompt Registry
464
+ // ---------------------------------------------------------------------------
465
+
466
+ /**
467
+ * Load the prompt registry from disk.
468
+ *
469
+ * @param {string} [registryPath] - Path to the registry JSON
470
+ * @returns {object} Map of prompt name → { template, metadata }
471
+ */
472
+ function loadRegistry(registryPath) {
473
+ const reg = registryPath || DEFAULT_REGISTRY_PATH;
474
+ try {
475
+ if (fs.existsSync(reg)) {
476
+ return JSON.parse(fs.readFileSync(reg, 'utf-8'));
477
+ }
478
+ } catch {
479
+ // Corrupted file — start fresh
480
+ }
481
+ return {};
482
+ }
483
+
484
+ /**
485
+ * Save the prompt registry to disk.
486
+ *
487
+ * @param {object} registry - The full registry object
488
+ * @param {string} [registryPath] - Path to the registry JSON
489
+ */
490
+ function saveRegistry(registry, registryPath) {
491
+ const reg = registryPath || DEFAULT_REGISTRY_PATH;
492
+ try {
493
+ ensureDir(path.dirname(reg));
494
+ fs.writeFileSync(reg, JSON.stringify(registry, null, 2));
495
+ } catch {
496
+ // Non-critical — registry still works in memory
497
+ }
498
+ }
499
+
500
+ /**
501
+ * Register a prompt template with version and model compatibility metadata.
502
+ *
503
+ * @param {string} name - Unique prompt name (e.g., "code-review-system")
504
+ * @param {string} template - The prompt template string
505
+ * @param {{ version: string, models: string[], category: string }} metadata - Prompt metadata
506
+ * @param {string} [registryPath] - Path to the registry JSON
507
+ * @returns {{ name: string, registered: boolean }} Registration result
508
+ */
509
+ function registerPrompt(name, template, metadata, registryPath) {
510
+ const registry = loadRegistry(registryPath);
511
+
512
+ // Support both metadata.models (array) and metadata.model (single string)
513
+ let models = [];
514
+ if (metadata && metadata.models) {
515
+ models = metadata.models;
516
+ } else if (metadata && metadata.model) {
517
+ models = [metadata.model];
518
+ }
519
+
520
+ registry[name] = {
521
+ template,
522
+ metadata: {
523
+ version: (metadata && metadata.version) || '1.0.0',
524
+ models,
525
+ category: (metadata && metadata.category) || 'general',
526
+ lastUpdated: new Date().toISOString(),
527
+ },
528
+ };
529
+
530
+ saveRegistry(registry, registryPath);
531
+ return { name, registered: true };
532
+ }
533
+
534
+ /**
535
+ * Retrieve a registered prompt, optionally filtering by model compatibility.
536
+ *
537
+ * @param {string} name - Prompt name to look up
538
+ * @param {string} [modelId] - Optional model ID to check compatibility
539
+ * @param {string} [registryPath] - Path to the registry JSON
540
+ * @returns {{ name: string, template: string, metadata: object, compatible: boolean }|null}
541
+ */
542
+ function getPrompt(name, modelId, registryPath) {
543
+ const registry = loadRegistry(registryPath);
544
+ const entry = registry[name];
545
+
546
+ if (!entry) return null;
547
+
548
+ const compatible =
549
+ !modelId || !entry.metadata.models.length || entry.metadata.models.includes(modelId);
550
+
551
+ // If a specific model was requested and it's not compatible, return null
552
+ if (modelId && entry.metadata.models.length > 0 && !entry.metadata.models.includes(modelId)) {
553
+ return null;
554
+ }
555
+
556
+ return {
557
+ name,
558
+ template: entry.template,
559
+ metadata: entry.metadata,
560
+ compatible,
561
+ };
562
+ }
563
+
564
+ /**
565
+ * List all registered prompts with their metadata.
566
+ *
567
+ * @param {string} [registryPath] - Path to the registry JSON
568
+ * @returns {{ name: string, metadata: object }[]} Array of prompt entries
569
+ */
570
+ function listPrompts(registryPath) {
571
+ const registry = loadRegistry(registryPath);
572
+
573
+ return Object.entries(registry).map(([name, entry]) => ({
574
+ name,
575
+ metadata: entry.metadata,
576
+ }));
577
+ }
578
+
579
+ // ---------------------------------------------------------------------------
580
+ // Adaptive Context Compaction (OpenDev 5-stage algorithm)
581
+ // ---------------------------------------------------------------------------
582
+
583
+ /**
584
+ * Compact a set of feedback entries using a 5-stage progressive algorithm.
585
+ *
586
+ * Stage 1: Group by signal type, keep top 10 per group
587
+ * Stage 2: Truncate large text fields to perEntryMaxChars
588
+ * Stage 3: Drop entries missing both context and whatWentWrong
589
+ * Stage 4: Window to most recent windowSize (anchors preserved)
590
+ * Stage 5: Deduplicate entries with identical whatWentWrong
591
+ *
592
+ * @param {object[]} entries - Feedback log entries
593
+ * @param {object[]} [anchors=[]] - Anchor entries to always preserve
594
+ * @param {{ windowSize?: number, perEntryMaxChars?: number }} [opts={}]
595
+ * @returns {{ entries: object[], stage: number, removedCount: number, compacted: boolean }}
596
+ */
597
+ function compactContext(entries, anchors, opts) {
598
+ const anchorIds = new Set((anchors || []).map((a) => a.id));
599
+ const options = opts || {};
600
+ const windowSize = typeof options.windowSize === 'number' ? options.windowSize : 30;
601
+ const perEntryMaxChars = typeof options.perEntryMaxChars === 'number' ? options.perEntryMaxChars : 512;
602
+
603
+ const anchorEntries = entries.filter((e) => anchorIds.has(e.id));
604
+ let working = entries.filter((e) => !anchorIds.has(e.id));
605
+ const initial = working.length;
606
+
607
+ // Stage 1: Group by signal, keep most recent 10 per signal type
608
+ const bySignal = {};
609
+ for (const entry of working) {
610
+ const sig = entry.signal || 'unknown';
611
+ if (!bySignal[sig]) bySignal[sig] = [];
612
+ bySignal[sig].push(entry);
613
+ }
614
+ working = Object.values(bySignal).flatMap((group) => group.slice(-10));
615
+
616
+ // Stage 2: Truncate large text fields
617
+ working = working.map((entry) => {
618
+ const truncated = { ...entry };
619
+ if (truncated.context && truncated.context.length > perEntryMaxChars) {
620
+ truncated.context = truncated.context.slice(0, perEntryMaxChars);
621
+ }
622
+ if (truncated.whatWentWrong && truncated.whatWentWrong.length > perEntryMaxChars) {
623
+ truncated.whatWentWrong = truncated.whatWentWrong.slice(0, perEntryMaxChars);
624
+ }
625
+ return truncated;
626
+ });
627
+
628
+ // Stage 3: Drop low-information entries (empty context AND empty whatWentWrong)
629
+ working = working.filter(
630
+ (e) => (e.context && e.context.trim()) || (e.whatWentWrong && e.whatWentWrong.trim()),
631
+ );
632
+
633
+ // Stage 4: Window to most recent N
634
+ if (working.length > windowSize) {
635
+ working = working.slice(-windowSize);
636
+ }
637
+
638
+ // Stage 5: Deduplicate by whatWentWrong fingerprint
639
+ const seen = new Set();
640
+ working = working.filter((e) => {
641
+ const key = e.whatWentWrong ? e.whatWentWrong.trim().toLowerCase() : null;
642
+ if (!key) return true;
643
+ if (seen.has(key)) return false;
644
+ seen.add(key);
645
+ return true;
646
+ });
647
+
648
+ // Stage 6: Global token budget — drop entries (oldest first) until total chars fit
649
+ let finalStage = 5;
650
+ const totalMaxChars = typeof options.totalMaxChars === 'number' ? options.totalMaxChars : null;
651
+ if (totalMaxChars !== null) {
652
+ let budget = totalMaxChars;
653
+ const budgeted = [];
654
+ // Iterate newest-first so most recent entries are preserved
655
+ for (let i = working.length - 1; i >= 0; i--) {
656
+ const entrySize = JSON.stringify(working[i]).length;
657
+ if (budget - entrySize < 0) break;
658
+ budget -= entrySize;
659
+ budgeted.unshift(working[i]);
660
+ }
661
+ if (budgeted.length < working.length) {
662
+ working = budgeted;
663
+ finalStage = 6;
664
+ }
665
+ }
666
+
667
+ const removedCount = initial - working.length;
668
+ return {
669
+ entries: [...anchorEntries, ...working],
670
+ stage: finalStage,
671
+ removedCount,
672
+ compacted: removedCount > 0,
673
+ };
674
+ }
675
+
676
+ // ---------------------------------------------------------------------------
677
+ // Exports
678
+ // ---------------------------------------------------------------------------
679
+
680
+ module.exports = {
681
+ // Knowledge Bundle Builder
682
+ buildKnowledgeIndex,
683
+ categorizeDoc,
684
+ extractDocSummary,
685
+
686
+ // Context Router
687
+ routeQuery,
688
+ scoreBundle,
689
+
690
+ // Quality Scorer
691
+ scoreRetrievalQuality,
692
+ logQualityResult,
693
+
694
+ // Prompt Registry
695
+ registerPrompt,
696
+ getPrompt,
697
+ listPrompts,
698
+
699
+ // Adaptive Context Compaction
700
+ compactContext,
701
+
702
+ // MCP Consolidation Manifest
703
+ TOOL_CONSOLIDATION,
704
+
705
+ // Constants (for testing / external use)
706
+ CATEGORY_RULES,
707
+ DEFAULT_INDEX_PATH,
708
+ DEFAULT_QUALITY_LOG_PATH,
709
+ DEFAULT_REGISTRY_PATH,
710
+ };