rlhf-feedback-loop 0.6.9 → 0.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # RLHF-Ready Feedback Loop — Agentic Control Plane & Context Engineering Studio
2
2
 
3
3
  [![CI](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/ci.yml/badge.svg)](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/ci.yml)
4
+ [![Self-Healing](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/self-healing-monitor.yml/badge.svg)](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/self-healing-monitor.yml)
5
+ [![npm](https://img.shields.io/npm/v/rlhf-feedback-loop)](https://www.npmjs.com/package/rlhf-feedback-loop)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+ [![Node](https://img.shields.io/badge/node-%3E%3D18.18.0-brightgreen)](package.json)
4
8
  [![Marketplace Ready](https://img.shields.io/badge/Anthropic_Marketplace-Ready-blue)](docs/ANTHROPIC_MARKETPLACE_STRATEGY.md)
5
9
  [![GEO Optimized](https://img.shields.io/badge/GEO-optimized-orange)](docs/geo-strategy-for-ai-agents.md)
6
10
 
@@ -84,7 +88,7 @@ All data stored locally as **JSONL** files — fully transparent, fully portable
84
88
 
85
89
  The open-source package is fully functional and free forever. Cloud Pro is for teams that don't want to self-host.
86
90
 
87
- | | Open Source | Cloud Pro ($49/mo) |
91
+ | | Open Source | Cloud Pro (Founding price: $10/mo) |
88
92
  |---|---|---|
89
93
  | Feedback capture | Local MCP server | Hosted HTTPS API |
90
94
  | Storage | Your machine | Managed cloud |
@@ -94,7 +98,7 @@ The open-source package is fully functional and free forever. Cloud Pro is for t
94
98
  | Support | GitHub Issues | Email |
95
99
  | Uptime | You manage | We manage (99.9% SLA) |
96
100
 
97
- [Get Cloud Pro](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app) | [Verification Evidence](docs/VERIFICATION_EVIDENCE.md)
101
+ [Get Cloud Pro ($10/mo)](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app) | [Verification Evidence](docs/VERIFICATION_EVIDENCE.md)
98
102
 
99
103
  ## Deep Dive
100
104
 
@@ -655,12 +655,22 @@ async function onData(chunk) {
655
655
  function startStdioServer() {
656
656
  if (stdioStarted) return;
657
657
  stdioStarted = true;
658
+
659
+ // Keep the process alive even if stdin closes (prevents premature exit
660
+ // when launched by MCP clients like Claude Code, Codex, Gemini CLI).
661
+ const keepAlive = setInterval(() => {}, 60_000);
662
+
663
+ process.stdin.resume();
658
664
  process.stdin.on('data', (chunk) => {
659
665
  onData(chunk).catch((err) => {
660
666
  const transport = err && err.transport === 'ndjson' ? 'ndjson' : 'framed';
661
667
  writeMessage({ jsonrpc: '2.0', id: null, error: { code: -32603, message: err.message } }, transport);
662
668
  });
663
669
  });
670
+ process.stdin.on('end', () => {
671
+ // stdin closed — clean up and exit gracefully
672
+ clearInterval(keepAlive);
673
+ });
664
674
  }
665
675
 
666
676
  module.exports = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rlhf-feedback-loop",
3
- "version": "0.6.9",
3
+ "version": "0.6.11",
4
4
  "description": "RLHF-ready human feedback capture and DPO data pipeline for AI agents. Optimize agentic reliability with Feedback-Driven Development (FDD): capture preference signals, enforce guardrails, and export training pairs for downstream optimization.",
5
5
  "homepage": "https://github.com/IgorGanapolsky/rlhf-feedback-loop#readme",
6
6
  "repository": {
@@ -33,7 +33,7 @@
33
33
  "test:schema": "node scripts/feedback-schema.js --test",
34
34
  "test:loop": "node scripts/feedback-loop.js --test",
35
35
  "test:dpo": "node scripts/export-dpo-pairs.js --test",
36
- "test:api": "node --test tests/api-server.test.js tests/api-auth-config.test.js tests/mcp-server.test.js tests/adapters.test.js tests/openapi-parity.test.js tests/budget-guard.test.js tests/contextfs.test.js tests/mcp-policy.test.js tests/subagent-profiles.test.js tests/intent-router.test.js tests/rubric-engine.test.js tests/self-healing-check.test.js tests/self-heal.test.js tests/feedback-schema.test.js tests/thompson-sampling.test.js tests/feedback-sequences.test.js tests/diversity-tracking.test.js tests/vector-store.test.js tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js tests/loop-closure.test.js tests/code-reasoning.test.js tests/feedback-loop.test.js tests/feedback-inbox-read.test.js tests/feedback-to-memory.test.js",
36
+ "test:api": "node --test tests/api-server.test.js tests/api-auth-config.test.js tests/mcp-server.test.js tests/adapters.test.js tests/openapi-parity.test.js tests/budget-guard.test.js tests/contextfs.test.js tests/mcp-policy.test.js tests/subagent-profiles.test.js tests/intent-router.test.js tests/rubric-engine.test.js tests/self-healing-check.test.js tests/self-heal.test.js tests/feedback-schema.test.js tests/thompson-sampling.test.js tests/feedback-sequences.test.js tests/diversity-tracking.test.js tests/vector-store.test.js tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js tests/loop-closure.test.js tests/code-reasoning.test.js tests/feedback-loop.test.js tests/feedback-inbox-read.test.js tests/feedback-to-memory.test.js tests/test-coverage.test.js tests/version-metadata.test.js",
37
37
  "test:proof": "node --test --test-concurrency=1 tests/prove-adapters.test.js tests/prove-automation.test.js tests/prove-attribution.test.js tests/prove-lancedb.test.js tests/prove-data-quality.test.js tests/prove-intelligence.test.js tests/prove-loop-closure.test.js tests/prove-subway-upgrades.test.js tests/prove-training-export.test.js",
38
38
  "test:rlaif": "node --test tests/rlaif-self-audit.test.js tests/dpo-optimizer.test.js tests/meta-policy.test.js",
39
39
  "test:attribution": "node --test tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js",
@@ -43,6 +43,7 @@
43
43
  "test:deployment": "node --test tests/deployment.test.js",
44
44
  "test:billing": "node --test tests/billing.test.js",
45
45
  "test:cli": "node --test tests/cli.test.js",
46
+ "test:coverage": "node scripts/test-coverage.js",
46
47
  "start:api": "node src/api/server.js",
47
48
  "start:mcp": "node adapters/mcp/server-stdio.js",
48
49
  "feedback:capture": "node .claude/scripts/feedback/capture-feedback.js",
@@ -75,7 +76,9 @@
75
76
  "ml:train": "python3 scripts/train_from_feedback.py --train",
76
77
  "ml:incremental": "python3 scripts/train_from_feedback.py --incremental",
77
78
  "ml:reliability": "python3 scripts/train_from_feedback.py --reliability",
78
- "ml:sample": "python3 scripts/train_from_feedback.py --sample"
79
+ "ml:sample": "python3 scripts/train_from_feedback.py --sample",
80
+ "adk:consolidate": "node scripts/adk-consolidator.js",
81
+ "adk:watch": "node scripts/adk-consolidator.js --watch"
79
82
  },
80
83
  "keywords": [
81
84
  "rlhf",
@@ -115,10 +118,10 @@
115
118
  "node": ">=18.18.0"
116
119
  },
117
120
  "dependencies": {
121
+ "@google/genai": "^1.44.0",
118
122
  "@huggingface/transformers": "^3.8.1",
119
123
  "@lancedb/lancedb": "^0.26.2",
120
- "apache-arrow": "^18.1.0",
121
- "stripe": "^20.4.1"
124
+ "apache-arrow": "^18.1.0"
122
125
  },
123
126
  "mcpName": "io.github.IgorGanapolsky/rlhf-feedback-loop"
124
127
  }
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Agent Development Kit (ADK) Memory Consolidator
4
+ *
5
+ * 'Always-On' background service that reads disparate feedback logs and uses
6
+ * Gemini (Flash-Lite/Flash) to actively consolidate, compress, and dream up
7
+ * generalized prevention rules. This moves the system from 'passive logging'
8
+ * to 'active semantic memory consolidation'.
9
+ */
10
+
11
+ 'use strict';
12
+
13
+ const fs = require('fs');
14
+ const path = require('path');
15
+ const { GoogleGenAI } = require('@google/genai');
16
+
17
+ const PROJECT_ROOT = path.join(__dirname, '..');
18
+ const { getFeedbackPaths, readJSONL } = require('./feedback-loop');
19
+
20
+ // Keep track of the last processed ID to avoid re-consolidating the exact same logs
21
+ const STATE_FILE = process.env.ADK_STATE_FILE || path.join(PROJECT_ROOT, '.rlhf', 'adk-state.json');
22
+
23
+ function ensureDir(dirPath) {
24
+ if (!fs.existsSync(dirPath)) {
25
+ fs.mkdirSync(dirPath, { recursive: true });
26
+ }
27
+ }
28
+
29
+ function loadState() {
30
+ if (fs.existsSync(STATE_FILE)) {
31
+ try {
32
+ return JSON.parse(fs.readFileSync(STATE_FILE, 'utf-8'));
33
+ } catch {
34
+ return { lastProcessedFeedbackId: null };
35
+ }
36
+ }
37
+ return { lastProcessedFeedbackId: null };
38
+ }
39
+
40
+ function saveState(state) {
41
+ ensureDir(path.dirname(STATE_FILE));
42
+ fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
43
+ }
44
+
45
+ async function consolidateMemory() {
46
+ const apiKey = process.env.GEMINI_API_KEY;
47
+ if (!apiKey) {
48
+ console.warn('[ADK Consolidator] GEMINI_API_KEY is not set. Skipping active consolidation.');
49
+ return;
50
+ }
51
+
52
+ const ai = new GoogleGenAI({ apiKey });
53
+ const paths = getFeedbackPaths();
54
+ const state = loadState();
55
+
56
+ const allLogs = readJSONL(paths.FEEDBACK_LOG_PATH);
57
+
58
+ if (allLogs.length === 0) {
59
+ console.log('[ADK Consolidator] No logs to consolidate.');
60
+ return;
61
+ }
62
+
63
+ // Find where we left off
64
+ let newLogs = [];
65
+ if (state.lastProcessedFeedbackId) {
66
+ const lastIdx = allLogs.findIndex(l => l.id === state.lastProcessedFeedbackId);
67
+ if (lastIdx !== -1) {
68
+ newLogs = allLogs.slice(lastIdx + 1);
69
+ } else {
70
+ // If we can't find it (log rotation?), just take the last 50
71
+ newLogs = allLogs.slice(-50);
72
+ }
73
+ } else {
74
+ // First time running, process up to last 50 entries
75
+ newLogs = allLogs.slice(-50);
76
+ }
77
+
78
+ if (newLogs.length === 0) {
79
+ console.log('[ADK Consolidator] No new logs since last consolidation cycle.');
80
+ return;
81
+ }
82
+
83
+ console.log(`[ADK Consolidator] Found ${newLogs.length} new feedback events. Activating Gemini for semantic consolidation...`);
84
+
85
+ const prompt = `
86
+ You are the Agent Development Kit (ADK) 'Always-On' Memory Consolidator.
87
+ Your job is to read the raw, disparate feedback logs of an AI agent and synthesize them into high-level, generalized prevention rules and learned intuitions.
88
+ Unlike standard systems that just count regex matches, you must semantically connect different failures (e.g., an API timeout and a missing import might both stem from 'rushing execution without verifying environment').
89
+
90
+ Here are the latest feedback events (JSON):
91
+ ${JSON.stringify(newLogs.map(l => ({ signal: l.signal, context: l.context, tags: l.tags, whatWentWrong: l.whatWentWrong, whatWorked: l.whatWorked })), null, 2)}
92
+
93
+ Existing Prevention Rules (if any):
94
+ ${fs.existsSync(paths.PREVENTION_RULES_PATH) ? fs.readFileSync(paths.PREVENTION_RULES_PATH, 'utf-8').slice(0, 2000) : 'None yet.'}
95
+
96
+ Output ONLY a valid JSON object with the following structure, representing the new synthesized insights:
97
+ {
98
+ "consolidatedInsights": [
99
+ {
100
+ "pattern": "Description of the underlying behavioral flaw or success pattern you detected.",
101
+ "rule": "A clear, actionable directive starting with 'ALWAYS' or 'NEVER' that should be added to prevention rules.",
102
+ "severity": "critical|high|medium|low"
103
+ }
104
+ ],
105
+ "reasoning": "A short summary of how you connected the dots between these logs."
106
+ }
107
+ `;
108
+
109
+ try {
110
+ // We use gemini-2.5-flash as the proxy for Flash-Lite/Flash efficiency
111
+ const response = await ai.models.generateContent({
112
+ model: 'gemini-2.5-flash',
113
+ contents: prompt,
114
+ config: {
115
+ responseMimeType: "application/json",
116
+ }
117
+ });
118
+
119
+ const result = JSON.parse(response.text);
120
+ console.log(`[ADK Consolidator] Consolidation complete. Reasoning: ${result.reasoning}`);
121
+
122
+ if (result.consolidatedInsights && result.consolidatedInsights.length > 0) {
123
+ appendRules(result.consolidatedInsights, paths.PREVENTION_RULES_PATH);
124
+ }
125
+
126
+ // Update state
127
+ state.lastProcessedFeedbackId = newLogs[newLogs.length - 1].id;
128
+ saveState(state);
129
+
130
+ } catch (err) {
131
+ console.error('[ADK Consolidator] Consolidation failed:', err.message);
132
+ }
133
+ }
134
+
135
+ function appendRules(insights, rulesPath) {
136
+ let existingContent = '';
137
+ if (fs.existsSync(rulesPath)) {
138
+ existingContent = fs.readFileSync(rulesPath, 'utf-8');
139
+ } else {
140
+ existingContent = '# Prevention Rules\n\nGenerated from active semantic memory consolidation.\n\n';
141
+ }
142
+
143
+ let newRulesBlock = '\n## ADK Semantic Consolidations\n';
144
+ const timestamp = new Date().toISOString();
145
+ insights.forEach(insight => {
146
+ newRulesBlock += `- [${insight.severity.toUpperCase()}] **${insight.pattern}**\n - Rule: ${insight.rule} *(Consolidated at ${timestamp})*\n`;
147
+ });
148
+
149
+ const updatedContent = existingContent + newRulesBlock;
150
+ ensureDir(path.dirname(rulesPath));
151
+ fs.writeFileSync(rulesPath, updatedContent);
152
+ console.log(`[ADK Consolidator] Appended ${insights.length} new consolidated rules to ${rulesPath}`);
153
+ }
154
+
155
+ if (require.main === module) {
156
+ const args = process.argv.slice(2);
157
+ const isWatchMode = args.includes('--watch');
158
+
159
+ if (isWatchMode) {
160
+ console.log('[ADK Consolidator] Started in Always-On Watch Mode (interval: 5 minutes)');
161
+ consolidateMemory(); // Run once immediately
162
+ setInterval(() => {
163
+ consolidateMemory();
164
+ }, 5 * 60 * 1000); // Check every 5 minutes
165
+ } else {
166
+ consolidateMemory().then(() => {
167
+ console.log('[ADK Consolidator] Cycle finished.');
168
+ process.exit(0);
169
+ });
170
+ }
171
+ }
172
+
173
+ module.exports = { consolidateMemory };
@@ -26,7 +26,7 @@ const crypto = require('crypto');
26
26
  const STRIPE_SECRET_KEY = process.env.STRIPE_SECRET_KEY || '';
27
27
  const STRIPE_WEBHOOK_SECRET = process.env.STRIPE_WEBHOOK_SECRET || '';
28
28
  const GITHUB_MARKETPLACE_WEBHOOK_SECRET = process.env.GITHUB_MARKETPLACE_WEBHOOK_SECRET || '';
29
- const STRIPE_PRICE_ID = process.env.STRIPE_PRICE_ID || 'price_cloud_pro_49_monthly';
29
+ const STRIPE_PRICE_ID = process.env.STRIPE_PRICE_ID || '';
30
30
 
31
31
  const API_KEYS_PATH = process.env._TEST_API_KEYS_PATH || path.resolve(
32
32
  __dirname,
@@ -371,6 +371,10 @@ async function createCheckoutSession({ successUrl, cancelUrl, customerEmail, ins
371
371
  };
372
372
  }
373
373
 
374
+ if (!STRIPE_PRICE_ID) {
375
+ throw new Error('STRIPE_PRICE_ID not configured');
376
+ }
377
+
374
378
  const params = {
375
379
  mode: 'subscription',
376
380
  line_items: [
@@ -71,12 +71,33 @@ function addEdgeCase(trace, description) {
71
71
  return trace;
72
72
  }
73
73
 
74
+ function computeControllability(trace) {
75
+ const steps = trace.steps;
76
+ const edgeCases = trace.edgeCases;
77
+ if (steps.length === 0) return { score: 0, flags: ['empty_trace'] };
78
+
79
+ const flags = [];
80
+ const allVerified = steps.every((s) => s.verdict === 'verified');
81
+ const allSameEvidence = new Set(steps.map((s) => s.evidence)).size === 1 && steps.length > 1;
82
+ const shortEvidence = steps.filter((s) => s.evidence.length < 10).length;
83
+ const noEdgeCases = edgeCases.length === 0;
84
+
85
+ if (allVerified && steps.length > 2) flags.push('all_verified');
86
+ if (allSameEvidence) flags.push('identical_evidence');
87
+ if (shortEvidence > steps.length / 2) flags.push('thin_evidence');
88
+ if (noEdgeCases && steps.length > 1) flags.push('no_edge_cases');
89
+
90
+ const score = Math.round((flags.length / 4) * 1000) / 1000;
91
+ return { score, flags };
92
+ }
93
+
74
94
  function finalizeTrace(trace, { confidenceThreshold = DEFAULT_CONFIDENCE_THRESHOLD } = {}) {
75
95
  const totalSteps = trace.steps.length;
76
96
  const verified = trace.steps.filter((s) => s.verdict === 'verified').length;
77
97
  const unverified = trace.steps.filter((s) => s.verdict === 'unverified').length;
78
98
  const refuted = trace.steps.filter((s) => s.verdict === 'refuted').length;
79
99
  const confidence = totalSteps > 0 ? Math.round((verified / totalSteps) * 1000) / 1000 : 0;
100
+ const ctrl = computeControllability(trace);
80
101
 
81
102
  trace.summary = {
82
103
  totalSteps,
@@ -85,6 +106,8 @@ function finalizeTrace(trace, { confidenceThreshold = DEFAULT_CONFIDENCE_THRESHO
85
106
  refuted,
86
107
  confidence,
87
108
  passed: confidence >= confidenceThreshold && refuted === 0,
109
+ controllability: ctrl.score,
110
+ controllabilityFlags: ctrl.flags,
88
111
  };
89
112
 
90
113
  return trace;
@@ -291,6 +314,7 @@ function aggregateTraces(traces) {
291
314
  refuted,
292
315
  averageConfidence: avgConfidence,
293
316
  allPassed: passedTraces === totalTraces,
317
+ flaggedTraces: traces.filter((t) => t.summary && t.summary.controllability > 0.5).length,
294
318
  };
295
319
  }
296
320
 
@@ -298,6 +322,7 @@ module.exports = {
298
322
  createTrace,
299
323
  addStep,
300
324
  addEdgeCase,
325
+ computeControllability,
301
326
  finalizeTrace,
302
327
  traceForSelfHealFix,
303
328
  traceForDpoPair,
@@ -305,4 +330,4 @@ module.exports = {
305
330
  aggregateTraces,
306
331
  DEFAULT_CONFIDENCE_THRESHOLD,
307
332
  };
308
- // test coverage: 573 tests
333
+ // Tests cover this module through the node:test suite; avoid hardcoding counts here.
@@ -273,7 +273,7 @@ function routeQuery(query, indexPath, topN) {
273
273
  index = JSON.parse(fs.readFileSync(idxPath, 'utf-8'));
274
274
  } catch {
275
275
  // Index doesn't exist — build it on the fly
276
- index = buildKnowledgeIndex();
276
+ index = buildKnowledgeIndex(undefined, idxPath);
277
277
  }
278
278
 
279
279
  const queryTokens = query
@@ -312,9 +312,10 @@ function routeQuery(query, indexPath, topN) {
312
312
  * @param {string} query - The original query
313
313
  * @param {string[]} retrievedDocs - Filenames of retrieved docs
314
314
  * @param {string[]} expectedTopics - Expected topic keywords to match against
315
+ * @param {string} [logPath] - Optional path for the quality log
315
316
  * @returns {{ precision: number, recall: number, f1: number, query: string, timestamp: string }}
316
317
  */
317
- function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
318
+ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics, logPath) {
318
319
  if (!retrievedDocs.length || !expectedTopics.length) {
319
320
  const result = {
320
321
  query,
@@ -325,7 +326,7 @@ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
325
326
  expectedCount: expectedTopics.length,
326
327
  timestamp: new Date().toISOString(),
327
328
  };
328
- logQualityResult(result);
329
+ logQualityResult(result, logPath);
329
330
  return result;
330
331
  }
331
332
 
@@ -369,7 +370,7 @@ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
369
370
  timestamp: new Date().toISOString(),
370
371
  };
371
372
 
372
- logQualityResult(result);
373
+ logQualityResult(result, logPath);
373
374
  return result;
374
375
  }
375
376
 
@@ -218,6 +218,7 @@ function writeContextObject({ namespace, title, content, tags = [], source, ttl
218
218
  };
219
219
 
220
220
  writeJson(filePath, doc);
221
+ indexContextObject(doc, filePath);
221
222
 
222
223
  recordProvenance({
223
224
  type: 'context_object_created',
@@ -355,6 +356,130 @@ function scoreDocument(doc, queryTokens) {
355
356
  return score;
356
357
  }
357
358
 
359
+ /* ── Memex-style Indexed Memory ────────────────────────────────── */
360
+
361
+ const MEMEX_INDEX_FILE = 'memex-index.jsonl';
362
+
363
+ function getMemexIndexPath() {
364
+ return path.join(CONTEXTFS_ROOT, NAMESPACES.provenance, MEMEX_INDEX_FILE);
365
+ }
366
+
367
+ function buildIndexEntry(doc, filePath) {
368
+ return {
369
+ id: doc.id,
370
+ namespace: doc.namespace || '',
371
+ title: doc.title || '',
372
+ tags: doc.tags || [],
373
+ digest: String(doc.content || '').slice(0, 120),
374
+ createdAt: doc.createdAt || nowIso(),
375
+ stableRef: filePath,
376
+ };
377
+ }
378
+
379
+ function indexContextObject(doc, filePath) {
380
+ const entry = buildIndexEntry(doc, filePath);
381
+ appendJsonl(getMemexIndexPath(), entry);
382
+ return entry;
383
+ }
384
+
385
+ function loadMemexIndex() {
386
+ return readJsonl(getMemexIndexPath());
387
+ }
388
+
389
+ function dereferenceEntry(entry) {
390
+ if (!entry || !entry.stableRef) return null;
391
+ try {
392
+ return JSON.parse(fs.readFileSync(entry.stableRef, 'utf-8'));
393
+ } catch {
394
+ return null;
395
+ }
396
+ }
397
+
398
+ function searchMemexIndex({ query = '', maxResults = 10, namespaces = [] } = {}) {
399
+ const index = loadMemexIndex();
400
+ const tokens = tokenizeQuery(query);
401
+ const nsFilter = namespaces.length > 0 ? new Set(normalizeNamespaces(namespaces)) : null;
402
+
403
+ const scored = index
404
+ .filter((entry) => !nsFilter || nsFilter.has(entry.namespace))
405
+ .map((entry) => {
406
+ const haystack = `${entry.title} ${entry.digest} ${(entry.tags || []).join(' ')}`.toLowerCase();
407
+ let score = 0;
408
+ tokens.forEach((t) => { if (t.length > 2 && haystack.includes(t)) score += 3; });
409
+ if (entry.namespace.includes('memory/error')) score += 1;
410
+ if (entry.namespace.includes('memory/learning')) score += 1;
411
+ if (entry.createdAt) {
412
+ const hours = (Date.now() - new Date(entry.createdAt).getTime()) / 3_600_000;
413
+ if (Number.isFinite(hours)) {
414
+ if (hours < 24) score += 2;
415
+ else if (hours < 168) score += 1;
416
+ }
417
+ }
418
+ return { entry, score };
419
+ })
420
+ .filter((x) => x.score > 0)
421
+ .sort((a, b) => b.score - a.score)
422
+ .slice(0, maxResults);
423
+
424
+ return scored.map((x) => ({ ...x.entry, _score: x.score }));
425
+ }
426
+
427
+ function constructMemexPack({ query = '', maxItems = 8, maxChars = 6000, namespaces = [] } = {}) {
428
+ const normalizedNamespaces = normalizeNamespaces(namespaces);
429
+ const hits = searchMemexIndex({ query, maxResults: maxItems * 2, namespaces: normalizedNamespaces });
430
+
431
+ const items = [];
432
+ let usedChars = 0;
433
+ const dereferenced = [];
434
+
435
+ for (const hit of hits) {
436
+ if (items.length >= maxItems) break;
437
+ const full = dereferenceEntry(hit);
438
+ if (!full) continue;
439
+
440
+ const snippet = `${full.title}\n${full.content || ''}`;
441
+ if (usedChars + snippet.length > maxChars) continue;
442
+
443
+ items.push({
444
+ id: full.id,
445
+ namespace: hit.namespace,
446
+ title: full.title,
447
+ content: full.content,
448
+ tags: full.tags || [],
449
+ score: hit._score,
450
+ });
451
+ usedChars += snippet.length;
452
+ dereferenced.push(hit.id);
453
+ }
454
+
455
+ const packId = `memex_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
456
+ const pack = {
457
+ packId,
458
+ query,
459
+ maxItems,
460
+ maxChars,
461
+ usedChars,
462
+ namespaces: normalizedNamespaces,
463
+ createdAt: nowIso(),
464
+ items,
465
+ indexHits: hits.length,
466
+ dereferencedCount: dereferenced.length,
467
+ cache: { hit: false },
468
+ };
469
+
470
+ appendJsonl(path.join(CONTEXTFS_ROOT, NAMESPACES.provenance, 'packs.jsonl'), pack);
471
+ recordProvenance({
472
+ type: 'memex_pack_constructed',
473
+ packId,
474
+ query,
475
+ indexHits: hits.length,
476
+ dereferencedCount: dereferenced.length,
477
+ usedChars,
478
+ });
479
+
480
+ return pack;
481
+ }
482
+
358
483
  function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, namespaces = [] } = {}) {
359
484
  const normalizedNamespaces = normalizeNamespaces(namespaces);
360
485
  const tokens = tokenizeQuery(query);
@@ -505,6 +630,11 @@ module.exports = {
505
630
  querySimilarity,
506
631
  findSemanticCacheHit,
507
632
  getSemanticCacheConfig,
633
+ buildIndexEntry,
634
+ loadMemexIndex,
635
+ dereferenceEntry,
636
+ searchMemexIndex,
637
+ constructMemexPack,
508
638
  };
509
639
 
510
640
  if (require.main === module) {