rlhf-feedback-loop 0.6.9 → 0.6.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/adapters/mcp/server-stdio.js +10 -0
- package/package.json +8 -5
- package/scripts/adk-consolidator.js +173 -0
- package/scripts/billing.js +5 -1
- package/scripts/code-reasoning.js +26 -1
- package/scripts/context-engine.js +5 -4
- package/scripts/contextfs.js +130 -0
- package/scripts/disagreement-mining.js +315 -0
- package/scripts/intent-router.js +88 -0
- package/scripts/prove-attribution.js +6 -6
- package/scripts/prove-data-quality.js +16 -8
- package/scripts/prove-intelligence.js +7 -4
- package/scripts/prove-lancedb.js +6 -6
- package/scripts/prove-loop-closure.js +16 -8
- package/scripts/prove-training-export.js +7 -4
- package/scripts/self-heal.js +24 -4
- package/scripts/sync-version.js +159 -0
- package/scripts/test-coverage.js +76 -0
package/README.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
# RLHF-Ready Feedback Loop — Agentic Control Plane & Context Engineering Studio
|
|
2
2
|
|
|
3
3
|
[](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/IgorGanapolsky/rlhf-feedback-loop/actions/workflows/self-healing-monitor.yml)
|
|
5
|
+
[](https://www.npmjs.com/package/rlhf-feedback-loop)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](package.json)
|
|
4
8
|
[](docs/ANTHROPIC_MARKETPLACE_STRATEGY.md)
|
|
5
9
|
[](docs/geo-strategy-for-ai-agents.md)
|
|
6
10
|
|
|
@@ -84,7 +88,7 @@ All data stored locally as **JSONL** files — fully transparent, fully portable
|
|
|
84
88
|
|
|
85
89
|
The open-source package is fully functional and free forever. Cloud Pro is for teams that don't want to self-host.
|
|
86
90
|
|
|
87
|
-
| | Open Source | Cloud Pro ($
|
|
91
|
+
| | Open Source | Cloud Pro (Founding price: $10/mo) |
|
|
88
92
|
|---|---|---|
|
|
89
93
|
| Feedback capture | Local MCP server | Hosted HTTPS API |
|
|
90
94
|
| Storage | Your machine | Managed cloud |
|
|
@@ -94,7 +98,7 @@ The open-source package is fully functional and free forever. Cloud Pro is for t
|
|
|
94
98
|
| Support | GitHub Issues | Email |
|
|
95
99
|
| Uptime | You manage | We manage (99.9% SLA) |
|
|
96
100
|
|
|
97
|
-
[Get Cloud Pro](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app) | [Verification Evidence](docs/VERIFICATION_EVIDENCE.md)
|
|
101
|
+
[Get Cloud Pro ($10/mo)](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app) | [Verification Evidence](docs/VERIFICATION_EVIDENCE.md)
|
|
98
102
|
|
|
99
103
|
## Deep Dive
|
|
100
104
|
|
|
@@ -655,12 +655,22 @@ async function onData(chunk) {
|
|
|
655
655
|
function startStdioServer() {
|
|
656
656
|
if (stdioStarted) return;
|
|
657
657
|
stdioStarted = true;
|
|
658
|
+
|
|
659
|
+
// Keep the process alive even if stdin closes (prevents premature exit
|
|
660
|
+
// when launched by MCP clients like Claude Code, Codex, Gemini CLI).
|
|
661
|
+
const keepAlive = setInterval(() => {}, 60_000);
|
|
662
|
+
|
|
663
|
+
process.stdin.resume();
|
|
658
664
|
process.stdin.on('data', (chunk) => {
|
|
659
665
|
onData(chunk).catch((err) => {
|
|
660
666
|
const transport = err && err.transport === 'ndjson' ? 'ndjson' : 'framed';
|
|
661
667
|
writeMessage({ jsonrpc: '2.0', id: null, error: { code: -32603, message: err.message } }, transport);
|
|
662
668
|
});
|
|
663
669
|
});
|
|
670
|
+
process.stdin.on('end', () => {
|
|
671
|
+
// stdin closed — clean up and exit gracefully
|
|
672
|
+
clearInterval(keepAlive);
|
|
673
|
+
});
|
|
664
674
|
}
|
|
665
675
|
|
|
666
676
|
module.exports = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rlhf-feedback-loop",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.11",
|
|
4
4
|
"description": "RLHF-ready human feedback capture and DPO data pipeline for AI agents. Optimize agentic reliability with Feedback-Driven Development (FDD): capture preference signals, enforce guardrails, and export training pairs for downstream optimization.",
|
|
5
5
|
"homepage": "https://github.com/IgorGanapolsky/rlhf-feedback-loop#readme",
|
|
6
6
|
"repository": {
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"test:schema": "node scripts/feedback-schema.js --test",
|
|
34
34
|
"test:loop": "node scripts/feedback-loop.js --test",
|
|
35
35
|
"test:dpo": "node scripts/export-dpo-pairs.js --test",
|
|
36
|
-
"test:api": "node --test tests/api-server.test.js tests/api-auth-config.test.js tests/mcp-server.test.js tests/adapters.test.js tests/openapi-parity.test.js tests/budget-guard.test.js tests/contextfs.test.js tests/mcp-policy.test.js tests/subagent-profiles.test.js tests/intent-router.test.js tests/rubric-engine.test.js tests/self-healing-check.test.js tests/self-heal.test.js tests/feedback-schema.test.js tests/thompson-sampling.test.js tests/feedback-sequences.test.js tests/diversity-tracking.test.js tests/vector-store.test.js tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js tests/loop-closure.test.js tests/code-reasoning.test.js tests/feedback-loop.test.js tests/feedback-inbox-read.test.js tests/feedback-to-memory.test.js",
|
|
36
|
+
"test:api": "node --test tests/api-server.test.js tests/api-auth-config.test.js tests/mcp-server.test.js tests/adapters.test.js tests/openapi-parity.test.js tests/budget-guard.test.js tests/contextfs.test.js tests/mcp-policy.test.js tests/subagent-profiles.test.js tests/intent-router.test.js tests/rubric-engine.test.js tests/self-healing-check.test.js tests/self-heal.test.js tests/feedback-schema.test.js tests/thompson-sampling.test.js tests/feedback-sequences.test.js tests/diversity-tracking.test.js tests/vector-store.test.js tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js tests/loop-closure.test.js tests/code-reasoning.test.js tests/feedback-loop.test.js tests/feedback-inbox-read.test.js tests/feedback-to-memory.test.js tests/test-coverage.test.js tests/version-metadata.test.js",
|
|
37
37
|
"test:proof": "node --test --test-concurrency=1 tests/prove-adapters.test.js tests/prove-automation.test.js tests/prove-attribution.test.js tests/prove-lancedb.test.js tests/prove-data-quality.test.js tests/prove-intelligence.test.js tests/prove-loop-closure.test.js tests/prove-subway-upgrades.test.js tests/prove-training-export.test.js",
|
|
38
38
|
"test:rlaif": "node --test tests/rlaif-self-audit.test.js tests/dpo-optimizer.test.js tests/meta-policy.test.js",
|
|
39
39
|
"test:attribution": "node --test tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js",
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"test:deployment": "node --test tests/deployment.test.js",
|
|
44
44
|
"test:billing": "node --test tests/billing.test.js",
|
|
45
45
|
"test:cli": "node --test tests/cli.test.js",
|
|
46
|
+
"test:coverage": "node scripts/test-coverage.js",
|
|
46
47
|
"start:api": "node src/api/server.js",
|
|
47
48
|
"start:mcp": "node adapters/mcp/server-stdio.js",
|
|
48
49
|
"feedback:capture": "node .claude/scripts/feedback/capture-feedback.js",
|
|
@@ -75,7 +76,9 @@
|
|
|
75
76
|
"ml:train": "python3 scripts/train_from_feedback.py --train",
|
|
76
77
|
"ml:incremental": "python3 scripts/train_from_feedback.py --incremental",
|
|
77
78
|
"ml:reliability": "python3 scripts/train_from_feedback.py --reliability",
|
|
78
|
-
"ml:sample": "python3 scripts/train_from_feedback.py --sample"
|
|
79
|
+
"ml:sample": "python3 scripts/train_from_feedback.py --sample",
|
|
80
|
+
"adk:consolidate": "node scripts/adk-consolidator.js",
|
|
81
|
+
"adk:watch": "node scripts/adk-consolidator.js --watch"
|
|
79
82
|
},
|
|
80
83
|
"keywords": [
|
|
81
84
|
"rlhf",
|
|
@@ -115,10 +118,10 @@
|
|
|
115
118
|
"node": ">=18.18.0"
|
|
116
119
|
},
|
|
117
120
|
"dependencies": {
|
|
121
|
+
"@google/genai": "^1.44.0",
|
|
118
122
|
"@huggingface/transformers": "^3.8.1",
|
|
119
123
|
"@lancedb/lancedb": "^0.26.2",
|
|
120
|
-
"apache-arrow": "^18.1.0"
|
|
121
|
-
"stripe": "^20.4.1"
|
|
124
|
+
"apache-arrow": "^18.1.0"
|
|
122
125
|
},
|
|
123
126
|
"mcpName": "io.github.IgorGanapolsky/rlhf-feedback-loop"
|
|
124
127
|
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Agent Development Kit (ADK) Memory Consolidator
|
|
4
|
+
*
|
|
5
|
+
* 'Always-On' background service that reads disparate feedback logs and uses
|
|
6
|
+
* Gemini (Flash-Lite/Flash) to actively consolidate, compress, and dream up
|
|
7
|
+
* generalized prevention rules. This moves the system from 'passive logging'
|
|
8
|
+
* to 'active semantic memory consolidation'.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
'use strict';
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { GoogleGenAI } = require('@google/genai');
|
|
16
|
+
|
|
17
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
18
|
+
const { getFeedbackPaths, readJSONL } = require('./feedback-loop');
|
|
19
|
+
|
|
20
|
+
// Keep track of the last processed ID to avoid re-consolidating the exact same logs
|
|
21
|
+
const STATE_FILE = process.env.ADK_STATE_FILE || path.join(PROJECT_ROOT, '.rlhf', 'adk-state.json');
|
|
22
|
+
|
|
23
|
+
function ensureDir(dirPath) {
|
|
24
|
+
if (!fs.existsSync(dirPath)) {
|
|
25
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function loadState() {
|
|
30
|
+
if (fs.existsSync(STATE_FILE)) {
|
|
31
|
+
try {
|
|
32
|
+
return JSON.parse(fs.readFileSync(STATE_FILE, 'utf-8'));
|
|
33
|
+
} catch {
|
|
34
|
+
return { lastProcessedFeedbackId: null };
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return { lastProcessedFeedbackId: null };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function saveState(state) {
|
|
41
|
+
ensureDir(path.dirname(STATE_FILE));
|
|
42
|
+
fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function consolidateMemory() {
|
|
46
|
+
const apiKey = process.env.GEMINI_API_KEY;
|
|
47
|
+
if (!apiKey) {
|
|
48
|
+
console.warn('[ADK Consolidator] GEMINI_API_KEY is not set. Skipping active consolidation.');
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
53
|
+
const paths = getFeedbackPaths();
|
|
54
|
+
const state = loadState();
|
|
55
|
+
|
|
56
|
+
const allLogs = readJSONL(paths.FEEDBACK_LOG_PATH);
|
|
57
|
+
|
|
58
|
+
if (allLogs.length === 0) {
|
|
59
|
+
console.log('[ADK Consolidator] No logs to consolidate.');
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Find where we left off
|
|
64
|
+
let newLogs = [];
|
|
65
|
+
if (state.lastProcessedFeedbackId) {
|
|
66
|
+
const lastIdx = allLogs.findIndex(l => l.id === state.lastProcessedFeedbackId);
|
|
67
|
+
if (lastIdx !== -1) {
|
|
68
|
+
newLogs = allLogs.slice(lastIdx + 1);
|
|
69
|
+
} else {
|
|
70
|
+
// If we can't find it (log rotation?), just take the last 50
|
|
71
|
+
newLogs = allLogs.slice(-50);
|
|
72
|
+
}
|
|
73
|
+
} else {
|
|
74
|
+
// First time running, process up to last 50 entries
|
|
75
|
+
newLogs = allLogs.slice(-50);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (newLogs.length === 0) {
|
|
79
|
+
console.log('[ADK Consolidator] No new logs since last consolidation cycle.');
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
console.log(`[ADK Consolidator] Found ${newLogs.length} new feedback events. Activating Gemini for semantic consolidation...`);
|
|
84
|
+
|
|
85
|
+
const prompt = `
|
|
86
|
+
You are the Agent Development Kit (ADK) 'Always-On' Memory Consolidator.
|
|
87
|
+
Your job is to read the raw, disparate feedback logs of an AI agent and synthesize them into high-level, generalized prevention rules and learned intuitions.
|
|
88
|
+
Unlike standard systems that just count regex matches, you must semantically connect different failures (e.g., an API timeout and a missing import might both stem from 'rushing execution without verifying environment').
|
|
89
|
+
|
|
90
|
+
Here are the latest feedback events (JSON):
|
|
91
|
+
${JSON.stringify(newLogs.map(l => ({ signal: l.signal, context: l.context, tags: l.tags, whatWentWrong: l.whatWentWrong, whatWorked: l.whatWorked })), null, 2)}
|
|
92
|
+
|
|
93
|
+
Existing Prevention Rules (if any):
|
|
94
|
+
${fs.existsSync(paths.PREVENTION_RULES_PATH) ? fs.readFileSync(paths.PREVENTION_RULES_PATH, 'utf-8').slice(0, 2000) : 'None yet.'}
|
|
95
|
+
|
|
96
|
+
Output ONLY a valid JSON object with the following structure, representing the new synthesized insights:
|
|
97
|
+
{
|
|
98
|
+
"consolidatedInsights": [
|
|
99
|
+
{
|
|
100
|
+
"pattern": "Description of the underlying behavioral flaw or success pattern you detected.",
|
|
101
|
+
"rule": "A clear, actionable directive starting with 'ALWAYS' or 'NEVER' that should be added to prevention rules.",
|
|
102
|
+
"severity": "critical|high|medium|low"
|
|
103
|
+
}
|
|
104
|
+
],
|
|
105
|
+
"reasoning": "A short summary of how you connected the dots between these logs."
|
|
106
|
+
}
|
|
107
|
+
`;
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
// We use gemini-2.5-flash as the proxy for Flash-Lite/Flash efficiency
|
|
111
|
+
const response = await ai.models.generateContent({
|
|
112
|
+
model: 'gemini-2.5-flash',
|
|
113
|
+
contents: prompt,
|
|
114
|
+
config: {
|
|
115
|
+
responseMimeType: "application/json",
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
const result = JSON.parse(response.text);
|
|
120
|
+
console.log(`[ADK Consolidator] Consolidation complete. Reasoning: ${result.reasoning}`);
|
|
121
|
+
|
|
122
|
+
if (result.consolidatedInsights && result.consolidatedInsights.length > 0) {
|
|
123
|
+
appendRules(result.consolidatedInsights, paths.PREVENTION_RULES_PATH);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Update state
|
|
127
|
+
state.lastProcessedFeedbackId = newLogs[newLogs.length - 1].id;
|
|
128
|
+
saveState(state);
|
|
129
|
+
|
|
130
|
+
} catch (err) {
|
|
131
|
+
console.error('[ADK Consolidator] Consolidation failed:', err.message);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function appendRules(insights, rulesPath) {
|
|
136
|
+
let existingContent = '';
|
|
137
|
+
if (fs.existsSync(rulesPath)) {
|
|
138
|
+
existingContent = fs.readFileSync(rulesPath, 'utf-8');
|
|
139
|
+
} else {
|
|
140
|
+
existingContent = '# Prevention Rules\n\nGenerated from active semantic memory consolidation.\n\n';
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let newRulesBlock = '\n## ADK Semantic Consolidations\n';
|
|
144
|
+
const timestamp = new Date().toISOString();
|
|
145
|
+
insights.forEach(insight => {
|
|
146
|
+
newRulesBlock += `- [${insight.severity.toUpperCase()}] **${insight.pattern}**\n - Rule: ${insight.rule} *(Consolidated at ${timestamp})*\n`;
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const updatedContent = existingContent + newRulesBlock;
|
|
150
|
+
ensureDir(path.dirname(rulesPath));
|
|
151
|
+
fs.writeFileSync(rulesPath, updatedContent);
|
|
152
|
+
console.log(`[ADK Consolidator] Appended ${insights.length} new consolidated rules to ${rulesPath}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (require.main === module) {
|
|
156
|
+
const args = process.argv.slice(2);
|
|
157
|
+
const isWatchMode = args.includes('--watch');
|
|
158
|
+
|
|
159
|
+
if (isWatchMode) {
|
|
160
|
+
console.log('[ADK Consolidator] Started in Always-On Watch Mode (interval: 5 minutes)');
|
|
161
|
+
consolidateMemory(); // Run once immediately
|
|
162
|
+
setInterval(() => {
|
|
163
|
+
consolidateMemory();
|
|
164
|
+
}, 5 * 60 * 1000); // Check every 5 minutes
|
|
165
|
+
} else {
|
|
166
|
+
consolidateMemory().then(() => {
|
|
167
|
+
console.log('[ADK Consolidator] Cycle finished.');
|
|
168
|
+
process.exit(0);
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
module.exports = { consolidateMemory };
|
package/scripts/billing.js
CHANGED
|
@@ -26,7 +26,7 @@ const crypto = require('crypto');
|
|
|
26
26
|
const STRIPE_SECRET_KEY = process.env.STRIPE_SECRET_KEY || '';
|
|
27
27
|
const STRIPE_WEBHOOK_SECRET = process.env.STRIPE_WEBHOOK_SECRET || '';
|
|
28
28
|
const GITHUB_MARKETPLACE_WEBHOOK_SECRET = process.env.GITHUB_MARKETPLACE_WEBHOOK_SECRET || '';
|
|
29
|
-
const STRIPE_PRICE_ID = process.env.STRIPE_PRICE_ID || '
|
|
29
|
+
const STRIPE_PRICE_ID = process.env.STRIPE_PRICE_ID || '';
|
|
30
30
|
|
|
31
31
|
const API_KEYS_PATH = process.env._TEST_API_KEYS_PATH || path.resolve(
|
|
32
32
|
__dirname,
|
|
@@ -371,6 +371,10 @@ async function createCheckoutSession({ successUrl, cancelUrl, customerEmail, ins
|
|
|
371
371
|
};
|
|
372
372
|
}
|
|
373
373
|
|
|
374
|
+
if (!STRIPE_PRICE_ID) {
|
|
375
|
+
throw new Error('STRIPE_PRICE_ID not configured');
|
|
376
|
+
}
|
|
377
|
+
|
|
374
378
|
const params = {
|
|
375
379
|
mode: 'subscription',
|
|
376
380
|
line_items: [
|
|
@@ -71,12 +71,33 @@ function addEdgeCase(trace, description) {
|
|
|
71
71
|
return trace;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
function computeControllability(trace) {
|
|
75
|
+
const steps = trace.steps;
|
|
76
|
+
const edgeCases = trace.edgeCases;
|
|
77
|
+
if (steps.length === 0) return { score: 0, flags: ['empty_trace'] };
|
|
78
|
+
|
|
79
|
+
const flags = [];
|
|
80
|
+
const allVerified = steps.every((s) => s.verdict === 'verified');
|
|
81
|
+
const allSameEvidence = new Set(steps.map((s) => s.evidence)).size === 1 && steps.length > 1;
|
|
82
|
+
const shortEvidence = steps.filter((s) => s.evidence.length < 10).length;
|
|
83
|
+
const noEdgeCases = edgeCases.length === 0;
|
|
84
|
+
|
|
85
|
+
if (allVerified && steps.length > 2) flags.push('all_verified');
|
|
86
|
+
if (allSameEvidence) flags.push('identical_evidence');
|
|
87
|
+
if (shortEvidence > steps.length / 2) flags.push('thin_evidence');
|
|
88
|
+
if (noEdgeCases && steps.length > 1) flags.push('no_edge_cases');
|
|
89
|
+
|
|
90
|
+
const score = Math.round((flags.length / 4) * 1000) / 1000;
|
|
91
|
+
return { score, flags };
|
|
92
|
+
}
|
|
93
|
+
|
|
74
94
|
function finalizeTrace(trace, { confidenceThreshold = DEFAULT_CONFIDENCE_THRESHOLD } = {}) {
|
|
75
95
|
const totalSteps = trace.steps.length;
|
|
76
96
|
const verified = trace.steps.filter((s) => s.verdict === 'verified').length;
|
|
77
97
|
const unverified = trace.steps.filter((s) => s.verdict === 'unverified').length;
|
|
78
98
|
const refuted = trace.steps.filter((s) => s.verdict === 'refuted').length;
|
|
79
99
|
const confidence = totalSteps > 0 ? Math.round((verified / totalSteps) * 1000) / 1000 : 0;
|
|
100
|
+
const ctrl = computeControllability(trace);
|
|
80
101
|
|
|
81
102
|
trace.summary = {
|
|
82
103
|
totalSteps,
|
|
@@ -85,6 +106,8 @@ function finalizeTrace(trace, { confidenceThreshold = DEFAULT_CONFIDENCE_THRESHO
|
|
|
85
106
|
refuted,
|
|
86
107
|
confidence,
|
|
87
108
|
passed: confidence >= confidenceThreshold && refuted === 0,
|
|
109
|
+
controllability: ctrl.score,
|
|
110
|
+
controllabilityFlags: ctrl.flags,
|
|
88
111
|
};
|
|
89
112
|
|
|
90
113
|
return trace;
|
|
@@ -291,6 +314,7 @@ function aggregateTraces(traces) {
|
|
|
291
314
|
refuted,
|
|
292
315
|
averageConfidence: avgConfidence,
|
|
293
316
|
allPassed: passedTraces === totalTraces,
|
|
317
|
+
flaggedTraces: traces.filter((t) => t.summary && t.summary.controllability > 0.5).length,
|
|
294
318
|
};
|
|
295
319
|
}
|
|
296
320
|
|
|
@@ -298,6 +322,7 @@ module.exports = {
|
|
|
298
322
|
createTrace,
|
|
299
323
|
addStep,
|
|
300
324
|
addEdgeCase,
|
|
325
|
+
computeControllability,
|
|
301
326
|
finalizeTrace,
|
|
302
327
|
traceForSelfHealFix,
|
|
303
328
|
traceForDpoPair,
|
|
@@ -305,4 +330,4 @@ module.exports = {
|
|
|
305
330
|
aggregateTraces,
|
|
306
331
|
DEFAULT_CONFIDENCE_THRESHOLD,
|
|
307
332
|
};
|
|
308
|
-
//
|
|
333
|
+
// Tests cover this module through the node:test suite; avoid hardcoding counts here.
|
|
@@ -273,7 +273,7 @@ function routeQuery(query, indexPath, topN) {
|
|
|
273
273
|
index = JSON.parse(fs.readFileSync(idxPath, 'utf-8'));
|
|
274
274
|
} catch {
|
|
275
275
|
// Index doesn't exist — build it on the fly
|
|
276
|
-
index = buildKnowledgeIndex();
|
|
276
|
+
index = buildKnowledgeIndex(undefined, idxPath);
|
|
277
277
|
}
|
|
278
278
|
|
|
279
279
|
const queryTokens = query
|
|
@@ -312,9 +312,10 @@ function routeQuery(query, indexPath, topN) {
|
|
|
312
312
|
* @param {string} query - The original query
|
|
313
313
|
* @param {string[]} retrievedDocs - Filenames of retrieved docs
|
|
314
314
|
* @param {string[]} expectedTopics - Expected topic keywords to match against
|
|
315
|
+
* @param {string} [logPath] - Optional path for the quality log
|
|
315
316
|
* @returns {{ precision: number, recall: number, f1: number, query: string, timestamp: string }}
|
|
316
317
|
*/
|
|
317
|
-
function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
|
|
318
|
+
function scoreRetrievalQuality(query, retrievedDocs, expectedTopics, logPath) {
|
|
318
319
|
if (!retrievedDocs.length || !expectedTopics.length) {
|
|
319
320
|
const result = {
|
|
320
321
|
query,
|
|
@@ -325,7 +326,7 @@ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
|
|
|
325
326
|
expectedCount: expectedTopics.length,
|
|
326
327
|
timestamp: new Date().toISOString(),
|
|
327
328
|
};
|
|
328
|
-
logQualityResult(result);
|
|
329
|
+
logQualityResult(result, logPath);
|
|
329
330
|
return result;
|
|
330
331
|
}
|
|
331
332
|
|
|
@@ -369,7 +370,7 @@ function scoreRetrievalQuality(query, retrievedDocs, expectedTopics) {
|
|
|
369
370
|
timestamp: new Date().toISOString(),
|
|
370
371
|
};
|
|
371
372
|
|
|
372
|
-
logQualityResult(result);
|
|
373
|
+
logQualityResult(result, logPath);
|
|
373
374
|
return result;
|
|
374
375
|
}
|
|
375
376
|
|
package/scripts/contextfs.js
CHANGED
|
@@ -218,6 +218,7 @@ function writeContextObject({ namespace, title, content, tags = [], source, ttl
|
|
|
218
218
|
};
|
|
219
219
|
|
|
220
220
|
writeJson(filePath, doc);
|
|
221
|
+
indexContextObject(doc, filePath);
|
|
221
222
|
|
|
222
223
|
recordProvenance({
|
|
223
224
|
type: 'context_object_created',
|
|
@@ -355,6 +356,130 @@ function scoreDocument(doc, queryTokens) {
|
|
|
355
356
|
return score;
|
|
356
357
|
}
|
|
357
358
|
|
|
359
|
+
/* ── Memex-style Indexed Memory ────────────────────────────────── */
|
|
360
|
+
|
|
361
|
+
const MEMEX_INDEX_FILE = 'memex-index.jsonl';
|
|
362
|
+
|
|
363
|
+
function getMemexIndexPath() {
|
|
364
|
+
return path.join(CONTEXTFS_ROOT, NAMESPACES.provenance, MEMEX_INDEX_FILE);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function buildIndexEntry(doc, filePath) {
|
|
368
|
+
return {
|
|
369
|
+
id: doc.id,
|
|
370
|
+
namespace: doc.namespace || '',
|
|
371
|
+
title: doc.title || '',
|
|
372
|
+
tags: doc.tags || [],
|
|
373
|
+
digest: String(doc.content || '').slice(0, 120),
|
|
374
|
+
createdAt: doc.createdAt || nowIso(),
|
|
375
|
+
stableRef: filePath,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function indexContextObject(doc, filePath) {
|
|
380
|
+
const entry = buildIndexEntry(doc, filePath);
|
|
381
|
+
appendJsonl(getMemexIndexPath(), entry);
|
|
382
|
+
return entry;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function loadMemexIndex() {
|
|
386
|
+
return readJsonl(getMemexIndexPath());
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function dereferenceEntry(entry) {
|
|
390
|
+
if (!entry || !entry.stableRef) return null;
|
|
391
|
+
try {
|
|
392
|
+
return JSON.parse(fs.readFileSync(entry.stableRef, 'utf-8'));
|
|
393
|
+
} catch {
|
|
394
|
+
return null;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function searchMemexIndex({ query = '', maxResults = 10, namespaces = [] } = {}) {
|
|
399
|
+
const index = loadMemexIndex();
|
|
400
|
+
const tokens = tokenizeQuery(query);
|
|
401
|
+
const nsFilter = namespaces.length > 0 ? new Set(normalizeNamespaces(namespaces)) : null;
|
|
402
|
+
|
|
403
|
+
const scored = index
|
|
404
|
+
.filter((entry) => !nsFilter || nsFilter.has(entry.namespace))
|
|
405
|
+
.map((entry) => {
|
|
406
|
+
const haystack = `${entry.title} ${entry.digest} ${(entry.tags || []).join(' ')}`.toLowerCase();
|
|
407
|
+
let score = 0;
|
|
408
|
+
tokens.forEach((t) => { if (t.length > 2 && haystack.includes(t)) score += 3; });
|
|
409
|
+
if (entry.namespace.includes('memory/error')) score += 1;
|
|
410
|
+
if (entry.namespace.includes('memory/learning')) score += 1;
|
|
411
|
+
if (entry.createdAt) {
|
|
412
|
+
const hours = (Date.now() - new Date(entry.createdAt).getTime()) / 3_600_000;
|
|
413
|
+
if (Number.isFinite(hours)) {
|
|
414
|
+
if (hours < 24) score += 2;
|
|
415
|
+
else if (hours < 168) score += 1;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return { entry, score };
|
|
419
|
+
})
|
|
420
|
+
.filter((x) => x.score > 0)
|
|
421
|
+
.sort((a, b) => b.score - a.score)
|
|
422
|
+
.slice(0, maxResults);
|
|
423
|
+
|
|
424
|
+
return scored.map((x) => ({ ...x.entry, _score: x.score }));
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function constructMemexPack({ query = '', maxItems = 8, maxChars = 6000, namespaces = [] } = {}) {
|
|
428
|
+
const normalizedNamespaces = normalizeNamespaces(namespaces);
|
|
429
|
+
const hits = searchMemexIndex({ query, maxResults: maxItems * 2, namespaces: normalizedNamespaces });
|
|
430
|
+
|
|
431
|
+
const items = [];
|
|
432
|
+
let usedChars = 0;
|
|
433
|
+
const dereferenced = [];
|
|
434
|
+
|
|
435
|
+
for (const hit of hits) {
|
|
436
|
+
if (items.length >= maxItems) break;
|
|
437
|
+
const full = dereferenceEntry(hit);
|
|
438
|
+
if (!full) continue;
|
|
439
|
+
|
|
440
|
+
const snippet = `${full.title}\n${full.content || ''}`;
|
|
441
|
+
if (usedChars + snippet.length > maxChars) continue;
|
|
442
|
+
|
|
443
|
+
items.push({
|
|
444
|
+
id: full.id,
|
|
445
|
+
namespace: hit.namespace,
|
|
446
|
+
title: full.title,
|
|
447
|
+
content: full.content,
|
|
448
|
+
tags: full.tags || [],
|
|
449
|
+
score: hit._score,
|
|
450
|
+
});
|
|
451
|
+
usedChars += snippet.length;
|
|
452
|
+
dereferenced.push(hit.id);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
const packId = `memex_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
456
|
+
const pack = {
|
|
457
|
+
packId,
|
|
458
|
+
query,
|
|
459
|
+
maxItems,
|
|
460
|
+
maxChars,
|
|
461
|
+
usedChars,
|
|
462
|
+
namespaces: normalizedNamespaces,
|
|
463
|
+
createdAt: nowIso(),
|
|
464
|
+
items,
|
|
465
|
+
indexHits: hits.length,
|
|
466
|
+
dereferencedCount: dereferenced.length,
|
|
467
|
+
cache: { hit: false },
|
|
468
|
+
};
|
|
469
|
+
|
|
470
|
+
appendJsonl(path.join(CONTEXTFS_ROOT, NAMESPACES.provenance, 'packs.jsonl'), pack);
|
|
471
|
+
recordProvenance({
|
|
472
|
+
type: 'memex_pack_constructed',
|
|
473
|
+
packId,
|
|
474
|
+
query,
|
|
475
|
+
indexHits: hits.length,
|
|
476
|
+
dereferencedCount: dereferenced.length,
|
|
477
|
+
usedChars,
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
return pack;
|
|
481
|
+
}
|
|
482
|
+
|
|
358
483
|
function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, namespaces = [] } = {}) {
|
|
359
484
|
const normalizedNamespaces = normalizeNamespaces(namespaces);
|
|
360
485
|
const tokens = tokenizeQuery(query);
|
|
@@ -505,6 +630,11 @@ module.exports = {
|
|
|
505
630
|
querySimilarity,
|
|
506
631
|
findSemanticCacheHit,
|
|
507
632
|
getSemanticCacheConfig,
|
|
633
|
+
buildIndexEntry,
|
|
634
|
+
loadMemexIndex,
|
|
635
|
+
dereferenceEntry,
|
|
636
|
+
searchMemexIndex,
|
|
637
|
+
constructMemexPack,
|
|
508
638
|
};
|
|
509
639
|
|
|
510
640
|
if (require.main === module) {
|