rlhf-feedback-loop 0.6.4 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -12
- package/adapters/mcp/server-stdio.js +109 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,7 +6,19 @@
|
|
|
6
6
|
[](adapters/mcp/server-stdio.js)
|
|
7
7
|
[](scripts/export-dpo-pairs.js)
|
|
8
8
|
|
|
9
|
-
**
|
|
9
|
+
**The complete RLHF data pipeline for AI coding agents.** Capture human feedback, build memory, generate prevention rules, and export DPO training pairs — the full loop from thumbs up/down to model fine-tuning.
|
|
10
|
+
|
|
11
|
+
## What This Is (and Isn't)
|
|
12
|
+
|
|
13
|
+
This tool implements the **data collection and preference pipeline** side of RLHF — the part that turns your daily interactions with AI agents into structured training data. Out of the box, it:
|
|
14
|
+
|
|
15
|
+
- **Captures** thumbs up/down feedback with context, tags, and rubric scores
|
|
16
|
+
- **Remembers** via JSONL logs + LanceDB vector search across sessions
|
|
17
|
+
- **Prevents** repeated mistakes with auto-generated guardrails
|
|
18
|
+
- **Recalls** relevant past feedback mid-conversation (in-session context injection)
|
|
19
|
+
- **Exports** DPO training pairs (prompt/chosen/rejected) for model fine-tuning
|
|
20
|
+
|
|
21
|
+
It does **not** update model weights in real-time. That's the fine-tuning step, which you do separately using the DPO pairs this tool exports. The full loop: capture feedback here → export DPO pairs → fine-tune with [TRL](https://github.com/huggingface/trl), [OpenPipe](https://openpipe.ai), or any DPO trainer → deploy improved model.
|
|
10
22
|
|
|
11
23
|
## Architecture
|
|
12
24
|
|
|
@@ -22,12 +34,12 @@ One command. Pick your platform:
|
|
|
22
34
|
|----------|---------|
|
|
23
35
|
| **Claude** | `claude mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
24
36
|
| **Codex** | `codex mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
25
|
-
| **Gemini** | `gemini mcp add rlhf
|
|
26
|
-
| **Amp** | `
|
|
37
|
+
| **Gemini** | `gemini mcp add rlhf "npx -y rlhf-feedback-loop serve"` |
|
|
38
|
+
| **Amp** | `amp mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
27
39
|
| **Cursor** | `cursor mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
28
40
|
| **All at once** | `npx add-mcp rlhf-feedback-loop` |
|
|
29
41
|
|
|
30
|
-
That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes.
|
|
42
|
+
That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes. Run once per project — the MCP server starts automatically on each session.
|
|
31
43
|
|
|
32
44
|
## How It Works
|
|
33
45
|
|
|
@@ -57,15 +69,21 @@ DPO export → fine-tune your model
|
|
|
57
69
|
|
|
58
70
|
All data stored locally as **JSONL** files — fully transparent, fully portable, no vendor lock-in. **LanceDB** indexes memories as vector embeddings for semantic search. **ShieldCortex** assembles context packs so your agent starts each task informed.
|
|
59
71
|
|
|
60
|
-
##
|
|
72
|
+
## Free vs. Cloud Pro
|
|
73
|
+
|
|
74
|
+
The open-source package is fully functional and free forever. Cloud Pro is for teams that don't want to self-host.
|
|
75
|
+
|
|
76
|
+
| | Open Source | Cloud Pro ($10/mo) |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| Feedback capture | Local MCP server | Hosted HTTPS API |
|
|
79
|
+
| Storage | Your machine | Managed cloud |
|
|
80
|
+
| DPO export | CLI command | API endpoint |
|
|
81
|
+
| Setup | `mcp add` one-liner | Provisioned API key |
|
|
82
|
+
| Team sharing | Manual (share JSONL) | Built-in (shared API) |
|
|
83
|
+
| Support | GitHub Issues | Email |
|
|
84
|
+
| Uptime | You manage | We manage (99.9% SLA) |
|
|
61
85
|
|
|
62
|
-
|
|
63
|
-
|---------|---------------|
|
|
64
|
-
| Agent keeps making the same mistake | Prevention rules auto-generated from repeated failures |
|
|
65
|
-
| Agent claims "done" without proof | Rubric engine blocks positive feedback without test evidence |
|
|
66
|
-
| Feedback collected but never used | DPO pairs exported for actual model fine-tuning |
|
|
67
|
-
| Different tools, different formats | One MCP server works across 5 platforms |
|
|
68
|
-
| Agent starts every task blank | In-session recall injects past learnings into current conversation |
|
|
86
|
+
[Get Cloud Pro](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app)
|
|
69
87
|
|
|
70
88
|
## Deep Dive
|
|
71
89
|
|
|
@@ -249,23 +249,123 @@ function parseOptionalObject(input, name) {
|
|
|
249
249
|
throw new Error(`${name} must be an object`);
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
+
function detectFeedbackSignal(text) {
|
|
253
|
+
const lower = String(text || '').toLowerCase();
|
|
254
|
+
const UP = /\b(thumbs?\s*up|that worked|looks good|nice work|perfect|good job)\b/;
|
|
255
|
+
const DOWN = /\b(thumbs?\s*down|that failed|that was wrong|fix this)\b/;
|
|
256
|
+
if (UP.test(lower)) return 'up';
|
|
257
|
+
if (DOWN.test(lower)) return 'down';
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function formatStats() {
|
|
262
|
+
const logPath = path.join(SAFE_DATA_DIR, 'feedback-log.jsonl');
|
|
263
|
+
const memPath = path.join(SAFE_DATA_DIR, 'memory-log.jsonl');
|
|
264
|
+
if (!fs.existsSync(logPath)) return 'No feedback captured yet.';
|
|
265
|
+
const lines = fs.readFileSync(logPath, 'utf8').trim().split('\n').filter(Boolean);
|
|
266
|
+
const entries = lines.map(l => { try { return JSON.parse(l); } catch (_) { return null; } }).filter(Boolean);
|
|
267
|
+
const pos = entries.filter(e => e.signal === 'positive').length;
|
|
268
|
+
const neg = entries.filter(e => e.signal === 'negative').length;
|
|
269
|
+
const memCount = fs.existsSync(memPath) ? fs.readFileSync(memPath, 'utf8').trim().split('\n').filter(Boolean).length : 0;
|
|
270
|
+
|
|
271
|
+
// HBR: "Which cases consume disproportionate time?" — top error domains
|
|
272
|
+
const negEntries = entries.filter(e => e.signal === 'negative');
|
|
273
|
+
const domainCounts = {};
|
|
274
|
+
negEntries.forEach(e => {
|
|
275
|
+
const domain = (e.richContext && e.richContext.domain) || 'general';
|
|
276
|
+
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
|
277
|
+
});
|
|
278
|
+
const topDomains = Object.entries(domainCounts).sort((a, b) => b[1] - a[1]).slice(0, 3);
|
|
279
|
+
|
|
280
|
+
// HBR: "Glass box" — audit trail of recent decisions
|
|
281
|
+
const recent = entries.slice(-5).reverse();
|
|
282
|
+
const auditTrail = recent.map(e => {
|
|
283
|
+
const sig = e.signal === 'positive' ? 'UP' : 'DN';
|
|
284
|
+
const ts = (e.timestamp || '').slice(11, 19);
|
|
285
|
+
const ctx = (e.context || '').slice(0, 60);
|
|
286
|
+
return ` [${sig}] ${ts} ${ctx}`;
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
const parts = [
|
|
290
|
+
'## Storage',
|
|
291
|
+
` Feedback log : ${entries.length} entries`,
|
|
292
|
+
` Memory log : ${memCount} memories`,
|
|
293
|
+
` LanceDB : ${path.join(SAFE_DATA_DIR, 'lancedb/')}`,
|
|
294
|
+
'',
|
|
295
|
+
'## Stats',
|
|
296
|
+
` Total : ${entries.length}`,
|
|
297
|
+
` Positive : ${pos}`,
|
|
298
|
+
` Negative : ${neg}`,
|
|
299
|
+
` Promoted : ${memCount}`,
|
|
300
|
+
` Ratio : ${pos > 0 ? (pos / (pos + neg) * 100).toFixed(0) + '% positive' : 'n/a'}`,
|
|
301
|
+
];
|
|
302
|
+
|
|
303
|
+
if (topDomains.length > 0) {
|
|
304
|
+
parts.push('', '## Top Error Domains (where mistakes cluster)');
|
|
305
|
+
topDomains.forEach(([domain, count]) => {
|
|
306
|
+
parts.push(` ${domain}: ${count} failures`);
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (auditTrail.length > 0) {
|
|
311
|
+
parts.push('', '## Audit Trail (last 5 decisions)');
|
|
312
|
+
parts.push(...auditTrail);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return parts.join('\n');
|
|
316
|
+
}
|
|
317
|
+
|
|
252
318
|
async function callTool(name, args = {}) {
|
|
253
319
|
assertToolAllowed(name, getActiveMcpProfile());
|
|
254
320
|
|
|
321
|
+
// Platform-agnostic auto-capture: detect feedback signals in any tool call
|
|
322
|
+
const textToCheck = args.query || args.context || '';
|
|
323
|
+
const autoSignal = detectFeedbackSignal(textToCheck);
|
|
324
|
+
if (autoSignal && name !== 'capture_feedback') {
|
|
325
|
+
const autoResult = captureFeedback({
|
|
326
|
+
signal: autoSignal,
|
|
327
|
+
context: textToCheck,
|
|
328
|
+
tags: ['auto-capture', 'mcp'],
|
|
329
|
+
});
|
|
330
|
+
const ev = autoResult.feedbackEvent || {};
|
|
331
|
+
const autoReport = [
|
|
332
|
+
'',
|
|
333
|
+
`## Auto-Captured Feedback [${autoSignal.toUpperCase()}]`,
|
|
334
|
+
` Feedback ID : ${ev.id || 'n/a'}`,
|
|
335
|
+
` Signal : ${ev.signal || autoSignal} (${ev.actionType || 'unknown'})`,
|
|
336
|
+
` Context : ${(ev.context || textToCheck).slice(0, 80)}`,
|
|
337
|
+
` Timestamp : ${ev.timestamp || new Date().toISOString()}`,
|
|
338
|
+
` Promoted : ${autoResult.accepted ? 'yes (Memory ID: ' + (autoResult.memoryRecord || {}).id + ')' : 'no — ' + (autoResult.reason || '')}`,
|
|
339
|
+
'',
|
|
340
|
+
formatStats(),
|
|
341
|
+
].join('\n');
|
|
342
|
+
// Prepend the auto-capture report to whatever the tool was going to return
|
|
343
|
+
const toolResult = await callToolInner(name, args);
|
|
344
|
+
toolResult.content[0].text = autoReport + '\n\n---\n\n' + toolResult.content[0].text;
|
|
345
|
+
return toolResult;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
return callToolInner(name, args);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
async function callToolInner(name, args = {}) {
|
|
255
352
|
if (name === 'recall') {
|
|
256
353
|
const query = args.query || '';
|
|
257
354
|
const limit = Number(args.limit || 5);
|
|
258
355
|
const parts = [];
|
|
259
356
|
|
|
260
|
-
// 1. Vector search for similar past feedback
|
|
357
|
+
// 1. Vector search for similar past feedback with confidence scores
|
|
261
358
|
try {
|
|
262
359
|
const similar = await searchSimilar(query, limit);
|
|
263
360
|
if (similar.length > 0) {
|
|
264
361
|
parts.push('## Relevant Past Feedback\n');
|
|
265
|
-
for (
|
|
362
|
+
for (let i = 0; i < similar.length; i++) {
|
|
363
|
+
const mem = similar[i];
|
|
266
364
|
const signal = mem.signal === 'positive' ? 'GOOD' : 'BAD';
|
|
267
|
-
|
|
365
|
+
const confidence = mem._distance != null ? Math.max(0, (1 - mem._distance) * 100).toFixed(0) : '?';
|
|
366
|
+
parts.push(`**[${signal}]** (${confidence}% match) ${mem.context}`);
|
|
268
367
|
if (mem.tags) parts.push(` Tags: ${mem.tags}`);
|
|
368
|
+
if (mem.timestamp) parts.push(` When: ${mem.timestamp}`);
|
|
269
369
|
parts.push('');
|
|
270
370
|
}
|
|
271
371
|
}
|
|
@@ -295,9 +395,13 @@ async function callTool(name, args = {}) {
|
|
|
295
395
|
}
|
|
296
396
|
} catch (_) {}
|
|
297
397
|
|
|
298
|
-
|
|
398
|
+
// 4. Append stats + audit trail (glass box)
|
|
399
|
+
parts.push('');
|
|
400
|
+
parts.push(formatStats());
|
|
401
|
+
|
|
402
|
+
const text = parts.length > 1
|
|
299
403
|
? parts.join('\n')
|
|
300
|
-
: 'No past feedback found. This appears to be a fresh start
|
|
404
|
+
: 'No past feedback found. This appears to be a fresh start.\n\n' + formatStats();
|
|
301
405
|
|
|
302
406
|
return { content: [{ type: 'text', text }] };
|
|
303
407
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rlhf-feedback-loop",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.6",
|
|
4
4
|
"description": "Make your AI agent learn from mistakes. Capture thumbs up/down feedback, block repeated failures, export DPO training data. Works with ChatGPT, Claude, Codex, Gemini, Amp.",
|
|
5
5
|
"homepage": "https://github.com/IgorGanapolsky/rlhf-feedback-loop#readme",
|
|
6
6
|
"repository": {
|