rlhf-feedback-loop 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -12
- package/adapters/mcp/server-stdio.js +56 -16
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,7 +6,19 @@
|
|
|
6
6
|
[](adapters/mcp/server-stdio.js)
|
|
7
7
|
[](scripts/export-dpo-pairs.js)
|
|
8
8
|
|
|
9
|
-
**
|
|
9
|
+
**The complete RLHF data pipeline for AI coding agents.** Capture human feedback, build memory, generate prevention rules, and export DPO training pairs — the full loop from thumbs up/down to model fine-tuning.
|
|
10
|
+
|
|
11
|
+
## What This Is (and Isn't)
|
|
12
|
+
|
|
13
|
+
This tool implements the **data collection and preference pipeline** side of RLHF — the part that turns your daily interactions with AI agents into structured training data. Out of the box, it:
|
|
14
|
+
|
|
15
|
+
- **Captures** thumbs up/down feedback with context, tags, and rubric scores
|
|
16
|
+
- **Remembers** via JSONL logs + LanceDB vector search across sessions
|
|
17
|
+
- **Prevents** repeated mistakes with auto-generated guardrails
|
|
18
|
+
- **Recalls** relevant past feedback mid-conversation (in-session context injection)
|
|
19
|
+
- **Exports** DPO training pairs (prompt/chosen/rejected) for model fine-tuning
|
|
20
|
+
|
|
21
|
+
It does **not** update model weights in real-time. That's the fine-tuning step, which you do separately using the DPO pairs this tool exports. The full loop: capture feedback here → export DPO pairs → fine-tune with [TRL](https://github.com/huggingface/trl), [OpenPipe](https://openpipe.ai), or any DPO trainer → deploy improved model.
|
|
10
22
|
|
|
11
23
|
## Architecture
|
|
12
24
|
|
|
@@ -22,12 +34,12 @@ One command. Pick your platform:
|
|
|
22
34
|
|----------|---------|
|
|
23
35
|
| **Claude** | `claude mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
24
36
|
| **Codex** | `codex mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
25
|
-
| **Gemini** | `gemini mcp add rlhf
|
|
26
|
-
| **Amp** | `
|
|
37
|
+
| **Gemini** | `gemini mcp add rlhf "npx -y rlhf-feedback-loop serve"` |
|
|
38
|
+
| **Amp** | `amp mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
27
39
|
| **Cursor** | `cursor mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
|
|
28
40
|
| **All at once** | `npx add-mcp rlhf-feedback-loop` |
|
|
29
41
|
|
|
30
|
-
That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes.
|
|
42
|
+
That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes. Run once per project — the MCP server starts automatically on each session.
|
|
31
43
|
|
|
32
44
|
## How It Works
|
|
33
45
|
|
|
@@ -57,15 +69,21 @@ DPO export → fine-tune your model
|
|
|
57
69
|
|
|
58
70
|
All data stored locally as **JSONL** files — fully transparent, fully portable, no vendor lock-in. **LanceDB** indexes memories as vector embeddings for semantic search. **ShieldCortex** assembles context packs so your agent starts each task informed.
|
|
59
71
|
|
|
60
|
-
##
|
|
72
|
+
## Free vs. Cloud Pro
|
|
73
|
+
|
|
74
|
+
The open-source package is fully functional and free forever. Cloud Pro is for teams that don't want to self-host.
|
|
75
|
+
|
|
76
|
+
| | Open Source | Cloud Pro ($10/mo) |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| Feedback capture | Local MCP server | Hosted HTTPS API |
|
|
79
|
+
| Storage | Your machine | Managed cloud |
|
|
80
|
+
| DPO export | CLI command | API endpoint |
|
|
81
|
+
| Setup | `mcp add` one-liner | Provisioned API key |
|
|
82
|
+
| Team sharing | Manual (share JSONL) | Built-in (shared API) |
|
|
83
|
+
| Support | GitHub Issues | Email |
|
|
84
|
+
| Uptime | You manage | We manage (99.9% SLA) |
|
|
61
85
|
|
|
62
|
-
|
|
63
|
-
|---------|---------------|
|
|
64
|
-
| Agent keeps making the same mistake | Prevention rules auto-generated from repeated failures |
|
|
65
|
-
| Agent claims "done" without proof | Rubric engine blocks positive feedback without test evidence |
|
|
66
|
-
| Feedback collected but never used | DPO pairs exported for actual model fine-tuning |
|
|
67
|
-
| Different tools, different formats | One MCP server works across 5 platforms |
|
|
68
|
-
| Agent starts every task blank | In-session recall injects past learnings into current conversation |
|
|
86
|
+
[Get Cloud Pro](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app)
|
|
69
87
|
|
|
70
88
|
## Deep Dive
|
|
71
89
|
|
|
@@ -267,19 +267,52 @@ function formatStats() {
|
|
|
267
267
|
const pos = entries.filter(e => e.signal === 'positive').length;
|
|
268
268
|
const neg = entries.filter(e => e.signal === 'negative').length;
|
|
269
269
|
const memCount = fs.existsSync(memPath) ? fs.readFileSync(memPath, 'utf8').trim().split('\n').filter(Boolean).length : 0;
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
270
|
+
|
|
271
|
+
// HBR: "Which cases consume disproportionate time?" — top error domains
|
|
272
|
+
const negEntries = entries.filter(e => e.signal === 'negative');
|
|
273
|
+
const domainCounts = {};
|
|
274
|
+
negEntries.forEach(e => {
|
|
275
|
+
const domain = (e.richContext && e.richContext.domain) || 'general';
|
|
276
|
+
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
|
277
|
+
});
|
|
278
|
+
const topDomains = Object.entries(domainCounts).sort((a, b) => b[1] - a[1]).slice(0, 3);
|
|
279
|
+
|
|
280
|
+
// HBR: "Glass box" — audit trail of recent decisions
|
|
281
|
+
const recent = entries.slice(-5).reverse();
|
|
282
|
+
const auditTrail = recent.map(e => {
|
|
283
|
+
const sig = e.signal === 'positive' ? 'UP' : 'DN';
|
|
284
|
+
const ts = (e.timestamp || '').slice(11, 19);
|
|
285
|
+
const ctx = (e.context || '').slice(0, 60);
|
|
286
|
+
return ` [${sig}] ${ts} ${ctx}`;
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
const parts = [
|
|
290
|
+
'## Storage',
|
|
291
|
+
` Feedback log : ${entries.length} entries`,
|
|
292
|
+
` Memory log : ${memCount} memories`,
|
|
274
293
|
` LanceDB : ${path.join(SAFE_DATA_DIR, 'lancedb/')}`,
|
|
275
294
|
'',
|
|
276
|
-
'##
|
|
277
|
-
` Total
|
|
278
|
-
` Positive
|
|
279
|
-
` Negative
|
|
280
|
-
` Promoted
|
|
281
|
-
` Ratio
|
|
282
|
-
]
|
|
295
|
+
'## Stats',
|
|
296
|
+
` Total : ${entries.length}`,
|
|
297
|
+
` Positive : ${pos}`,
|
|
298
|
+
` Negative : ${neg}`,
|
|
299
|
+
` Promoted : ${memCount}`,
|
|
300
|
+
` Ratio : ${pos > 0 ? (pos / (pos + neg) * 100).toFixed(0) + '% positive' : 'n/a'}`,
|
|
301
|
+
];
|
|
302
|
+
|
|
303
|
+
if (topDomains.length > 0) {
|
|
304
|
+
parts.push('', '## Top Error Domains (where mistakes cluster)');
|
|
305
|
+
topDomains.forEach(([domain, count]) => {
|
|
306
|
+
parts.push(` ${domain}: ${count} failures`);
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (auditTrail.length > 0) {
|
|
311
|
+
parts.push('', '## Audit Trail (last 5 decisions)');
|
|
312
|
+
parts.push(...auditTrail);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return parts.join('\n');
|
|
283
316
|
}
|
|
284
317
|
|
|
285
318
|
async function callTool(name, args = {}) {
|
|
@@ -321,15 +354,18 @@ async function callToolInner(name, args = {}) {
|
|
|
321
354
|
const limit = Number(args.limit || 5);
|
|
322
355
|
const parts = [];
|
|
323
356
|
|
|
324
|
-
// 1. Vector search for similar past feedback
|
|
357
|
+
// 1. Vector search for similar past feedback with confidence scores
|
|
325
358
|
try {
|
|
326
359
|
const similar = await searchSimilar(query, limit);
|
|
327
360
|
if (similar.length > 0) {
|
|
328
361
|
parts.push('## Relevant Past Feedback\n');
|
|
329
|
-
for (
|
|
362
|
+
for (let i = 0; i < similar.length; i++) {
|
|
363
|
+
const mem = similar[i];
|
|
330
364
|
const signal = mem.signal === 'positive' ? 'GOOD' : 'BAD';
|
|
331
|
-
|
|
365
|
+
const confidence = mem._distance != null ? Math.max(0, (1 - mem._distance) * 100).toFixed(0) : '?';
|
|
366
|
+
parts.push(`**[${signal}]** (${confidence}% match) ${mem.context}`);
|
|
332
367
|
if (mem.tags) parts.push(` Tags: ${mem.tags}`);
|
|
368
|
+
if (mem.timestamp) parts.push(` When: ${mem.timestamp}`);
|
|
333
369
|
parts.push('');
|
|
334
370
|
}
|
|
335
371
|
}
|
|
@@ -359,9 +395,13 @@ async function callToolInner(name, args = {}) {
|
|
|
359
395
|
}
|
|
360
396
|
} catch (_) {}
|
|
361
397
|
|
|
362
|
-
|
|
398
|
+
// 4. Append stats + audit trail (glass box)
|
|
399
|
+
parts.push('');
|
|
400
|
+
parts.push(formatStats());
|
|
401
|
+
|
|
402
|
+
const text = parts.length > 1
|
|
363
403
|
? parts.join('\n')
|
|
364
|
-
: 'No past feedback found. This appears to be a fresh start
|
|
404
|
+
: 'No past feedback found. This appears to be a fresh start.\n\n' + formatStats();
|
|
365
405
|
|
|
366
406
|
return { content: [{ type: 'text', text }] };
|
|
367
407
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rlhf-feedback-loop",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.6",
|
|
4
4
|
"description": "Make your AI agent learn from mistakes. Capture thumbs up/down feedback, block repeated failures, export DPO training data. Works with ChatGPT, Claude, Codex, Gemini, Amp.",
|
|
5
5
|
"homepage": "https://github.com/IgorGanapolsky/rlhf-feedback-loop#readme",
|
|
6
6
|
"repository": {
|