npm - rlhf-feedback-loop - Versions diffs - 0.6.4 → 0.6.6 - Mend

rlhf-feedback-loop 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md +30 -12
package/adapters/mcp/server-stdio.js +109 -5
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -6,7 +6,19 @@
 [![MCP Ready](https://img.shields.io/badge/MCP-ready-black)](adapters/mcp/server-stdio.js)
 [![DPO Ready](https://img.shields.io/badge/DPO-ready-blue)](scripts/export-dpo-pairs.js)
-**Make your AI agent learn from mistakes.** Capture thumbs up/down feedback, block repeated failures, and export DPO training data — across ChatGPT, Claude, Codex, Gemini, and Amp.
+**The complete RLHF data pipeline for AI coding agents.** Capture human feedback, build memory, generate prevention rules, and export DPO training pairs — the full loop from thumbs up/down to model fine-tuning.
+## What This Is (and Isn't)
+This tool implements the **data collection and preference pipeline** side of RLHF — the part that turns your daily interactions with AI agents into structured training data. Out of the box, it:
+- **Captures** thumbs up/down feedback with context, tags, and rubric scores
+- **Remembers** via JSONL logs + LanceDB vector search across sessions
+- **Prevents** repeated mistakes with auto-generated guardrails
+- **Recalls** relevant past feedback mid-conversation (in-session context injection)
+- **Exports** DPO training pairs (prompt/chosen/rejected) for model fine-tuning
+It does **not** update model weights in real-time. That's the fine-tuning step, which you do separately using the DPO pairs this tool exports. The full loop: capture feedback here → export DPO pairs → fine-tune with [TRL](https://github.com/huggingface/trl), [OpenPipe](https://openpipe.ai), or any DPO trainer → deploy improved model.
 ## Architecture
@@ -22,12 +34,12 @@ One command. Pick your platform:
 |----------|---------|
 | **Claude** | `claude mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
 | **Codex** | `codex mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
-| **Gemini** | `gemini mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
-| **Amp** | `cp node_modules/rlhf-feedback-loop/plugins/amp-skill/SKILL.md .amp/skills/rlhf-feedback/SKILL.md` |
+| **Gemini** | `gemini mcp add rlhf "npx -y rlhf-feedback-loop serve"` |
+| **Amp** | `amp mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
 | **Cursor** | `cursor mcp add rlhf -- npx -y rlhf-feedback-loop serve` |
 | **All at once** | `npx add-mcp rlhf-feedback-loop` |
-That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes.
+That's it. Your agent can now capture feedback, recall past learnings mid-conversation, and block repeated mistakes. Run once per project — the MCP server starts automatically on each session.
 ## How It Works
@@ -57,15 +69,21 @@ DPO export → fine-tune your model
 All data stored locally as **JSONL** files — fully transparent, fully portable, no vendor lock-in. **LanceDB** indexes memories as vector embeddings for semantic search. **ShieldCortex** assembles context packs so your agent starts each task informed.
-## Why This Exists
+## Free vs. Cloud Pro
+The open-source package is fully functional and free forever. Cloud Pro is for teams that don't want to self-host.
+| | Open Source | Cloud Pro ($10/mo) |
+|---|---|---|
+| Feedback capture | Local MCP server | Hosted HTTPS API |
+| Storage | Your machine | Managed cloud |
+| DPO export | CLI command | API endpoint |
+| Setup | `mcp add` one-liner | Provisioned API key |
+| Team sharing | Manual (share JSONL) | Built-in (shared API) |
+| Support | GitHub Issues | Email |
+| Uptime | You manage | We manage (99.9% SLA) |
-| Problem | What this does |
-|---------|---------------|
-| Agent keeps making the same mistake | Prevention rules auto-generated from repeated failures |
-| Agent claims "done" without proof | Rubric engine blocks positive feedback without test evidence |
-| Feedback collected but never used | DPO pairs exported for actual model fine-tuning |
-| Different tools, different formats | One MCP server works across 5 platforms |
-| Agent starts every task blank | In-session recall injects past learnings into current conversation |
+[Get Cloud Pro](https://buy.stripe.com/bJe14neyU4r4f0leOD3sI02) | [Live API](https://rlhf-feedback-loop-710216278770.us-central1.run.app)
 ## Deep Dive

package/adapters/mcp/server-stdio.js CHANGED Viewed

@@ -249,23 +249,123 @@ function parseOptionalObject(input, name) {
   throw new Error(`${name} must be an object`);
 }
+function detectFeedbackSignal(text) {
+  const lower = String(text || '').toLowerCase();
+  const UP = /\b(thumbs?\s*up|that worked|looks good|nice work|perfect|good job)\b/;
+  const DOWN = /\b(thumbs?\s*down|that failed|that was wrong|fix this)\b/;
+  if (UP.test(lower)) return 'up';
+  if (DOWN.test(lower)) return 'down';
+  return null;
+}
+function formatStats() {
+  const logPath = path.join(SAFE_DATA_DIR, 'feedback-log.jsonl');
+  const memPath = path.join(SAFE_DATA_DIR, 'memory-log.jsonl');
+  if (!fs.existsSync(logPath)) return 'No feedback captured yet.';
+  const lines = fs.readFileSync(logPath, 'utf8').trim().split('\n').filter(Boolean);
+  const entries = lines.map(l => { try { return JSON.parse(l); } catch (_) { return null; } }).filter(Boolean);
+  const pos = entries.filter(e => e.signal === 'positive').length;
+  const neg = entries.filter(e => e.signal === 'negative').length;
+  const memCount = fs.existsSync(memPath) ? fs.readFileSync(memPath, 'utf8').trim().split('\n').filter(Boolean).length : 0;
+  // HBR: "Which cases consume disproportionate time?" — top error domains
+  const negEntries = entries.filter(e => e.signal === 'negative');
+  const domainCounts = {};
+  negEntries.forEach(e => {
+    const domain = (e.richContext && e.richContext.domain) || 'general';
+    domainCounts[domain] = (domainCounts[domain] || 0) + 1;
+  });
+  const topDomains = Object.entries(domainCounts).sort((a, b) => b[1] - a[1]).slice(0, 3);
+  // HBR: "Glass box" — audit trail of recent decisions
+  const recent = entries.slice(-5).reverse();
+  const auditTrail = recent.map(e => {
+    const sig = e.signal === 'positive' ? 'UP' : 'DN';
+    const ts = (e.timestamp || '').slice(11, 19);
+    const ctx = (e.context || '').slice(0, 60);
+    return `  [${sig}] ${ts} ${ctx}`;
+  });
+  const parts = [
+    '## Storage',
+    `  Feedback log : ${entries.length} entries`,
+    `  Memory log   : ${memCount} memories`,
+    `  LanceDB      : ${path.join(SAFE_DATA_DIR, 'lancedb/')}`,
+    '',
+    '## Stats',
+    `  Total     : ${entries.length}`,
+    `  Positive  : ${pos}`,
+    `  Negative  : ${neg}`,
+    `  Promoted  : ${memCount}`,
+    `  Ratio     : ${pos > 0 ? (pos / (pos + neg) * 100).toFixed(0) + '% positive' : 'n/a'}`,
+  ];
+  if (topDomains.length > 0) {
+    parts.push('', '## Top Error Domains (where mistakes cluster)');
+    topDomains.forEach(([domain, count]) => {
+      parts.push(`  ${domain}: ${count} failures`);
+    });
+  }
+  if (auditTrail.length > 0) {
+    parts.push('', '## Audit Trail (last 5 decisions)');
+    parts.push(...auditTrail);
+  }
+  return parts.join('\n');
+}
 async function callTool(name, args = {}) {
   assertToolAllowed(name, getActiveMcpProfile());
+  // Platform-agnostic auto-capture: detect feedback signals in any tool call
+  const textToCheck = args.query || args.context || '';
+  const autoSignal = detectFeedbackSignal(textToCheck);
+  if (autoSignal && name !== 'capture_feedback') {
+    const autoResult = captureFeedback({
+      signal: autoSignal,
+      context: textToCheck,
+      tags: ['auto-capture', 'mcp'],
+    });
+    const ev = autoResult.feedbackEvent || {};
+    const autoReport = [
+      '',
+      `## Auto-Captured Feedback [${autoSignal.toUpperCase()}]`,
+      `  Feedback ID : ${ev.id || 'n/a'}`,
+      `  Signal      : ${ev.signal || autoSignal} (${ev.actionType || 'unknown'})`,
+      `  Context     : ${(ev.context || textToCheck).slice(0, 80)}`,
+      `  Timestamp   : ${ev.timestamp || new Date().toISOString()}`,
+      `  Promoted    : ${autoResult.accepted ? 'yes (Memory ID: ' + (autoResult.memoryRecord || {}).id + ')' : 'no — ' + (autoResult.reason || '')}`,
+      '',
+      formatStats(),
+    ].join('\n');
+    // Prepend the auto-capture report to whatever the tool was going to return
+    const toolResult = await callToolInner(name, args);
+    toolResult.content[0].text = autoReport + '\n\n---\n\n' + toolResult.content[0].text;
+    return toolResult;
+  }
+  return callToolInner(name, args);
+}
+async function callToolInner(name, args = {}) {
   if (name === 'recall') {
     const query = args.query || '';
     const limit = Number(args.limit || 5);
     const parts = [];
-    // 1. Vector search for similar past feedback
+    // 1. Vector search for similar past feedback with confidence scores
     try {
       const similar = await searchSimilar(query, limit);
       if (similar.length > 0) {
         parts.push('## Relevant Past Feedback\n');
-        for (const mem of similar) {
+        for (let i = 0; i < similar.length; i++) {
+          const mem = similar[i];
           const signal = mem.signal === 'positive' ? 'GOOD' : 'BAD';
-          parts.push(`**[${signal}]** ${mem.context}`);
+          const confidence = mem._distance != null ? Math.max(0, (1 - mem._distance) * 100).toFixed(0) : '?';
+          parts.push(`**[${signal}]** (${confidence}% match) ${mem.context}`);
           if (mem.tags) parts.push(`  Tags: ${mem.tags}`);
+          if (mem.timestamp) parts.push(`  When: ${mem.timestamp}`);
           parts.push('');
         }
       }
@@ -295,9 +395,13 @@ async function callTool(name, args = {}) {
       }
     } catch (_) {}
-    const text = parts.length > 0
+    // 4. Append stats + audit trail (glass box)
+    parts.push('');
+    parts.push(formatStats());
+    const text = parts.length > 1
       ? parts.join('\n')
-      : 'No past feedback found. This appears to be a fresh start.';
+      : 'No past feedback found. This appears to be a fresh start.\n\n' + formatStats();
     return { content: [{ type: 'text', text }] };
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "rlhf-feedback-loop",
-  "version": "0.6.4",
+  "version": "0.6.6",
   "description": "Make your AI agent learn from mistakes. Capture thumbs up/down feedback, block repeated failures, export DPO training data. Works with ChatGPT, Claude, Codex, Gemini, Amp.",
   "homepage": "https://github.com/IgorGanapolsky/rlhf-feedback-loop#readme",
   "repository": {