npm - rlhf-feedback-loop - Versions diffs - 0.5.0 - Mend

rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/CHANGELOG.md +26 -0
package/LICENSE +21 -0
package/README.md +308 -0
package/adapters/README.md +8 -0
package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
package/adapters/chatgpt/INSTALL.md +80 -0
package/adapters/chatgpt/openapi.yaml +292 -0
package/adapters/claude/.mcp.json +8 -0
package/adapters/codex/config.toml +4 -0
package/adapters/gemini/function-declarations.json +95 -0
package/adapters/mcp/server-stdio.js +444 -0
package/bin/cli.js +167 -0
package/config/mcp-allowlists.json +29 -0
package/config/policy-bundles/constrained-v1.json +53 -0
package/config/policy-bundles/default-v1.json +80 -0
package/config/rubrics/default-v1.json +52 -0
package/config/subagent-profiles.json +32 -0
package/openapi/openapi.yaml +292 -0
package/package.json +91 -0
package/plugins/amp-skill/INSTALL.md +52 -0
package/plugins/amp-skill/SKILL.md +31 -0
package/plugins/claude-skill/INSTALL.md +55 -0
package/plugins/claude-skill/SKILL.md +46 -0
package/plugins/codex-profile/AGENTS.md +20 -0
package/plugins/codex-profile/INSTALL.md +57 -0
package/plugins/gemini-extension/INSTALL.md +74 -0
package/plugins/gemini-extension/gemini_prompt.txt +10 -0
package/plugins/gemini-extension/tool_contract.json +28 -0
package/scripts/billing.js +471 -0
package/scripts/budget-guard.js +173 -0
package/scripts/code-reasoning.js +307 -0
package/scripts/context-engine.js +547 -0
package/scripts/contextfs.js +513 -0
package/scripts/contract-audit.js +198 -0
package/scripts/dpo-optimizer.js +208 -0
package/scripts/export-dpo-pairs.js +316 -0
package/scripts/export-training.js +448 -0
package/scripts/feedback-attribution.js +313 -0
package/scripts/feedback-inbox-read.js +162 -0
package/scripts/feedback-loop.js +838 -0
package/scripts/feedback-schema.js +300 -0
package/scripts/feedback-to-memory.js +165 -0
package/scripts/feedback-to-rules.js +109 -0
package/scripts/generate-paperbanana-diagrams.sh +99 -0
package/scripts/hybrid-feedback-context.js +676 -0
package/scripts/intent-router.js +164 -0
package/scripts/mcp-policy.js +92 -0
package/scripts/meta-policy.js +194 -0
package/scripts/plan-gate.js +154 -0
package/scripts/prove-adapters.js +364 -0
package/scripts/prove-attribution.js +364 -0
package/scripts/prove-automation.js +393 -0
package/scripts/prove-data-quality.js +219 -0
package/scripts/prove-intelligence.js +256 -0
package/scripts/prove-lancedb.js +370 -0
package/scripts/prove-loop-closure.js +255 -0
package/scripts/prove-rlaif.js +404 -0
package/scripts/prove-subway-upgrades.js +250 -0
package/scripts/prove-training-export.js +324 -0
package/scripts/prove-v2-milestone.js +273 -0
package/scripts/prove-v3-milestone.js +381 -0
package/scripts/rlaif-self-audit.js +123 -0
package/scripts/rubric-engine.js +230 -0
package/scripts/self-heal.js +127 -0
package/scripts/self-healing-check.js +111 -0
package/scripts/skill-quality-tracker.js +284 -0
package/scripts/subagent-profiles.js +79 -0
package/scripts/sync-gh-secrets-from-env.sh +29 -0
package/scripts/thompson-sampling.js +331 -0
package/scripts/train_from_feedback.py +914 -0
package/scripts/validate-feedback.js +580 -0
package/scripts/vector-store.js +100 -0
package/src/api/server.js +497 -0

package/config/policy-bundles/default-v1.json ADDED Viewed

@@ -0,0 +1,80 @@
+{
+  "bundleId": "default-v1",
+  "version": 1,
+  "description": "Balanced autonomous execution bundle with human checkpoints on high-risk actions.",
+  "defaultMcpProfile": "default",
+  "approval": {
+    "requiredRisks": ["high", "critical"],
+    "profileOverrides": {
+      "default": ["high", "critical"],
+      "readonly": ["high", "critical"],
+      "locked": ["medium", "high", "critical"]
+    }
+  },
+  "intents": [
+    {
+      "id": "capture_feedback_loop",
+      "description": "Capture user outcome and update memory artifacts.",
+      "risk": "low",
+      "actions": [
+        {
+          "kind": "mcp_tool",
+          "name": "capture_feedback"
+        },
+        {
+          "kind": "mcp_tool",
+          "name": "feedback_summary"
+        }
+      ]
+    },
+    {
+      "id": "improve_response_quality",
+      "description": "Summarize recent failures and regenerate prevention rules.",
+      "risk": "medium",
+      "actions": [
+        {
+          "kind": "mcp_tool",
+          "name": "feedback_summary"
+        },
+        {
+          "kind": "mcp_tool",
+          "name": "prevention_rules"
+        },
+        {
+          "kind": "mcp_tool",
+          "name": "construct_context_pack"
+        }
+      ]
+    },
+    {
+      "id": "publish_dpo_training_data",
+      "description": "Export DPO preference pairs for model improvement pipelines.",
+      "risk": "high",
+      "actions": [
+        {
+          "kind": "mcp_tool",
+          "name": "export_dpo_pairs"
+        }
+      ]
+    },
+    {
+      "id": "incident_postmortem",
+      "description": "Construct evidence pack and record evaluation for incident review.",
+      "risk": "medium",
+      "actions": [
+        {
+          "kind": "mcp_tool",
+          "name": "construct_context_pack"
+        },
+        {
+          "kind": "mcp_tool",
+          "name": "context_provenance"
+        },
+        {
+          "kind": "mcp_tool",
+          "name": "evaluate_context_pack"
+        }
+      ]
+    }
+  ]
+}

package/config/rubrics/default-v1.json ADDED Viewed

@@ -0,0 +1,52 @@
+{
+  "rubricId": "default-v1",
+  "version": 1,
+  "description": "Rubric for coding-agent response quality and operational safety.",
+  "criteria": [
+    {
+      "id": "correctness",
+      "label": "Technical correctness",
+      "weight": 0.3,
+      "minPassingScore": 3
+    },
+    {
+      "id": "verification_evidence",
+      "label": "Verification evidence quality",
+      "weight": 0.25,
+      "minPassingScore": 3,
+      "requiresEvidence": true
+    },
+    {
+      "id": "safety",
+      "label": "Safety and guardrail compliance",
+      "weight": 0.2,
+      "minPassingScore": 3
+    },
+    {
+      "id": "instruction_following",
+      "label": "Instruction following",
+      "weight": 0.15,
+      "minPassingScore": 3
+    },
+    {
+      "id": "clarity",
+      "label": "Clarity and communication",
+      "weight": 0.1,
+      "minPassingScore": 2
+    }
+  ],
+  "guardrails": [
+    {
+      "key": "testsPassed",
+      "label": "Required tests passed"
+    },
+    {
+      "key": "pathSafety",
+      "label": "Path/file safety checks passed"
+    },
+    {
+      "key": "budgetCompliant",
+      "label": "Budget compliance maintained"
+    }
+  ]
+}

package/config/subagent-profiles.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "profiles": {
+    "pr_workflow": {
+      "description": "Prepare PR metadata and feedback evidence while keeping write operations constrained.",
+      "mcpProfile": "default",
+      "skills": ["change-report", "feedback-summary"],
+      "context": {
+        "maxItems": 8,
+        "maxChars": 6000
+      }
+    },
+    "review_workflow": {
+      "description": "Read-heavy code review and risk analysis profile.",
+      "mcpProfile": "readonly",
+      "skills": ["code-review", "verification"],
+      "context": {
+        "maxItems": 10,
+        "maxChars": 7000
+      }
+    },
+    "secure_runtime": {
+      "description": "Minimal profile for constrained environments.",
+      "mcpProfile": "locked",
+      "skills": ["feedback-summary"],
+      "context": {
+        "maxItems": 5,
+        "maxChars": 3000
+      }
+    }
+  }
+}

package/openapi/openapi.yaml ADDED Viewed

@@ -0,0 +1,292 @@
+openapi: 3.1.0
+info:
+  title: RLHF Feedback Loop API
+  version: 1.1.0
+  description: |
+    Production API for feedback capture, schema-validated memory promotion,
+    prevention rule generation, and DPO export.
+servers:
+  - url: http://localhost:8787
+security:
+  - bearerAuth: []
+components:
+  securitySchemes:
+    bearerAuth:
+      type: http
+      scheme: bearer
+      bearerFormat: API Key
+  schemas:
+    RubricScore:
+      type: object
+      required: [criterion, score]
+      properties:
+        criterion:
+          type: string
+        score:
+          type: number
+          minimum: 1
+          maximum: 5
+        evidence:
+          type: string
+        judge:
+          type: string
+    CaptureFeedbackRequest:
+      type: object
+      required: [signal, context]
+      properties:
+        signal:
+          type: string
+          enum: [up, down, positive, negative]
+        context:
+          type: string
+        whatWentWrong:
+          type: string
+        whatToChange:
+          type: string
+        whatWorked:
+          type: string
+        rubricScores:
+          type: array
+          items:
+            $ref: '#/components/schemas/RubricScore'
+        guardrails:
+          type: object
+          properties:
+            testsPassed:
+              type: boolean
+            pathSafety:
+              type: boolean
+            budgetCompliant:
+              type: boolean
+        tags:
+          oneOf:
+            - type: array
+              items:
+                type: string
+            - type: string
+        skill:
+          type: string
+    IntentPlanRequest:
+      type: object
+      required: [intentId]
+      properties:
+        intentId:
+          type: string
+        context:
+          type: string
+        mcpProfile:
+          type: string
+        bundleId:
+          type: string
+        approved:
+          type: boolean
+paths:
+  /healthz:
+    get:
+      operationId: healthz
+      responses:
+        '200':
+          description: Service health
+        '401':
+          description: Unauthorized
+  /v1/feedback/capture:
+    post:
+      operationId: captureFeedback
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CaptureFeedbackRequest'
+      responses:
+        '200':
+          description: Feedback accepted and promoted to memory
+        '422':
+          description: Feedback recorded but rejected for memory promotion
+        '401':
+          description: Unauthorized
+  /v1/feedback/stats:
+    get:
+      operationId: getFeedbackStats
+      responses:
+        '200':
+          description: Aggregated feedback statistics
+        '401':
+          description: Unauthorized
+  /v1/intents/catalog:
+    get:
+      operationId: listIntentCatalog
+      parameters:
+        - in: query
+          name: mcpProfile
+          schema:
+            type: string
+        - in: query
+          name: bundleId
+          schema:
+            type: string
+      responses:
+        '200':
+          description: Intent catalog with risk and checkpoint metadata
+        '401':
+          description: Unauthorized
+  /v1/intents/plan:
+    post:
+      operationId: planIntent
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/IntentPlanRequest'
+      responses:
+        '200':
+          description: Policy-scoped intent execution plan
+        '400':
+          description: Invalid intent request
+        '401':
+          description: Unauthorized
+  /v1/feedback/summary:
+    get:
+      operationId: getFeedbackSummary
+      parameters:
+        - in: query
+          name: recent
+          schema:
+            type: integer
+            default: 20
+      responses:
+        '200':
+          description: Feedback summary text
+        '401':
+          description: Unauthorized
+  /v1/feedback/rules:
+    post:
+      operationId: generatePreventionRules
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                minOccurrences:
+                  type: integer
+                  default: 2
+                outputPath:
+                  type: string
+      responses:
+        '200':
+          description: Prevention rules generated
+        '401':
+          description: Unauthorized
+  /v1/dpo/export:
+    post:
+      operationId: exportDpoPairs
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                inputPath:
+                  type: string
+                memoryLogPath:
+                  type: string
+                outputPath:
+                  type: string
+      responses:
+        '200':
+          description: DPO export completed
+        '401':
+          description: Unauthorized
+  /v1/context/construct:
+    post:
+      operationId: constructContextPack
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                query:
+                  type: string
+                maxItems:
+                  type: integer
+                  default: 8
+                maxChars:
+                  type: integer
+                  default: 6000
+                namespaces:
+                  type: array
+                  items:
+                    type: string
+                    enum:
+                      - raw_history
+                      - memory/error
+                      - memory/learning
+                      - rules
+                      - tools
+                      - provenance
+      responses:
+        '200':
+          description: Context pack created
+        '400':
+          description: Invalid namespace selection
+        '401':
+          description: Unauthorized
+  /v1/context/evaluate:
+    post:
+      operationId: evaluateContextPack
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required: [packId, outcome]
+              properties:
+                packId:
+                  type: string
+                outcome:
+                  type: string
+                signal:
+                  type: string
+                notes:
+                  type: string
+                rubricScores:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/RubricScore'
+                guardrails:
+                  type: object
+                  properties:
+                    testsPassed:
+                      type: boolean
+                    pathSafety:
+                      type: boolean
+                    budgetCompliant:
+                      type: boolean
+      responses:
+        '200':
+          description: Evaluation recorded
+        '400':
+          description: Invalid rubric payload
+        '401':
+          description: Unauthorized
+  /v1/context/provenance:
+    get:
+      operationId: getContextProvenance
+      parameters:
+        - in: query
+          name: limit
+          schema:
+            type: integer
+            default: 50
+      responses:
+        '200':
+          description: Recent provenance events
+        '401':
+          description: Unauthorized

package/package.json ADDED Viewed

@@ -0,0 +1,91 @@
+{
+  "name": "rlhf-feedback-loop",
+  "version": "0.5.0",
+  "description": "Production-grade RLHF feedback operations for coding agents: capture thumbs signals, enforce schema quality, prevent repeated mistakes, and export DPO pairs.",
+  "main": "scripts/feedback-loop.js",
+  "bin": {
+    "rlhf-feedback-loop": "./bin/cli.js"
+  },
+  "files": [
+    "bin/",
+    "scripts/",
+    "src/",
+    "adapters/",
+    "plugins/",
+    "openapi/",
+    "config/",
+    "README.md",
+    "LICENSE",
+    "CHANGELOG.md"
+  ],
+  "type": "commonjs",
+  "scripts": {
+    "test": "npm run test:schema && npm run test:loop && npm run test:dpo && npm run test:api && npm run test:proof && npm run test:e2e && npm run test:rlaif && npm run test:attribution && npm run test:quality && npm run test:intelligence && npm run test:training-export && npm run test:deployment && npm run test:billing && npm run test:cli",
+    "test:e2e": "node --test tests/e2e-pipeline.test.js",
+    "test:schema": "node scripts/feedback-schema.js --test",
+    "test:loop": "node scripts/feedback-loop.js --test",
+    "test:dpo": "node scripts/export-dpo-pairs.js --test",
+    "test:api": "node --test tests/api-server.test.js tests/api-auth-config.test.js tests/mcp-server.test.js tests/adapters.test.js tests/openapi-parity.test.js tests/budget-guard.test.js tests/contextfs.test.js tests/mcp-policy.test.js tests/subagent-profiles.test.js tests/intent-router.test.js tests/rubric-engine.test.js tests/self-healing-check.test.js tests/self-heal.test.js tests/feedback-schema.test.js tests/thompson-sampling.test.js tests/feedback-sequences.test.js tests/diversity-tracking.test.js tests/vector-store.test.js tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js tests/loop-closure.test.js tests/code-reasoning.test.js",
+    "test:proof": "node --test tests/prove-adapters.test.js tests/prove-automation.test.js",
+    "test:rlaif": "node --test tests/rlaif-self-audit.test.js tests/dpo-optimizer.test.js tests/meta-policy.test.js",
+    "test:attribution": "node --test tests/feedback-attribution.test.js tests/hybrid-feedback-context.test.js",
+    "test:quality": "node --test tests/validate-feedback.test.js",
+    "test:intelligence": "node --test tests/intelligence.test.js",
+    "test:training-export": "node --test tests/training-export.test.js",
+    "test:deployment": "node --test tests/deployment.test.js",
+    "test:billing": "node --test tests/billing.test.js",
+    "test:cli": "node --test tests/cli.test.js",
+    "start:api": "node src/api/server.js",
+    "start:mcp": "node adapters/mcp/server-stdio.js",
+    "feedback:capture": "node .claude/scripts/feedback/capture-feedback.js",
+    "feedback:stats": "node .claude/scripts/feedback/capture-feedback.js --stats",
+    "feedback:summary": "node .claude/scripts/feedback/capture-feedback.js --summary",
+    "feedback:rules": "node .claude/scripts/feedback/capture-feedback.js --rules",
+    "feedback:export:dpo": "node scripts/export-dpo-pairs.js --from-local --output .claude/memory/feedback/dpo-pairs.jsonl",
+    "budget:status": "node scripts/budget-guard.js --status",
+    "diagrams:paperbanana": "bash scripts/generate-paperbanana-diagrams.sh",
+    "prove:adapters": "node scripts/prove-adapters.js",
+    "prove:automation": "node scripts/prove-automation.js",
+    "prove:lancedb": "node scripts/prove-lancedb.js",
+    "prove:rlaif": "node scripts/prove-rlaif.js",
+    "prove:attribution": "node scripts/prove-attribution.js",
+    "prove:data-quality": "node scripts/prove-data-quality.js",
+    "prove:loop-closure": "node scripts/prove-loop-closure.js",
+    "prove:intelligence": "node scripts/prove-intelligence.js",
+    "prove:training-export": "node scripts/prove-training-export.js",
+    "prove:v2-milestone": "node scripts/prove-v2-milestone.js",
+    "feedback:export:pytorch": "node scripts/export-training.js --pytorch",
+    "feedback:export:csv": "node scripts/export-training.js --csv",
+    "feedback:export:actions": "node scripts/export-training.js --actions",
+    "prove:subway-upgrades": "node scripts/prove-subway-upgrades.js",
+    "self-heal:check": "node scripts/self-healing-check.js",
+    "self-heal:run": "node scripts/self-heal.js --reason=manual",
+    "intents:list": "node scripts/intent-router.js",
+    "intents:plan": "node scripts/intent-router.js --intent=publish_dpo_training_data",
+    "ml:dpo": "node scripts/dpo-optimizer.js --run",
+    "ml:meta-policy": "node scripts/meta-policy.js --extract",
+    "ml:train": "python3 scripts/train_from_feedback.py --train",
+    "ml:incremental": "python3 scripts/train_from_feedback.py --incremental",
+    "ml:reliability": "python3 scripts/train_from_feedback.py --reliability",
+    "ml:sample": "python3 scripts/train_from_feedback.py --sample"
+  },
+  "keywords": [
+    "rlhf",
+    "dpo",
+    "ai-agents",
+    "llmops",
+    "preference-learning",
+    "feedback-loop",
+    "claude",
+    "codex",
+    "gemini",
+    "agent-evaluation",
+    "prompt-engineering"
+  ],
+  "license": "MIT",
+  "dependencies": {
+    "@huggingface/transformers": "^3.8.1",
+    "@lancedb/lancedb": "^0.26.2",
+    "apache-arrow": "^18.1.0"
+  }
+}

package/plugins/amp-skill/INSTALL.md ADDED Viewed

@@ -0,0 +1,52 @@
+# Amp: RLHF Feedback Skill Install
+Install the RLHF skill for Amp in under 60 seconds. No manual file editing required.
+## One-Command Install
+```bash
+cp plugins/amp-skill/SKILL.md .amp/skills/rlhf-feedback.md
+```
+Or from the npm package:
+```bash
+npx rlhf-feedback-loop init
+cp node_modules/rlhf-feedback-loop/plugins/amp-skill/SKILL.md .amp/skills/rlhf-feedback.md
+```
+## What This Does
+Copies the skill definition to `.amp/skills/` so Amp loads it automatically on next launch.
+## Verify
+After copying, restart Amp. The skill will appear in the active skills list.
+Then test:
+```bash
+node .rlhf/capture-feedback.js --feedback=up --context="amp skill install verified" --tags="install"
+```
+## Available Commands (via skill)
+```bash
+# Positive feedback
+node .rlhf/capture-feedback.js --feedback=up --context="..." --tags="..."
+# Negative feedback
+node .rlhf/capture-feedback.js --feedback=down --context="..." --what-went-wrong="..." --what-to-change="..." --tags="..."
+```
+## Requirements
+- Amp (any version with skills support)
+- Node.js 18+ in PATH
+- `.rlhf/` directory (created by `npx rlhf-feedback-loop init`)
+## Uninstall
+```bash
+rm .amp/skills/rlhf-feedback.md
+```

package/plugins/amp-skill/SKILL.md ADDED Viewed

@@ -0,0 +1,31 @@
+---
+name: rlhf-feedback
+description: Capture thumbs feedback and apply prevention rules before coding
+---
+# Amp RLHF Skill
+On explicit user feedback:
+```bash
+node .rlhf/capture-feedback.js --feedback=up --context="..." --tags="..."
+node .rlhf/capture-feedback.js --feedback=down --context="..." --what-went-wrong="..." --what-to-change="..." --tags="..."
+```
+Before major implementation:
+```bash
+node .rlhf/capture-feedback.js --feedback=up --context="session start" --tags="session" 2>/dev/null || true
+```
+## Triggers
+- "thumbs up" / "that worked" / "looks good"
+- "thumbs down" / "that failed" / "that was wrong"
+## Negative Triggers (do NOT activate for)
+- "generate code"
+- "search files"
+- "explain this"
+- "run tests"

package/plugins/claude-skill/INSTALL.md ADDED Viewed

@@ -0,0 +1,55 @@
+# Claude Code: RLHF Feedback Skill Install
+Install the skill in under 60 seconds. No manual file editing required.
+## One-Command Install
+```bash
+cp plugins/claude-skill/SKILL.md .claude/skills/rlhf-feedback.md
+```
+Or from the published npm package:
+```bash
+npx rlhf-feedback-loop init
+cp node_modules/rlhf-feedback-loop/plugins/claude-skill/SKILL.md .claude/skills/rlhf-feedback.md
+```
+## What This Does
+Copies the skill definition to `.claude/skills/` so Claude Code loads it automatically on next launch.
+The skill activates on triggers: "thumbs up", "thumbs down", "that worked", "that failed".
+## Verify
+After copying, restart Claude Code and run:
+```bash
+# Claude Code will show available skills:
+# rlhf-feedback — Capture thumbs up/down feedback into structured memories
+```
+Then test it:
+```bash
+node .rlhf/capture-feedback.js --feedback=up --context="skill install verified" --tags="install"
+```
+## What You Get
+- Automatic feedback capture on quality signals
+- Prevention rules generated from repeated mistakes
+- Session-start context loading: `npm run feedback:summary && npm run feedback:rules`
+## Requirements
+- Claude Code (any version)
+- Node.js 18+ in PATH
+- `.rlhf/` directory (created by `npx rlhf-feedback-loop init`)
+## Uninstall
+```bash
+rm .claude/skills/rlhf-feedback.md
+```