rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,284 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Skill Quality Tracker
4
+ *
5
+ * Correlates tool call metrics to feedback signals by timestamp proximity.
6
+ * After a sequence of tool calls and feedback captures, produces a per-skill
7
+ * quality score derived from timestamp-proximity correlation.
8
+ *
9
+ * Ported from Subway_RN_Demo/.claude/scripts/feedback/skill-quality-tracker.js
10
+ * PATH: PROJECT_ROOT = path.join(__dirname, '..') — 1 level up from scripts/
11
+ */
12
+
13
+ 'use strict';
14
+
15
+ const fs = require('fs');
16
+ const readline = require('readline');
17
+ const path = require('path');
18
+
19
+ const FEEDBACK_DIR = process.env.RLHF_FEEDBACK_DIR
20
+ || path.join(__dirname, '..', '.claude', 'memory', 'feedback');
21
+
22
+ const METRICS_PATH = process.env.METRICS_PATH
23
+ || path.join(FEEDBACK_DIR, 'tool-metrics.jsonl');
24
+
25
+ const FEEDBACK_PATH = process.env.FEEDBACK_PATH
26
+ || path.join(FEEDBACK_DIR, 'feedback-log.jsonl');
27
+
28
+ // Correlation window: feedback within 60 seconds of a tool call is considered correlated
29
+ const CORRELATION_WINDOW_MS = 60_000;
30
+
31
+ /**
32
+ * Safely parse a single JSON line.
33
+ *
34
+ * @param {string} line
35
+ * @returns {object|null}
36
+ */
37
+ function parseLine(line) {
38
+ try {
39
+ return JSON.parse(line);
40
+ } catch {
41
+ return null;
42
+ }
43
+ }
44
+
45
+ /**
46
+ * Load feedback entries from JSONL file.
47
+ * Each entry needs: timestamp, feedback (signal).
48
+ *
49
+ * @param {string} filePath
50
+ * @returns {Promise<Array<{ ts: number, feedback: string, tool: string|null }>>}
51
+ */
52
+ async function loadFeedback(filePath) {
53
+ const entries = [];
54
+ if (!fs.existsSync(filePath)) return entries;
55
+
56
+ const rl = readline.createInterface({
57
+ input: fs.createReadStream(filePath),
58
+ crlfDelay: Infinity,
59
+ });
60
+
61
+ for await (const line of rl) {
62
+ const obj = parseLine(line);
63
+ if (obj && obj.timestamp) {
64
+ // Support both 'feedback' (Subway) and 'signal' (rlhf) field names
65
+ const feedbackVal = obj.feedback || obj.signal;
66
+ if (feedbackVal) {
67
+ // Normalize to 'positive'/'negative' regardless of source schema
68
+ let normalized = feedbackVal;
69
+ if (feedbackVal === 'up') normalized = 'positive';
70
+ else if (feedbackVal === 'down') normalized = 'negative';
71
+
72
+ entries.push({
73
+ ts: new Date(obj.timestamp).getTime(),
74
+ feedback: normalized,
75
+ tool: obj.tool_name || null,
76
+ });
77
+ }
78
+ }
79
+ }
80
+
81
+ entries.sort((a, b) => a.ts - b.ts);
82
+ return entries;
83
+ }
84
+
85
+ /**
86
+ * Find correlated feedback for a tool call by timestamp proximity.
87
+ *
88
+ * Searches feedback entries within CORRELATION_WINDOW_MS of the metric timestamp.
89
+ * If the feedback entry has a tool_name, it must match the metric's tool name.
90
+ *
91
+ * @param {number} metricTs - Timestamp of the tool call (ms)
92
+ * @param {string} metricTool - Tool name
93
+ * @param {Array<{ ts: number, feedback: string, tool: string|null }>} feedbackEntries
94
+ * @returns {string|null} 'positive', 'negative', or null if no correlation found
95
+ */
96
+ function correlateFeedback(metricTs, metricTool, feedbackEntries) {
97
+ for (const fb of feedbackEntries) {
98
+ if (Math.abs(fb.ts - metricTs) <= CORRELATION_WINDOW_MS) {
99
+ // If feedback has a tool name, it must match; otherwise correlate by time alone
100
+ if (!fb.tool || fb.tool === metricTool) {
101
+ return fb.feedback;
102
+ }
103
+ }
104
+ }
105
+ return null;
106
+ }
107
+
108
+ /**
109
+ * Process tool metrics JSONL and correlate with feedback.
110
+ *
111
+ * @param {string} metricsPath
112
+ * @param {Array<{ ts: number, feedback: string, tool: string|null }>} feedbackEntries
113
+ * @returns {Promise<{ totalToolUses: number, breakdown: object }>}
114
+ */
115
+ async function processMetrics(metricsPath, feedbackEntries) {
116
+ const breakdown = {};
117
+ let totalToolUses = 0;
118
+
119
+ if (!fs.existsSync(metricsPath)) return { totalToolUses, breakdown };
120
+
121
+ const rl = readline.createInterface({
122
+ input: fs.createReadStream(metricsPath),
123
+ crlfDelay: Infinity,
124
+ });
125
+
126
+ for await (const line of rl) {
127
+ const obj = parseLine(line);
128
+ if (!obj || !obj.tool_name) continue;
129
+
130
+ totalToolUses++;
131
+ const name = obj.tool_name;
132
+
133
+ if (!breakdown[name]) {
134
+ breakdown[name] = { uses: 0, correlatedPositive: 0, correlatedNegative: 0 };
135
+ }
136
+
137
+ breakdown[name].uses++;
138
+
139
+ const ts = new Date(obj.timestamp).getTime();
140
+ if (!isNaN(ts)) {
141
+ const signal = correlateFeedback(ts, name, feedbackEntries);
142
+ if (signal === 'positive') breakdown[name].correlatedPositive++;
143
+ else if (signal === 'negative') breakdown[name].correlatedNegative++;
144
+ }
145
+ }
146
+
147
+ return { totalToolUses, breakdown };
148
+ }
149
+
150
+ /**
151
+ * Compute per-tool success rates from correlation counts.
152
+ * Mutates the breakdown object in place.
153
+ *
154
+ * @param {object} breakdown - { toolName: { uses, correlatedPositive, correlatedNegative } }
155
+ */
156
+ function computeSuccessRates(breakdown) {
157
+ for (const tool of Object.values(breakdown)) {
158
+ const correlated = tool.correlatedPositive + tool.correlatedNegative;
159
+ tool.successRate = correlated > 0
160
+ ? +(tool.correlatedPositive / correlated).toFixed(4)
161
+ : null;
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Return top-performing tools sorted by success rate.
167
+ *
168
+ * @param {object} breakdown
169
+ * @param {number} [min=10] - Minimum uses threshold
170
+ * @param {number} [limit=5] - Maximum entries to return
171
+ * @returns {Array<{ tool: string, successRate: number, uses: number }>}
172
+ */
173
+ function topPerformers(breakdown, min = 10, limit = 5) {
174
+ return Object.entries(breakdown)
175
+ .filter(([, v]) => v.uses >= min && v.successRate !== null)
176
+ .sort((a, b) => b[1].successRate - a[1].successRate || b[1].uses - a[1].uses)
177
+ .slice(0, limit)
178
+ .map(([name, v]) => ({ tool: name, successRate: v.successRate, uses: v.uses }));
179
+ }
180
+
181
+ /**
182
+ * Return tools with high negative correlation (potential trouble spots).
183
+ * Threshold: >30% negative rate among correlated feedback.
184
+ *
185
+ * @param {object} breakdown
186
+ * @returns {Array<{ tool: string, negativeRate: number, uses: number }>}
187
+ */
188
+ function troubleSpots(breakdown) {
189
+ return Object.entries(breakdown)
190
+ .filter(([, v]) => {
191
+ const total = v.correlatedPositive + v.correlatedNegative;
192
+ return total > 0 && v.correlatedNegative / total > 0.3;
193
+ })
194
+ .map(([name, v]) => {
195
+ const total = v.correlatedPositive + v.correlatedNegative;
196
+ return {
197
+ tool: name,
198
+ negativeRate: +(v.correlatedNegative / total).toFixed(4),
199
+ uses: v.uses,
200
+ };
201
+ })
202
+ .sort((a, b) => b.negativeRate - a.negativeRate);
203
+ }
204
+
205
+ /**
206
+ * Generate actionable recommendations from top performers and trouble spots.
207
+ *
208
+ * @param {Array} top - topPerformers result
209
+ * @param {Array} trouble - troubleSpots result
210
+ * @param {object} breakdown - full breakdown
211
+ * @returns {string[]}
212
+ */
213
+ function generateRecommendations(top, trouble, breakdown) {
214
+ const recs = [];
215
+
216
+ for (const t of trouble) {
217
+ recs.push(
218
+ `Investigate "${t.tool}" — ${(t.negativeRate * 100).toFixed(1)}% negative correlation across ${t.uses} uses.`
219
+ );
220
+ }
221
+
222
+ if (top.length > 0) {
223
+ recs.push(
224
+ `"${top[0].tool}" is the top performer (${(top[0].successRate * 100).toFixed(1)}% success). Consider expanding its usage patterns.`
225
+ );
226
+ }
227
+
228
+ const uncorrelated = Object.entries(breakdown).filter(
229
+ ([, v]) => v.uses >= 10 && v.successRate === null
230
+ );
231
+ if (uncorrelated.length > 0) {
232
+ recs.push(
233
+ `${uncorrelated.length} tool(s) with 10+ uses have no correlated feedback — consider adding coverage.`
234
+ );
235
+ }
236
+
237
+ if (recs.length === 0) recs.push('No actionable recommendations at this time.');
238
+ return recs;
239
+ }
240
+
241
+ /**
242
+ * Main entry point: load data, correlate, produce report.
243
+ *
244
+ * @returns {Promise<object>} Full skill quality report
245
+ */
246
+ async function run() {
247
+ const feedbackEntries = await loadFeedback(FEEDBACK_PATH);
248
+ const { totalToolUses, breakdown } = await processMetrics(METRICS_PATH, feedbackEntries);
249
+
250
+ computeSuccessRates(breakdown);
251
+
252
+ const top = topPerformers(breakdown);
253
+ const trouble = troubleSpots(breakdown);
254
+ const recommendations = generateRecommendations(top, trouble, breakdown);
255
+
256
+ const report = {
257
+ generatedAt: new Date().toISOString(),
258
+ totalToolUses,
259
+ toolBreakdown: breakdown,
260
+ topPerformers: top,
261
+ troubleSpots: trouble,
262
+ recommendations,
263
+ };
264
+
265
+ console.log(JSON.stringify(report, null, 2));
266
+ return report;
267
+ }
268
+
269
+ if (require.main === module) {
270
+ run().catch(() => {}).finally(() => process.exit(0));
271
+ }
272
+
273
+ module.exports = {
274
+ parseLine,
275
+ correlateFeedback,
276
+ computeSuccessRates,
277
+ topPerformers,
278
+ troubleSpots,
279
+ generateRecommendations,
280
+ loadFeedback,
281
+ processMetrics,
282
+ run,
283
+ CORRELATION_WINDOW_MS,
284
+ };
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+ const { loadMcpPolicy } = require('./mcp-policy');
5
+
6
+ const PROJECT_ROOT = path.join(__dirname, '..');
7
+ const DEFAULT_SUBAGENT_PROFILE_PATH = path.join(PROJECT_ROOT, 'config', 'subagent-profiles.json');
8
+
9
+ function getSubagentProfilePath() {
10
+ return process.env.RLHF_SUBAGENT_PROFILE_PATH || DEFAULT_SUBAGENT_PROFILE_PATH;
11
+ }
12
+
13
+ function loadSubagentProfiles() {
14
+ const raw = fs.readFileSync(getSubagentProfilePath(), 'utf-8');
15
+ const parsed = JSON.parse(raw);
16
+ if (!parsed.profiles || typeof parsed.profiles !== 'object') {
17
+ throw new Error('Invalid subagent profile config: missing profiles object');
18
+ }
19
+ return parsed;
20
+ }
21
+
22
+ function listSubagentProfiles() {
23
+ const parsed = loadSubagentProfiles();
24
+ return Object.keys(parsed.profiles);
25
+ }
26
+
27
+ function getSubagentProfile(name) {
28
+ const parsed = loadSubagentProfiles();
29
+ const profile = parsed.profiles[name];
30
+ if (!profile) {
31
+ throw new Error(`Unknown subagent profile: ${name}`);
32
+ }
33
+ return profile;
34
+ }
35
+
36
+ function validateSubagentProfiles() {
37
+ const parsed = loadSubagentProfiles();
38
+ const policy = loadMcpPolicy();
39
+ const issues = [];
40
+
41
+ for (const [name, profile] of Object.entries(parsed.profiles)) {
42
+ if (!profile.mcpProfile) {
43
+ issues.push(`${name}: missing mcpProfile`);
44
+ } else if (!policy.profiles[profile.mcpProfile]) {
45
+ issues.push(`${name}: unknown mcpProfile '${profile.mcpProfile}'`);
46
+ }
47
+
48
+ if (!profile.context || typeof profile.context !== 'object') {
49
+ issues.push(`${name}: missing context settings`);
50
+ } else {
51
+ if (!Number.isFinite(profile.context.maxItems) || profile.context.maxItems <= 0) {
52
+ issues.push(`${name}: invalid context.maxItems`);
53
+ }
54
+ if (!Number.isFinite(profile.context.maxChars) || profile.context.maxChars <= 0) {
55
+ issues.push(`${name}: invalid context.maxChars`);
56
+ }
57
+ }
58
+ }
59
+
60
+ return {
61
+ valid: issues.length === 0,
62
+ issues,
63
+ };
64
+ }
65
+
66
+ module.exports = {
67
+ DEFAULT_SUBAGENT_PROFILE_PATH,
68
+ getSubagentProfilePath,
69
+ loadSubagentProfiles,
70
+ listSubagentProfiles,
71
+ getSubagentProfile,
72
+ validateSubagentProfiles,
73
+ };
74
+
75
+ if (require.main === module) {
76
+ const result = validateSubagentProfiles();
77
+ console.log(JSON.stringify({ profiles: listSubagentProfiles(), ...result }, null, 2));
78
+ process.exit(result.valid ? 0 : 1);
79
+ }
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ REPO="${1:-IgorGanapolsky/rlhf-feedback-loop}"
5
+
6
+ # Minimal secret set for autonomous PR merge + optional LLM routing.
7
+ SECRET_KEYS=(
8
+ GH_PAT
9
+ SENTRY_DSN
10
+ SENTRY_AUTH_TOKEN
11
+ LLM_GATEWAY_BASE_URL
12
+ LLM_GATEWAY_API_KEY
13
+ TETRATE_API_KEY
14
+ )
15
+
16
+ echo "Syncing secrets to $REPO (only keys present in current environment)..."
17
+
18
+ for key in "${SECRET_KEYS[@]}"; do
19
+ value="${!key:-}"
20
+ if [[ -z "$value" ]]; then
21
+ echo "- skip $key (not set)"
22
+ continue
23
+ fi
24
+
25
+ printf '%s' "$value" | gh secret set "$key" -R "$REPO"
26
+ echo "- set $key"
27
+ done
28
+
29
+ echo "Done."