rlhf-feedback-loop 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/adapters/README.md +8 -0
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
- package/adapters/chatgpt/INSTALL.md +80 -0
- package/adapters/chatgpt/openapi.yaml +292 -0
- package/adapters/claude/.mcp.json +8 -0
- package/adapters/codex/config.toml +4 -0
- package/adapters/gemini/function-declarations.json +95 -0
- package/adapters/mcp/server-stdio.js +444 -0
- package/bin/cli.js +167 -0
- package/config/mcp-allowlists.json +29 -0
- package/config/policy-bundles/constrained-v1.json +53 -0
- package/config/policy-bundles/default-v1.json +80 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/subagent-profiles.json +32 -0
- package/openapi/openapi.yaml +292 -0
- package/package.json +91 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +31 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +57 -0
- package/plugins/gemini-extension/INSTALL.md +74 -0
- package/plugins/gemini-extension/gemini_prompt.txt +10 -0
- package/plugins/gemini-extension/tool_contract.json +28 -0
- package/scripts/billing.js +471 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/code-reasoning.js +307 -0
- package/scripts/context-engine.js +547 -0
- package/scripts/contextfs.js +513 -0
- package/scripts/contract-audit.js +198 -0
- package/scripts/dpo-optimizer.js +208 -0
- package/scripts/export-dpo-pairs.js +316 -0
- package/scripts/export-training.js +448 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +838 -0
- package/scripts/feedback-schema.js +300 -0
- package/scripts/feedback-to-memory.js +165 -0
- package/scripts/feedback-to-rules.js +109 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/hybrid-feedback-context.js +676 -0
- package/scripts/intent-router.js +164 -0
- package/scripts/mcp-policy.js +92 -0
- package/scripts/meta-policy.js +194 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/prove-adapters.js +364 -0
- package/scripts/prove-attribution.js +364 -0
- package/scripts/prove-automation.js +393 -0
- package/scripts/prove-data-quality.js +219 -0
- package/scripts/prove-intelligence.js +256 -0
- package/scripts/prove-lancedb.js +370 -0
- package/scripts/prove-loop-closure.js +255 -0
- package/scripts/prove-rlaif.js +404 -0
- package/scripts/prove-subway-upgrades.js +250 -0
- package/scripts/prove-training-export.js +324 -0
- package/scripts/prove-v2-milestone.js +273 -0
- package/scripts/prove-v3-milestone.js +381 -0
- package/scripts/rlaif-self-audit.js +123 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/self-heal.js +127 -0
- package/scripts/self-healing-check.js +111 -0
- package/scripts/skill-quality-tracker.js +284 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +29 -0
- package/scripts/thompson-sampling.js +331 -0
- package/scripts/train_from_feedback.py +914 -0
- package/scripts/validate-feedback.js +580 -0
- package/scripts/vector-store.js +100 -0
- package/src/api/server.js +497 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Thompson Sampling Beta-Bernoulli Module
|
|
4
|
+
*
|
|
5
|
+
* Implements per-category reliability estimates (ML-01) and exponential
|
|
6
|
+
* time-decay weighting with half-life of 7 days (ML-02).
|
|
7
|
+
*
|
|
8
|
+
* Source: Direct port of train_from_feedback.py (Subway_RN_Demo) lines 218-293.
|
|
9
|
+
* Algorithm: Beta-Bernoulli update with Marsaglia-Tsang gamma sampling for
|
|
10
|
+
* posterior draws. Zero external npm dependencies.
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* const ts = require('./thompson-sampling');
|
|
14
|
+
* const model = ts.loadModel(modelPath);
|
|
15
|
+
* ts.updateModel(model, { signal: 'positive', timestamp: '...', categories: ['testing'] });
|
|
16
|
+
* const rel = ts.getReliability(model);
|
|
17
|
+
* const post = ts.samplePosteriors(model);
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
'use strict';
|
|
21
|
+
|
|
22
|
+
const fs = require('fs');
|
|
23
|
+
const { parseTimestamp } = require('./feedback-schema');
|
|
24
|
+
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Constants
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
/** Exponential decay half-life in days. 2^(-age/HALF_LIFE_DAYS) weights recent feedback higher. */
|
|
30
|
+
const HALF_LIFE_DAYS = 7.0;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Minimum weight floor so that very old feedback still contributes (minimally),
|
|
34
|
+
* and invalid timestamps do not silently zero out updates.
|
|
35
|
+
*/
|
|
36
|
+
const DECAY_FLOOR = 0.01;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Default category taxonomy — mirrors Subway's 8-keyword categories plus
|
|
40
|
+
* 'uncategorized' as the catch-all. Used when initializing a new model.
|
|
41
|
+
*/
|
|
42
|
+
const DEFAULT_CATEGORIES = [
|
|
43
|
+
'code_edit',
|
|
44
|
+
'git',
|
|
45
|
+
'testing',
|
|
46
|
+
'pr_review',
|
|
47
|
+
'search',
|
|
48
|
+
'architecture',
|
|
49
|
+
'security',
|
|
50
|
+
'debugging',
|
|
51
|
+
'uncategorized',
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Time-Decay Weight
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Compute exponential time-decay weight for a feedback timestamp.
|
|
60
|
+
*
|
|
61
|
+
* Formula: weight = max(2^(-ageDays / HALF_LIFE_DAYS), DECAY_FLOOR)
|
|
62
|
+
*
|
|
63
|
+
* At age=0 days: weight ≈ 1.0
|
|
64
|
+
* At age=7 days: weight ≈ 0.5
|
|
65
|
+
* At age=∞ days: weight → DECAY_FLOOR (0.01)
|
|
66
|
+
*
|
|
67
|
+
* Returns DECAY_FLOOR for invalid/null timestamps so callers never receive 0.
|
|
68
|
+
*
|
|
69
|
+
* @param {string|null|undefined} timestamp - ISO 8601 timestamp string
|
|
70
|
+
* @returns {number} Weight in [DECAY_FLOOR, 1.0]
|
|
71
|
+
*/
|
|
72
|
+
function timeDecayWeight(timestamp) {
|
|
73
|
+
const d = parseTimestamp(timestamp);
|
|
74
|
+
if (!d) return DECAY_FLOOR;
|
|
75
|
+
const ageDays = (Date.now() - d.getTime()) / (1000 * 60 * 60 * 24);
|
|
76
|
+
return Math.max(Math.pow(2, -ageDays / HALF_LIFE_DAYS), DECAY_FLOOR);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Model Lifecycle
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Create a fresh Beta-Bernoulli model with uniform priors (alpha=1, beta=1)
|
|
85
|
+
* for all DEFAULT_CATEGORIES. The uniform prior encodes "no information yet."
|
|
86
|
+
*
|
|
87
|
+
* @returns {Object} Initial model object
|
|
88
|
+
*/
|
|
89
|
+
function createInitialModel() {
|
|
90
|
+
const now = new Date().toISOString();
|
|
91
|
+
const categories = {};
|
|
92
|
+
DEFAULT_CATEGORIES.forEach((cat) => {
|
|
93
|
+
categories[cat] = { alpha: 1.0, beta: 1.0, samples: 0, last_updated: null };
|
|
94
|
+
});
|
|
95
|
+
return {
|
|
96
|
+
version: 1,
|
|
97
|
+
created: now,
|
|
98
|
+
updated: now,
|
|
99
|
+
total_entries: 0,
|
|
100
|
+
categories,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Load an existing model from disk. Falls back to createInitialModel() only
|
|
106
|
+
* if the file does not exist or contains invalid JSON.
|
|
107
|
+
*
|
|
108
|
+
* IMPORTANT: Never call createInitialModel() directly when you intend to
|
|
109
|
+
* update an existing model — that would reset all accumulated posteriors.
|
|
110
|
+
*
|
|
111
|
+
* @param {string} modelPath - Absolute or relative path to feedback_model.json
|
|
112
|
+
* @returns {Object} Parsed model or fresh initial model
|
|
113
|
+
*/
|
|
114
|
+
function loadModel(modelPath) {
|
|
115
|
+
if (fs.existsSync(modelPath)) {
|
|
116
|
+
try {
|
|
117
|
+
return JSON.parse(fs.readFileSync(modelPath, 'utf-8'));
|
|
118
|
+
} catch (_err) {
|
|
119
|
+
// Corrupt JSON — fall through to createInitialModel()
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return createInitialModel();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Persist a model object to disk as formatted JSON.
|
|
127
|
+
*
|
|
128
|
+
* Creates parent directories if needed. Updates `model.updated` timestamp
|
|
129
|
+
* before writing so the file reflects the time of save.
|
|
130
|
+
*
|
|
131
|
+
* @param {Object} model - Model object to persist
|
|
132
|
+
* @param {string} modelPath - Absolute or relative path to write
|
|
133
|
+
*/
|
|
134
|
+
function saveModel(model, modelPath) {
|
|
135
|
+
model.updated = new Date().toISOString();
|
|
136
|
+
const dir = require('path').dirname(modelPath);
|
|
137
|
+
if (!fs.existsSync(dir)) {
|
|
138
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
139
|
+
}
|
|
140
|
+
fs.writeFileSync(modelPath, `${JSON.stringify(model, null, 2)}\n`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Model Update
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Apply a single weighted Beta-Bernoulli update to the model.
|
|
149
|
+
*
|
|
150
|
+
* For positive signal: alpha += timeDecayWeight(timestamp)
|
|
151
|
+
* For negative signal: beta += timeDecayWeight(timestamp)
|
|
152
|
+
*
|
|
153
|
+
* Updates all provided categories. If a category is not in the model yet,
|
|
154
|
+
* it is added with default priors before applying the update.
|
|
155
|
+
*
|
|
156
|
+
* Mutates model in place AND returns the model for chaining.
|
|
157
|
+
*
|
|
158
|
+
* @param {Object} model - Model object (mutated in place)
|
|
159
|
+
* @param {Object} params
|
|
160
|
+
* @param {'positive'|'negative'} params.signal - Feedback direction
|
|
161
|
+
* @param {string} params.timestamp - ISO 8601 timestamp for decay calculation
|
|
162
|
+
* @param {string[]} [params.categories] - Categories to update; defaults to ['uncategorized']
|
|
163
|
+
* @returns {Object} The mutated model
|
|
164
|
+
*/
|
|
165
|
+
function updateModel(model, { signal, timestamp, categories }) {
|
|
166
|
+
const weight = timeDecayWeight(timestamp);
|
|
167
|
+
const isPositive = signal === 'positive';
|
|
168
|
+
const cats = categories && categories.length ? categories : ['uncategorized'];
|
|
169
|
+
|
|
170
|
+
cats.forEach((cat) => {
|
|
171
|
+
if (!model.categories[cat]) {
|
|
172
|
+
model.categories[cat] = { alpha: 1.0, beta: 1.0, samples: 0, last_updated: null };
|
|
173
|
+
}
|
|
174
|
+
if (isPositive) {
|
|
175
|
+
model.categories[cat].alpha += weight;
|
|
176
|
+
} else {
|
|
177
|
+
model.categories[cat].beta += weight;
|
|
178
|
+
}
|
|
179
|
+
model.categories[cat].samples += 1;
|
|
180
|
+
model.categories[cat].last_updated = timestamp;
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
model.total_entries = (model.total_entries || 0) + 1;
|
|
184
|
+
model.updated = new Date().toISOString();
|
|
185
|
+
return model;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
// Reliability Estimation
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Compute per-category reliability as the Beta posterior mean:
|
|
194
|
+
* reliability = alpha / (alpha + beta)
|
|
195
|
+
*
|
|
196
|
+
* With uniform priors (alpha=1, beta=1), reliability starts at 0.5.
|
|
197
|
+
* More positive signal → approaches 1.0.
|
|
198
|
+
* More negative signal → approaches 0.0.
|
|
199
|
+
*
|
|
200
|
+
* @param {Object} model - Model object containing categories
|
|
201
|
+
* @returns {Object} Map of category → { alpha, beta, reliability, samples }
|
|
202
|
+
*/
|
|
203
|
+
function getReliability(model) {
|
|
204
|
+
const results = {};
|
|
205
|
+
for (const [cat, params] of Object.entries(model.categories || {})) {
|
|
206
|
+
const total = params.alpha + params.beta;
|
|
207
|
+
results[cat] = {
|
|
208
|
+
alpha: params.alpha,
|
|
209
|
+
beta: params.beta,
|
|
210
|
+
reliability: total > 0 ? params.alpha / total : 0.5,
|
|
211
|
+
samples: params.samples,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
return results;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Posterior Sampling
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Draw one sample from the Beta posterior for each category via the
|
|
223
|
+
* Marsaglia-Tsang (2000) gamma ratio method. No external library needed.
|
|
224
|
+
*
|
|
225
|
+
* betaSample(alpha, beta) = gammaSample(alpha) / (gammaSample(alpha) + gammaSample(beta))
|
|
226
|
+
*
|
|
227
|
+
* This is the JS equivalent of Python's random.betavariate(alpha, beta).
|
|
228
|
+
* Used for Thompson Sampling action selection (explore via uncertainty).
|
|
229
|
+
*
|
|
230
|
+
* @param {Object} model - Model object containing categories
|
|
231
|
+
* @returns {Object} Map of category → float sample in [0, 1]
|
|
232
|
+
*/
|
|
233
|
+
function samplePosteriors(model) {
|
|
234
|
+
const samples = {};
|
|
235
|
+
for (const [cat, params] of Object.entries(model.categories || {})) {
|
|
236
|
+
samples[cat] = betaSample(
|
|
237
|
+
Math.max(params.alpha, 0.01),
|
|
238
|
+
Math.max(params.beta, 0.01),
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
return samples;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// Internal: Marsaglia-Tsang Gamma Sampling (2000)
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Sample from Gamma(shape, 1) using Marsaglia-Tsang (2000) algorithm.
|
|
250
|
+
* Handles shape < 1 via Johnk's method (shape+1 recursion with U^(1/shape) scaling).
|
|
251
|
+
*
|
|
252
|
+
* @param {number} shape - Shape parameter (must be > 0)
|
|
253
|
+
* @returns {number} Gamma-distributed sample
|
|
254
|
+
*/
|
|
255
|
+
function gammaSample(shape) {
|
|
256
|
+
if (shape < 1) {
|
|
257
|
+
return gammaSample(1 + shape) * Math.pow(Math.random(), 1 / shape);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const d = shape - 1 / 3;
|
|
261
|
+
const c = 1 / Math.sqrt(9 * d);
|
|
262
|
+
|
|
263
|
+
// Rejection sampling loop — terminates quickly for shape >= 1
|
|
264
|
+
// eslint-disable-next-line no-constant-condition
|
|
265
|
+
while (true) {
|
|
266
|
+
let x;
|
|
267
|
+
let v;
|
|
268
|
+
do {
|
|
269
|
+
x = gaussSample();
|
|
270
|
+
v = 1 + c * x;
|
|
271
|
+
} while (v <= 0);
|
|
272
|
+
|
|
273
|
+
v = v * v * v;
|
|
274
|
+
const u = Math.random();
|
|
275
|
+
|
|
276
|
+
if (u < 1 - 0.0331 * (x * x) * (x * x)) {
|
|
277
|
+
return d * v;
|
|
278
|
+
}
|
|
279
|
+
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) {
|
|
280
|
+
return d * v;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Draw a standard normal sample using Box-Muller with rejection sampling.
|
|
287
|
+
* Avoids the log(0) edge case by rejecting s===0.
|
|
288
|
+
*
|
|
289
|
+
* @returns {number} Standard normal sample
|
|
290
|
+
*/
|
|
291
|
+
function gaussSample() {
|
|
292
|
+
let u;
|
|
293
|
+
let v;
|
|
294
|
+
let s;
|
|
295
|
+
do {
|
|
296
|
+
u = Math.random() * 2 - 1;
|
|
297
|
+
v = Math.random() * 2 - 1;
|
|
298
|
+
s = u * u + v * v;
|
|
299
|
+
} while (s >= 1 || s === 0);
|
|
300
|
+
return u * Math.sqrt((-2 * Math.log(s)) / s);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Sample from Beta(alpha, beta) using the gamma ratio method.
|
|
305
|
+
*
|
|
306
|
+
* @param {number} alpha - Alpha shape parameter (> 0)
|
|
307
|
+
* @param {number} beta - Beta shape parameter (> 0)
|
|
308
|
+
* @returns {number} Beta-distributed sample in [0, 1]
|
|
309
|
+
*/
|
|
310
|
+
function betaSample(alpha, beta) {
|
|
311
|
+
const x = gammaSample(alpha);
|
|
312
|
+
const y = gammaSample(beta);
|
|
313
|
+
return x / (x + y);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// ---------------------------------------------------------------------------
|
|
317
|
+
// Exports
|
|
318
|
+
// ---------------------------------------------------------------------------
|
|
319
|
+
|
|
320
|
+
module.exports = {
|
|
321
|
+
timeDecayWeight,
|
|
322
|
+
loadModel,
|
|
323
|
+
saveModel,
|
|
324
|
+
createInitialModel,
|
|
325
|
+
updateModel,
|
|
326
|
+
getReliability,
|
|
327
|
+
samplePosteriors,
|
|
328
|
+
HALF_LIFE_DAYS,
|
|
329
|
+
DECAY_FLOOR,
|
|
330
|
+
DEFAULT_CATEGORIES,
|
|
331
|
+
};
|