rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,331 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Thompson Sampling Beta-Bernoulli Module
4
+ *
5
+ * Implements per-category reliability estimates (ML-01) and exponential
6
+ * time-decay weighting with half-life of 7 days (ML-02).
7
+ *
8
+ * Source: Direct port of train_from_feedback.py (Subway_RN_Demo) lines 218-293.
9
+ * Algorithm: Beta-Bernoulli update with Marsaglia-Tsang gamma sampling for
10
+ * posterior draws. Zero external npm dependencies.
11
+ *
12
+ * Usage:
13
+ * const ts = require('./thompson-sampling');
14
+ * const model = ts.loadModel(modelPath);
15
+ * ts.updateModel(model, { signal: 'positive', timestamp: '...', categories: ['testing'] });
16
+ * const rel = ts.getReliability(model);
17
+ * const post = ts.samplePosteriors(model);
18
+ */
19
+
20
+ 'use strict';
21
+
22
+ const fs = require('fs');
23
+ const { parseTimestamp } = require('./feedback-schema');
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Constants
27
+ // ---------------------------------------------------------------------------
28
+
29
+ /** Exponential decay half-life in days. 2^(-age/HALF_LIFE_DAYS) weights recent feedback higher. */
30
+ const HALF_LIFE_DAYS = 7.0;
31
+
32
+ /**
33
+ * Minimum weight floor so that very old feedback still contributes (minimally),
34
+ * and invalid timestamps do not silently zero out updates.
35
+ */
36
+ const DECAY_FLOOR = 0.01;
37
+
38
+ /**
39
+ * Default category taxonomy — mirrors Subway's 8-keyword categories plus
40
+ * 'uncategorized' as the catch-all. Used when initializing a new model.
41
+ */
42
+ const DEFAULT_CATEGORIES = [
43
+ 'code_edit',
44
+ 'git',
45
+ 'testing',
46
+ 'pr_review',
47
+ 'search',
48
+ 'architecture',
49
+ 'security',
50
+ 'debugging',
51
+ 'uncategorized',
52
+ ];
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Time-Decay Weight
56
+ // ---------------------------------------------------------------------------
57
+
58
+ /**
59
+ * Compute exponential time-decay weight for a feedback timestamp.
60
+ *
61
+ * Formula: weight = max(2^(-ageDays / HALF_LIFE_DAYS), DECAY_FLOOR)
62
+ *
63
+ * At age=0 days: weight ≈ 1.0
64
+ * At age=7 days: weight ≈ 0.5
65
+ * At age=∞ days: weight → DECAY_FLOOR (0.01)
66
+ *
67
+ * Returns DECAY_FLOOR for invalid/null timestamps so callers never receive 0.
68
+ *
69
+ * @param {string|null|undefined} timestamp - ISO 8601 timestamp string
70
+ * @returns {number} Weight in [DECAY_FLOOR, 1.0]
71
+ */
72
+ function timeDecayWeight(timestamp) {
73
+ const d = parseTimestamp(timestamp);
74
+ if (!d) return DECAY_FLOOR;
75
+ const ageDays = (Date.now() - d.getTime()) / (1000 * 60 * 60 * 24);
76
+ return Math.max(Math.pow(2, -ageDays / HALF_LIFE_DAYS), DECAY_FLOOR);
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // Model Lifecycle
81
+ // ---------------------------------------------------------------------------
82
+
83
+ /**
84
+ * Create a fresh Beta-Bernoulli model with uniform priors (alpha=1, beta=1)
85
+ * for all DEFAULT_CATEGORIES. The uniform prior encodes "no information yet."
86
+ *
87
+ * @returns {Object} Initial model object
88
+ */
89
+ function createInitialModel() {
90
+ const now = new Date().toISOString();
91
+ const categories = {};
92
+ DEFAULT_CATEGORIES.forEach((cat) => {
93
+ categories[cat] = { alpha: 1.0, beta: 1.0, samples: 0, last_updated: null };
94
+ });
95
+ return {
96
+ version: 1,
97
+ created: now,
98
+ updated: now,
99
+ total_entries: 0,
100
+ categories,
101
+ };
102
+ }
103
+
104
+ /**
105
+ * Load an existing model from disk. Falls back to createInitialModel() only
106
+ * if the file does not exist or contains invalid JSON.
107
+ *
108
+ * IMPORTANT: Never call createInitialModel() directly when you intend to
109
+ * update an existing model — that would reset all accumulated posteriors.
110
+ *
111
+ * @param {string} modelPath - Absolute or relative path to feedback_model.json
112
+ * @returns {Object} Parsed model or fresh initial model
113
+ */
114
+ function loadModel(modelPath) {
115
+ if (fs.existsSync(modelPath)) {
116
+ try {
117
+ return JSON.parse(fs.readFileSync(modelPath, 'utf-8'));
118
+ } catch (_err) {
119
+ // Corrupt JSON — fall through to createInitialModel()
120
+ }
121
+ }
122
+ return createInitialModel();
123
+ }
124
+
125
+ /**
126
+ * Persist a model object to disk as formatted JSON.
127
+ *
128
+ * Creates parent directories if needed. Updates `model.updated` timestamp
129
+ * before writing so the file reflects the time of save.
130
+ *
131
+ * @param {Object} model - Model object to persist
132
+ * @param {string} modelPath - Absolute or relative path to write
133
+ */
134
+ function saveModel(model, modelPath) {
135
+ model.updated = new Date().toISOString();
136
+ const dir = require('path').dirname(modelPath);
137
+ if (!fs.existsSync(dir)) {
138
+ fs.mkdirSync(dir, { recursive: true });
139
+ }
140
+ fs.writeFileSync(modelPath, `${JSON.stringify(model, null, 2)}\n`);
141
+ }
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Model Update
145
+ // ---------------------------------------------------------------------------
146
+
147
+ /**
148
+ * Apply a single weighted Beta-Bernoulli update to the model.
149
+ *
150
+ * For positive signal: alpha += timeDecayWeight(timestamp)
151
+ * For negative signal: beta += timeDecayWeight(timestamp)
152
+ *
153
+ * Updates all provided categories. If a category is not in the model yet,
154
+ * it is added with default priors before applying the update.
155
+ *
156
+ * Mutates model in place AND returns the model for chaining.
157
+ *
158
+ * @param {Object} model - Model object (mutated in place)
159
+ * @param {Object} params
160
+ * @param {'positive'|'negative'} params.signal - Feedback direction
161
+ * @param {string} params.timestamp - ISO 8601 timestamp for decay calculation
162
+ * @param {string[]} [params.categories] - Categories to update; defaults to ['uncategorized']
163
+ * @returns {Object} The mutated model
164
+ */
165
+ function updateModel(model, { signal, timestamp, categories }) {
166
+ const weight = timeDecayWeight(timestamp);
167
+ const isPositive = signal === 'positive';
168
+ const cats = categories && categories.length ? categories : ['uncategorized'];
169
+
170
+ cats.forEach((cat) => {
171
+ if (!model.categories[cat]) {
172
+ model.categories[cat] = { alpha: 1.0, beta: 1.0, samples: 0, last_updated: null };
173
+ }
174
+ if (isPositive) {
175
+ model.categories[cat].alpha += weight;
176
+ } else {
177
+ model.categories[cat].beta += weight;
178
+ }
179
+ model.categories[cat].samples += 1;
180
+ model.categories[cat].last_updated = timestamp;
181
+ });
182
+
183
+ model.total_entries = (model.total_entries || 0) + 1;
184
+ model.updated = new Date().toISOString();
185
+ return model;
186
+ }
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // Reliability Estimation
190
+ // ---------------------------------------------------------------------------
191
+
192
+ /**
193
+ * Compute per-category reliability as the Beta posterior mean:
194
+ * reliability = alpha / (alpha + beta)
195
+ *
196
+ * With uniform priors (alpha=1, beta=1), reliability starts at 0.5.
197
+ * More positive signal → approaches 1.0.
198
+ * More negative signal → approaches 0.0.
199
+ *
200
+ * @param {Object} model - Model object containing categories
201
+ * @returns {Object} Map of category → { alpha, beta, reliability, samples }
202
+ */
203
+ function getReliability(model) {
204
+ const results = {};
205
+ for (const [cat, params] of Object.entries(model.categories || {})) {
206
+ const total = params.alpha + params.beta;
207
+ results[cat] = {
208
+ alpha: params.alpha,
209
+ beta: params.beta,
210
+ reliability: total > 0 ? params.alpha / total : 0.5,
211
+ samples: params.samples,
212
+ };
213
+ }
214
+ return results;
215
+ }
216
+
217
+ // ---------------------------------------------------------------------------
218
+ // Posterior Sampling
219
+ // ---------------------------------------------------------------------------
220
+
221
+ /**
222
+ * Draw one sample from the Beta posterior for each category via the
223
+ * Marsaglia-Tsang (2000) gamma ratio method. No external library needed.
224
+ *
225
+ * betaSample(alpha, beta) = gammaSample(alpha) / (gammaSample(alpha) + gammaSample(beta))
226
+ *
227
+ * This is the JS equivalent of Python's random.betavariate(alpha, beta).
228
+ * Used for Thompson Sampling action selection (explore via uncertainty).
229
+ *
230
+ * @param {Object} model - Model object containing categories
231
+ * @returns {Object} Map of category → float sample in [0, 1]
232
+ */
233
+ function samplePosteriors(model) {
234
+ const samples = {};
235
+ for (const [cat, params] of Object.entries(model.categories || {})) {
236
+ samples[cat] = betaSample(
237
+ Math.max(params.alpha, 0.01),
238
+ Math.max(params.beta, 0.01),
239
+ );
240
+ }
241
+ return samples;
242
+ }
243
+
244
+ // ---------------------------------------------------------------------------
245
+ // Internal: Marsaglia-Tsang Gamma Sampling (2000)
246
+ // ---------------------------------------------------------------------------
247
+
248
+ /**
249
+ * Sample from Gamma(shape, 1) using Marsaglia-Tsang (2000) algorithm.
250
+ * Handles shape < 1 via Johnk's method (shape+1 recursion with U^(1/shape) scaling).
251
+ *
252
+ * @param {number} shape - Shape parameter (must be > 0)
253
+ * @returns {number} Gamma-distributed sample
254
+ */
255
+ function gammaSample(shape) {
256
+ if (shape < 1) {
257
+ return gammaSample(1 + shape) * Math.pow(Math.random(), 1 / shape);
258
+ }
259
+
260
+ const d = shape - 1 / 3;
261
+ const c = 1 / Math.sqrt(9 * d);
262
+
263
+ // Rejection sampling loop — terminates quickly for shape >= 1
264
+ // eslint-disable-next-line no-constant-condition
265
+ while (true) {
266
+ let x;
267
+ let v;
268
+ do {
269
+ x = gaussSample();
270
+ v = 1 + c * x;
271
+ } while (v <= 0);
272
+
273
+ v = v * v * v;
274
+ const u = Math.random();
275
+
276
+ if (u < 1 - 0.0331 * (x * x) * (x * x)) {
277
+ return d * v;
278
+ }
279
+ if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) {
280
+ return d * v;
281
+ }
282
+ }
283
+ }
284
+
285
+ /**
286
+ * Draw a standard normal sample using Box-Muller with rejection sampling.
287
+ * Avoids the log(0) edge case by rejecting s===0.
288
+ *
289
+ * @returns {number} Standard normal sample
290
+ */
291
+ function gaussSample() {
292
+ let u;
293
+ let v;
294
+ let s;
295
+ do {
296
+ u = Math.random() * 2 - 1;
297
+ v = Math.random() * 2 - 1;
298
+ s = u * u + v * v;
299
+ } while (s >= 1 || s === 0);
300
+ return u * Math.sqrt((-2 * Math.log(s)) / s);
301
+ }
302
+
303
+ /**
304
+ * Sample from Beta(alpha, beta) using the gamma ratio method.
305
+ *
306
+ * @param {number} alpha - Alpha shape parameter (> 0)
307
+ * @param {number} beta - Beta shape parameter (> 0)
308
+ * @returns {number} Beta-distributed sample in [0, 1]
309
+ */
310
+ function betaSample(alpha, beta) {
311
+ const x = gammaSample(alpha);
312
+ const y = gammaSample(beta);
313
+ return x / (x + y);
314
+ }
315
+
316
+ // ---------------------------------------------------------------------------
317
+ // Exports
318
+ // ---------------------------------------------------------------------------
319
+
320
+ module.exports = {
321
+ timeDecayWeight,
322
+ loadModel,
323
+ saveModel,
324
+ createInitialModel,
325
+ updateModel,
326
+ getReliability,
327
+ samplePosteriors,
328
+ HALF_LIFE_DAYS,
329
+ DECAY_FLOOR,
330
+ DEFAULT_CATEGORIES,
331
+ };