rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,208 @@
1
+ 'use strict';
2
+ /**
3
+ * DPO Batch Optimizer (DPO-02)
4
+ *
5
+ * Builds (chosen, rejected) preference pairs from feedback-log.jsonl memories,
6
+ * computes DPO log-ratio adjustments using Thompson Sampling posteriors,
7
+ * and writes dpo-model.json to RLHF_FEEDBACK_DIR.
8
+ *
9
+ * Does NOT call any external API. Pure offline batch optimization.
10
+ *
11
+ * Exports: run, buildPreferencePairs, applyDpoAdjustments, dpoLogRatio
12
+ */
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+ const os = require('os');
17
+
18
+ const DPO_BETA = 0.1;
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // dpoLogRatio — DPO preference adjustment formula
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /**
25
+ * Compute DPO adjustment in range [-1, +1] from chosen and rejected weights.
26
+ *
27
+ * @param {number} chosenWeight - Time-decay weight for chosen (positive) outcome
28
+ * @param {number} rejectedWeight - Time-decay weight for rejected (negative) outcome
29
+ * @param {number} [beta=0.1] - Temperature parameter (lower = more aggressive)
30
+ * @returns {number} Adjustment in [-1, +1]
31
+ */
32
+ function dpoLogRatio(chosenWeight, rejectedWeight, beta) {
33
+ const b = (beta !== undefined && beta !== null) ? beta : DPO_BETA;
34
+ const cw = Math.max(chosenWeight, 0.01);
35
+ const rw = Math.max(rejectedWeight, 0.01);
36
+ const logRatio = Math.log(cw) - Math.log(rw);
37
+ const sigmoid = 1.0 / (1.0 + Math.exp(-b * logRatio));
38
+ return (sigmoid - 0.5) * 2;
39
+ }
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // buildPreferencePairs — groups DPO pairs by category from feedbackDir
43
+ // ---------------------------------------------------------------------------
44
+
45
+ /**
46
+ * Build preference pairs grouped by category.
47
+ *
48
+ * Uses buildDpoPairs() from export-dpo-pairs.js (do NOT reimplement).
49
+ * Reads memory-log.jsonl from feedbackDir to get error + learning memories.
50
+ *
51
+ * @param {string} feedbackDir - Directory containing memory-log.jsonl
52
+ * @returns {Object} Map of category → [{ chosen, rejected }]
53
+ */
54
+ function buildPreferencePairs(feedbackDir) {
55
+ const { buildDpoPairs, readJSONL } = require('./export-dpo-pairs');
56
+ const memoryLogPath = path.join(feedbackDir, 'memory-log.jsonl');
57
+ const memories = readJSONL(memoryLogPath);
58
+
59
+ const errors = memories.filter((m) => m.category === 'error');
60
+ const learnings = memories.filter((m) => m.category === 'learning');
61
+
62
+ const result = buildDpoPairs(errors, learnings);
63
+
64
+ // Group pairs by category (inferred from matchedKeys or tags)
65
+ const grouped = {};
66
+ for (const pair of result.pairs) {
67
+ const keys = (pair.metadata && pair.metadata.matchedKeys) || [];
68
+ const category = keys.length > 0 ? keys[0] : 'uncategorized';
69
+ if (!grouped[category]) grouped[category] = [];
70
+ grouped[category].push({
71
+ chosen: pair.metadata
72
+ ? { id: pair.metadata.learningId, content: pair.chosen, timestamp: null }
73
+ : { content: pair.chosen },
74
+ rejected: pair.metadata
75
+ ? { id: pair.metadata.errorId, content: pair.rejected, timestamp: null }
76
+ : { content: pair.rejected },
77
+ metadata: pair.metadata,
78
+ });
79
+ }
80
+
81
+ return grouped;
82
+ }
83
+
84
+ // ---------------------------------------------------------------------------
85
+ // applyDpoAdjustments — mutates Thompson model with DPO posterior adjustments
86
+ // ---------------------------------------------------------------------------
87
+
88
+ /**
89
+ * Apply DPO adjustments to Thompson Sampling posteriors and save the model.
90
+ *
91
+ * CRITICAL: calls ts.saveModel(model, modelPath) after all mutations.
92
+ *
93
+ * @param {string} modelPath - Path to feedback_model.json
94
+ * @param {Object} pairs - Map of category → [{ chosen, rejected }] from buildPreferencePairs
95
+ * @returns {Object} adjustments - Map of category → { pairs, avg_adjustment }
96
+ */
97
+ function applyDpoAdjustments(modelPath, pairs) {
98
+ const ts = require('./thompson-sampling');
99
+ const model = ts.loadModel(modelPath);
100
+ const adjustments = {};
101
+
102
+ for (const [cat, catPairs] of Object.entries(pairs)) {
103
+ if (!catPairs || catPairs.length === 0) continue;
104
+
105
+ // Ensure category exists in model
106
+ if (!model.categories[cat]) {
107
+ model.categories[cat] = { alpha: 1.0, beta: 1.0, samples: 0, last_updated: null };
108
+ }
109
+
110
+ let total = 0;
111
+ for (const pair of catPairs) {
112
+ const chosenTs = (pair.chosen && pair.chosen.timestamp) || null;
113
+ const rejectedTs = (pair.rejected && pair.rejected.timestamp) || null;
114
+ const cw = ts.timeDecayWeight(chosenTs);
115
+ const rw = ts.timeDecayWeight(rejectedTs);
116
+ total += dpoLogRatio(cw, rw);
117
+ }
118
+
119
+ const avg = total / catPairs.length;
120
+
121
+ if (avg > 0) {
122
+ model.categories[cat].alpha += avg * catPairs.length * 0.5;
123
+ } else {
124
+ model.categories[cat].beta += Math.abs(avg) * catPairs.length * 0.5;
125
+ }
126
+
127
+ adjustments[cat] = {
128
+ pairs: catPairs.length,
129
+ avg_adjustment: Math.round(avg * 10000) / 10000,
130
+ };
131
+ }
132
+
133
+ // CRITICAL: save after all mutations (Pitfall 2 from RESEARCH.md)
134
+ ts.saveModel(model, modelPath);
135
+
136
+ return adjustments;
137
+ }
138
+
139
+ // ---------------------------------------------------------------------------
140
+ // run — top-level batch DPO optimization entry point
141
+ // ---------------------------------------------------------------------------
142
+
143
+ /**
144
+ * Run the full DPO optimization batch:
145
+ * 1. Build preference pairs from memory-log.jsonl
146
+ * 2. Apply DPO adjustments to Thompson model
147
+ * 3. Write dpo-model.json to feedbackDir
148
+ *
149
+ * @param {Object} [opts]
150
+ * @param {string} [opts.feedbackDir] - Override RLHF_FEEDBACK_DIR
151
+ * @param {string} [opts.modelPath] - Override Thompson model path
152
+ * @returns {{ adjustments: Object, pairs_processed: number }}
153
+ */
154
+ function run(opts) {
155
+ const options = opts || {};
156
+ const feedbackDir = options.feedbackDir ||
157
+ process.env.RLHF_FEEDBACK_DIR ||
158
+ path.join(os.homedir(), '.claude', 'memory', 'feedback');
159
+ const modelPath = options.modelPath ||
160
+ path.join(process.cwd(), '.claude', 'memory', 'feedback', 'feedback_model.json');
161
+
162
+ const pairs = buildPreferencePairs(feedbackDir);
163
+
164
+ const pairsProcessed = Object.values(pairs).reduce((sum, arr) => sum + arr.length, 0);
165
+
166
+ let adjustments = {};
167
+ if (pairsProcessed > 0) {
168
+ adjustments = applyDpoAdjustments(modelPath, pairs);
169
+ }
170
+
171
+ const dpoModel = {
172
+ generated: new Date().toISOString(),
173
+ pairs_processed: pairsProcessed,
174
+ adjustments,
175
+ };
176
+
177
+ const dpoModelPath = path.join(feedbackDir, 'dpo-model.json');
178
+ if (!fs.existsSync(feedbackDir)) {
179
+ fs.mkdirSync(feedbackDir, { recursive: true });
180
+ }
181
+ fs.writeFileSync(dpoModelPath, `${JSON.stringify(dpoModel, null, 2)}\n`);
182
+
183
+ console.log(`DPO optimization complete: ${pairsProcessed} pairs processed`);
184
+ if (Object.keys(adjustments).length > 0) {
185
+ console.log('Adjustments:', JSON.stringify(adjustments, null, 2));
186
+ } else {
187
+ console.log('No adjustment pairs found (empty or no overlapping memories)');
188
+ }
189
+
190
+ return { adjustments, pairs_processed: pairsProcessed };
191
+ }
192
+
193
+ // ---------------------------------------------------------------------------
194
+ // CLI entry point
195
+ // ---------------------------------------------------------------------------
196
+
197
+ if (require.main === module && process.argv.includes('--run')) {
198
+ run().catch((e) => {
199
+ console.error(e);
200
+ process.exit(1);
201
+ });
202
+ }
203
+
204
+ // ---------------------------------------------------------------------------
205
+ // Exports
206
+ // ---------------------------------------------------------------------------
207
+
208
+ module.exports = { run, buildPreferencePairs, applyDpoAdjustments, dpoLogRatio };
@@ -0,0 +1,316 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * DPO Preference Pair Exporter
4
+ *
5
+ * Transforms error + learning memories into DPO JSONL triples.
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const { traceForDpoPair, aggregateTraces } = require('./code-reasoning');
11
+
12
+ const PROJECT_ROOT = path.join(__dirname, '..');
13
+ const FEEDBACK_DIR = process.env.RLHF_FEEDBACK_DIR || path.join(PROJECT_ROOT, '.claude', 'memory', 'feedback');
14
+ const DEFAULT_LOCAL_MEMORY_LOG = path.join(FEEDBACK_DIR, 'memory-log.jsonl');
15
+
16
+ function readJSONL(filePath) {
17
+ if (!fs.existsSync(filePath)) return [];
18
+ const raw = fs.readFileSync(filePath, 'utf-8').trim();
19
+ if (!raw) return [];
20
+ return raw
21
+ .split('\n')
22
+ .map((line) => {
23
+ try {
24
+ return JSON.parse(line);
25
+ } catch {
26
+ return null;
27
+ }
28
+ })
29
+ .filter(Boolean);
30
+ }
31
+
32
+ function extractDomainKeys(memory) {
33
+ const keys = new Set();
34
+ const genericTags = new Set(['feedback', 'positive', 'negative']);
35
+
36
+ if (Array.isArray(memory.tags)) {
37
+ for (const tag of memory.tags) {
38
+ if (!genericTags.has(tag)) keys.add(tag);
39
+ }
40
+ }
41
+
42
+ const titleWords = (memory.title || '')
43
+ .replace(/^(MISTAKE|SUCCESS|ERROR|LEARNING|PREFERENCE):\s*/i, '')
44
+ .toLowerCase()
45
+ .split(/\s+/)
46
+ .filter((w) => w.length > 3)
47
+ .slice(0, 3);
48
+
49
+ for (const word of titleWords) keys.add(word);
50
+
51
+ return Array.from(keys);
52
+ }
53
+
54
+ function domainOverlap(keysA, keysB) {
55
+ const setB = new Set(keysB);
56
+ let overlap = 0;
57
+ for (const key of keysA) {
58
+ if (setB.has(key)) overlap++;
59
+ }
60
+ return overlap;
61
+ }
62
+
63
+ function inferPrompt(error, learning) {
64
+ const shared = (error.tags || []).filter((t) => (learning.tags || []).includes(t));
65
+ if (shared.length > 0) {
66
+ return `Task domain: ${shared.join(', ')}. How should the agent handle this scenario?`;
67
+ }
68
+
69
+ const scenario = (error.title || '').replace(/^(MISTAKE|ERROR):\s*/i, '').trim();
70
+ if (scenario) return `Scenario: ${scenario}. What is the better response?`;
71
+ return 'How should the agent respond in this situation?';
72
+ }
73
+
74
+ function getRubricWeightedScore(memory) {
75
+ if (!memory || !memory.rubricSummary) return null;
76
+ const score = Number(memory.rubricSummary.weightedScore);
77
+ if (!Number.isFinite(score)) return null;
78
+ return score;
79
+ }
80
+
81
+ function buildRubricDelta(error, learning) {
82
+ const errorScore = getRubricWeightedScore(error);
83
+ const learningScore = getRubricWeightedScore(learning);
84
+ if (errorScore == null && learningScore == null) return null;
85
+ const delta = (learningScore != null && errorScore != null)
86
+ ? Math.round((learningScore - errorScore) * 1000) / 1000
87
+ : null;
88
+ return {
89
+ learningWeightedScore: learningScore,
90
+ errorWeightedScore: errorScore,
91
+ weightedDelta: delta,
92
+ errorFailingCriteria: error && error.rubricSummary ? error.rubricSummary.failingCriteria || [] : [],
93
+ learningFailingCriteria: learning && learning.rubricSummary ? learning.rubricSummary.failingCriteria || [] : [],
94
+ };
95
+ }
96
+
97
+ function buildDpoPairs(errors, learnings) {
98
+ const pairs = [];
99
+ const usedErrors = new Set();
100
+ const usedLearnings = new Set();
101
+
102
+ const errorKeys = errors.map((e) => ({ memory: e, keys: extractDomainKeys(e) }));
103
+ const learningKeys = learnings.map((l) => ({ memory: l, keys: extractDomainKeys(l) }));
104
+
105
+ for (const err of errorKeys) {
106
+ let best = null;
107
+ let bestScore = 0;
108
+ let bestOverlap = 0;
109
+
110
+ for (const learn of learningKeys) {
111
+ if (usedLearnings.has(learn.memory.id)) continue;
112
+ const overlap = domainOverlap(err.keys, learn.keys);
113
+ const rubric = buildRubricDelta(err.memory, learn.memory);
114
+ const rubricDelta = rubric && rubric.weightedDelta != null ? rubric.weightedDelta : 0;
115
+ const score = overlap + Math.max(0, rubricDelta);
116
+ if (score > bestScore) {
117
+ best = learn;
118
+ bestScore = score;
119
+ bestOverlap = overlap;
120
+ }
121
+ }
122
+
123
+ if (best && bestScore > 0 && bestOverlap > 0) {
124
+ pairs.push({
125
+ prompt: inferPrompt(err.memory, best.memory),
126
+ chosen: best.memory.content,
127
+ rejected: err.memory.content,
128
+ metadata: {
129
+ errorId: err.memory.id,
130
+ learningId: best.memory.id,
131
+ matchScore: bestScore,
132
+ overlapScore: domainOverlap(err.keys, best.keys),
133
+ matchedKeys: err.keys.filter((k) => best.keys.includes(k)),
134
+ errorTitle: err.memory.title,
135
+ learningTitle: best.memory.title,
136
+ rubric: buildRubricDelta(err.memory, best.memory),
137
+ },
138
+ });
139
+ usedErrors.add(err.memory.id);
140
+ usedLearnings.add(best.memory.id);
141
+ }
142
+ }
143
+
144
+ return {
145
+ pairs,
146
+ unpairedErrors: errors.filter((e) => !usedErrors.has(e.id)),
147
+ unpairedLearnings: learnings.filter((l) => !usedLearnings.has(l.id)),
148
+ };
149
+ }
150
+
151
+ function toJSONL(pairs) {
152
+ return `${pairs.map((p) => JSON.stringify(p)).join('\n')}\n`;
153
+ }
154
+
155
+ function exportDpoFromMemories(memories) {
156
+ const errors = memories.filter((m) => m.category === 'error');
157
+ const learnings = memories.filter((m) => m.category === 'learning');
158
+ const result = buildDpoPairs(errors, learnings);
159
+
160
+ const traces = result.pairs.map((pair) => traceForDpoPair(pair));
161
+ const reasoning = aggregateTraces(traces);
162
+
163
+ const pairsWithTraces = result.pairs.map((pair, i) => ({
164
+ ...pair,
165
+ metadata: {
166
+ ...pair.metadata,
167
+ reasoningTrace: {
168
+ traceId: traces[i].traceId,
169
+ confidence: traces[i].summary.confidence,
170
+ passed: traces[i].summary.passed,
171
+ verified: traces[i].summary.verified,
172
+ refuted: traces[i].summary.refuted,
173
+ edgeCases: traces[i].edgeCases,
174
+ },
175
+ },
176
+ }));
177
+
178
+ return {
179
+ pairs: pairsWithTraces,
180
+ unpairedErrors: result.unpairedErrors,
181
+ unpairedLearnings: result.unpairedLearnings,
182
+ errors,
183
+ learnings,
184
+ reasoning,
185
+ jsonl: toJSONL(pairsWithTraces),
186
+ };
187
+ }
188
+
189
+ function parseArgs(argv) {
190
+ const args = {};
191
+ argv.forEach((arg) => {
192
+ if (!arg.startsWith('--')) return;
193
+ const [key, ...rest] = arg.slice(2).split('=');
194
+ args[key] = rest.length ? rest.join('=') : true;
195
+ });
196
+ return args;
197
+ }
198
+
199
+ function runCli() {
200
+ const args = parseArgs(process.argv.slice(2));
201
+
202
+ if (args.test) {
203
+ runTests();
204
+ return;
205
+ }
206
+
207
+ let memories = [];
208
+
209
+ if (args.input) {
210
+ const raw = fs.readFileSync(args.input, 'utf-8');
211
+ const parsed = JSON.parse(raw);
212
+ memories = Array.isArray(parsed) ? parsed : parsed.memories || [];
213
+ } else if (args['from-local']) {
214
+ memories = readJSONL(DEFAULT_LOCAL_MEMORY_LOG);
215
+ } else {
216
+ console.error('Provide --input=<path-to-json> or --from-local');
217
+ process.exit(1);
218
+ }
219
+
220
+ const result = exportDpoFromMemories(memories);
221
+ const jsonl = result.jsonl;
222
+
223
+ if (args.output) {
224
+ fs.writeFileSync(args.output, jsonl);
225
+ console.error(`Wrote ${result.pairs.length} DPO pairs to ${args.output}`);
226
+ } else {
227
+ process.stdout.write(jsonl);
228
+ }
229
+
230
+ console.error(`Errors=${result.errors.length} Learnings=${result.learnings.length} Pairs=${result.pairs.length}`);
231
+ console.error(`Unpaired errors=${result.unpairedErrors.length} Unpaired learnings=${result.unpairedLearnings.length}`);
232
+ }
233
+
234
+ function runTests() {
235
+ let passed = 0;
236
+ let failed = 0;
237
+
238
+ function assert(condition, name) {
239
+ if (condition) {
240
+ passed++;
241
+ console.log(` PASS ${name}`);
242
+ } else {
243
+ failed++;
244
+ console.log(` FAIL ${name}`);
245
+ }
246
+ }
247
+
248
+ console.log('\nexport-dpo-pairs.js tests\n');
249
+
250
+ const errors = [
251
+ {
252
+ id: 1,
253
+ title: 'MISTAKE: Claimed done with no test proof',
254
+ content: 'Claimed completion without running tests.',
255
+ category: 'error',
256
+ tags: ['verification', 'feedback'],
257
+ rubricSummary: {
258
+ weightedScore: 0.32,
259
+ failingCriteria: ['verification_evidence'],
260
+ failingGuardrails: ['testsPassed'],
261
+ },
262
+ },
263
+ {
264
+ id: 2,
265
+ title: 'MISTAKE: Generic mismatch',
266
+ content: 'No matching learning memory for this domain.',
267
+ category: 'error',
268
+ tags: ['unique-tag'],
269
+ },
270
+ ];
271
+
272
+ const learnings = [
273
+ {
274
+ id: 10,
275
+ title: 'SUCCESS: Always run tests before completion claims',
276
+ content: 'Run tests and include output before saying complete.',
277
+ category: 'learning',
278
+ tags: ['verification', 'feedback'],
279
+ rubricSummary: {
280
+ weightedScore: 0.89,
281
+ failingCriteria: [],
282
+ failingGuardrails: [],
283
+ },
284
+ },
285
+ ];
286
+
287
+ const result = buildDpoPairs(errors, learnings);
288
+ assert(result.pairs.length === 1, 'one pair built from overlapping domain keys');
289
+ assert(result.unpairedErrors.length === 1, 'unpaired error left when no match exists');
290
+ assert(result.unpairedLearnings.length === 0, 'no unpaired learnings');
291
+
292
+ const jsonl = toJSONL(result.pairs);
293
+ assert(jsonl.endsWith('\n'), 'JSONL output ends with newline');
294
+
295
+ const parsed = JSON.parse(jsonl.trim());
296
+ assert(parsed.prompt.includes('verification'), 'inferred prompt includes shared domain');
297
+ assert(parsed.metadata.rubric.weightedDelta > 0, 'rubric delta metadata is attached');
298
+
299
+ console.log(`\nResults: ${passed} passed, ${failed} failed\n`);
300
+ process.exit(failed > 0 ? 1 : 0);
301
+ }
302
+
303
+ module.exports = {
304
+ readJSONL,
305
+ extractDomainKeys,
306
+ domainOverlap,
307
+ inferPrompt,
308
+ buildDpoPairs,
309
+ toJSONL,
310
+ exportDpoFromMemories,
311
+ DEFAULT_LOCAL_MEMORY_LOG,
312
+ };
313
+
314
+ if (require.main === module) {
315
+ runCli();
316
+ }