rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,393 @@
1
+ #!/usr/bin/env node
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+ const os = require('os');
5
+ const {
6
+ captureFeedback,
7
+ analyzeFeedback,
8
+ buildPreventionRules,
9
+ getFeedbackPaths,
10
+ readJSONL,
11
+ } = require('./feedback-loop');
12
+ const { exportDpoFromMemories } = require('./export-dpo-pairs');
13
+ const { planIntent } = require('./intent-router');
14
+ const { startServer } = require('../src/api/server');
15
+ const { handleRequest } = require('../adapters/mcp/server-stdio');
16
+ const { collectHealthReport } = require('./self-healing-check');
17
+ const { runSelfHeal } = require('./self-heal');
18
+ const { CONTEXTFS_ROOT, NAMESPACES } = require('./contextfs');
19
+ const { traceForProofCheck, aggregateTraces } = require('./code-reasoning');
20
+
21
+ const ROOT = path.join(__dirname, '..');
22
+ const DEFAULT_PROOF_DIR = path.join(ROOT, 'proof', 'automation');
23
+
24
+ function ensureDir(dirPath) {
25
+ if (!fs.existsSync(dirPath)) fs.mkdirSync(dirPath, { recursive: true });
26
+ }
27
+
28
+ function check(condition, message) {
29
+ if (!condition) throw new Error(message);
30
+ }
31
+
32
+ async function runAutomationProof(options = {}) {
33
+ const proofDir = options.proofDir || process.env.RLHF_AUTOMATION_PROOF_DIR || DEFAULT_PROOF_DIR;
34
+ const writeArtifacts = options.writeArtifacts !== false;
35
+ const proofPort = options.port ?? 0;
36
+
37
+ if (writeArtifacts) ensureDir(proofDir);
38
+
39
+ const tmpFeedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-automation-proof-'));
40
+ process.env.RLHF_FEEDBACK_DIR = tmpFeedbackDir;
41
+ process.env.RLHF_API_KEY = 'automation-proof-key';
42
+ process.env.RLHF_MCP_PROFILE = 'default';
43
+
44
+ const report = {
45
+ generatedAt: new Date().toISOString(),
46
+ checks: [],
47
+ summary: { passed: 0, failed: 0 },
48
+ };
49
+
50
+ function addResult(name, passed, details) {
51
+ report.checks.push({ name, passed, details });
52
+ if (passed) report.summary.passed += 1;
53
+ else report.summary.failed += 1;
54
+ }
55
+
56
+ const { server, port } = await startServer({ port: proofPort });
57
+ try {
58
+ // 1) Positive with valid rubric -> accepted
59
+ {
60
+ const result = captureFeedback({
61
+ signal: 'up',
62
+ context: 'Implemented with tests and evidence',
63
+ whatWorked: 'Used proof harness and verification logs',
64
+ tags: ['verification', 'automation'],
65
+ rubricScores: [
66
+ { criterion: 'correctness', score: 4, evidence: 'all tests pass', judge: 'judge-a' },
67
+ { criterion: 'verification_evidence', score: 4, evidence: 'proof attached', judge: 'judge-a' },
68
+ { criterion: 'safety', score: 4, evidence: 'path checks enabled', judge: 'judge-a' },
69
+ ],
70
+ guardrails: {
71
+ testsPassed: true,
72
+ pathSafety: true,
73
+ budgetCompliant: true,
74
+ },
75
+ });
76
+ check(result.accepted === true, 'expected rubric-valid positive feedback to be accepted');
77
+ check(Boolean(result.memoryRecord && result.memoryRecord.rubricSummary), 'accepted learning should include rubricSummary');
78
+ addResult('feedback.capture.rubric_pass', true, {
79
+ accepted: result.accepted,
80
+ weightedScore: result.memoryRecord.rubricSummary.weightedScore,
81
+ });
82
+ }
83
+
84
+ // 2) Positive with failed guardrail/disagreement -> blocked
85
+ {
86
+ const result = captureFeedback({
87
+ signal: 'up',
88
+ context: 'Claimed done without logs',
89
+ whatWorked: 'looked good',
90
+ tags: ['verification', 'automation'],
91
+ rubricScores: [
92
+ { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
93
+ { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'logs missing' },
94
+ ],
95
+ guardrails: {
96
+ testsPassed: false,
97
+ pathSafety: true,
98
+ budgetCompliant: true,
99
+ },
100
+ });
101
+ check(result.accepted === false, 'expected rubric-gated positive feedback to be rejected');
102
+ check(/Rubric gate prevented promotion/i.test(String(result.reason)), 'expected rubric gate reason');
103
+ addResult('feedback.capture.rubric_block', true, { accepted: result.accepted, reason: result.reason });
104
+ }
105
+
106
+ // 3) Negative with rubric failures -> accepted mistake memory with rubric tags
107
+ {
108
+ const result = captureFeedback({
109
+ signal: 'down',
110
+ context: 'Skipped verification before completion claim',
111
+ whatWentWrong: 'No test evidence',
112
+ whatToChange: 'Always include test output',
113
+ tags: ['verification', 'automation'],
114
+ rubricScores: [
115
+ { criterion: 'verification_evidence', score: 1, evidence: 'no logs', judge: 'judge-a' },
116
+ { criterion: 'correctness', score: 2, evidence: 'regression detected', judge: 'judge-a' },
117
+ ],
118
+ guardrails: {
119
+ testsPassed: false,
120
+ pathSafety: true,
121
+ budgetCompliant: true,
122
+ },
123
+ });
124
+ check(result.accepted === true, 'expected negative feedback to be accepted as mistake memory');
125
+ check(result.memoryRecord.tags.includes('rubric-verification_evidence'), 'expected rubric failure tags');
126
+ addResult('feedback.capture.negative_with_rubric', true, {
127
+ accepted: result.accepted,
128
+ tags: result.memoryRecord.tags,
129
+ });
130
+ }
131
+
132
+ // 4) analytics tracks rubric blocks/failures
133
+ {
134
+ const { FEEDBACK_LOG_PATH } = getFeedbackPaths();
135
+ const stats = analyzeFeedback(FEEDBACK_LOG_PATH);
136
+ check(stats.rubric.samples >= 3, 'expected rubric samples to be tracked');
137
+ check(stats.rubric.blockedPromotions >= 1, 'expected blocked rubric promotions to be tracked');
138
+ addResult('analytics.rubric_tracking', true, stats.rubric);
139
+ }
140
+
141
+ // 5) prevention rules include rubric dimensions
142
+ {
143
+ const markdown = buildPreventionRules(1);
144
+ check(markdown.includes('Rubric Failure Dimensions'), 'expected rubric section in prevention rules');
145
+ check(markdown.includes('verification_evidence'), 'expected criterion in prevention rules');
146
+ addResult('prevention_rules.rubric_dimensions', true, { hasRubricSection: true });
147
+ }
148
+
149
+ // 6) DPO export includes rubric delta metadata
150
+ {
151
+ const { MEMORY_LOG_PATH } = getFeedbackPaths();
152
+ const memories = readJSONL(MEMORY_LOG_PATH);
153
+ const result = exportDpoFromMemories(memories);
154
+ check(result.pairs.length >= 1, 'expected at least one DPO pair');
155
+ const first = result.pairs[0];
156
+ check(Boolean(first.metadata && first.metadata.rubric), 'expected rubric metadata in DPO pair');
157
+ addResult('dpo_export.rubric_metadata', true, first.metadata.rubric);
158
+ }
159
+
160
+ // 7) API rubric gate returns 422
161
+ {
162
+ const res = await fetch(`http://localhost:${port}/v1/feedback/capture`, {
163
+ method: 'POST',
164
+ headers: {
165
+ Authorization: 'Bearer automation-proof-key',
166
+ 'Content-Type': 'application/json',
167
+ },
168
+ body: JSON.stringify({
169
+ signal: 'up',
170
+ context: 'unsafe api approval attempt',
171
+ whatWorked: 'claimed success',
172
+ tags: ['verification', 'automation'],
173
+ rubricScores: [
174
+ { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
175
+ { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
176
+ ],
177
+ guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
178
+ }),
179
+ });
180
+ check(res.status === 422, `expected 422 from API rubric gate, got ${res.status}`);
181
+ const body = await res.json();
182
+ check(body.accepted === false, 'API rubric-gated capture must be rejected');
183
+ addResult('api.rubric_gate', true, { status: res.status });
184
+ }
185
+
186
+ // 8) MCP rubric gate returns accepted=false
187
+ {
188
+ const call = await handleRequest({
189
+ jsonrpc: '2.0',
190
+ id: 91,
191
+ method: 'tools/call',
192
+ params: {
193
+ name: 'capture_feedback',
194
+ arguments: {
195
+ signal: 'up',
196
+ context: 'unsafe mcp approval attempt',
197
+ whatWorked: 'claimed success',
198
+ rubricScores: [
199
+ { criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
200
+ { criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
201
+ ],
202
+ guardrails: { testsPassed: false, pathSafety: true, budgetCompliant: true },
203
+ },
204
+ },
205
+ });
206
+ const payload = JSON.parse(call.content[0].text);
207
+ check(payload.accepted === false, 'MCP rubric-gated capture must be rejected');
208
+ addResult('mcp.rubric_gate', true, { accepted: payload.accepted });
209
+ }
210
+
211
+ // 9) intent checkpoints still enforced
212
+ {
213
+ const planBlocked = planIntent({
214
+ intentId: 'publish_dpo_training_data',
215
+ mcpProfile: 'default',
216
+ approved: false,
217
+ });
218
+ check(planBlocked.status === 'checkpoint_required', 'expected checkpoint_required for high-risk intent');
219
+
220
+ const planApproved = planIntent({
221
+ intentId: 'publish_dpo_training_data',
222
+ mcpProfile: 'default',
223
+ approved: true,
224
+ });
225
+ check(planApproved.status === 'ready', 'expected ready when approved');
226
+ addResult('intent.checkpoint_enforcement', true, {
227
+ blocked: planBlocked.status,
228
+ approved: planApproved.status,
229
+ });
230
+ }
231
+
232
+ // 10) context evaluate stores rubric evaluation
233
+ {
234
+ const construct = await fetch(`http://localhost:${port}/v1/context/construct`, {
235
+ method: 'POST',
236
+ headers: {
237
+ Authorization: 'Bearer automation-proof-key',
238
+ 'Content-Type': 'application/json',
239
+ },
240
+ body: JSON.stringify({ query: 'verification automation', maxItems: 5, maxChars: 5000 }),
241
+ });
242
+ check(construct.status === 200, `context construct expected 200, got ${construct.status}`);
243
+ const pack = await construct.json();
244
+
245
+ const evaluate = await fetch(`http://localhost:${port}/v1/context/evaluate`, {
246
+ method: 'POST',
247
+ headers: {
248
+ Authorization: 'Bearer automation-proof-key',
249
+ 'Content-Type': 'application/json',
250
+ },
251
+ body: JSON.stringify({
252
+ packId: pack.packId,
253
+ outcome: 'useful',
254
+ signal: 'positive',
255
+ rubricScores: [
256
+ { criterion: 'correctness', score: 4, evidence: 'tests pass', judge: 'judge-a' },
257
+ { criterion: 'verification_evidence', score: 4, evidence: 'logs attached', judge: 'judge-a' },
258
+ ],
259
+ guardrails: { testsPassed: true, pathSafety: true, budgetCompliant: true },
260
+ }),
261
+ });
262
+ check(evaluate.status === 200, `context evaluate expected 200, got ${evaluate.status}`);
263
+ const evalBody = await evaluate.json();
264
+ check(Boolean(evalBody.rubricEvaluation), 'expected rubricEvaluation on context evaluate result');
265
+ addResult('context.evaluate.rubric', true, { rubricId: evalBody.rubricEvaluation.rubricId });
266
+ }
267
+
268
+ // 11) semantic cache hit on equivalent query
269
+ {
270
+ fs.rmSync(path.join(CONTEXTFS_ROOT, NAMESPACES.provenance, 'semantic-cache.jsonl'), { force: true });
271
+ const first = await fetch(`http://localhost:${port}/v1/context/construct`, {
272
+ method: 'POST',
273
+ headers: {
274
+ Authorization: 'Bearer automation-proof-key',
275
+ 'Content-Type': 'application/json',
276
+ },
277
+ body: JSON.stringify({ query: 'verification testing evidence', maxItems: 5, maxChars: 5000 }),
278
+ });
279
+ check(first.status === 200, `first context construct expected 200, got ${first.status}`);
280
+ const firstPack = await first.json();
281
+
282
+ const second = await fetch(`http://localhost:${port}/v1/context/construct`, {
283
+ method: 'POST',
284
+ headers: {
285
+ Authorization: 'Bearer automation-proof-key',
286
+ 'Content-Type': 'application/json',
287
+ },
288
+ body: JSON.stringify({ query: 'testing verification evidence', maxItems: 5, maxChars: 5000 }),
289
+ });
290
+ check(second.status === 200, `second context construct expected 200, got ${second.status}`);
291
+ const secondPack = await second.json();
292
+ check(firstPack.cache && firstPack.cache.hit === false, 'first pack expected cache miss');
293
+ check(secondPack.cache && secondPack.cache.hit === true, 'second pack expected cache hit');
294
+ addResult('context.semantic_cache.hit', true, {
295
+ firstHit: firstPack.cache.hit,
296
+ secondHit: secondPack.cache.hit,
297
+ similarity: secondPack.cache.similarity,
298
+ });
299
+ }
300
+
301
+ // 12) self-healing helpers produce healthy reports in baseline state
302
+ {
303
+ const health = collectHealthReport({
304
+ checks: [
305
+ { name: 'noop', command: ['node', '-e', 'process.exit(0)'] },
306
+ ],
307
+ });
308
+ check(health.overall_status === 'healthy', 'health report expected healthy for noop check');
309
+
310
+ const heal = runSelfHeal({ reason: 'automation-proof', cwd: ROOT });
311
+ check(heal.healthy === true, 'self-heal expected healthy execution');
312
+ check(Boolean(heal.reasoning), 'self-heal must include reasoning traces');
313
+ check(heal.traces.length === heal.plan.length, 'self-heal traces count must match plan length');
314
+ addResult('self_healing.helpers', true, {
315
+ healthStatus: health.overall_status,
316
+ changed: heal.changed,
317
+ reasoning: heal.reasoning,
318
+ });
319
+ }
320
+
321
+ // 13) code reasoning traces verify DPO pair quality
322
+ {
323
+ const { MEMORY_LOG_PATH } = getFeedbackPaths();
324
+ const memories = readJSONL(MEMORY_LOG_PATH);
325
+ const result = exportDpoFromMemories(memories);
326
+ if (result.pairs.length >= 1) {
327
+ const first = result.pairs[0];
328
+ check(Boolean(first.metadata.reasoningTrace), 'DPO pair must include reasoningTrace metadata');
329
+ check(typeof first.metadata.reasoningTrace.confidence === 'number', 'reasoningTrace must have confidence score');
330
+ check(typeof first.metadata.reasoningTrace.traceId === 'string', 'reasoningTrace must have traceId');
331
+ check(Boolean(result.reasoning), 'DPO export must include aggregate reasoning summary');
332
+ addResult('code_reasoning.dpo_traces', true, {
333
+ traceId: first.metadata.reasoningTrace.traceId,
334
+ confidence: first.metadata.reasoningTrace.confidence,
335
+ aggregateConfidence: result.reasoning.averageConfidence,
336
+ });
337
+ } else {
338
+ addResult('code_reasoning.dpo_traces', true, { skipped: true, reason: 'no DPO pairs to trace' });
339
+ }
340
+ }
341
+
342
+ // 14) code reasoning traces attached to proof checks
343
+ {
344
+ const proofTraces = report.checks.map((chk) => traceForProofCheck(chk));
345
+ const aggregate = aggregateTraces(proofTraces);
346
+ check(aggregate.totalTraces === report.checks.length, 'proof trace count must match check count');
347
+ check(aggregate.refuted === 0, 'no proof check should have refuted steps');
348
+ check(aggregate.averageConfidence > 0, 'proof traces must have positive confidence');
349
+ report.reasoning = aggregate;
350
+ report.proofTraces = proofTraces;
351
+ addResult('code_reasoning.proof_gate', true, {
352
+ totalTraces: aggregate.totalTraces,
353
+ averageConfidence: aggregate.averageConfidence,
354
+ allPassed: aggregate.allPassed,
355
+ });
356
+ }
357
+ } catch (err) {
358
+ addResult('fatal', false, { error: err.message });
359
+ } finally {
360
+ await new Promise((resolve) => server.close(resolve));
361
+ fs.rmSync(tmpFeedbackDir, { recursive: true, force: true });
362
+ }
363
+
364
+ if (writeArtifacts) {
365
+ fs.writeFileSync(path.join(proofDir, 'report.json'), `${JSON.stringify(report, null, 2)}\n`);
366
+ const mdLines = [
367
+ '# Automation Proof',
368
+ '',
369
+ `Generated: ${report.generatedAt}`,
370
+ '',
371
+ `Passed: ${report.summary.passed}`,
372
+ `Failed: ${report.summary.failed}`,
373
+ '',
374
+ '## Checks',
375
+ ...report.checks.map((checkItem) => `- ${checkItem.passed ? 'PASS' : 'FAIL'} ${checkItem.name}`),
376
+ '',
377
+ ];
378
+ fs.writeFileSync(path.join(proofDir, 'report.md'), `${mdLines.join('\n')}\n`);
379
+ }
380
+
381
+ if (report.summary.failed > 0) process.exitCode = 1;
382
+ return report;
383
+ }
384
+
385
+ module.exports = {
386
+ runAutomationProof,
387
+ };
388
+
389
+ if (require.main === module) {
390
+ runAutomationProof().then((report) => {
391
+ console.log(JSON.stringify(report.summary, null, 2));
392
+ });
393
+ }
@@ -0,0 +1,219 @@
1
+ 'use strict';
2
+ /**
3
+ * Phase 7: Data Quality — Proof Gate
4
+ *
5
+ * Validates all QUAL-01 through QUAL-04 requirements offline.
6
+ * Mirrors the pattern of prove-attribution.js (mkdtempSync + env override + execSync).
7
+ *
8
+ * Usage:
9
+ * node scripts/prove-data-quality.js
10
+ *
11
+ * Produces:
12
+ * proof/data-quality-report.json
13
+ * proof/data-quality-report.md
14
+ */
15
+
16
+ const { execSync } = require('child_process');
17
+ const fs = require('fs');
18
+ const os = require('os');
19
+ const path = require('path');
20
+
21
+ const PROOF_DIR = path.join(__dirname, '..', 'proof');
22
+ const REPORT_JSON = path.join(PROOF_DIR, 'data-quality-report.json');
23
+ const REPORT_MD = path.join(PROOF_DIR, 'data-quality-report.md');
24
+
25
+ function run() {
26
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-qual-proof-'));
27
+ const results = { passed: 0, failed: 0, requirements: {} };
28
+
29
+ const checks = [
30
+ {
31
+ id: 'QUAL-01',
32
+ desc: 'validate-feedback.js exports validateEntry with 4-level pipeline',
33
+ fn: () => {
34
+ delete require.cache[require.resolve('./validate-feedback')];
35
+ const v = require('./validate-feedback');
36
+ if (typeof v.validateEntry !== 'function') throw new Error('validateEntry not exported');
37
+ if (typeof v.validateSchema !== 'function') throw new Error('validateSchema not exported');
38
+ if (typeof v.validateSemantics !== 'function') throw new Error('validateSemantics not exported');
39
+ if (typeof v.detectAnomalies !== 'function') throw new Error('detectAnomalies not exported');
40
+ if (typeof v.generateCorrections !== 'function') throw new Error('generateCorrections not exported');
41
+
42
+ // Verify semantic inconsistency is caught
43
+ const r = v.validateEntry({
44
+ id: 'proof-x',
45
+ timestamp: new Date().toISOString(),
46
+ signal: 'positive',
47
+ reward: -1,
48
+ context: 'good work done',
49
+ });
50
+ if (r.valid) throw new Error('Expected invalid for positive+negative-reward');
51
+ if (!r.corrections.length) throw new Error('Expected auto-correction for reward');
52
+ if (r.correctedEntry.reward !== 1) throw new Error('Expected corrected reward=1');
53
+
54
+ // Verify sensitive data detection
55
+ const r2 = v.validateEntry({
56
+ id: 'proof-y',
57
+ timestamp: new Date().toISOString(),
58
+ signal: 'positive',
59
+ reward: 1,
60
+ context: 'api_key=abc123 was in the response',
61
+ });
62
+ if (!r2.issues.some((i) => i.type === 'security')) {
63
+ throw new Error('Expected security issue for api_key pattern');
64
+ }
65
+ },
66
+ },
67
+ {
68
+ id: 'QUAL-02',
69
+ desc: 'captureFeedback produces richContext with domain, filePaths, errorType, outcomeCategory',
70
+ fn: () => {
71
+ process.env.RLHF_FEEDBACK_DIR = tmpDir;
72
+ // Clear module cache so env var takes effect
73
+ [
74
+ './feedback-loop',
75
+ './feedback-attribution',
76
+ './rlaif-self-audit',
77
+ ].forEach((m) => {
78
+ try {
79
+ delete require.cache[require.resolve(m)];
80
+ } catch {
81
+ // optional module
82
+ }
83
+ });
84
+ const { captureFeedback } = require('./feedback-loop');
85
+ const r = captureFeedback({
86
+ signal: 'positive',
87
+ context: 'unit tests added for edge cases',
88
+ tags: ['testing'],
89
+ filePaths: ['src/api.js'],
90
+ });
91
+ if (!r.feedbackEvent) throw new Error('No feedbackEvent in result');
92
+ const rc = r.feedbackEvent.richContext;
93
+ if (!rc) throw new Error('richContext missing from feedbackEvent');
94
+ if (typeof rc.domain !== 'string') throw new Error('richContext.domain must be string');
95
+ if (!Array.isArray(rc.filePaths)) throw new Error('richContext.filePaths must be array');
96
+ if (!('errorType' in rc)) throw new Error('richContext.errorType field missing');
97
+ if (typeof rc.outcomeCategory !== 'string') throw new Error('richContext.outcomeCategory must be string');
98
+ if (rc.domain !== 'testing') throw new Error(`Expected domain=testing, got ${rc.domain}`);
99
+ },
100
+ },
101
+ {
102
+ id: 'QUAL-03',
103
+ desc: 'inferOutcome returns granular categories beyond binary up/down',
104
+ fn: () => {
105
+ [
106
+ './feedback-loop',
107
+ ].forEach((m) => {
108
+ try { delete require.cache[require.resolve(m)]; } catch {}
109
+ });
110
+ const { inferOutcome } = require('./feedback-loop');
111
+ if (typeof inferOutcome !== 'function') throw new Error('inferOutcome not exported from feedback-loop');
112
+
113
+ const cases = [
114
+ ['positive', 'solved it first try', 'quick-success'],
115
+ ['positive', 'thorough comprehensive analysis', 'deep-success'],
116
+ ['positive', 'worked well overall', 'standard-success'],
117
+ ['negative', 'gave wrong incorrect answer', 'factual-error'],
118
+ ['negative', 'shallow surface level response', 'insufficient-depth'],
119
+ ['negative', 'guessed without checking docs', 'false-assumption'],
120
+ ];
121
+
122
+ for (const [signal, context, expected] of cases) {
123
+ const got = inferOutcome(signal, context);
124
+ if (got !== expected) {
125
+ throw new Error(`inferOutcome('${signal}', '${context}') = '${got}', expected '${expected}'`);
126
+ }
127
+ }
128
+ },
129
+ },
130
+ {
131
+ id: 'QUAL-04',
132
+ desc: 'test:quality (node --test tests/validate-feedback.test.js) passes with 0 failures',
133
+ fn: () => {
134
+ const out = execSync('node --test tests/validate-feedback.test.js', {
135
+ cwd: path.join(__dirname, '..'),
136
+ env: { ...process.env, RLHF_FEEDBACK_DIR: tmpDir },
137
+ encoding: 'utf8',
138
+ stdio: 'pipe',
139
+ });
140
+ // node:test exits non-zero on failure — if we get here, all tests passed
141
+ const failMatch = out.match(/ℹ fail (\d+)/);
142
+ if (failMatch && parseInt(failMatch[1], 10) > 0) {
143
+ throw new Error(`Tests failed: ${failMatch[1]} failure(s)\n${out.slice(-500)}`);
144
+ }
145
+ },
146
+ },
147
+ ];
148
+
149
+ console.log('Phase 7: Data Quality — Proof Gate\n');
150
+ console.log('Checking requirements:\n');
151
+
152
+ for (const check of checks) {
153
+ try {
154
+ check.fn();
155
+ results.passed++;
156
+ results.requirements[check.id] = { status: 'pass', desc: check.desc };
157
+ console.log(` PASS ${check.id}: ${check.desc}`);
158
+ } catch (err) {
159
+ results.failed++;
160
+ results.requirements[check.id] = {
161
+ status: 'fail',
162
+ desc: check.desc,
163
+ error: err.message,
164
+ };
165
+ console.error(` FAIL ${check.id}: ${err.message}`);
166
+ }
167
+ }
168
+
169
+ // Cleanup tmp dir
170
+ try {
171
+ fs.rmSync(tmpDir, { recursive: true, force: true });
172
+ } catch {}
173
+ delete process.env.RLHF_FEEDBACK_DIR;
174
+
175
+ // Write proof artifacts
176
+ fs.mkdirSync(PROOF_DIR, { recursive: true });
177
+
178
+ const report = {
179
+ phase: '07-data-quality',
180
+ generatedAt: new Date().toISOString(),
181
+ passed: results.passed,
182
+ failed: results.failed,
183
+ total: checks.length,
184
+ requirements: results.requirements,
185
+ };
186
+
187
+ fs.writeFileSync(REPORT_JSON, JSON.stringify(report, null, 2) + '\n');
188
+
189
+ const md = [
190
+ '# Phase 7: Data Quality — Proof Report',
191
+ '',
192
+ `Generated: ${report.generatedAt}`,
193
+ `Result: ${results.passed}/${checks.length} passed`,
194
+ '',
195
+ '## Requirements',
196
+ '',
197
+ ...Object.entries(results.requirements).map(([id, r]) => {
198
+ const checkbox = r.status === 'pass' ? '[x]' : '[ ]';
199
+ const errLine = r.error ? `\n - Error: \`${r.error}\`` : '';
200
+ return `- ${checkbox} **${id}**: ${r.desc}${errLine}`;
201
+ }),
202
+ '',
203
+ '## Evidence',
204
+ '',
205
+ '- `scripts/validate-feedback.js` — 4-level validation pipeline (schema, semantics, anomaly, self-correction)',
206
+ '- `scripts/feedback-loop.js` — `inferOutcome()` and `enrichFeedbackContext()` added; `richContext` in every feedbackEvent',
207
+ '- `tests/validate-feedback.test.js` — 25 node:test cases covering all QUAL requirements',
208
+ '',
209
+ ].join('\n');
210
+
211
+ fs.writeFileSync(REPORT_MD, md);
212
+
213
+ console.log(`\nPhase 7 proof: ${results.passed} passed, ${results.failed} failed`);
214
+ console.log(`Report: ${REPORT_JSON}`);
215
+
216
+ if (results.failed > 0) process.exit(1);
217
+ }
218
+
219
+ run();