rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * prove-v2-milestone.js
4
+ *
5
+ * Phase 12: Final proof gate for the v2.0 milestone.
6
+ * Verifies:
7
+ * PROOF-01: Proof reports exist for all v2 phases (6-11)
8
+ * PROOF-02: npm test passes with count > 142 (v1 baseline) and 0 failures
9
+ *
10
+ * All numbers are from actual test runs — no placeholders.
11
+ */
12
+
13
+ 'use strict';
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const { execSync } = require('child_process');
18
+
19
+ const ROOT = path.join(__dirname, '..');
20
+ const PROOF_DIR = path.join(ROOT, 'proof');
21
+
22
+ function ensureDir(d) {
23
+ if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
24
+ }
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // PROOF-01: Check all v2 proof reports exist
28
+ // ---------------------------------------------------------------------------
29
+
30
+ const V2_PROOF_REQUIREMENTS = [
31
+ {
32
+ phase: 6,
33
+ name: 'Feedback Attribution',
34
+ files: ['proof/attribution-report.json', 'proof/attribution-report.md'],
35
+ },
36
+ {
37
+ phase: 7,
38
+ name: 'Data Quality',
39
+ files: ['proof/data-quality-report.json', 'proof/data-quality-report.md'],
40
+ },
41
+ {
42
+ phase: 8,
43
+ name: 'Loop Closure',
44
+ files: ['proof/loop-closure-report.json', 'proof/loop-closure-report.md'],
45
+ },
46
+ {
47
+ phase: 9,
48
+ name: 'Intelligence',
49
+ files: ['proof/intelligence-report.json', 'proof/intelligence-report.md'],
50
+ },
51
+ {
52
+ phase: 10,
53
+ name: 'Training Export',
54
+ files: ['proof/training-export-report.json', 'proof/training-export-report.md'],
55
+ },
56
+ {
57
+ phase: 11,
58
+ name: 'Subway Upgrades',
59
+ files: [
60
+ 'proof/subway-upgrades/subway-upgrades-report.json',
61
+ 'proof/subway-upgrades/subway-upgrades-report.md',
62
+ ],
63
+ },
64
+ ];
65
+
66
+ function checkProofReports() {
67
+ const results = [];
68
+ let allExist = true;
69
+
70
+ for (const req of V2_PROOF_REQUIREMENTS) {
71
+ const phaseResult = { phase: req.phase, name: req.name, files: [] };
72
+
73
+ for (const relPath of req.files) {
74
+ const absPath = path.join(ROOT, relPath);
75
+ const exists = fs.existsSync(absPath);
76
+ if (!exists) allExist = false;
77
+
78
+ // Check for placeholders/TODOs
79
+ let hasTodo = false;
80
+ let fileSize = 0;
81
+ if (exists) {
82
+ try {
83
+ const content = fs.readFileSync(absPath, 'utf-8');
84
+ hasTodo = /TODO|placeholder|estimated/i.test(content);
85
+ fileSize = content.length;
86
+ } catch {
87
+ hasTodo = false;
88
+ }
89
+ }
90
+
91
+ phaseResult.files.push({
92
+ path: relPath,
93
+ exists,
94
+ hasTodo,
95
+ fileSize,
96
+ });
97
+ }
98
+
99
+ results.push(phaseResult);
100
+ }
101
+
102
+ return { allExist, phases: results };
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // PROOF-02: Run npm test and verify count
107
+ // ---------------------------------------------------------------------------
108
+
109
+ const V1_BASELINE_TEST_COUNT = 142;
110
+
111
+ function runFullTestSuite() {
112
+ console.log('Running full test suite (npm test)...');
113
+ let output = '';
114
+ let timedOut = false;
115
+
116
+ try {
117
+ output = execSync('npm test', {
118
+ cwd: ROOT,
119
+ encoding: 'utf-8',
120
+ stdio: ['pipe', 'pipe', 'pipe'],
121
+ timeout: 180_000,
122
+ });
123
+ } catch (err) {
124
+ // npm test exits non-zero on test failure — capture output anyway
125
+ output = (err.stdout || '') + (err.stderr || '');
126
+ if (err.code === 'ETIMEDOUT') timedOut = true;
127
+ }
128
+
129
+ if (timedOut) {
130
+ return { passed: 0, failed: 1, raw: 'TIMED OUT', timedOut: true };
131
+ }
132
+
133
+ // Sum all "ℹ pass N" lines
134
+ const passMatches = [...output.matchAll(/ℹ pass (\d+)/g)];
135
+ const failMatches = [...output.matchAll(/ℹ fail (\d+)/g)];
136
+
137
+ const passed = passMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
138
+ const failed = failMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
139
+
140
+ return { passed, failed, raw: output.slice(-2000), timedOut: false };
141
+ }
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Main
145
+ // ---------------------------------------------------------------------------
146
+
147
+ async function main() {
148
+ console.log('Phase 12: Proof Gate — v2.0 Milestone Final Check\n');
149
+ console.log('='.repeat(50));
150
+
151
+ // PROOF-01
152
+ console.log('\nChecking proof reports (PROOF-01)...');
153
+ const proofCheck = checkProofReports();
154
+
155
+ for (const phase of proofCheck.phases) {
156
+ const status = phase.files.every((f) => f.exists && !f.hasTodo) ? 'PASS' : 'FAIL';
157
+ console.log(` Phase ${phase.phase} (${phase.name}): ${status}`);
158
+ for (const f of phase.files) {
159
+ const indicator = f.exists ? (f.hasTodo ? ' TODO found' : ' exists') : ' MISSING';
160
+ console.log(` ${f.path}: ${indicator} ${f.exists ? `(${f.fileSize} bytes)` : ''}`);
161
+ }
162
+ }
163
+
164
+ // PROOF-02
165
+ const testResults = runFullTestSuite();
166
+ const testCountOk = testResults.passed > V1_BASELINE_TEST_COUNT;
167
+ const testFailOk = testResults.failed === 0;
168
+
169
+ console.log(`\nTest Results (PROOF-02):`);
170
+ console.log(` Passed: ${testResults.passed} (v1 baseline: ${V1_BASELINE_TEST_COUNT}, need > ${V1_BASELINE_TEST_COUNT})`);
171
+ console.log(` Failed: ${testResults.failed}`);
172
+ console.log(` Count check: ${testCountOk ? 'PASS' : 'FAIL'} (${testResults.passed} > ${V1_BASELINE_TEST_COUNT})`);
173
+ console.log(` Zero failures: ${testFailOk ? 'PASS' : 'FAIL'}`);
174
+
175
+ const proof01Passed = proofCheck.allExist &&
176
+ proofCheck.phases.every((p) => p.files.every((f) => f.exists && !f.hasTodo));
177
+ const proof02Passed = testCountOk && testFailOk;
178
+ const overallPassed = proof01Passed && proof02Passed;
179
+
180
+ // Write reports
181
+ const report = {
182
+ phase: 12,
183
+ name: 'Proof Gate',
184
+ milestone: 'v2.0',
185
+ requirements: ['PROOF-01', 'PROOF-02'],
186
+ generatedAt: new Date().toISOString(),
187
+ proofReports: {
188
+ allExist: proofCheck.allExist,
189
+ phases: proofCheck.phases,
190
+ passed: proof01Passed,
191
+ },
192
+ testResults: {
193
+ passed: testResults.passed,
194
+ failed: testResults.failed,
195
+ v1Baseline: V1_BASELINE_TEST_COUNT,
196
+ countExceedsBaseline: testCountOk,
197
+ zeroFailures: testFailOk,
198
+ passed: proof02Passed,
199
+ },
200
+ overallPassed,
201
+ };
202
+
203
+ ensureDir(PROOF_DIR);
204
+ const jsonPath = path.join(PROOF_DIR, 'v2-milestone-report.json');
205
+ const mdPath = path.join(PROOF_DIR, 'v2-milestone-report.md');
206
+
207
+ fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
208
+
209
+ const status = overallPassed ? 'PASSED' : 'FAILED';
210
+
211
+ const phaseRows = proofCheck.phases.map((p) => {
212
+ const pStatus = p.files.every((f) => f.exists && !f.hasTodo) ? 'PASS' : 'FAIL';
213
+ const fileList = p.files.map((f) => `${f.path} (${f.exists ? (f.hasTodo ? 'TODO found' : 'exists') : 'MISSING'})`).join(', ');
214
+ return `| ${p.phase} | ${p.name} | ${fileList} | ${pStatus} |`;
215
+ }).join('\n');
216
+
217
+ const md = `# Phase 12: Proof Gate — v2.0 Milestone Report
218
+
219
+ **Status:** ${status}
220
+ **Generated:** ${report.generatedAt}
221
+ **Milestone:** v2.0 RLHF Bidirectional Feature Sync
222
+
223
+ ## PROOF-01: All v2 Phase Proof Reports Exist
224
+
225
+ | Phase | Name | Files | Status |
226
+ |-------|------|-------|--------|
227
+ ${phaseRows}
228
+
229
+ **Overall PROOF-01:** ${proof01Passed ? 'PASS' : 'FAIL'}
230
+
231
+ ## PROOF-02: npm test — Count and Zero Failures
232
+
233
+ | Metric | Value | Requirement | Status |
234
+ |--------|-------|-------------|--------|
235
+ | Tests passed | ${testResults.passed} | > ${V1_BASELINE_TEST_COUNT} (v1 baseline) | ${testCountOk ? 'PASS' : 'FAIL'} |
236
+ | Tests failed | ${testResults.failed} | 0 | ${testFailOk ? 'PASS' : 'FAIL'} |
237
+
238
+ **Overall PROOF-02:** ${proof02Passed ? 'PASS' : 'FAIL'}
239
+
240
+ ## v2.0 Milestone Summary
241
+
242
+ All v2 phases complete:
243
+
244
+ | Phase | Feature | Requirements |
245
+ |-------|---------|-------------|
246
+ | 6 | Feedback Attribution | ATTR-01, ATTR-02, ATTR-03 |
247
+ | 7 | Data Quality | QUAL-01, QUAL-02, QUAL-03, QUAL-04 |
248
+ | 8 | Loop Closure | LOOP-01, LOOP-02, LOOP-03, LOOP-04, LOOP-05 |
249
+ | 9 | Intelligence | INTL-01, INTL-02, INTL-03 |
250
+ | 10 | Training Export | XPRT-01, XPRT-02, XPRT-03, XPRT-04, XPRT-05 |
251
+ | 11 | Subway Upgrades | SUBW-01, SUBW-02, SUBW-03, SUBW-04, SUBW-05 |
252
+ | 12 | Proof Gate | PROOF-01, PROOF-02 |
253
+
254
+ **Final test count:** ${testResults.passed} (${testResults.passed - V1_BASELINE_TEST_COUNT} above v1 baseline of ${V1_BASELINE_TEST_COUNT})
255
+ **Test failures:** ${testResults.failed}
256
+ **v2.0 milestone status:** ${status}
257
+ `;
258
+
259
+ fs.writeFileSync(mdPath, md);
260
+
261
+ console.log(`\n${'='.repeat(50)}`);
262
+ console.log(`v2.0 Milestone Status: ${status}`);
263
+ console.log(`PROOF-01 (all proof reports): ${proof01Passed ? 'PASS' : 'FAIL'}`);
264
+ console.log(`PROOF-02 (test count + 0 failures): ${proof02Passed ? 'PASS' : 'FAIL'}`);
265
+ console.log(`\nFinal proof report: ${mdPath}`);
266
+
267
+ process.exit(overallPassed ? 0 : 1);
268
+ }
269
+
270
+ main().catch((err) => {
271
+ console.error('prove-v2-milestone failed:', err.message);
272
+ process.exit(1);
273
+ });
@@ -0,0 +1,381 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * prove-v3-milestone.js
4
+ *
5
+ * Phase 17: Final proof gate for the v3.0 Commercialization milestone.
6
+ * Verifies:
7
+ * PROOF-01: Dockerfile exists + /health endpoint works (start server, curl, kill)
8
+ * PROOF-02: billing.js exports all 5 required functions + key provision/validate round-trip
9
+ * PROOF-03: bin/cli.js runs `init` in tmpdir + creates config
10
+ * PROOF-04: npm test passes with count >= 314 and 0 failures
11
+ *
12
+ * All numbers are from actual runs — no placeholders.
13
+ * Writes proof/v3-milestone-report.json and proof/v3-milestone-report.md
14
+ */
15
+
16
+ 'use strict';
17
+
18
+ const fs = require('fs');
19
+ const path = require('path');
20
+ const { execSync, spawnSync } = require('child_process');
21
+ const os = require('os');
22
+
23
+ const ROOT = path.join(__dirname, '..');
24
+ const PROOF_DIR = path.join(ROOT, 'proof');
25
+
26
+ function ensureDir(d) {
27
+ if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
28
+ }
29
+
30
+ function stamp() {
31
+ return new Date().toISOString();
32
+ }
33
+
34
+ // ─── Result collector ──────────────────────────────────────────────────────
35
+ const results = [];
36
+
37
+ function record(check, passed, detail, evidence = '') {
38
+ results.push({ check, passed, detail, evidence, ts: stamp() });
39
+ const icon = passed ? 'PASS' : 'FAIL';
40
+ console.log(`[${icon}] ${check}: ${detail}`);
41
+ if (evidence && !passed) console.log(` Evidence: ${evidence}`);
42
+ }
43
+
44
+ // ===========================================================================
45
+ // PROOF-01: Dockerfile exists + health endpoint responds
46
+ // ===========================================================================
47
+ console.log('\n── PROOF-01: Dockerfile + /health endpoint ──');
48
+
49
+ const dockerfilePath = path.join(ROOT, 'Dockerfile');
50
+ const dockerfileExists = fs.existsSync(dockerfilePath);
51
+ record('PROOF-01a: Dockerfile exists', dockerfileExists, dockerfileExists ? 'Dockerfile found' : 'Dockerfile MISSING');
52
+
53
+ // Start the API server, curl /health, then kill it
54
+ let healthPassed = false;
55
+ let healthDetail = '';
56
+ let healthEvidence = '';
57
+ let serverPid = null;
58
+
59
+ const serverPath = path.join(ROOT, 'src', 'api', 'server.js');
60
+ const serverExists = fs.existsSync(serverPath);
61
+ record('PROOF-01b: src/api/server.js exists', serverExists, serverExists ? 'server.js found' : 'server.js MISSING');
62
+
63
+ if (serverExists) {
64
+ const TEST_PORT = 13877; // unlikely to collide
65
+ const env = { ...process.env, PORT: String(TEST_PORT), RLHF_ALLOW_INSECURE: 'true' };
66
+ try {
67
+ const serverProc = require('child_process').spawn(
68
+ process.execPath,
69
+ [serverPath],
70
+ { env, detached: false, stdio: 'pipe' }
71
+ );
72
+ serverPid = serverProc.pid;
73
+
74
+ // Wait up to 4 seconds for the server to start
75
+ let started = false;
76
+ const deadline = Date.now() + 4000;
77
+ while (Date.now() < deadline) {
78
+ try {
79
+ execSync(`curl -sf http://localhost:${TEST_PORT}/health`, { timeout: 1000 });
80
+ started = true;
81
+ break;
82
+ } catch (_) {
83
+ // not up yet — busy wait 200ms
84
+ const t = Date.now() + 200;
85
+ while (Date.now() < t) { /* spin */ }
86
+ }
87
+ }
88
+
89
+ if (started) {
90
+ const rawResponse = execSync(`curl -s http://localhost:${TEST_PORT}/health`, { timeout: 2000 }).toString().trim();
91
+ healthEvidence = rawResponse;
92
+ try {
93
+ const parsed = JSON.parse(rawResponse);
94
+ const hasVersion = typeof parsed.version !== 'undefined';
95
+ const hasUptime = typeof parsed.uptime !== 'undefined';
96
+ healthPassed = hasVersion && hasUptime;
97
+ healthDetail = healthPassed
98
+ ? `HTTP 200, version=${parsed.version}, uptime=${parsed.uptime}`
99
+ : `Response missing version or uptime fields: ${rawResponse}`;
100
+ } catch (e) {
101
+ healthDetail = `Invalid JSON from /health: ${rawResponse}`;
102
+ }
103
+ } else {
104
+ healthDetail = 'Server did not start within 4 seconds';
105
+ }
106
+
107
+ // Kill the server
108
+ try { process.kill(serverPid, 'SIGTERM'); } catch (_) {}
109
+ } catch (err) {
110
+ healthDetail = `Server start error: ${err.message}`;
111
+ }
112
+ } else {
113
+ healthDetail = 'Skipped — server.js not found';
114
+ }
115
+
116
+ record('PROOF-01c: /health returns 200 with version+uptime', healthPassed, healthDetail, healthEvidence);
117
+
118
+ // ===========================================================================
119
+ // PROOF-02: billing.js exports all 5 required functions + provision/validate round-trip
120
+ // ===========================================================================
121
+ console.log('\n── PROOF-02: Billing module exports + key round-trip ──');
122
+
123
+ const REQUIRED_BILLING_EXPORTS = [
124
+ 'createCheckoutSession',
125
+ 'provisionApiKey',
126
+ 'validateApiKey',
127
+ 'recordUsage',
128
+ 'handleWebhook',
129
+ ];
130
+
131
+ const billingPath = path.join(ROOT, 'scripts', 'billing.js');
132
+ let billingExportsPassed = false;
133
+ let billingExportsDetail = '';
134
+ let roundTripPassed = false;
135
+ let roundTripDetail = '';
136
+
137
+ if (fs.existsSync(billingPath)) {
138
+ try {
139
+ const billing = require(billingPath);
140
+ const missingExports = REQUIRED_BILLING_EXPORTS.filter(fn => typeof billing[fn] !== 'function');
141
+ billingExportsPassed = missingExports.length === 0;
142
+ billingExportsDetail = billingExportsPassed
143
+ ? `All 5 functions exported: ${REQUIRED_BILLING_EXPORTS.join(', ')}`
144
+ : `Missing exports: ${missingExports.join(', ')}`;
145
+
146
+ // Key provision + validate round-trip (runs in local mode when STRIPE_SECRET_KEY is absent)
147
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-proof-billing-'));
148
+ const tmpKeyPath = path.join(tmpDir, 'api-keys.json');
149
+ // Temporarily redirect key store to tmpdir
150
+ const origKeyPath = billing._API_KEYS_PATH;
151
+
152
+ // Provision a key
153
+ process.env.STRIPE_SECRET_KEY = ''; // force local mode
154
+ const provisioned = billing.provisionApiKey('test-customer-proof');
155
+ // provisionApiKey returns { key, customerId, createdAt }
156
+ const apiKey = provisioned && (provisioned.apiKey || provisioned.key);
157
+ if (apiKey) {
158
+ // Validate it
159
+ const validation = billing.validateApiKey(apiKey);
160
+ roundTripPassed = validation && validation.valid === true;
161
+ roundTripDetail = roundTripPassed
162
+ ? `Key provisioned (${apiKey.slice(0, 14)}...) and validated successfully`
163
+ : `Key validation failed: ${JSON.stringify(validation)}`;
164
+ } else {
165
+ roundTripDetail = `provisionApiKey returned: ${JSON.stringify(provisioned)}`;
166
+ }
167
+ // Cleanup tmp key file if it was created in tmpdir
168
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch (_) {}
169
+ } catch (err) {
170
+ billingExportsDetail = `Error loading billing.js: ${err.message}`;
171
+ roundTripDetail = 'Skipped due to billing.js load error';
172
+ }
173
+ } else {
174
+ billingExportsDetail = 'billing.js not found at scripts/billing.js';
175
+ roundTripDetail = 'Skipped — billing.js not found';
176
+ }
177
+
178
+ record('PROOF-02a: billing.js exports 5 required functions', billingExportsPassed, billingExportsDetail);
179
+ record('PROOF-02b: provisionApiKey + validateApiKey round-trip', roundTripPassed, roundTripDetail);
180
+
181
+ // ===========================================================================
182
+ // PROOF-03: bin/cli.js runs `init` in tmpdir and creates config
183
+ // ===========================================================================
184
+ console.log('\n── PROOF-03: CLI init in tmpdir ──');
185
+
186
+ const cliPath = path.join(ROOT, 'bin', 'cli.js');
187
+ let cliPassed = false;
188
+ let cliDetail = '';
189
+
190
+ if (fs.existsSync(cliPath)) {
191
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-proof-cli-'));
192
+ try {
193
+ const result = spawnSync(process.execPath, [cliPath, 'init'], {
194
+ cwd: tmpDir,
195
+ timeout: 15000,
196
+ env: { ...process.env },
197
+ });
198
+ const stdout = (result.stdout || Buffer.alloc(0)).toString();
199
+ const stderr = (result.stderr || Buffer.alloc(0)).toString();
200
+
201
+ // Check if .rlhf directory and config were created
202
+ const rlhfDir = path.join(tmpDir, '.rlhf');
203
+ const configFile = path.join(rlhfDir, 'config.json');
204
+ const rlhfDirExists = fs.existsSync(rlhfDir);
205
+ const configExists = fs.existsSync(configFile);
206
+
207
+ cliPassed = rlhfDirExists && configExists && result.status === 0;
208
+ if (cliPassed) {
209
+ const config = JSON.parse(fs.readFileSync(configFile, 'utf8'));
210
+ cliDetail = `.rlhf/ created, config.json has keys: ${Object.keys(config).join(', ')}`;
211
+ } else {
212
+ cliDetail = [
213
+ `exit=${result.status}`,
214
+ `rlhfDir=${rlhfDirExists}`,
215
+ `config=${configExists}`,
216
+ stdout.trim() || stderr.trim(),
217
+ ].filter(Boolean).join(' | ');
218
+ }
219
+ } finally {
220
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch (_) {}
221
+ }
222
+ } else {
223
+ cliDetail = 'bin/cli.js not found';
224
+ }
225
+
226
+ record('PROOF-03: cli init creates .rlhf/ and config.json', cliPassed, cliDetail);
227
+
228
+ // ===========================================================================
229
+ // PROOF-04: npm test — count >= 314 and 0 failures
230
+ // ===========================================================================
231
+ console.log('\n── PROOF-04: npm test ──');
232
+
233
+ const TEST_THRESHOLD = 314;
234
+ let testsPassed = false;
235
+ let testsDetail = '';
236
+ let testCount = 0;
237
+ let testFailures = 0;
238
+ let testOutput = '';
239
+
240
+ // npm test runs multiple sub-scripts (test:schema, test:loop, etc.) — each emits
241
+ // its own "ℹ pass N" and "ℹ fail N" summary lines. We sum all of them.
242
+ function parseTestCounts(output) {
243
+ let pass = 0;
244
+ let fail = 0;
245
+ // node:test emits lines like "ℹ pass 158" (with unicode char) or just "pass 158"
246
+ for (const m of output.matchAll(/(?:ℹ\s+)?pass\s+(\d+)/gi)) {
247
+ pass += parseInt(m[1], 10);
248
+ }
249
+ for (const m of output.matchAll(/(?:ℹ\s+)?fail\s+(\d+)/gi)) {
250
+ fail += parseInt(m[1], 10);
251
+ }
252
+ return { pass, fail };
253
+ }
254
+
255
+ // Also count simple "Results: N passed, M failed" format (used by some sub-scripts)
256
+ function parseResultsFormat(output) {
257
+ let pass = 0;
258
+ let fail = 0;
259
+ for (const m of output.matchAll(/Results:\s+(\d+)\s+passed,\s+(\d+)\s+failed/gi)) {
260
+ pass += parseInt(m[1], 10);
261
+ fail += parseInt(m[2], 10);
262
+ }
263
+ return { pass, fail };
264
+ }
265
+
266
+ let npmExitCode = 0;
267
+ try {
268
+ // Note: do NOT set RLHF_ALLOW_INSECURE=true here — it disables auth checks and breaks auth tests
269
+ const testEnv = { ...process.env, FORCE_COLOR: '0' };
270
+ delete testEnv.RLHF_ALLOW_INSECURE;
271
+ testOutput = execSync('npm test', {
272
+ cwd: ROOT,
273
+ timeout: 300000, // 5 minutes
274
+ env: testEnv,
275
+ }).toString();
276
+ } catch (err) {
277
+ npmExitCode = typeof err.status === 'number' ? err.status : 1;
278
+ testOutput = (err.stdout ? err.stdout.toString() : '') + (err.stderr ? err.stderr.toString() : err.message);
279
+ }
280
+
281
+ {
282
+ const nodeTestCounts = parseTestCounts(testOutput);
283
+ const resultsFormatCounts = parseResultsFormat(testOutput);
284
+ // Use whichever is larger (prefer node:test counts which are more complete)
285
+ const candidateCount = Math.max(nodeTestCounts.pass, resultsFormatCounts.pass);
286
+ testCount = candidateCount;
287
+ // Failures: if npm exited 0, there were 0 failures regardless of parsing edge cases
288
+ testFailures = npmExitCode === 0 ? 0 : (nodeTestCounts.fail + resultsFormatCounts.fail);
289
+
290
+ testsPassed = testCount >= TEST_THRESHOLD && testFailures === 0 && npmExitCode === 0;
291
+ testsDetail = testsPassed
292
+ ? `${testCount} tests passed, 0 failures (threshold: ${TEST_THRESHOLD}+)`
293
+ : `npm exit=${npmExitCode}, count=${testCount}, failures=${testFailures} (need ${TEST_THRESHOLD}+ passing, 0 failures)`;
294
+ }
295
+
296
+ record(`PROOF-04: npm test >= ${TEST_THRESHOLD} passing, 0 failures`, testsPassed, testsDetail);
297
+
298
+ // ===========================================================================
299
+ // Summary
300
+ // ===========================================================================
301
+
302
+ const allPassed = results.every(r => r.passed);
303
+ const passCount = results.filter(r => r.passed).length;
304
+ const failCount = results.filter(r => !r.passed).length;
305
+
306
+ console.log('\n── Summary ──────────────────────────────────');
307
+ console.log(`Checks: ${results.length} total | ${passCount} passed | ${failCount} failed`);
308
+ console.log(`Overall: ${allPassed ? 'PASS' : 'FAIL'}`);
309
+
310
+ // ===========================================================================
311
+ // Write reports
312
+ // ===========================================================================
313
+
314
+ ensureDir(PROOF_DIR);
315
+
316
+ const jsonReport = {
317
+ milestone: 'v3.0 Commercialization',
318
+ generated: stamp(),
319
+ overall: allPassed ? 'PASS' : 'FAIL',
320
+ summary: { total: results.length, passed: passCount, failed: failCount },
321
+ checks: results,
322
+ testCount,
323
+ testFailures,
324
+ };
325
+
326
+ fs.writeFileSync(
327
+ path.join(PROOF_DIR, 'v3-milestone-report.json'),
328
+ JSON.stringify(jsonReport, null, 2),
329
+ 'utf8'
330
+ );
331
+ console.log('\nWrote proof/v3-milestone-report.json');
332
+
333
+ const mdReport = `# v3.0 Milestone Proof Report
334
+
335
+ **Generated:** ${stamp()}
336
+ **Overall:** ${allPassed ? 'PASS' : 'FAIL'}
337
+
338
+ ## Summary
339
+
340
+ | Metric | Value |
341
+ |--------|-------|
342
+ | Total checks | ${results.length} |
343
+ | Passed | ${passCount} |
344
+ | Failed | ${failCount} |
345
+ | Test count | ${testCount} |
346
+ | Test failures | ${testFailures} |
347
+
348
+ ## Check Results
349
+
350
+ | Check | Status | Detail |
351
+ |-------|--------|--------|
352
+ ${results.map(r => `| ${r.check} | ${r.passed ? 'PASS' : 'FAIL'} | ${r.detail.replace(/\|/g, '/')} |`).join('\n')}
353
+
354
+ ## PROOF-01: Dockerfile + /health
355
+
356
+ ${results.filter(r => r.check.startsWith('PROOF-01')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
357
+
358
+ ## PROOF-02: Billing Module
359
+
360
+ ${results.filter(r => r.check.startsWith('PROOF-02')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
361
+
362
+ ## PROOF-03: CLI Init
363
+
364
+ ${results.filter(r => r.check.startsWith('PROOF-03')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
365
+
366
+ ## PROOF-04: Test Suite
367
+
368
+ ${results.filter(r => r.check.startsWith('PROOF-04')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
369
+
370
+ ---
371
+ *All numbers from actual runs. No placeholders.*
372
+ `;
373
+
374
+ fs.writeFileSync(
375
+ path.join(PROOF_DIR, 'v3-milestone-report.md'),
376
+ mdReport,
377
+ 'utf8'
378
+ );
379
+ console.log('Wrote proof/v3-milestone-report.md');
380
+
381
+ process.exit(allPassed ? 0 : 1);