rlhf-feedback-loop 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/adapters/README.md +8 -0
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
- package/adapters/chatgpt/INSTALL.md +80 -0
- package/adapters/chatgpt/openapi.yaml +292 -0
- package/adapters/claude/.mcp.json +8 -0
- package/adapters/codex/config.toml +4 -0
- package/adapters/gemini/function-declarations.json +95 -0
- package/adapters/mcp/server-stdio.js +444 -0
- package/bin/cli.js +167 -0
- package/config/mcp-allowlists.json +29 -0
- package/config/policy-bundles/constrained-v1.json +53 -0
- package/config/policy-bundles/default-v1.json +80 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/subagent-profiles.json +32 -0
- package/openapi/openapi.yaml +292 -0
- package/package.json +91 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +31 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +57 -0
- package/plugins/gemini-extension/INSTALL.md +74 -0
- package/plugins/gemini-extension/gemini_prompt.txt +10 -0
- package/plugins/gemini-extension/tool_contract.json +28 -0
- package/scripts/billing.js +471 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/code-reasoning.js +307 -0
- package/scripts/context-engine.js +547 -0
- package/scripts/contextfs.js +513 -0
- package/scripts/contract-audit.js +198 -0
- package/scripts/dpo-optimizer.js +208 -0
- package/scripts/export-dpo-pairs.js +316 -0
- package/scripts/export-training.js +448 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +838 -0
- package/scripts/feedback-schema.js +300 -0
- package/scripts/feedback-to-memory.js +165 -0
- package/scripts/feedback-to-rules.js +109 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/hybrid-feedback-context.js +676 -0
- package/scripts/intent-router.js +164 -0
- package/scripts/mcp-policy.js +92 -0
- package/scripts/meta-policy.js +194 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/prove-adapters.js +364 -0
- package/scripts/prove-attribution.js +364 -0
- package/scripts/prove-automation.js +393 -0
- package/scripts/prove-data-quality.js +219 -0
- package/scripts/prove-intelligence.js +256 -0
- package/scripts/prove-lancedb.js +370 -0
- package/scripts/prove-loop-closure.js +255 -0
- package/scripts/prove-rlaif.js +404 -0
- package/scripts/prove-subway-upgrades.js +250 -0
- package/scripts/prove-training-export.js +324 -0
- package/scripts/prove-v2-milestone.js +273 -0
- package/scripts/prove-v3-milestone.js +381 -0
- package/scripts/rlaif-self-audit.js +123 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/self-heal.js +127 -0
- package/scripts/self-healing-check.js +111 -0
- package/scripts/skill-quality-tracker.js +284 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +29 -0
- package/scripts/thompson-sampling.js +331 -0
- package/scripts/train_from_feedback.py +914 -0
- package/scripts/validate-feedback.js +580 -0
- package/scripts/vector-store.js +100 -0
- package/src/api/server.js +497 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* prove-v2-milestone.js
|
|
4
|
+
*
|
|
5
|
+
* Phase 12: Final proof gate for the v2.0 milestone.
|
|
6
|
+
* Verifies:
|
|
7
|
+
* PROOF-01: Proof reports exist for all v2 phases (6-11)
|
|
8
|
+
* PROOF-02: npm test passes with count > 142 (v1 baseline) and 0 failures
|
|
9
|
+
*
|
|
10
|
+
* All numbers are from actual test runs — no placeholders.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
'use strict';
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const { execSync } = require('child_process');
|
|
18
|
+
|
|
19
|
+
const ROOT = path.join(__dirname, '..');
|
|
20
|
+
const PROOF_DIR = path.join(ROOT, 'proof');
|
|
21
|
+
|
|
22
|
+
function ensureDir(d) {
|
|
23
|
+
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// PROOF-01: Check all v2 proof reports exist
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
const V2_PROOF_REQUIREMENTS = [
|
|
31
|
+
{
|
|
32
|
+
phase: 6,
|
|
33
|
+
name: 'Feedback Attribution',
|
|
34
|
+
files: ['proof/attribution-report.json', 'proof/attribution-report.md'],
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
phase: 7,
|
|
38
|
+
name: 'Data Quality',
|
|
39
|
+
files: ['proof/data-quality-report.json', 'proof/data-quality-report.md'],
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
phase: 8,
|
|
43
|
+
name: 'Loop Closure',
|
|
44
|
+
files: ['proof/loop-closure-report.json', 'proof/loop-closure-report.md'],
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
phase: 9,
|
|
48
|
+
name: 'Intelligence',
|
|
49
|
+
files: ['proof/intelligence-report.json', 'proof/intelligence-report.md'],
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
phase: 10,
|
|
53
|
+
name: 'Training Export',
|
|
54
|
+
files: ['proof/training-export-report.json', 'proof/training-export-report.md'],
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
phase: 11,
|
|
58
|
+
name: 'Subway Upgrades',
|
|
59
|
+
files: [
|
|
60
|
+
'proof/subway-upgrades/subway-upgrades-report.json',
|
|
61
|
+
'proof/subway-upgrades/subway-upgrades-report.md',
|
|
62
|
+
],
|
|
63
|
+
},
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
function checkProofReports() {
|
|
67
|
+
const results = [];
|
|
68
|
+
let allExist = true;
|
|
69
|
+
|
|
70
|
+
for (const req of V2_PROOF_REQUIREMENTS) {
|
|
71
|
+
const phaseResult = { phase: req.phase, name: req.name, files: [] };
|
|
72
|
+
|
|
73
|
+
for (const relPath of req.files) {
|
|
74
|
+
const absPath = path.join(ROOT, relPath);
|
|
75
|
+
const exists = fs.existsSync(absPath);
|
|
76
|
+
if (!exists) allExist = false;
|
|
77
|
+
|
|
78
|
+
// Check for placeholders/TODOs
|
|
79
|
+
let hasTodo = false;
|
|
80
|
+
let fileSize = 0;
|
|
81
|
+
if (exists) {
|
|
82
|
+
try {
|
|
83
|
+
const content = fs.readFileSync(absPath, 'utf-8');
|
|
84
|
+
hasTodo = /TODO|placeholder|estimated/i.test(content);
|
|
85
|
+
fileSize = content.length;
|
|
86
|
+
} catch {
|
|
87
|
+
hasTodo = false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
phaseResult.files.push({
|
|
92
|
+
path: relPath,
|
|
93
|
+
exists,
|
|
94
|
+
hasTodo,
|
|
95
|
+
fileSize,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
results.push(phaseResult);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return { allExist, phases: results };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// PROOF-02: Run npm test and verify count
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
const V1_BASELINE_TEST_COUNT = 142;
|
|
110
|
+
|
|
111
|
+
function runFullTestSuite() {
|
|
112
|
+
console.log('Running full test suite (npm test)...');
|
|
113
|
+
let output = '';
|
|
114
|
+
let timedOut = false;
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
output = execSync('npm test', {
|
|
118
|
+
cwd: ROOT,
|
|
119
|
+
encoding: 'utf-8',
|
|
120
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
121
|
+
timeout: 180_000,
|
|
122
|
+
});
|
|
123
|
+
} catch (err) {
|
|
124
|
+
// npm test exits non-zero on test failure — capture output anyway
|
|
125
|
+
output = (err.stdout || '') + (err.stderr || '');
|
|
126
|
+
if (err.code === 'ETIMEDOUT') timedOut = true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (timedOut) {
|
|
130
|
+
return { passed: 0, failed: 1, raw: 'TIMED OUT', timedOut: true };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Sum all "ℹ pass N" lines
|
|
134
|
+
const passMatches = [...output.matchAll(/ℹ pass (\d+)/g)];
|
|
135
|
+
const failMatches = [...output.matchAll(/ℹ fail (\d+)/g)];
|
|
136
|
+
|
|
137
|
+
const passed = passMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
138
|
+
const failed = failMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
139
|
+
|
|
140
|
+
return { passed, failed, raw: output.slice(-2000), timedOut: false };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Main
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
async function main() {
|
|
148
|
+
console.log('Phase 12: Proof Gate — v2.0 Milestone Final Check\n');
|
|
149
|
+
console.log('='.repeat(50));
|
|
150
|
+
|
|
151
|
+
// PROOF-01
|
|
152
|
+
console.log('\nChecking proof reports (PROOF-01)...');
|
|
153
|
+
const proofCheck = checkProofReports();
|
|
154
|
+
|
|
155
|
+
for (const phase of proofCheck.phases) {
|
|
156
|
+
const status = phase.files.every((f) => f.exists && !f.hasTodo) ? 'PASS' : 'FAIL';
|
|
157
|
+
console.log(` Phase ${phase.phase} (${phase.name}): ${status}`);
|
|
158
|
+
for (const f of phase.files) {
|
|
159
|
+
const indicator = f.exists ? (f.hasTodo ? ' TODO found' : ' exists') : ' MISSING';
|
|
160
|
+
console.log(` ${f.path}: ${indicator} ${f.exists ? `(${f.fileSize} bytes)` : ''}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// PROOF-02
|
|
165
|
+
const testResults = runFullTestSuite();
|
|
166
|
+
const testCountOk = testResults.passed > V1_BASELINE_TEST_COUNT;
|
|
167
|
+
const testFailOk = testResults.failed === 0;
|
|
168
|
+
|
|
169
|
+
console.log(`\nTest Results (PROOF-02):`);
|
|
170
|
+
console.log(` Passed: ${testResults.passed} (v1 baseline: ${V1_BASELINE_TEST_COUNT}, need > ${V1_BASELINE_TEST_COUNT})`);
|
|
171
|
+
console.log(` Failed: ${testResults.failed}`);
|
|
172
|
+
console.log(` Count check: ${testCountOk ? 'PASS' : 'FAIL'} (${testResults.passed} > ${V1_BASELINE_TEST_COUNT})`);
|
|
173
|
+
console.log(` Zero failures: ${testFailOk ? 'PASS' : 'FAIL'}`);
|
|
174
|
+
|
|
175
|
+
const proof01Passed = proofCheck.allExist &&
|
|
176
|
+
proofCheck.phases.every((p) => p.files.every((f) => f.exists && !f.hasTodo));
|
|
177
|
+
const proof02Passed = testCountOk && testFailOk;
|
|
178
|
+
const overallPassed = proof01Passed && proof02Passed;
|
|
179
|
+
|
|
180
|
+
// Write reports
|
|
181
|
+
const report = {
|
|
182
|
+
phase: 12,
|
|
183
|
+
name: 'Proof Gate',
|
|
184
|
+
milestone: 'v2.0',
|
|
185
|
+
requirements: ['PROOF-01', 'PROOF-02'],
|
|
186
|
+
generatedAt: new Date().toISOString(),
|
|
187
|
+
proofReports: {
|
|
188
|
+
allExist: proofCheck.allExist,
|
|
189
|
+
phases: proofCheck.phases,
|
|
190
|
+
passed: proof01Passed,
|
|
191
|
+
},
|
|
192
|
+
testResults: {
|
|
193
|
+
passed: testResults.passed,
|
|
194
|
+
failed: testResults.failed,
|
|
195
|
+
v1Baseline: V1_BASELINE_TEST_COUNT,
|
|
196
|
+
countExceedsBaseline: testCountOk,
|
|
197
|
+
zeroFailures: testFailOk,
|
|
198
|
+
passed: proof02Passed,
|
|
199
|
+
},
|
|
200
|
+
overallPassed,
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
ensureDir(PROOF_DIR);
|
|
204
|
+
const jsonPath = path.join(PROOF_DIR, 'v2-milestone-report.json');
|
|
205
|
+
const mdPath = path.join(PROOF_DIR, 'v2-milestone-report.md');
|
|
206
|
+
|
|
207
|
+
fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
208
|
+
|
|
209
|
+
const status = overallPassed ? 'PASSED' : 'FAILED';
|
|
210
|
+
|
|
211
|
+
const phaseRows = proofCheck.phases.map((p) => {
|
|
212
|
+
const pStatus = p.files.every((f) => f.exists && !f.hasTodo) ? 'PASS' : 'FAIL';
|
|
213
|
+
const fileList = p.files.map((f) => `${f.path} (${f.exists ? (f.hasTodo ? 'TODO found' : 'exists') : 'MISSING'})`).join(', ');
|
|
214
|
+
return `| ${p.phase} | ${p.name} | ${fileList} | ${pStatus} |`;
|
|
215
|
+
}).join('\n');
|
|
216
|
+
|
|
217
|
+
const md = `# Phase 12: Proof Gate — v2.0 Milestone Report
|
|
218
|
+
|
|
219
|
+
**Status:** ${status}
|
|
220
|
+
**Generated:** ${report.generatedAt}
|
|
221
|
+
**Milestone:** v2.0 RLHF Bidirectional Feature Sync
|
|
222
|
+
|
|
223
|
+
## PROOF-01: All v2 Phase Proof Reports Exist
|
|
224
|
+
|
|
225
|
+
| Phase | Name | Files | Status |
|
|
226
|
+
|-------|------|-------|--------|
|
|
227
|
+
${phaseRows}
|
|
228
|
+
|
|
229
|
+
**Overall PROOF-01:** ${proof01Passed ? 'PASS' : 'FAIL'}
|
|
230
|
+
|
|
231
|
+
## PROOF-02: npm test — Count and Zero Failures
|
|
232
|
+
|
|
233
|
+
| Metric | Value | Requirement | Status |
|
|
234
|
+
|--------|-------|-------------|--------|
|
|
235
|
+
| Tests passed | ${testResults.passed} | > ${V1_BASELINE_TEST_COUNT} (v1 baseline) | ${testCountOk ? 'PASS' : 'FAIL'} |
|
|
236
|
+
| Tests failed | ${testResults.failed} | 0 | ${testFailOk ? 'PASS' : 'FAIL'} |
|
|
237
|
+
|
|
238
|
+
**Overall PROOF-02:** ${proof02Passed ? 'PASS' : 'FAIL'}
|
|
239
|
+
|
|
240
|
+
## v2.0 Milestone Summary
|
|
241
|
+
|
|
242
|
+
All v2 phases complete:
|
|
243
|
+
|
|
244
|
+
| Phase | Feature | Requirements |
|
|
245
|
+
|-------|---------|-------------|
|
|
246
|
+
| 6 | Feedback Attribution | ATTR-01, ATTR-02, ATTR-03 |
|
|
247
|
+
| 7 | Data Quality | QUAL-01, QUAL-02, QUAL-03, QUAL-04 |
|
|
248
|
+
| 8 | Loop Closure | LOOP-01, LOOP-02, LOOP-03, LOOP-04, LOOP-05 |
|
|
249
|
+
| 9 | Intelligence | INTL-01, INTL-02, INTL-03 |
|
|
250
|
+
| 10 | Training Export | XPRT-01, XPRT-02, XPRT-03, XPRT-04, XPRT-05 |
|
|
251
|
+
| 11 | Subway Upgrades | SUBW-01, SUBW-02, SUBW-03, SUBW-04, SUBW-05 |
|
|
252
|
+
| 12 | Proof Gate | PROOF-01, PROOF-02 |
|
|
253
|
+
|
|
254
|
+
**Final test count:** ${testResults.passed} (${testResults.passed - V1_BASELINE_TEST_COUNT} above v1 baseline of ${V1_BASELINE_TEST_COUNT})
|
|
255
|
+
**Test failures:** ${testResults.failed}
|
|
256
|
+
**v2.0 milestone status:** ${status}
|
|
257
|
+
`;
|
|
258
|
+
|
|
259
|
+
fs.writeFileSync(mdPath, md);
|
|
260
|
+
|
|
261
|
+
console.log(`\n${'='.repeat(50)}`);
|
|
262
|
+
console.log(`v2.0 Milestone Status: ${status}`);
|
|
263
|
+
console.log(`PROOF-01 (all proof reports): ${proof01Passed ? 'PASS' : 'FAIL'}`);
|
|
264
|
+
console.log(`PROOF-02 (test count + 0 failures): ${proof02Passed ? 'PASS' : 'FAIL'}`);
|
|
265
|
+
console.log(`\nFinal proof report: ${mdPath}`);
|
|
266
|
+
|
|
267
|
+
process.exit(overallPassed ? 0 : 1);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
main().catch((err) => {
|
|
271
|
+
console.error('prove-v2-milestone failed:', err.message);
|
|
272
|
+
process.exit(1);
|
|
273
|
+
});
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* prove-v3-milestone.js
|
|
4
|
+
*
|
|
5
|
+
* Phase 17: Final proof gate for the v3.0 Commercialization milestone.
|
|
6
|
+
* Verifies:
|
|
7
|
+
* PROOF-01: Dockerfile exists + /health endpoint works (start server, curl, kill)
|
|
8
|
+
* PROOF-02: billing.js exports all 5 required functions + key provision/validate round-trip
|
|
9
|
+
* PROOF-03: bin/cli.js runs `init` in tmpdir + creates config
|
|
10
|
+
* PROOF-04: npm test passes with count >= 314 and 0 failures
|
|
11
|
+
*
|
|
12
|
+
* All numbers are from actual runs — no placeholders.
|
|
13
|
+
* Writes proof/v3-milestone-report.json and proof/v3-milestone-report.md
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
'use strict';
|
|
17
|
+
|
|
18
|
+
const fs = require('fs');
|
|
19
|
+
const path = require('path');
|
|
20
|
+
const { execSync, spawnSync } = require('child_process');
|
|
21
|
+
const os = require('os');
|
|
22
|
+
|
|
23
|
+
const ROOT = path.join(__dirname, '..');
|
|
24
|
+
const PROOF_DIR = path.join(ROOT, 'proof');
|
|
25
|
+
|
|
26
|
+
function ensureDir(d) {
|
|
27
|
+
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function stamp() {
|
|
31
|
+
return new Date().toISOString();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ─── Result collector ──────────────────────────────────────────────────────
|
|
35
|
+
const results = [];
|
|
36
|
+
|
|
37
|
+
function record(check, passed, detail, evidence = '') {
|
|
38
|
+
results.push({ check, passed, detail, evidence, ts: stamp() });
|
|
39
|
+
const icon = passed ? 'PASS' : 'FAIL';
|
|
40
|
+
console.log(`[${icon}] ${check}: ${detail}`);
|
|
41
|
+
if (evidence && !passed) console.log(` Evidence: ${evidence}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ===========================================================================
|
|
45
|
+
// PROOF-01: Dockerfile exists + health endpoint responds
|
|
46
|
+
// ===========================================================================
|
|
47
|
+
console.log('\n── PROOF-01: Dockerfile + /health endpoint ──');
|
|
48
|
+
|
|
49
|
+
const dockerfilePath = path.join(ROOT, 'Dockerfile');
|
|
50
|
+
const dockerfileExists = fs.existsSync(dockerfilePath);
|
|
51
|
+
record('PROOF-01a: Dockerfile exists', dockerfileExists, dockerfileExists ? 'Dockerfile found' : 'Dockerfile MISSING');
|
|
52
|
+
|
|
53
|
+
// Start the API server, curl /health, then kill it
|
|
54
|
+
let healthPassed = false;
|
|
55
|
+
let healthDetail = '';
|
|
56
|
+
let healthEvidence = '';
|
|
57
|
+
let serverPid = null;
|
|
58
|
+
|
|
59
|
+
const serverPath = path.join(ROOT, 'src', 'api', 'server.js');
|
|
60
|
+
const serverExists = fs.existsSync(serverPath);
|
|
61
|
+
record('PROOF-01b: src/api/server.js exists', serverExists, serverExists ? 'server.js found' : 'server.js MISSING');
|
|
62
|
+
|
|
63
|
+
if (serverExists) {
|
|
64
|
+
const TEST_PORT = 13877; // unlikely to collide
|
|
65
|
+
const env = { ...process.env, PORT: String(TEST_PORT), RLHF_ALLOW_INSECURE: 'true' };
|
|
66
|
+
try {
|
|
67
|
+
const serverProc = require('child_process').spawn(
|
|
68
|
+
process.execPath,
|
|
69
|
+
[serverPath],
|
|
70
|
+
{ env, detached: false, stdio: 'pipe' }
|
|
71
|
+
);
|
|
72
|
+
serverPid = serverProc.pid;
|
|
73
|
+
|
|
74
|
+
// Wait up to 4 seconds for the server to start
|
|
75
|
+
let started = false;
|
|
76
|
+
const deadline = Date.now() + 4000;
|
|
77
|
+
while (Date.now() < deadline) {
|
|
78
|
+
try {
|
|
79
|
+
execSync(`curl -sf http://localhost:${TEST_PORT}/health`, { timeout: 1000 });
|
|
80
|
+
started = true;
|
|
81
|
+
break;
|
|
82
|
+
} catch (_) {
|
|
83
|
+
// not up yet — busy wait 200ms
|
|
84
|
+
const t = Date.now() + 200;
|
|
85
|
+
while (Date.now() < t) { /* spin */ }
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (started) {
|
|
90
|
+
const rawResponse = execSync(`curl -s http://localhost:${TEST_PORT}/health`, { timeout: 2000 }).toString().trim();
|
|
91
|
+
healthEvidence = rawResponse;
|
|
92
|
+
try {
|
|
93
|
+
const parsed = JSON.parse(rawResponse);
|
|
94
|
+
const hasVersion = typeof parsed.version !== 'undefined';
|
|
95
|
+
const hasUptime = typeof parsed.uptime !== 'undefined';
|
|
96
|
+
healthPassed = hasVersion && hasUptime;
|
|
97
|
+
healthDetail = healthPassed
|
|
98
|
+
? `HTTP 200, version=${parsed.version}, uptime=${parsed.uptime}`
|
|
99
|
+
: `Response missing version or uptime fields: ${rawResponse}`;
|
|
100
|
+
} catch (e) {
|
|
101
|
+
healthDetail = `Invalid JSON from /health: ${rawResponse}`;
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
healthDetail = 'Server did not start within 4 seconds';
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Kill the server
|
|
108
|
+
try { process.kill(serverPid, 'SIGTERM'); } catch (_) {}
|
|
109
|
+
} catch (err) {
|
|
110
|
+
healthDetail = `Server start error: ${err.message}`;
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
healthDetail = 'Skipped — server.js not found';
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
record('PROOF-01c: /health returns 200 with version+uptime', healthPassed, healthDetail, healthEvidence);
|
|
117
|
+
|
|
118
|
+
// ===========================================================================
|
|
119
|
+
// PROOF-02: billing.js exports all 5 required functions + provision/validate round-trip
|
|
120
|
+
// ===========================================================================
|
|
121
|
+
console.log('\n── PROOF-02: Billing module exports + key round-trip ──');
|
|
122
|
+
|
|
123
|
+
const REQUIRED_BILLING_EXPORTS = [
|
|
124
|
+
'createCheckoutSession',
|
|
125
|
+
'provisionApiKey',
|
|
126
|
+
'validateApiKey',
|
|
127
|
+
'recordUsage',
|
|
128
|
+
'handleWebhook',
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
const billingPath = path.join(ROOT, 'scripts', 'billing.js');
|
|
132
|
+
let billingExportsPassed = false;
|
|
133
|
+
let billingExportsDetail = '';
|
|
134
|
+
let roundTripPassed = false;
|
|
135
|
+
let roundTripDetail = '';
|
|
136
|
+
|
|
137
|
+
if (fs.existsSync(billingPath)) {
|
|
138
|
+
try {
|
|
139
|
+
const billing = require(billingPath);
|
|
140
|
+
const missingExports = REQUIRED_BILLING_EXPORTS.filter(fn => typeof billing[fn] !== 'function');
|
|
141
|
+
billingExportsPassed = missingExports.length === 0;
|
|
142
|
+
billingExportsDetail = billingExportsPassed
|
|
143
|
+
? `All 5 functions exported: ${REQUIRED_BILLING_EXPORTS.join(', ')}`
|
|
144
|
+
: `Missing exports: ${missingExports.join(', ')}`;
|
|
145
|
+
|
|
146
|
+
// Key provision + validate round-trip (runs in local mode when STRIPE_SECRET_KEY is absent)
|
|
147
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-proof-billing-'));
|
|
148
|
+
const tmpKeyPath = path.join(tmpDir, 'api-keys.json');
|
|
149
|
+
// Temporarily redirect key store to tmpdir
|
|
150
|
+
const origKeyPath = billing._API_KEYS_PATH;
|
|
151
|
+
|
|
152
|
+
// Provision a key
|
|
153
|
+
process.env.STRIPE_SECRET_KEY = ''; // force local mode
|
|
154
|
+
const provisioned = billing.provisionApiKey('test-customer-proof');
|
|
155
|
+
// provisionApiKey returns { key, customerId, createdAt }
|
|
156
|
+
const apiKey = provisioned && (provisioned.apiKey || provisioned.key);
|
|
157
|
+
if (apiKey) {
|
|
158
|
+
// Validate it
|
|
159
|
+
const validation = billing.validateApiKey(apiKey);
|
|
160
|
+
roundTripPassed = validation && validation.valid === true;
|
|
161
|
+
roundTripDetail = roundTripPassed
|
|
162
|
+
? `Key provisioned (${apiKey.slice(0, 14)}...) and validated successfully`
|
|
163
|
+
: `Key validation failed: ${JSON.stringify(validation)}`;
|
|
164
|
+
} else {
|
|
165
|
+
roundTripDetail = `provisionApiKey returned: ${JSON.stringify(provisioned)}`;
|
|
166
|
+
}
|
|
167
|
+
// Cleanup tmp key file if it was created in tmpdir
|
|
168
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch (_) {}
|
|
169
|
+
} catch (err) {
|
|
170
|
+
billingExportsDetail = `Error loading billing.js: ${err.message}`;
|
|
171
|
+
roundTripDetail = 'Skipped due to billing.js load error';
|
|
172
|
+
}
|
|
173
|
+
} else {
|
|
174
|
+
billingExportsDetail = 'billing.js not found at scripts/billing.js';
|
|
175
|
+
roundTripDetail = 'Skipped — billing.js not found';
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
record('PROOF-02a: billing.js exports 5 required functions', billingExportsPassed, billingExportsDetail);
|
|
179
|
+
record('PROOF-02b: provisionApiKey + validateApiKey round-trip', roundTripPassed, roundTripDetail);
|
|
180
|
+
|
|
181
|
+
// ===========================================================================
|
|
182
|
+
// PROOF-03: bin/cli.js runs `init` in tmpdir and creates config
|
|
183
|
+
// ===========================================================================
|
|
184
|
+
console.log('\n── PROOF-03: CLI init in tmpdir ──');
|
|
185
|
+
|
|
186
|
+
const cliPath = path.join(ROOT, 'bin', 'cli.js');
|
|
187
|
+
let cliPassed = false;
|
|
188
|
+
let cliDetail = '';
|
|
189
|
+
|
|
190
|
+
if (fs.existsSync(cliPath)) {
|
|
191
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-proof-cli-'));
|
|
192
|
+
try {
|
|
193
|
+
const result = spawnSync(process.execPath, [cliPath, 'init'], {
|
|
194
|
+
cwd: tmpDir,
|
|
195
|
+
timeout: 15000,
|
|
196
|
+
env: { ...process.env },
|
|
197
|
+
});
|
|
198
|
+
const stdout = (result.stdout || Buffer.alloc(0)).toString();
|
|
199
|
+
const stderr = (result.stderr || Buffer.alloc(0)).toString();
|
|
200
|
+
|
|
201
|
+
// Check if .rlhf directory and config were created
|
|
202
|
+
const rlhfDir = path.join(tmpDir, '.rlhf');
|
|
203
|
+
const configFile = path.join(rlhfDir, 'config.json');
|
|
204
|
+
const rlhfDirExists = fs.existsSync(rlhfDir);
|
|
205
|
+
const configExists = fs.existsSync(configFile);
|
|
206
|
+
|
|
207
|
+
cliPassed = rlhfDirExists && configExists && result.status === 0;
|
|
208
|
+
if (cliPassed) {
|
|
209
|
+
const config = JSON.parse(fs.readFileSync(configFile, 'utf8'));
|
|
210
|
+
cliDetail = `.rlhf/ created, config.json has keys: ${Object.keys(config).join(', ')}`;
|
|
211
|
+
} else {
|
|
212
|
+
cliDetail = [
|
|
213
|
+
`exit=${result.status}`,
|
|
214
|
+
`rlhfDir=${rlhfDirExists}`,
|
|
215
|
+
`config=${configExists}`,
|
|
216
|
+
stdout.trim() || stderr.trim(),
|
|
217
|
+
].filter(Boolean).join(' | ');
|
|
218
|
+
}
|
|
219
|
+
} finally {
|
|
220
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch (_) {}
|
|
221
|
+
}
|
|
222
|
+
} else {
|
|
223
|
+
cliDetail = 'bin/cli.js not found';
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
record('PROOF-03: cli init creates .rlhf/ and config.json', cliPassed, cliDetail);
|
|
227
|
+
|
|
228
|
+
// ===========================================================================
|
|
229
|
+
// PROOF-04: npm test — count >= 314 and 0 failures
|
|
230
|
+
// ===========================================================================
|
|
231
|
+
console.log('\n── PROOF-04: npm test ──');
|
|
232
|
+
|
|
233
|
+
const TEST_THRESHOLD = 314;
|
|
234
|
+
let testsPassed = false;
|
|
235
|
+
let testsDetail = '';
|
|
236
|
+
let testCount = 0;
|
|
237
|
+
let testFailures = 0;
|
|
238
|
+
let testOutput = '';
|
|
239
|
+
|
|
240
|
+
// npm test runs multiple sub-scripts (test:schema, test:loop, etc.) — each emits
|
|
241
|
+
// its own "ℹ pass N" and "ℹ fail N" summary lines. We sum all of them.
|
|
242
|
+
function parseTestCounts(output) {
|
|
243
|
+
let pass = 0;
|
|
244
|
+
let fail = 0;
|
|
245
|
+
// node:test emits lines like "ℹ pass 158" (with unicode char) or just "pass 158"
|
|
246
|
+
for (const m of output.matchAll(/(?:ℹ\s+)?pass\s+(\d+)/gi)) {
|
|
247
|
+
pass += parseInt(m[1], 10);
|
|
248
|
+
}
|
|
249
|
+
for (const m of output.matchAll(/(?:ℹ\s+)?fail\s+(\d+)/gi)) {
|
|
250
|
+
fail += parseInt(m[1], 10);
|
|
251
|
+
}
|
|
252
|
+
return { pass, fail };
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Also count simple "Results: N passed, M failed" format (used by some sub-scripts)
|
|
256
|
+
function parseResultsFormat(output) {
|
|
257
|
+
let pass = 0;
|
|
258
|
+
let fail = 0;
|
|
259
|
+
for (const m of output.matchAll(/Results:\s+(\d+)\s+passed,\s+(\d+)\s+failed/gi)) {
|
|
260
|
+
pass += parseInt(m[1], 10);
|
|
261
|
+
fail += parseInt(m[2], 10);
|
|
262
|
+
}
|
|
263
|
+
return { pass, fail };
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
let npmExitCode = 0;
|
|
267
|
+
try {
|
|
268
|
+
// Note: do NOT set RLHF_ALLOW_INSECURE=true here — it disables auth checks and breaks auth tests
|
|
269
|
+
const testEnv = { ...process.env, FORCE_COLOR: '0' };
|
|
270
|
+
delete testEnv.RLHF_ALLOW_INSECURE;
|
|
271
|
+
testOutput = execSync('npm test', {
|
|
272
|
+
cwd: ROOT,
|
|
273
|
+
timeout: 300000, // 5 minutes
|
|
274
|
+
env: testEnv,
|
|
275
|
+
}).toString();
|
|
276
|
+
} catch (err) {
|
|
277
|
+
npmExitCode = typeof err.status === 'number' ? err.status : 1;
|
|
278
|
+
testOutput = (err.stdout ? err.stdout.toString() : '') + (err.stderr ? err.stderr.toString() : err.message);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
{
|
|
282
|
+
const nodeTestCounts = parseTestCounts(testOutput);
|
|
283
|
+
const resultsFormatCounts = parseResultsFormat(testOutput);
|
|
284
|
+
// Use whichever is larger (prefer node:test counts which are more complete)
|
|
285
|
+
const candidateCount = Math.max(nodeTestCounts.pass, resultsFormatCounts.pass);
|
|
286
|
+
testCount = candidateCount;
|
|
287
|
+
// Failures: if npm exited 0, there were 0 failures regardless of parsing edge cases
|
|
288
|
+
testFailures = npmExitCode === 0 ? 0 : (nodeTestCounts.fail + resultsFormatCounts.fail);
|
|
289
|
+
|
|
290
|
+
testsPassed = testCount >= TEST_THRESHOLD && testFailures === 0 && npmExitCode === 0;
|
|
291
|
+
testsDetail = testsPassed
|
|
292
|
+
? `${testCount} tests passed, 0 failures (threshold: ${TEST_THRESHOLD}+)`
|
|
293
|
+
: `npm exit=${npmExitCode}, count=${testCount}, failures=${testFailures} (need ${TEST_THRESHOLD}+ passing, 0 failures)`;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
record(`PROOF-04: npm test >= ${TEST_THRESHOLD} passing, 0 failures`, testsPassed, testsDetail);
|
|
297
|
+
|
|
298
|
+
// ===========================================================================
|
|
299
|
+
// Summary
|
|
300
|
+
// ===========================================================================
|
|
301
|
+
|
|
302
|
+
const allPassed = results.every(r => r.passed);
|
|
303
|
+
const passCount = results.filter(r => r.passed).length;
|
|
304
|
+
const failCount = results.filter(r => !r.passed).length;
|
|
305
|
+
|
|
306
|
+
console.log('\n── Summary ──────────────────────────────────');
|
|
307
|
+
console.log(`Checks: ${results.length} total | ${passCount} passed | ${failCount} failed`);
|
|
308
|
+
console.log(`Overall: ${allPassed ? 'PASS' : 'FAIL'}`);
|
|
309
|
+
|
|
310
|
+
// ===========================================================================
|
|
311
|
+
// Write reports
|
|
312
|
+
// ===========================================================================
|
|
313
|
+
|
|
314
|
+
ensureDir(PROOF_DIR);
|
|
315
|
+
|
|
316
|
+
const jsonReport = {
|
|
317
|
+
milestone: 'v3.0 Commercialization',
|
|
318
|
+
generated: stamp(),
|
|
319
|
+
overall: allPassed ? 'PASS' : 'FAIL',
|
|
320
|
+
summary: { total: results.length, passed: passCount, failed: failCount },
|
|
321
|
+
checks: results,
|
|
322
|
+
testCount,
|
|
323
|
+
testFailures,
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
fs.writeFileSync(
|
|
327
|
+
path.join(PROOF_DIR, 'v3-milestone-report.json'),
|
|
328
|
+
JSON.stringify(jsonReport, null, 2),
|
|
329
|
+
'utf8'
|
|
330
|
+
);
|
|
331
|
+
console.log('\nWrote proof/v3-milestone-report.json');
|
|
332
|
+
|
|
333
|
+
const mdReport = `# v3.0 Milestone Proof Report
|
|
334
|
+
|
|
335
|
+
**Generated:** ${stamp()}
|
|
336
|
+
**Overall:** ${allPassed ? 'PASS' : 'FAIL'}
|
|
337
|
+
|
|
338
|
+
## Summary
|
|
339
|
+
|
|
340
|
+
| Metric | Value |
|
|
341
|
+
|--------|-------|
|
|
342
|
+
| Total checks | ${results.length} |
|
|
343
|
+
| Passed | ${passCount} |
|
|
344
|
+
| Failed | ${failCount} |
|
|
345
|
+
| Test count | ${testCount} |
|
|
346
|
+
| Test failures | ${testFailures} |
|
|
347
|
+
|
|
348
|
+
## Check Results
|
|
349
|
+
|
|
350
|
+
| Check | Status | Detail |
|
|
351
|
+
|-------|--------|--------|
|
|
352
|
+
${results.map(r => `| ${r.check} | ${r.passed ? 'PASS' : 'FAIL'} | ${r.detail.replace(/\|/g, '/')} |`).join('\n')}
|
|
353
|
+
|
|
354
|
+
## PROOF-01: Dockerfile + /health
|
|
355
|
+
|
|
356
|
+
${results.filter(r => r.check.startsWith('PROOF-01')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
|
|
357
|
+
|
|
358
|
+
## PROOF-02: Billing Module
|
|
359
|
+
|
|
360
|
+
${results.filter(r => r.check.startsWith('PROOF-02')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
|
|
361
|
+
|
|
362
|
+
## PROOF-03: CLI Init
|
|
363
|
+
|
|
364
|
+
${results.filter(r => r.check.startsWith('PROOF-03')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
|
|
365
|
+
|
|
366
|
+
## PROOF-04: Test Suite
|
|
367
|
+
|
|
368
|
+
${results.filter(r => r.check.startsWith('PROOF-04')).map(r => `- **${r.check}**: ${r.passed ? 'PASS' : 'FAIL'} — ${r.detail}`).join('\n')}
|
|
369
|
+
|
|
370
|
+
---
|
|
371
|
+
*All numbers from actual runs. No placeholders.*
|
|
372
|
+
`;
|
|
373
|
+
|
|
374
|
+
fs.writeFileSync(
|
|
375
|
+
path.join(PROOF_DIR, 'v3-milestone-report.md'),
|
|
376
|
+
mdReport,
|
|
377
|
+
'utf8'
|
|
378
|
+
);
|
|
379
|
+
console.log('Wrote proof/v3-milestone-report.md');
|
|
380
|
+
|
|
381
|
+
process.exit(allPassed ? 0 : 1);
|