rlhf-feedback-loop 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/adapters/README.md +8 -0
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
- package/adapters/chatgpt/INSTALL.md +80 -0
- package/adapters/chatgpt/openapi.yaml +292 -0
- package/adapters/claude/.mcp.json +8 -0
- package/adapters/codex/config.toml +4 -0
- package/adapters/gemini/function-declarations.json +95 -0
- package/adapters/mcp/server-stdio.js +444 -0
- package/bin/cli.js +167 -0
- package/config/mcp-allowlists.json +29 -0
- package/config/policy-bundles/constrained-v1.json +53 -0
- package/config/policy-bundles/default-v1.json +80 -0
- package/config/rubrics/default-v1.json +52 -0
- package/config/subagent-profiles.json +32 -0
- package/openapi/openapi.yaml +292 -0
- package/package.json +91 -0
- package/plugins/amp-skill/INSTALL.md +52 -0
- package/plugins/amp-skill/SKILL.md +31 -0
- package/plugins/claude-skill/INSTALL.md +55 -0
- package/plugins/claude-skill/SKILL.md +46 -0
- package/plugins/codex-profile/AGENTS.md +20 -0
- package/plugins/codex-profile/INSTALL.md +57 -0
- package/plugins/gemini-extension/INSTALL.md +74 -0
- package/plugins/gemini-extension/gemini_prompt.txt +10 -0
- package/plugins/gemini-extension/tool_contract.json +28 -0
- package/scripts/billing.js +471 -0
- package/scripts/budget-guard.js +173 -0
- package/scripts/code-reasoning.js +307 -0
- package/scripts/context-engine.js +547 -0
- package/scripts/contextfs.js +513 -0
- package/scripts/contract-audit.js +198 -0
- package/scripts/dpo-optimizer.js +208 -0
- package/scripts/export-dpo-pairs.js +316 -0
- package/scripts/export-training.js +448 -0
- package/scripts/feedback-attribution.js +313 -0
- package/scripts/feedback-inbox-read.js +162 -0
- package/scripts/feedback-loop.js +838 -0
- package/scripts/feedback-schema.js +300 -0
- package/scripts/feedback-to-memory.js +165 -0
- package/scripts/feedback-to-rules.js +109 -0
- package/scripts/generate-paperbanana-diagrams.sh +99 -0
- package/scripts/hybrid-feedback-context.js +676 -0
- package/scripts/intent-router.js +164 -0
- package/scripts/mcp-policy.js +92 -0
- package/scripts/meta-policy.js +194 -0
- package/scripts/plan-gate.js +154 -0
- package/scripts/prove-adapters.js +364 -0
- package/scripts/prove-attribution.js +364 -0
- package/scripts/prove-automation.js +393 -0
- package/scripts/prove-data-quality.js +219 -0
- package/scripts/prove-intelligence.js +256 -0
- package/scripts/prove-lancedb.js +370 -0
- package/scripts/prove-loop-closure.js +255 -0
- package/scripts/prove-rlaif.js +404 -0
- package/scripts/prove-subway-upgrades.js +250 -0
- package/scripts/prove-training-export.js +324 -0
- package/scripts/prove-v2-milestone.js +273 -0
- package/scripts/prove-v3-milestone.js +381 -0
- package/scripts/rlaif-self-audit.js +123 -0
- package/scripts/rubric-engine.js +230 -0
- package/scripts/self-heal.js +127 -0
- package/scripts/self-healing-check.js +111 -0
- package/scripts/skill-quality-tracker.js +284 -0
- package/scripts/subagent-profiles.js +79 -0
- package/scripts/sync-gh-secrets-from-env.sh +29 -0
- package/scripts/thompson-sampling.js +331 -0
- package/scripts/train_from_feedback.py +914 -0
- package/scripts/validate-feedback.js +580 -0
- package/scripts/vector-store.js +100 -0
- package/src/api/server.js +497 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* Phase 11: Subway Upgrades — Proof Gate
|
|
4
|
+
*
|
|
5
|
+
* Validates all SUBW-01 through SUBW-05 requirements.
|
|
6
|
+
* Runs Jest tests in Subway (via execSync) to produce pass/fail evidence.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* node scripts/prove-subway-upgrades.js
|
|
10
|
+
*
|
|
11
|
+
* Produces:
|
|
12
|
+
* proof/subway-upgrades/subway-upgrades-report.json
|
|
13
|
+
* proof/subway-upgrades/subway-upgrades-report.md
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const { execSync } = require('child_process');
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const os = require('os');
|
|
19
|
+
const path = require('path');
|
|
20
|
+
|
|
21
|
+
const PROOF_DIR = path.join(__dirname, '..', 'proof', 'subway-upgrades');
|
|
22
|
+
const REPORT_JSON = path.join(PROOF_DIR, 'subway-upgrades-report.json');
|
|
23
|
+
const REPORT_MD = path.join(PROOF_DIR, 'subway-upgrades-report.md');
|
|
24
|
+
|
|
25
|
+
const SUBWAY_ROOT = path.join(__dirname, '..', '..', '..', 'Subway_RN_Demo');
|
|
26
|
+
|
|
27
|
+
function run() {
|
|
28
|
+
const results = { passed: 0, failed: 0, requirements: {} };
|
|
29
|
+
|
|
30
|
+
const checks = [
|
|
31
|
+
{
|
|
32
|
+
id: 'SUBW-01',
|
|
33
|
+
desc: 'LanceDB vector store ported to Subway — upsert + search verified by Jest (vector-store.test.js)',
|
|
34
|
+
fn: () => {
|
|
35
|
+
// Verify the Subway vector-store.js exists
|
|
36
|
+
const vsPath = path.join(SUBWAY_ROOT, '.claude', 'scripts', 'feedback', 'vector-store.js');
|
|
37
|
+
if (!fs.existsSync(vsPath)) throw new Error(`Missing: ${vsPath}`);
|
|
38
|
+
|
|
39
|
+
// Load and verify exports
|
|
40
|
+
const vs = require(vsPath);
|
|
41
|
+
if (typeof vs.upsertFeedback !== 'function') throw new Error('upsertFeedback not exported');
|
|
42
|
+
if (typeof vs.searchSimilar !== 'function') throw new Error('searchSimilar not exported');
|
|
43
|
+
if (vs.TABLE_NAME !== 'rlhf_memories') throw new Error(`TABLE_NAME must be rlhf_memories, got ${vs.TABLE_NAME}`);
|
|
44
|
+
|
|
45
|
+
// Run Jest tests
|
|
46
|
+
const out = execSync(
|
|
47
|
+
'node --experimental-vm-modules node_modules/.bin/jest --watchman=false --config jest.governance.config.js --testPathPattern=vector-store --forceExit',
|
|
48
|
+
{
|
|
49
|
+
cwd: SUBWAY_ROOT,
|
|
50
|
+
encoding: 'utf8',
|
|
51
|
+
stdio: 'pipe',
|
|
52
|
+
env: { ...process.env, RLHF_VECTOR_STUB_EMBED: 'true', NODE_OPTIONS: '--experimental-vm-modules' },
|
|
53
|
+
}
|
|
54
|
+
);
|
|
55
|
+
if (out.includes('FAIL ')) throw new Error(`Jest tests failed:\n${out.slice(-300)}`);
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
id: 'SUBW-02',
|
|
60
|
+
desc: 'DPO optimizer ported to Subway — buildPreferencePairs + applyDpoAdjustments + dpoLogRatio exported',
|
|
61
|
+
fn: () => {
|
|
62
|
+
const dpoPath = path.join(SUBWAY_ROOT, '.claude', 'scripts', 'feedback', 'dpo-optimizer.js');
|
|
63
|
+
if (!fs.existsSync(dpoPath)) throw new Error(`Missing: ${dpoPath}`);
|
|
64
|
+
|
|
65
|
+
// Clear cache for fresh load
|
|
66
|
+
delete require.cache[require.resolve(dpoPath)];
|
|
67
|
+
const dpo = require(dpoPath);
|
|
68
|
+
if (typeof dpo.dpoLogRatio !== 'function') throw new Error('dpoLogRatio not exported');
|
|
69
|
+
if (typeof dpo.buildPreferencePairs !== 'function') throw new Error('buildPreferencePairs not exported');
|
|
70
|
+
if (typeof dpo.applyDpoAdjustments !== 'function') throw new Error('applyDpoAdjustments not exported');
|
|
71
|
+
if (typeof dpo.run !== 'function') throw new Error('run not exported');
|
|
72
|
+
|
|
73
|
+
// Verify dpoLogRatio math
|
|
74
|
+
const adj = dpo.dpoLogRatio(1.0, 0.5);
|
|
75
|
+
if (typeof adj !== 'number') throw new Error('dpoLogRatio must return number');
|
|
76
|
+
if (adj <= 0) throw new Error('dpoLogRatio(1.0, 0.5) should be positive (chosen > rejected)');
|
|
77
|
+
|
|
78
|
+
// Run Jest tests
|
|
79
|
+
const out = execSync(
|
|
80
|
+
'node --experimental-vm-modules node_modules/.bin/jest --watchman=false --config jest.governance.config.js --testPathPattern=dpo-optimizer --forceExit',
|
|
81
|
+
{
|
|
82
|
+
cwd: SUBWAY_ROOT,
|
|
83
|
+
encoding: 'utf8',
|
|
84
|
+
stdio: 'pipe',
|
|
85
|
+
}
|
|
86
|
+
);
|
|
87
|
+
if (out.includes('FAIL ')) throw new Error(`Jest tests failed:\n${out.slice(-300)}`);
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: 'SUBW-03',
|
|
92
|
+
desc: 'Thompson Sampling JS module ported to Subway — updateModel updates alpha/beta posteriors',
|
|
93
|
+
fn: () => {
|
|
94
|
+
const tsPath = path.join(SUBWAY_ROOT, '.claude', 'scripts', 'feedback', 'thompson-sampling.js');
|
|
95
|
+
if (!fs.existsSync(tsPath)) throw new Error(`Missing: ${tsPath}`);
|
|
96
|
+
|
|
97
|
+
delete require.cache[require.resolve(tsPath)];
|
|
98
|
+
const ts = require(tsPath);
|
|
99
|
+
if (typeof ts.timeDecayWeight !== 'function') throw new Error('timeDecayWeight not exported');
|
|
100
|
+
if (typeof ts.loadModel !== 'function') throw new Error('loadModel not exported');
|
|
101
|
+
if (typeof ts.saveModel !== 'function') throw new Error('saveModel not exported');
|
|
102
|
+
if (typeof ts.updateModel !== 'function') throw new Error('updateModel not exported');
|
|
103
|
+
if (typeof ts.getReliability !== 'function') throw new Error('getReliability not exported');
|
|
104
|
+
if (typeof ts.samplePosteriors !== 'function') throw new Error('samplePosteriors not exported');
|
|
105
|
+
|
|
106
|
+
// Verify alpha update
|
|
107
|
+
const model = ts.createInitialModel();
|
|
108
|
+
const beforeAlpha = model.categories.testing.alpha;
|
|
109
|
+
ts.updateModel(model, { signal: 'positive', timestamp: new Date().toISOString(), categories: ['testing'] });
|
|
110
|
+
if (model.categories.testing.alpha <= beforeAlpha) throw new Error('alpha should increase on positive signal');
|
|
111
|
+
|
|
112
|
+
// Run Jest tests
|
|
113
|
+
const out = execSync(
|
|
114
|
+
'node --experimental-vm-modules node_modules/.bin/jest --watchman=false --config jest.governance.config.js --testPathPattern=thompson-sampling --forceExit',
|
|
115
|
+
{
|
|
116
|
+
cwd: SUBWAY_ROOT,
|
|
117
|
+
encoding: 'utf8',
|
|
118
|
+
stdio: 'pipe',
|
|
119
|
+
}
|
|
120
|
+
);
|
|
121
|
+
if (out.includes('FAIL ')) throw new Error(`Jest tests failed:\n${out.slice(-300)}`);
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
id: 'SUBW-04',
|
|
126
|
+
desc: 'Self-healing GH Action workflows exist in Subway .github/workflows/',
|
|
127
|
+
fn: () => {
|
|
128
|
+
const monitorPath = path.join(SUBWAY_ROOT, '.github', 'workflows', 'self-healing-monitor.yml');
|
|
129
|
+
const autoFixPath = path.join(SUBWAY_ROOT, '.github', 'workflows', 'self-healing-auto-fix.yml');
|
|
130
|
+
if (!fs.existsSync(monitorPath)) throw new Error(`Missing: ${monitorPath}`);
|
|
131
|
+
if (!fs.existsSync(autoFixPath)) throw new Error(`Missing: ${autoFixPath}`);
|
|
132
|
+
|
|
133
|
+
// Verify key fields in monitor workflow
|
|
134
|
+
const monitorContent = fs.readFileSync(monitorPath, 'utf-8');
|
|
135
|
+
if (!monitorContent.includes('self-healing-check.js')) {
|
|
136
|
+
throw new Error('self-healing-monitor.yml must reference self-healing-check.js');
|
|
137
|
+
}
|
|
138
|
+
if (!monitorContent.includes('self-heal.js')) {
|
|
139
|
+
throw new Error('self-healing-monitor.yml must reference self-heal.js');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Verify auto-fix workflow
|
|
143
|
+
const autoFixContent = fs.readFileSync(autoFixPath, 'utf-8');
|
|
144
|
+
if (!autoFixContent.includes('self-heal.js')) {
|
|
145
|
+
throw new Error('self-healing-auto-fix.yml must reference self-heal.js');
|
|
146
|
+
}
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
id: 'SUBW-05',
|
|
151
|
+
desc: 'All Phase 11 Subway Jest tests pass with 0 failures (vector-store, dpo-optimizer, thompson-sampling)',
|
|
152
|
+
fn: () => {
|
|
153
|
+
// Use combined stderr+stdout to capture Jest output (it writes to stderr)
|
|
154
|
+
let out = '';
|
|
155
|
+
try {
|
|
156
|
+
out = execSync(
|
|
157
|
+
'npx jest --watchman=false --config jest.governance.config.js --testPathPattern="vector-store|dpo-optimizer|thompson-sampling" --forceExit 2>&1',
|
|
158
|
+
{
|
|
159
|
+
cwd: SUBWAY_ROOT,
|
|
160
|
+
encoding: 'utf8',
|
|
161
|
+
env: { ...process.env, RLHF_VECTOR_STUB_EMBED: 'true', NODE_OPTIONS: '--experimental-vm-modules' },
|
|
162
|
+
}
|
|
163
|
+
);
|
|
164
|
+
} catch (err) {
|
|
165
|
+
// execSync throws on non-zero exit — capture output from err.stdout/stderr
|
|
166
|
+
out = (err.stdout || '') + (err.stderr || '');
|
|
167
|
+
// If there are actual test failures in the output, throw; otherwise re-check
|
|
168
|
+
const failMatch = out.match(/Tests:\s+(\d+) failed/);
|
|
169
|
+
if (failMatch && parseInt(failMatch[1], 10) > 0) {
|
|
170
|
+
throw new Error(`${failMatch[1]} Jest test failure(s):\n${out.slice(-500)}`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Verify at least 25 tests passed
|
|
174
|
+
const passMatch = out.match(/Tests:\s+.*?(\d+) passed/);
|
|
175
|
+
const passCnt = passMatch ? parseInt(passMatch[1], 10) : 0;
|
|
176
|
+
if (passCnt < 25) throw new Error(`Expected >= 25 tests passing, got ${passCnt}`);
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
];
|
|
180
|
+
|
|
181
|
+
console.log('Phase 11: Subway Upgrades — Proof Gate\n');
|
|
182
|
+
console.log('Checking requirements:\n');
|
|
183
|
+
|
|
184
|
+
for (const check of checks) {
|
|
185
|
+
try {
|
|
186
|
+
check.fn();
|
|
187
|
+
results.passed++;
|
|
188
|
+
results.requirements[check.id] = { status: 'pass', desc: check.desc };
|
|
189
|
+
console.log(` PASS ${check.id}: ${check.desc}`);
|
|
190
|
+
} catch (err) {
|
|
191
|
+
results.failed++;
|
|
192
|
+
results.requirements[check.id] = {
|
|
193
|
+
status: 'fail',
|
|
194
|
+
desc: check.desc,
|
|
195
|
+
error: err.message,
|
|
196
|
+
};
|
|
197
|
+
console.error(` FAIL ${check.id}: ${err.message}`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Write proof artifacts
|
|
202
|
+
fs.mkdirSync(PROOF_DIR, { recursive: true });
|
|
203
|
+
|
|
204
|
+
const report = {
|
|
205
|
+
phase: '11-subway-upgrades',
|
|
206
|
+
generatedAt: new Date().toISOString(),
|
|
207
|
+
passed: results.passed,
|
|
208
|
+
failed: results.failed,
|
|
209
|
+
total: checks.length,
|
|
210
|
+
requirements: results.requirements,
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
fs.writeFileSync(REPORT_JSON, JSON.stringify(report, null, 2) + '\n');
|
|
214
|
+
|
|
215
|
+
const md = [
|
|
216
|
+
'# Phase 11: Subway Upgrades — Proof Report',
|
|
217
|
+
'',
|
|
218
|
+
`Generated: ${report.generatedAt}`,
|
|
219
|
+
`Result: ${results.passed}/${checks.length} passed`,
|
|
220
|
+
'',
|
|
221
|
+
'## Requirements',
|
|
222
|
+
'',
|
|
223
|
+
...Object.entries(results.requirements).map(([id, r]) => {
|
|
224
|
+
const checkbox = r.status === 'pass' ? '[x]' : '[ ]';
|
|
225
|
+
const errLine = r.error ? `\n - Error: \`${r.error}\`` : '';
|
|
226
|
+
return `- ${checkbox} **${id}**: ${r.desc}${errLine}`;
|
|
227
|
+
}),
|
|
228
|
+
'',
|
|
229
|
+
'## Evidence',
|
|
230
|
+
'',
|
|
231
|
+
'- `Subway/.claude/scripts/feedback/vector-store.js` — LanceDB upsert + semantic search (3-level PATH from root)',
|
|
232
|
+
'- `Subway/.claude/scripts/feedback/dpo-optimizer.js` — Offline batch DPO optimization (sibling requires)',
|
|
233
|
+
'- `Subway/.claude/scripts/feedback/thompson-sampling.js` — Beta-Bernoulli posteriors with inline parseTimestamp',
|
|
234
|
+
'- `Subway/.github/workflows/self-healing-monitor.yml` — Scheduled health check + issue creation',
|
|
235
|
+
'- `Subway/.github/workflows/self-healing-auto-fix.yml` — Scheduled self-heal + remediation PR',
|
|
236
|
+
'- `Subway/scripts/__tests__/vector-store.test.js` — 6 Jest tests (NODE_OPTIONS=--experimental-vm-modules)',
|
|
237
|
+
'- `Subway/scripts/__tests__/dpo-optimizer.test.js` — 7 Jest tests',
|
|
238
|
+
'- `Subway/scripts/__tests__/thompson-sampling.test.js` — 10 Jest tests',
|
|
239
|
+
'',
|
|
240
|
+
].join('\n');
|
|
241
|
+
|
|
242
|
+
fs.writeFileSync(REPORT_MD, md);
|
|
243
|
+
|
|
244
|
+
console.log(`\nPhase 11 proof: ${results.passed} passed, ${results.failed} failed`);
|
|
245
|
+
console.log(`Report: ${REPORT_JSON}`);
|
|
246
|
+
|
|
247
|
+
if (results.failed > 0) process.exit(1);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
run();
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* prove-training-export.js
|
|
4
|
+
*
|
|
5
|
+
* Smoke-test gate for Phase 10: Training Export
|
|
6
|
+
* Verifies all export formats + DPO validation gate work end-to-end.
|
|
7
|
+
* Writes machine-readable JSON + human-readable markdown to proof/.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
'use strict';
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const os = require('os');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { execSync } = require('child_process');
|
|
16
|
+
|
|
17
|
+
const ROOT = path.join(__dirname, '..');
|
|
18
|
+
const PROOF_DIR = path.join(ROOT, 'proof');
|
|
19
|
+
|
|
20
|
+
function ensureDir(d) {
|
|
21
|
+
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function runTests() {
|
|
25
|
+
try {
|
|
26
|
+
const output = execSync('node --test tests/training-export.test.js', {
|
|
27
|
+
cwd: ROOT,
|
|
28
|
+
encoding: 'utf-8',
|
|
29
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
30
|
+
});
|
|
31
|
+
return output;
|
|
32
|
+
} catch (err) {
|
|
33
|
+
return err.stdout || err.stderr || String(err);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function parseTestOutput(output) {
|
|
38
|
+
const passMatch = output.match(/ℹ pass (\d+)/);
|
|
39
|
+
const failMatch = output.match(/ℹ fail (\d+)/);
|
|
40
|
+
const passed = passMatch ? parseInt(passMatch[1], 10) : 0;
|
|
41
|
+
const failed = failMatch ? parseInt(failMatch[1], 10) : 0;
|
|
42
|
+
return { passed, failed };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function makeFeedbackEntry(overrides) {
|
|
46
|
+
return {
|
|
47
|
+
id: `fb_${Date.now()}_test`,
|
|
48
|
+
timestamp: new Date().toISOString(),
|
|
49
|
+
signal: 'positive',
|
|
50
|
+
feedback: 'up',
|
|
51
|
+
reward: 1,
|
|
52
|
+
context: 'Test context for smoke test',
|
|
53
|
+
tags: ['testing'],
|
|
54
|
+
richContext: { domain: 'testing', outcomeCategory: 'quick-success' },
|
|
55
|
+
...overrides,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Smoke test: PyTorch JSON export (XPRT-01)
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
function smokePyTorchExport() {
|
|
63
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'prove-xprt1-'));
|
|
64
|
+
try {
|
|
65
|
+
delete require.cache[require.resolve('./export-training.js')];
|
|
66
|
+
const m = require('./export-training.js');
|
|
67
|
+
|
|
68
|
+
fs.mkdirSync(path.join(tmpDir, 'training-data'), { recursive: true });
|
|
69
|
+
|
|
70
|
+
const entries = [
|
|
71
|
+
makeFeedbackEntry({ context: 'Implemented TDD correctly' }),
|
|
72
|
+
makeFeedbackEntry({ signal: 'negative', feedback: 'down', reward: -1, context: 'Skipped validation' }),
|
|
73
|
+
];
|
|
74
|
+
fs.writeFileSync(
|
|
75
|
+
path.join(tmpDir, 'feedback-log.jsonl'),
|
|
76
|
+
entries.map((e) => JSON.stringify(e)).join('\n')
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const outPath = path.join(tmpDir, 'pytorch.json');
|
|
80
|
+
const result = m.exportPyTorchJSON(tmpDir, outPath);
|
|
81
|
+
|
|
82
|
+
if (!fs.existsSync(result.outputPath)) throw new Error('Output file not created');
|
|
83
|
+
const data = JSON.parse(fs.readFileSync(result.outputPath, 'utf-8'));
|
|
84
|
+
if (!data.metadata) throw new Error('Missing metadata');
|
|
85
|
+
if (data.metadata.format !== 'pytorch-dpo') throw new Error('Wrong format');
|
|
86
|
+
if (!Array.isArray(data.pairs)) throw new Error('Missing pairs array');
|
|
87
|
+
if (!Array.isArray(data.sequences)) throw new Error('Missing sequences array');
|
|
88
|
+
|
|
89
|
+
// Verify pair structure when pairs exist
|
|
90
|
+
if (data.pairs.length > 0) {
|
|
91
|
+
const pair = data.pairs[0];
|
|
92
|
+
if (!('prompt' in pair)) throw new Error('pair missing prompt');
|
|
93
|
+
if (!('chosen' in pair)) throw new Error('pair missing chosen');
|
|
94
|
+
if (!('rejected' in pair)) throw new Error('pair missing rejected');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return { passed: true, pairCount: result.pairCount, format: data.metadata.format };
|
|
98
|
+
} catch (err) {
|
|
99
|
+
return { passed: false, error: err.message };
|
|
100
|
+
} finally {
|
|
101
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Smoke test: CSV export (XPRT-02)
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
function smokeCsvExport() {
|
|
109
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'prove-xprt2-'));
|
|
110
|
+
try {
|
|
111
|
+
delete require.cache[require.resolve('./export-training.js')];
|
|
112
|
+
const m = require('./export-training.js');
|
|
113
|
+
|
|
114
|
+
fs.mkdirSync(path.join(tmpDir, 'training-data'), { recursive: true });
|
|
115
|
+
|
|
116
|
+
const entries = [
|
|
117
|
+
makeFeedbackEntry({ context: 'Works great' }),
|
|
118
|
+
makeFeedbackEntry({ signal: 'negative', feedback: 'down', reward: -1, context: 'Has issues' }),
|
|
119
|
+
];
|
|
120
|
+
fs.writeFileSync(
|
|
121
|
+
path.join(tmpDir, 'feedback-log.jsonl'),
|
|
122
|
+
entries.map((e) => JSON.stringify(e)).join('\n')
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
const outPath = path.join(tmpDir, 'summary.csv');
|
|
126
|
+
const result = m.exportCSV(tmpDir, outPath);
|
|
127
|
+
|
|
128
|
+
if (!fs.existsSync(result.outputPath)) throw new Error('CSV not created');
|
|
129
|
+
const csv = fs.readFileSync(result.outputPath, 'utf-8');
|
|
130
|
+
const lines = csv.split('\n');
|
|
131
|
+
const headers = lines[0].split(',');
|
|
132
|
+
|
|
133
|
+
const required = ['id', 'timestamp', 'signal', 'reward', 'context'];
|
|
134
|
+
for (const h of required) {
|
|
135
|
+
if (!headers.includes(h)) throw new Error(`CSV missing column: ${h}`);
|
|
136
|
+
}
|
|
137
|
+
if (result.rowCount !== 2) throw new Error(`Expected 2 rows, got ${result.rowCount}`);
|
|
138
|
+
|
|
139
|
+
return { passed: true, rowCount: result.rowCount, headers };
|
|
140
|
+
} catch (err) {
|
|
141
|
+
return { passed: false, error: err.message };
|
|
142
|
+
} finally {
|
|
143
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// Smoke test: Action analysis (XPRT-03)
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
function smokeActionAnalysis() {
|
|
151
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'prove-xprt3-'));
|
|
152
|
+
try {
|
|
153
|
+
delete require.cache[require.resolve('./export-training.js')];
|
|
154
|
+
const m = require('./export-training.js');
|
|
155
|
+
|
|
156
|
+
fs.mkdirSync(path.join(tmpDir, 'training-data'), { recursive: true });
|
|
157
|
+
fs.writeFileSync(path.join(tmpDir, 'feedback-log.jsonl'), '');
|
|
158
|
+
|
|
159
|
+
const outPath = path.join(tmpDir, 'actions.json');
|
|
160
|
+
const { report } = m.exportActionAnalysis(tmpDir, outPath);
|
|
161
|
+
|
|
162
|
+
if (!report.summary) throw new Error('Missing summary');
|
|
163
|
+
if (!report.actionPatterns) throw new Error('Missing actionPatterns');
|
|
164
|
+
if (!Array.isArray(report.topFailureModes)) throw new Error('Missing topFailureModes');
|
|
165
|
+
if (!Array.isArray(report.recommendations)) throw new Error('Missing recommendations');
|
|
166
|
+
|
|
167
|
+
return { passed: true, fields: Object.keys(report) };
|
|
168
|
+
} catch (err) {
|
|
169
|
+
return { passed: false, error: err.message };
|
|
170
|
+
} finally {
|
|
171
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// Smoke test: validateMemoryStructure gate (XPRT-04)
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
function smokeValidateMemoryStructure() {
|
|
179
|
+
try {
|
|
180
|
+
delete require.cache[require.resolve('./export-training.js')];
|
|
181
|
+
const m = require('./export-training.js');
|
|
182
|
+
|
|
183
|
+
// Valid entry
|
|
184
|
+
const valid = m.validateMemoryStructure({
|
|
185
|
+
title: 'SUCCESS: Test passed',
|
|
186
|
+
content: 'The implementation was correct and tests verified.',
|
|
187
|
+
category: 'learning',
|
|
188
|
+
tags: ['testing'],
|
|
189
|
+
});
|
|
190
|
+
if (!valid.valid) throw new Error('Valid entry rejected: ' + valid.issues.join(', '));
|
|
191
|
+
|
|
192
|
+
// Missing 'chosen' in DPO export
|
|
193
|
+
const missingChosen = m.validateMemoryStructure({
|
|
194
|
+
title: 'PREFERENCE: Good vs bad',
|
|
195
|
+
content: 'Comparison of approaches.',
|
|
196
|
+
category: 'preference',
|
|
197
|
+
tags: ['arch'],
|
|
198
|
+
_dpoExport: true,
|
|
199
|
+
prompt: 'Which approach?',
|
|
200
|
+
// chosen is missing
|
|
201
|
+
rejected: 'The bad approach',
|
|
202
|
+
});
|
|
203
|
+
if (missingChosen.valid) throw new Error('Should have rejected missing chosen field');
|
|
204
|
+
if (!missingChosen.issues.some((i) => i.includes('chosen'))) {
|
|
205
|
+
throw new Error('Issue should mention chosen field');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
passed: true,
|
|
210
|
+
validEntryAccepted: valid.valid,
|
|
211
|
+
missingChosenRejected: !missingChosen.valid,
|
|
212
|
+
missingChosenIssues: missingChosen.issues,
|
|
213
|
+
};
|
|
214
|
+
} catch (err) {
|
|
215
|
+
return { passed: false, error: err.message };
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
// Main
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
async function main() {
|
|
223
|
+
console.log('Running Phase 10: Training Export proof gate...\n');
|
|
224
|
+
|
|
225
|
+
const testOutput = runTests();
|
|
226
|
+
const { passed: testsPassed, failed: testsFailed } = parseTestOutput(testOutput);
|
|
227
|
+
|
|
228
|
+
const pytorch = smokePyTorchExport();
|
|
229
|
+
const csv = smokeCsvExport();
|
|
230
|
+
const actions = smokeActionAnalysis();
|
|
231
|
+
const gate = smokeValidateMemoryStructure();
|
|
232
|
+
|
|
233
|
+
const allPassed = testsFailed === 0 && pytorch.passed && csv.passed && actions.passed && gate.passed;
|
|
234
|
+
|
|
235
|
+
const report = {
|
|
236
|
+
phase: 10,
|
|
237
|
+
name: 'Training Export',
|
|
238
|
+
requirements: ['XPRT-01', 'XPRT-02', 'XPRT-03', 'XPRT-04', 'XPRT-05'],
|
|
239
|
+
generatedAt: new Date().toISOString(),
|
|
240
|
+
testResults: {
|
|
241
|
+
passed: testsPassed,
|
|
242
|
+
failed: testsFailed,
|
|
243
|
+
suiteFile: 'tests/training-export.test.js',
|
|
244
|
+
},
|
|
245
|
+
smokeTests: {
|
|
246
|
+
pytorchExport: pytorch,
|
|
247
|
+
csvExport: csv,
|
|
248
|
+
actionAnalysis: actions,
|
|
249
|
+
validateMemoryStructure: gate,
|
|
250
|
+
},
|
|
251
|
+
overallPassed: allPassed,
|
|
252
|
+
};
|
|
253
|
+
|
|
254
|
+
ensureDir(PROOF_DIR);
|
|
255
|
+
const jsonPath = path.join(PROOF_DIR, 'training-export-report.json');
|
|
256
|
+
const mdPath = path.join(PROOF_DIR, 'training-export-report.md');
|
|
257
|
+
|
|
258
|
+
fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
259
|
+
|
|
260
|
+
const status = allPassed ? 'PASSED' : 'FAILED';
|
|
261
|
+
const md = `# Phase 10: Training Export — Proof Report
|
|
262
|
+
|
|
263
|
+
**Status:** ${status}
|
|
264
|
+
**Generated:** ${report.generatedAt}
|
|
265
|
+
**Requirements:** ${report.requirements.join(', ')}
|
|
266
|
+
|
|
267
|
+
## Test Results
|
|
268
|
+
|
|
269
|
+
| Suite | Passed | Failed |
|
|
270
|
+
|-------|--------|--------|
|
|
271
|
+
| training-export.test.js | ${testsPassed} | ${testsFailed} |
|
|
272
|
+
|
|
273
|
+
## Smoke Tests
|
|
274
|
+
|
|
275
|
+
### PyTorch JSON Export (XPRT-01)
|
|
276
|
+
- Passed: ${pytorch.passed}
|
|
277
|
+
${pytorch.passed ? `- Pair count: ${pytorch.pairCount}\n- Format: ${pytorch.format}` : `- Error: ${pytorch.error}`}
|
|
278
|
+
|
|
279
|
+
### CSV Summary Export (XPRT-02)
|
|
280
|
+
- Passed: ${csv.passed}
|
|
281
|
+
${csv.passed ? `- Row count: ${csv.rowCount}\n- Headers: ${csv.headers ? csv.headers.join(', ') : 'N/A'}` : `- Error: ${csv.error}`}
|
|
282
|
+
|
|
283
|
+
### Action Analysis Report (XPRT-03)
|
|
284
|
+
- Passed: ${actions.passed}
|
|
285
|
+
${actions.passed ? `- Report fields: ${actions.fields ? actions.fields.join(', ') : 'N/A'}` : `- Error: ${actions.error}`}
|
|
286
|
+
|
|
287
|
+
### DPO Export Gate — validateMemoryStructure (XPRT-04)
|
|
288
|
+
- Passed: ${gate.passed}
|
|
289
|
+
${gate.passed ? `- Valid entry accepted: ${gate.validEntryAccepted}\n- Missing 'chosen' field rejected: ${gate.missingChosenRejected}` : `- Error: ${gate.error}`}
|
|
290
|
+
|
|
291
|
+
## Requirements Coverage
|
|
292
|
+
|
|
293
|
+
| Requirement | Description | Status |
|
|
294
|
+
|-------------|-------------|--------|
|
|
295
|
+
| XPRT-01 | PyTorch JSON export with prompt/chosen/rejected pairs | ${pytorch.passed ? 'PASS' : 'FAIL'} |
|
|
296
|
+
| XPRT-02 | CSV summary export with correct headers and escaping | ${csv.passed ? 'PASS' : 'FAIL'} |
|
|
297
|
+
| XPRT-03 | Action analysis report from feedback sequences | ${actions.passed ? 'PASS' : 'FAIL'} |
|
|
298
|
+
| XPRT-04 | validateMemoryStructure() gates DPO export | ${gate.passed ? 'PASS' : 'FAIL'} |
|
|
299
|
+
| XPRT-05 | All export features have unit tests (${testsPassed} tests, ${testsFailed} failures) | ${testsFailed === 0 ? 'PASS' : 'FAIL'} |
|
|
300
|
+
|
|
301
|
+
## Files Created
|
|
302
|
+
|
|
303
|
+
- \`scripts/export-training.js\` — PyTorch JSON, CSV, action analysis exports + validateMemoryStructure gate
|
|
304
|
+
- \`tests/training-export.test.js\` — ${testsPassed} unit tests covering all formats, gate rejection, edge cases
|
|
305
|
+
- \`scripts/prove-training-export.js\` — This proof gate script
|
|
306
|
+
`;
|
|
307
|
+
|
|
308
|
+
fs.writeFileSync(mdPath, md);
|
|
309
|
+
|
|
310
|
+
console.log(`Status: ${status}`);
|
|
311
|
+
console.log(`Tests: ${testsPassed} passed, ${testsFailed} failed`);
|
|
312
|
+
console.log(`PyTorch smoke: ${pytorch.passed ? 'PASS' : 'FAIL'}`);
|
|
313
|
+
console.log(`CSV smoke: ${csv.passed ? 'PASS' : 'FAIL'}`);
|
|
314
|
+
console.log(`Action analysis smoke: ${actions.passed ? 'PASS' : 'FAIL'}`);
|
|
315
|
+
console.log(`validateMemoryStructure gate: ${gate.passed ? 'PASS' : 'FAIL'}`);
|
|
316
|
+
console.log(`\nReport written to: ${mdPath}`);
|
|
317
|
+
|
|
318
|
+
process.exit(allPassed ? 0 : 1);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
main().catch((err) => {
|
|
322
|
+
console.error('prove-training-export failed:', err.message);
|
|
323
|
+
process.exit(1);
|
|
324
|
+
});
|