rlhf-feedback-loop 0.6.10 → 0.6.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +120 -74
- package/adapters/README.md +3 -3
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +2 -0
- package/adapters/chatgpt/INSTALL.md +6 -3
- package/adapters/chatgpt/openapi.yaml +5 -2
- package/adapters/claude/.mcp.json +3 -3
- package/adapters/codex/config.toml +3 -3
- package/adapters/gemini/function-declarations.json +2 -2
- package/adapters/mcp/server-stdio.js +19 -5
- package/bin/cli.js +295 -25
- package/openapi/openapi.yaml +5 -2
- package/package.json +25 -9
- package/scripts/a2ui-engine.js +73 -0
- package/scripts/adk-consolidator.js +267 -0
- package/scripts/billing.js +192 -681
- package/scripts/code-reasoning.js +26 -1
- package/scripts/context-engine.js +86 -4
- package/scripts/contextfs.js +130 -0
- package/scripts/disagreement-mining.js +315 -0
- package/scripts/export-kto-pairs.js +310 -0
- package/scripts/feedback-ingest-watcher.js +290 -0
- package/scripts/feedback-loop.js +153 -8
- package/scripts/feedback-quality.js +139 -0
- package/scripts/feedback-schema.js +31 -5
- package/scripts/feedback-to-memory.js +13 -1
- package/scripts/hook-auto-capture.sh +6 -0
- package/scripts/hook-stop-self-score.sh +51 -0
- package/scripts/install-mcp.js +168 -0
- package/scripts/intent-router.js +88 -0
- package/scripts/jsonl-watcher.js +151 -0
- package/scripts/local-model-profile.js +207 -0
- package/scripts/pr-manager.js +112 -0
- package/scripts/prove-adapters.js +137 -15
- package/scripts/prove-attribution.js +6 -6
- package/scripts/prove-automation.js +41 -8
- package/scripts/prove-data-quality.js +16 -8
- package/scripts/prove-intelligence.js +7 -4
- package/scripts/prove-lancedb.js +7 -7
- package/scripts/prove-local-intelligence.js +244 -0
- package/scripts/prove-loop-closure.js +16 -8
- package/scripts/prove-training-export.js +7 -4
- package/scripts/prove-workflow-contract.js +116 -0
- package/scripts/reminder-engine.js +132 -0
- package/scripts/risk-scorer.js +458 -0
- package/scripts/rlaif-self-audit.js +7 -1
- package/scripts/self-heal.js +24 -4
- package/scripts/status-dashboard.js +155 -0
- package/scripts/sync-version.js +159 -0
- package/scripts/test-coverage.js +76 -0
- package/scripts/validate-workflow-contract.js +287 -0
- package/scripts/vector-store.js +115 -17
- package/src/api/server.js +372 -25
|
@@ -15,7 +15,9 @@ const path = require('path');
|
|
|
15
15
|
const { execSync } = require('child_process');
|
|
16
16
|
|
|
17
17
|
const ROOT = path.join(__dirname, '..');
|
|
18
|
-
|
|
18
|
+
function getProofDir() {
|
|
19
|
+
return process.env.RLHF_PROOF_DIR || path.join(ROOT, 'proof');
|
|
20
|
+
}
|
|
19
21
|
|
|
20
22
|
function ensureDir(d) {
|
|
21
23
|
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
@@ -185,9 +187,10 @@ async function main() {
|
|
|
185
187
|
overallPassed: allPassed,
|
|
186
188
|
};
|
|
187
189
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
const
|
|
190
|
+
const proofDir = getProofDir();
|
|
191
|
+
ensureDir(proofDir);
|
|
192
|
+
const jsonPath = path.join(proofDir, 'intelligence-report.json');
|
|
193
|
+
const mdPath = path.join(proofDir, 'intelligence-report.md');
|
|
191
194
|
|
|
192
195
|
fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
193
196
|
|
package/scripts/prove-lancedb.js
CHANGED
|
@@ -18,7 +18,6 @@ const os = require('os');
|
|
|
18
18
|
const { execSync } = require('child_process');
|
|
19
19
|
|
|
20
20
|
const ROOT = path.join(__dirname, '..');
|
|
21
|
-
const PROOF_DIR = path.join(ROOT, 'proof');
|
|
22
21
|
const PKG = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
|
|
23
22
|
|
|
24
23
|
function ensureDir(dirPath) {
|
|
@@ -31,7 +30,8 @@ function status(condition) {
|
|
|
31
30
|
return condition ? 'pass' : 'fail';
|
|
32
31
|
}
|
|
33
32
|
|
|
34
|
-
async function runProof() {
|
|
33
|
+
async function runProof(options = {}) {
|
|
34
|
+
const proofDir = options.proofDir || process.env.RLHF_PROOF_DIR || path.join(ROOT, 'proof');
|
|
35
35
|
const report = {
|
|
36
36
|
phase: '04-lancedb-vector-storage',
|
|
37
37
|
generated: new Date().toISOString(),
|
|
@@ -228,7 +228,7 @@ async function runProof() {
|
|
|
228
228
|
} finally {
|
|
229
229
|
// Clean up tmp dir
|
|
230
230
|
try {
|
|
231
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
231
|
+
fs.rmSync(tmpDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
|
|
232
232
|
} catch (_) {
|
|
233
233
|
// ignore cleanup errors
|
|
234
234
|
}
|
|
@@ -295,9 +295,9 @@ async function runProof() {
|
|
|
295
295
|
// ─────────────────────────────────────────────────────────────────────────
|
|
296
296
|
// Write proof artifacts
|
|
297
297
|
// ─────────────────────────────────────────────────────────────────────────
|
|
298
|
-
ensureDir(
|
|
298
|
+
ensureDir(proofDir);
|
|
299
299
|
|
|
300
|
-
const jsonPath = path.join(
|
|
300
|
+
const jsonPath = path.join(proofDir, 'lancedb-report.json');
|
|
301
301
|
fs.writeFileSync(jsonPath, `${JSON.stringify(report, null, 2)}\n`);
|
|
302
302
|
|
|
303
303
|
const mdLines = [
|
|
@@ -341,7 +341,7 @@ async function runProof() {
|
|
|
341
341
|
mdLines.push('- multi-upsert top-k includes expected record');
|
|
342
342
|
mdLines.push('');
|
|
343
343
|
|
|
344
|
-
const mdPath = path.join(
|
|
344
|
+
const mdPath = path.join(proofDir, 'lancedb-report.md');
|
|
345
345
|
fs.writeFileSync(mdPath, `${mdLines.join('\n')}\n`);
|
|
346
346
|
|
|
347
347
|
console.log(`Proof written to ${mdPath}`);
|
|
@@ -352,7 +352,7 @@ async function runProof() {
|
|
|
352
352
|
const hasFail = report.summary.failed > 0;
|
|
353
353
|
if (hasFail) {
|
|
354
354
|
process.exitCode = 1;
|
|
355
|
-
console.error(
|
|
355
|
+
console.error(`\nFAIL — one or more requirements did not pass. See ${mdPath} for details.`);
|
|
356
356
|
} else {
|
|
357
357
|
console.log('\nPASS — all requirements satisfied (warns are acceptable).');
|
|
358
358
|
}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const os = require('os');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const { execSync } = require('child_process');
|
|
8
|
+
|
|
9
|
+
const ROOT = path.join(__dirname, '..');
|
|
10
|
+
const DEFAULT_PROOF_DIR = process.env.RLHF_PROOF_DIR || path.join(ROOT, 'proof');
|
|
11
|
+
|
|
12
|
+
function ensureDir(dirPath) {
|
|
13
|
+
if (!fs.existsSync(dirPath)) fs.mkdirSync(dirPath, { recursive: true });
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function runTests() {
|
|
17
|
+
try {
|
|
18
|
+
return execSync(
|
|
19
|
+
'node --test tests/local-model-profile.test.js tests/risk-scorer.test.js tests/vector-store.test.js tests/feedback-sequences.test.js tests/feedback-loop.test.js',
|
|
20
|
+
{ cwd: ROOT, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
21
|
+
);
|
|
22
|
+
} catch (err) {
|
|
23
|
+
return err.stdout || err.stderr || String(err);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function parseTestOutput(output) {
|
|
28
|
+
const passMatch = output.match(/ℹ pass (\d+)/);
|
|
29
|
+
const failMatch = output.match(/ℹ fail (\d+)/);
|
|
30
|
+
return {
|
|
31
|
+
passed: passMatch ? Number(passMatch[1]) : 0,
|
|
32
|
+
failed: failMatch ? Number(failMatch[1]) : 0,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function main() {
|
|
37
|
+
const output = runTests();
|
|
38
|
+
const testResults = parseTestOutput(output);
|
|
39
|
+
const proofDir = DEFAULT_PROOF_DIR;
|
|
40
|
+
ensureDir(proofDir);
|
|
41
|
+
|
|
42
|
+
const tmpFeedbackDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-local-intel-'));
|
|
43
|
+
const report = {
|
|
44
|
+
generatedAt: new Date().toISOString(),
|
|
45
|
+
checks: [],
|
|
46
|
+
summary: { passed: 0, failed: 0 },
|
|
47
|
+
testResults,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
function addResult(id, passed, evidence) {
|
|
51
|
+
report.checks.push({ id, passed, evidence });
|
|
52
|
+
if (passed) report.summary.passed += 1;
|
|
53
|
+
else report.summary.failed += 1;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const { writeModelFitReport } = require('./local-model-profile');
|
|
58
|
+
const { reportPath, report: modelFitReport } = writeModelFitReport(tmpFeedbackDir, {
|
|
59
|
+
resolved: require('./local-model-profile').resolveEmbeddingProfile({
|
|
60
|
+
RLHF_RAM_BYTES_OVERRIDE: String(4 * 1024 ** 3),
|
|
61
|
+
RLHF_CPU_COUNT_OVERRIDE: '4',
|
|
62
|
+
}),
|
|
63
|
+
});
|
|
64
|
+
addResult(
|
|
65
|
+
'FIT-01',
|
|
66
|
+
fs.existsSync(reportPath) && modelFitReport.selectedProfile.id === 'compact',
|
|
67
|
+
`model-fit report written; selected profile=${modelFitReport.selectedProfile.id}; maxChars=${modelFitReport.selectedProfile.maxChars}`
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
process.env.RLHF_FEEDBACK_DIR = tmpFeedbackDir;
|
|
71
|
+
process.env.RLHF_MODEL_FIT_PROFILE = 'quality';
|
|
72
|
+
process.env.RLHF_VECTOR_FORCE_PRIMARY_FAILURE = 'true';
|
|
73
|
+
delete process.env.RLHF_VECTOR_STUB_EMBED;
|
|
74
|
+
delete require.cache[require.resolve('./vector-store')];
|
|
75
|
+
const vectorStore = require('./vector-store');
|
|
76
|
+
vectorStore.setLanceLoaderForTests(async () => {
|
|
77
|
+
const tables = new Map();
|
|
78
|
+
return {
|
|
79
|
+
connect: async () => ({
|
|
80
|
+
tableNames: async () => [...tables.keys()],
|
|
81
|
+
openTable: async (name) => {
|
|
82
|
+
const rows = tables.get(name) || [];
|
|
83
|
+
return {
|
|
84
|
+
add: async (records) => {
|
|
85
|
+
rows.push(...records);
|
|
86
|
+
tables.set(name, rows);
|
|
87
|
+
},
|
|
88
|
+
search: () => ({
|
|
89
|
+
limit: (limit) => ({
|
|
90
|
+
toArray: async () => rows.slice(0, limit),
|
|
91
|
+
}),
|
|
92
|
+
}),
|
|
93
|
+
};
|
|
94
|
+
},
|
|
95
|
+
createTable: async (name, records) => {
|
|
96
|
+
tables.set(name, [...records]);
|
|
97
|
+
return {
|
|
98
|
+
add: async (more) => {
|
|
99
|
+
const rows = tables.get(name) || [];
|
|
100
|
+
rows.push(...more);
|
|
101
|
+
tables.set(name, rows);
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
},
|
|
105
|
+
}),
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
vectorStore.setPipelineLoaderForTests(async (_task, model, opts) => async () => ({
|
|
109
|
+
data: Float32Array.from({ length: 384 }, (_, index) => (index === 0 ? 1 : 0)),
|
|
110
|
+
model,
|
|
111
|
+
opts,
|
|
112
|
+
}));
|
|
113
|
+
await vectorStore.upsertFeedback({
|
|
114
|
+
id: 'proof-local-intel',
|
|
115
|
+
signal: 'positive',
|
|
116
|
+
context: 'vector fallback proof',
|
|
117
|
+
tags: ['proof'],
|
|
118
|
+
timestamp: new Date().toISOString(),
|
|
119
|
+
});
|
|
120
|
+
const fallbackProfile = vectorStore.getLastEmbeddingProfile();
|
|
121
|
+
addResult(
|
|
122
|
+
'FIT-02',
|
|
123
|
+
Boolean(fallbackProfile && fallbackProfile.fallbackUsed),
|
|
124
|
+
`vector-store active profile=${fallbackProfile && fallbackProfile.activeProfile ? fallbackProfile.activeProfile.id : 'none'}; fallbackUsed=${fallbackProfile ? fallbackProfile.fallbackUsed : false}; reason=${fallbackProfile ? fallbackProfile.fallbackReason : 'n/a'}`
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
delete require.cache[require.resolve('./feedback-loop')];
|
|
128
|
+
const { captureFeedback, analyzeFeedback } = require('./feedback-loop');
|
|
129
|
+
captureFeedback({
|
|
130
|
+
signal: 'up',
|
|
131
|
+
context: 'ran tests and included logs',
|
|
132
|
+
whatWorked: 'verification complete',
|
|
133
|
+
tags: ['testing', 'verification'],
|
|
134
|
+
});
|
|
135
|
+
captureFeedback({
|
|
136
|
+
signal: 'down',
|
|
137
|
+
context: 'skipped tests and missing logs caused failure',
|
|
138
|
+
whatWentWrong: 'verification skipped',
|
|
139
|
+
whatToChange: 'always run tests',
|
|
140
|
+
tags: ['debugging', 'verification'],
|
|
141
|
+
});
|
|
142
|
+
captureFeedback({
|
|
143
|
+
signal: 'up',
|
|
144
|
+
context: 'proof attached and verification complete',
|
|
145
|
+
whatWorked: 'full evidence',
|
|
146
|
+
tags: ['testing', 'verification'],
|
|
147
|
+
});
|
|
148
|
+
captureFeedback({
|
|
149
|
+
signal: 'down',
|
|
150
|
+
context: 'unsafe path and security risk caused rejection',
|
|
151
|
+
whatWentWrong: 'unsafe path',
|
|
152
|
+
whatToChange: 'validate paths',
|
|
153
|
+
tags: ['security'],
|
|
154
|
+
});
|
|
155
|
+
const clarification = captureFeedback({
|
|
156
|
+
signal: 'up',
|
|
157
|
+
context: 'thumbs up',
|
|
158
|
+
tags: ['verification'],
|
|
159
|
+
});
|
|
160
|
+
addResult(
|
|
161
|
+
'VETO-01',
|
|
162
|
+
clarification.status === 'clarification_required' && clarification.needsClarification === true,
|
|
163
|
+
`vague feedback status=${clarification.status}; prompt=${clarification.prompt || 'n/a'}`
|
|
164
|
+
);
|
|
165
|
+
captureFeedback({
|
|
166
|
+
signal: 'positive',
|
|
167
|
+
context: 'claimed success without logs',
|
|
168
|
+
whatWorked: 'Reviewer approved despite missing logs',
|
|
169
|
+
tags: ['verification'],
|
|
170
|
+
rubricScores: [
|
|
171
|
+
{ criterion: 'verification_evidence', score: 5, judge: 'judge-a' },
|
|
172
|
+
{ criterion: 'verification_evidence', score: 2, judge: 'judge-b', evidence: 'missing logs' },
|
|
173
|
+
],
|
|
174
|
+
guardrails: {
|
|
175
|
+
testsPassed: false,
|
|
176
|
+
pathSafety: true,
|
|
177
|
+
budgetCompliant: true,
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
captureFeedback({
|
|
181
|
+
signal: 'down',
|
|
182
|
+
context: 'regression due to skipped verification',
|
|
183
|
+
whatWentWrong: 'regression shipped',
|
|
184
|
+
whatToChange: 'add regression tests',
|
|
185
|
+
tags: ['debugging', 'verification'],
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
const riskModelPath = path.join(tmpFeedbackDir, 'risk-model.json');
|
|
189
|
+
const analysis = analyzeFeedback();
|
|
190
|
+
addResult(
|
|
191
|
+
'RISK-01',
|
|
192
|
+
fs.existsSync(riskModelPath),
|
|
193
|
+
'risk-model artifact written'
|
|
194
|
+
);
|
|
195
|
+
addResult(
|
|
196
|
+
'RISK-02',
|
|
197
|
+
Boolean(analysis.boostedRisk && analysis.boostedRisk.exampleCount >= 6),
|
|
198
|
+
`boostedRisk exampleCount=${analysis.boostedRisk ? analysis.boostedRisk.exampleCount : 0}; mode=${analysis.boostedRisk ? analysis.boostedRisk.mode : 'none'}; topDomain=${analysis.boostedRisk && analysis.boostedRisk.highRiskDomains[0] ? analysis.boostedRisk.highRiskDomains[0].key : 'none'}`
|
|
199
|
+
);
|
|
200
|
+
} finally {
|
|
201
|
+
delete process.env.RLHF_FEEDBACK_DIR;
|
|
202
|
+
delete process.env.RLHF_MODEL_FIT_PROFILE;
|
|
203
|
+
delete process.env.RLHF_VECTOR_FORCE_PRIMARY_FAILURE;
|
|
204
|
+
delete process.env.RLHF_VECTOR_STUB_EMBED;
|
|
205
|
+
fs.rmSync(tmpFeedbackDir, { recursive: true, force: true });
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const passed = report.summary.failed === 0 && report.testResults.failed === 0;
|
|
209
|
+
const jsonPath = path.join(proofDir, 'local-intelligence-report.json');
|
|
210
|
+
const mdPath = path.join(proofDir, 'local-intelligence-report.md');
|
|
211
|
+
|
|
212
|
+
fs.writeFileSync(jsonPath, `${JSON.stringify(report, null, 2)}\n`);
|
|
213
|
+
|
|
214
|
+
const lines = [
|
|
215
|
+
'# Local Intelligence Proof Report',
|
|
216
|
+
'',
|
|
217
|
+
`Status: ${passed ? 'PASSED' : 'FAILED'}`,
|
|
218
|
+
`Generated: ${report.generatedAt}`,
|
|
219
|
+
'',
|
|
220
|
+
'## Test Results',
|
|
221
|
+
'',
|
|
222
|
+
`- Passed: ${report.testResults.passed}`,
|
|
223
|
+
`- Failed: ${report.testResults.failed}`,
|
|
224
|
+
'',
|
|
225
|
+
'## Checks',
|
|
226
|
+
'',
|
|
227
|
+
];
|
|
228
|
+
|
|
229
|
+
report.checks.forEach((check) => {
|
|
230
|
+
lines.push(`- ${check.id}: ${check.passed ? 'PASS' : 'FAIL'} — ${check.evidence}`);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
fs.writeFileSync(mdPath, `${lines.join('\n')}\n`);
|
|
234
|
+
|
|
235
|
+
process.stdout.write(`Status: ${passed ? 'PASSED' : 'FAILED'}\n`);
|
|
236
|
+
process.stdout.write(`JSON report: ${jsonPath}\n`);
|
|
237
|
+
process.stdout.write(`Markdown report: ${mdPath}\n`);
|
|
238
|
+
process.exit(passed ? 0 : 1);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
main().catch((err) => {
|
|
242
|
+
console.error(`prove-local-intelligence failed: ${err.message}`);
|
|
243
|
+
process.exit(1);
|
|
244
|
+
});
|
|
@@ -18,13 +18,21 @@ const fs = require('fs');
|
|
|
18
18
|
const os = require('os');
|
|
19
19
|
const path = require('path');
|
|
20
20
|
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
const ROOT = path.join(__dirname, '..');
|
|
22
|
+
|
|
23
|
+
function resolveProofPaths() {
|
|
24
|
+
const proofDir = process.env.RLHF_PROOF_DIR || path.join(ROOT, 'proof');
|
|
25
|
+
return {
|
|
26
|
+
proofDir,
|
|
27
|
+
reportJson: path.join(proofDir, 'loop-closure-report.json'),
|
|
28
|
+
reportMd: path.join(proofDir, 'loop-closure-report.md'),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
24
31
|
|
|
25
32
|
function run() {
|
|
26
33
|
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rlhf-loop-proof-'));
|
|
27
34
|
const results = { passed: 0, failed: 0, requirements: {} };
|
|
35
|
+
const { proofDir, reportJson, reportMd } = resolveProofPaths();
|
|
28
36
|
|
|
29
37
|
const checks = [
|
|
30
38
|
{
|
|
@@ -168,7 +176,7 @@ function run() {
|
|
|
168
176
|
desc: 'test:loop-closure (node --test tests/loop-closure.test.js) passes with 0 failures',
|
|
169
177
|
fn: () => {
|
|
170
178
|
const out = execSync('node --test tests/loop-closure.test.js', {
|
|
171
|
-
cwd:
|
|
179
|
+
cwd: ROOT,
|
|
172
180
|
env: { ...process.env, RLHF_FEEDBACK_DIR: tmpDir },
|
|
173
181
|
encoding: 'utf8',
|
|
174
182
|
stdio: 'pipe',
|
|
@@ -207,7 +215,7 @@ function run() {
|
|
|
207
215
|
} catch {}
|
|
208
216
|
|
|
209
217
|
// Write proof artifacts
|
|
210
|
-
fs.mkdirSync(
|
|
218
|
+
fs.mkdirSync(proofDir, { recursive: true });
|
|
211
219
|
|
|
212
220
|
const report = {
|
|
213
221
|
phase: '08-loop-closure',
|
|
@@ -218,7 +226,7 @@ function run() {
|
|
|
218
226
|
requirements: results.requirements,
|
|
219
227
|
};
|
|
220
228
|
|
|
221
|
-
fs.writeFileSync(
|
|
229
|
+
fs.writeFileSync(reportJson, JSON.stringify(report, null, 2) + '\n');
|
|
222
230
|
|
|
223
231
|
const md = [
|
|
224
232
|
'# Phase 8: Loop Closure — Proof Report',
|
|
@@ -244,10 +252,10 @@ function run() {
|
|
|
244
252
|
'',
|
|
245
253
|
].join('\n');
|
|
246
254
|
|
|
247
|
-
fs.writeFileSync(
|
|
255
|
+
fs.writeFileSync(reportMd, md);
|
|
248
256
|
|
|
249
257
|
console.log(`\nPhase 8 proof: ${results.passed} passed, ${results.failed} failed`);
|
|
250
|
-
console.log(`Report: ${
|
|
258
|
+
console.log(`Report: ${reportJson}`);
|
|
251
259
|
|
|
252
260
|
if (results.failed > 0) process.exit(1);
|
|
253
261
|
}
|
|
@@ -15,7 +15,9 @@ const path = require('path');
|
|
|
15
15
|
const { execSync } = require('child_process');
|
|
16
16
|
|
|
17
17
|
const ROOT = path.join(__dirname, '..');
|
|
18
|
-
|
|
18
|
+
function getProofDir() {
|
|
19
|
+
return process.env.RLHF_PROOF_DIR || path.join(ROOT, 'proof');
|
|
20
|
+
}
|
|
19
21
|
|
|
20
22
|
function ensureDir(d) {
|
|
21
23
|
if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
|
|
@@ -251,9 +253,10 @@ async function main() {
|
|
|
251
253
|
overallPassed: allPassed,
|
|
252
254
|
};
|
|
253
255
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const
|
|
256
|
+
const proofDir = getProofDir();
|
|
257
|
+
ensureDir(proofDir);
|
|
258
|
+
const jsonPath = path.join(proofDir, 'training-export-report.json');
|
|
259
|
+
const mdPath = path.join(proofDir, 'training-export-report.md');
|
|
257
260
|
|
|
258
261
|
fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
|
259
262
|
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const fs = require('node:fs');
|
|
3
|
+
const path = require('node:path');
|
|
4
|
+
const {
|
|
5
|
+
runWorkflowContractValidation,
|
|
6
|
+
} = require('./validate-workflow-contract');
|
|
7
|
+
|
|
8
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
9
|
+
const DEFAULT_PROOF_DIR = path.join(PROJECT_ROOT, 'proof', 'workflow-contract');
|
|
10
|
+
|
|
11
|
+
function ensureDir(dirPath) {
|
|
12
|
+
if (!fs.existsSync(dirPath)) {
|
|
13
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function toMarkdown(report) {
|
|
18
|
+
const lines = [
|
|
19
|
+
'# Workflow Contract Proof Report',
|
|
20
|
+
'',
|
|
21
|
+
`Generated: ${report.generatedAt}`,
|
|
22
|
+
'',
|
|
23
|
+
`Summary: ${report.summary.passed} passed, ${report.summary.failed} failed`,
|
|
24
|
+
'',
|
|
25
|
+
'## Validated Files',
|
|
26
|
+
'',
|
|
27
|
+
...Object.values(report.files).map((filePath) => `- \`${filePath}\``),
|
|
28
|
+
'',
|
|
29
|
+
'## Checks',
|
|
30
|
+
'',
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
report.checks.forEach((check) => {
|
|
34
|
+
lines.push(`- ${check.name}: ${check.passed ? 'PASS' : 'FAIL'}`);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
if (report.issues.length > 0) {
|
|
38
|
+
lines.push('');
|
|
39
|
+
lines.push('## Issues');
|
|
40
|
+
lines.push('');
|
|
41
|
+
report.issues.forEach((issue) => {
|
|
42
|
+
lines.push(`- ${issue}`);
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return `${lines.join('\n')}\n`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function runWorkflowContractProof(options = {}) {
|
|
50
|
+
const proofDir = options.proofDir || process.env.RLHF_WORKFLOW_CONTRACT_PROOF_DIR || DEFAULT_PROOF_DIR;
|
|
51
|
+
const writeArtifacts = options.writeArtifacts !== false;
|
|
52
|
+
const validation = runWorkflowContractValidation({ projectRoot: options.projectRoot || PROJECT_ROOT });
|
|
53
|
+
|
|
54
|
+
const report = {
|
|
55
|
+
generatedAt: validation.generatedAt,
|
|
56
|
+
files: validation.files,
|
|
57
|
+
checks: [
|
|
58
|
+
{
|
|
59
|
+
name: 'workflow.contract.complete',
|
|
60
|
+
passed: validation.ok,
|
|
61
|
+
details: {
|
|
62
|
+
headingsFound: validation.details.workflow ? validation.details.workflow.headingsFound : [],
|
|
63
|
+
proofCommandsFound: validation.details.workflow ? validation.details.workflow.proofCommandsFound : [],
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'issue.template.complete',
|
|
68
|
+
passed: validation.ok,
|
|
69
|
+
details: {
|
|
70
|
+
fieldIdsFound: validation.details.issueTemplate ? validation.details.issueTemplate.fieldIdsFound : [],
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
name: 'pull_request.template.complete',
|
|
75
|
+
passed: validation.ok,
|
|
76
|
+
details: {
|
|
77
|
+
sectionsFound: validation.details.pullRequestTemplate ? validation.details.pullRequestTemplate.sectionsFound : [],
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: 'readme.links.contracts',
|
|
82
|
+
passed: validation.ok,
|
|
83
|
+
details: validation.details.readme || {},
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
issues: validation.issues.slice(),
|
|
87
|
+
summary: {
|
|
88
|
+
passed: validation.ok ? 4 : 0,
|
|
89
|
+
failed: validation.ok ? 0 : 4,
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
if (writeArtifacts) {
|
|
94
|
+
ensureDir(proofDir);
|
|
95
|
+
fs.writeFileSync(path.join(proofDir, 'report.json'), JSON.stringify(report, null, 2));
|
|
96
|
+
fs.writeFileSync(path.join(proofDir, 'report.md'), toMarkdown(report));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return report;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (require.main === module) {
|
|
103
|
+
const report = runWorkflowContractProof();
|
|
104
|
+
if (report.summary.failed > 0) {
|
|
105
|
+
console.error(toMarkdown(report));
|
|
106
|
+
process.exit(1);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
console.log(toMarkdown(report));
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
module.exports = {
|
|
113
|
+
DEFAULT_PROOF_DIR,
|
|
114
|
+
runWorkflowContractProof,
|
|
115
|
+
toMarkdown,
|
|
116
|
+
};
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
7
|
+
const DEFAULT_STATE_PATH = path.join(PROJECT_ROOT, '.rlhf', 'reminder-state.json');
|
|
8
|
+
|
|
9
|
+
const REMINDER_TEMPLATES = {
|
|
10
|
+
guardrail_spike: 'Safety guardrails triggered {{count}} times. Re-apply rule: {{rule}}',
|
|
11
|
+
iteration_limit: 'Approaching max iterations ({{count}}/{{limit}}). Prioritize essential actions only.',
|
|
12
|
+
tool_misuse: 'Tool misuse detected {{count}} times for: {{tools}}. Verify tool schemas before calling.',
|
|
13
|
+
error_cascade: 'Repeated errors ({{count}}). Switch strategy: {{suggestion}}',
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const DEFAULT_THRESHOLDS = {
|
|
17
|
+
guardrail_spike: 3,
|
|
18
|
+
iteration_limit: 1,
|
|
19
|
+
tool_misuse: 2,
|
|
20
|
+
error_cascade: 3,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function getStatePath(stateFile) {
|
|
24
|
+
return stateFile || DEFAULT_STATE_PATH;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function loadState(stateFile) {
|
|
28
|
+
const p = getStatePath(stateFile);
|
|
29
|
+
try {
|
|
30
|
+
if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, 'utf-8'));
|
|
31
|
+
} catch {
|
|
32
|
+
// corrupted — start fresh
|
|
33
|
+
}
|
|
34
|
+
return { counts: {} };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function saveState(state, stateFile) {
|
|
38
|
+
const p = getStatePath(stateFile);
|
|
39
|
+
fs.mkdirSync(path.dirname(p), { recursive: true });
|
|
40
|
+
fs.writeFileSync(p, JSON.stringify(state, null, 2));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Increment the event counter for a given event type.
|
|
45
|
+
* @param {string} eventType - One of the keys in REMINDER_TEMPLATES
|
|
46
|
+
* @param {string} [stateFile] - Path to state JSON (default: .rlhf/reminder-state.json)
|
|
47
|
+
* @returns {number} New count after incrementing
|
|
48
|
+
*/
|
|
49
|
+
function trackEvent(eventType, stateFile) {
|
|
50
|
+
const state = loadState(stateFile);
|
|
51
|
+
state.counts[eventType] = (state.counts[eventType] || 0) + 1;
|
|
52
|
+
saveState(state, stateFile);
|
|
53
|
+
return state.counts[eventType];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Get the current event count without modifying state.
|
|
58
|
+
* @param {string} eventType
|
|
59
|
+
* @param {string} [stateFile]
|
|
60
|
+
* @returns {number}
|
|
61
|
+
*/
|
|
62
|
+
function getEventCount(eventType, stateFile) {
|
|
63
|
+
return loadState(stateFile).counts[eventType] || 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Return true if the event count meets or exceeds its threshold.
|
|
68
|
+
* @param {string} eventType
|
|
69
|
+
* @param {number} [threshold] - Defaults to DEFAULT_THRESHOLDS[eventType] or 3
|
|
70
|
+
* @param {string} [stateFile]
|
|
71
|
+
* @returns {boolean}
|
|
72
|
+
*/
|
|
73
|
+
function shouldInjectReminder(eventType, threshold, stateFile) {
|
|
74
|
+
const t = typeof threshold === 'number' ? threshold : (DEFAULT_THRESHOLDS[eventType] || 3);
|
|
75
|
+
return getEventCount(eventType, stateFile) >= t;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Render a reminder template with context variable substitution.
|
|
80
|
+
* @param {string} eventType
|
|
81
|
+
* @param {object} ctx - Variables to substitute into {{var}} placeholders
|
|
82
|
+
* @returns {string}
|
|
83
|
+
*/
|
|
84
|
+
function renderTemplate(eventType, ctx) {
|
|
85
|
+
const template = REMINDER_TEMPLATES[eventType];
|
|
86
|
+
if (!template) return `[Reminder] Event: ${eventType}`;
|
|
87
|
+
return template.replace(/\{\{(\w+)\}\}/g, (_, key) => (ctx && ctx[key] !== undefined ? ctx[key] : `{${key}}`));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Append a system reminder to a turns array without modifying state.
|
|
92
|
+
* Callers are responsible for calling trackEvent before/after as needed.
|
|
93
|
+
* @param {object[]} turns - Existing turns array
|
|
94
|
+
* @param {string} eventType
|
|
95
|
+
* @param {object} ctx - Template variables (count will be added automatically)
|
|
96
|
+
* @param {string} [stateFile]
|
|
97
|
+
* @returns {object[]} New turns array with reminder appended
|
|
98
|
+
*/
|
|
99
|
+
function injectReminder(turns, eventType, ctx, stateFile) {
|
|
100
|
+
const count = getEventCount(eventType, stateFile);
|
|
101
|
+
const message = renderTemplate(eventType, { ...ctx, count });
|
|
102
|
+
const reminder = {
|
|
103
|
+
role: 'user',
|
|
104
|
+
content: `[System Reminder] ${message}`,
|
|
105
|
+
injectedAt: new Date().toISOString(),
|
|
106
|
+
eventType,
|
|
107
|
+
};
|
|
108
|
+
return [...turns, reminder];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Reset the event counter for a given event type (e.g., after a reminder is acted on).
|
|
113
|
+
* @param {string} eventType
|
|
114
|
+
* @param {string} [stateFile]
|
|
115
|
+
*/
|
|
116
|
+
function resetEvent(eventType, stateFile) {
|
|
117
|
+
const state = loadState(stateFile);
|
|
118
|
+
state.counts[eventType] = 0;
|
|
119
|
+
saveState(state, stateFile);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
module.exports = {
|
|
123
|
+
REMINDER_TEMPLATES,
|
|
124
|
+
DEFAULT_THRESHOLDS,
|
|
125
|
+
DEFAULT_STATE_PATH,
|
|
126
|
+
trackEvent,
|
|
127
|
+
getEventCount,
|
|
128
|
+
shouldInjectReminder,
|
|
129
|
+
renderTemplate,
|
|
130
|
+
injectReminder,
|
|
131
|
+
resetEvent,
|
|
132
|
+
};
|