rlhf-feedback-loop 0.6.11 → 0.6.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +116 -74
- package/adapters/README.md +3 -3
- package/adapters/amp/skills/rlhf-feedback/SKILL.md +2 -0
- package/adapters/chatgpt/INSTALL.md +6 -3
- package/adapters/chatgpt/openapi.yaml +5 -2
- package/adapters/claude/.mcp.json +3 -3
- package/adapters/codex/config.toml +3 -3
- package/adapters/gemini/function-declarations.json +2 -2
- package/adapters/mcp/server-stdio.js +19 -5
- package/bin/cli.js +295 -25
- package/openapi/openapi.yaml +5 -2
- package/package.json +23 -10
- package/scripts/a2ui-engine.js +73 -0
- package/scripts/adk-consolidator.js +126 -32
- package/scripts/billing.js +192 -685
- package/scripts/context-engine.js +81 -0
- package/scripts/export-kto-pairs.js +310 -0
- package/scripts/feedback-ingest-watcher.js +290 -0
- package/scripts/feedback-loop.js +153 -8
- package/scripts/feedback-quality.js +139 -0
- package/scripts/feedback-schema.js +31 -5
- package/scripts/feedback-to-memory.js +13 -1
- package/scripts/hook-auto-capture.sh +6 -0
- package/scripts/hook-stop-self-score.sh +51 -0
- package/scripts/install-mcp.js +168 -0
- package/scripts/jsonl-watcher.js +151 -0
- package/scripts/local-model-profile.js +207 -0
- package/scripts/pr-manager.js +112 -0
- package/scripts/prove-adapters.js +137 -15
- package/scripts/prove-automation.js +41 -8
- package/scripts/prove-lancedb.js +1 -1
- package/scripts/prove-local-intelligence.js +244 -0
- package/scripts/prove-workflow-contract.js +116 -0
- package/scripts/reminder-engine.js +132 -0
- package/scripts/risk-scorer.js +458 -0
- package/scripts/rlaif-self-audit.js +7 -1
- package/scripts/status-dashboard.js +155 -0
- package/scripts/test-coverage.js +1 -1
- package/scripts/validate-workflow-contract.js +287 -0
- package/scripts/vector-store.js +115 -17
- package/src/api/server.js +372 -25
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
7
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
8
|
+
const DEFAULT_FEEDBACK_DIR = path.join(PROJECT_ROOT, '.claude', 'memory', 'feedback');
|
|
9
|
+
const DEFAULT_MODEL_PATH = path.join(DEFAULT_FEEDBACK_DIR, 'risk-model.json');
|
|
10
|
+
const DEFAULT_SEQUENCE_PATH = path.join(DEFAULT_FEEDBACK_DIR, 'feedback-sequences.jsonl');
|
|
11
|
+
|
|
12
|
+
const DOMAIN_FEATURES = [
|
|
13
|
+
'general',
|
|
14
|
+
'testing',
|
|
15
|
+
'security',
|
|
16
|
+
'performance',
|
|
17
|
+
'ui-components',
|
|
18
|
+
'api-integration',
|
|
19
|
+
'git-workflow',
|
|
20
|
+
'documentation',
|
|
21
|
+
'debugging',
|
|
22
|
+
'architecture',
|
|
23
|
+
'data-modeling',
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
const RISK_WORD_RE = /\b(fail|error|wrong|missing|skip|regress|unsafe|blocked|rejected)\b/i;
|
|
27
|
+
const VERIFY_WORD_RE = /\b(test|verify|coverage|evidence|log|proof)\b/i;
|
|
28
|
+
const SAFETY_WORD_RE = /\b(budget|path|guardrail|safe|security|risk)\b/i;
|
|
29
|
+
const SUCCESS_WORD_RE = /\b(pass|worked|fixed|success|verified)\b/i;
|
|
30
|
+
|
|
31
|
+
function resolveFeedbackDir(feedbackDir) {
|
|
32
|
+
return feedbackDir || process.env.RLHF_FEEDBACK_DIR || DEFAULT_FEEDBACK_DIR;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function readJSONL(filePath) {
|
|
36
|
+
if (!fs.existsSync(filePath)) return [];
|
|
37
|
+
const raw = fs.readFileSync(filePath, 'utf8').trim();
|
|
38
|
+
if (!raw) return [];
|
|
39
|
+
return raw
|
|
40
|
+
.split('\n')
|
|
41
|
+
.map((line) => {
|
|
42
|
+
try {
|
|
43
|
+
return JSON.parse(line);
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
})
|
|
48
|
+
.filter(Boolean);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function average(values) {
|
|
52
|
+
if (!Array.isArray(values) || values.length === 0) return 0;
|
|
53
|
+
return values.reduce((sum, value) => sum + Number(value || 0), 0) / values.length;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function max(values) {
|
|
57
|
+
if (!Array.isArray(values) || values.length === 0) return 0;
|
|
58
|
+
return values.reduce((best, value) => Math.max(best, Number(value || 0)), 0);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function countNegatives(values) {
|
|
62
|
+
return (values || []).filter((value) => Number(value) < 0).length;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function countPositives(values) {
|
|
66
|
+
return (values || []).filter((value) => Number(value) > 0).length;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function toArray(value) {
|
|
70
|
+
return Array.isArray(value) ? value : [];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function buildVocabulary(rows, key, limit) {
|
|
74
|
+
const counts = new Map();
|
|
75
|
+
rows.forEach((row) => {
|
|
76
|
+
const values = key === 'targetTags'
|
|
77
|
+
? toArray(row.targetTags || row.tags)
|
|
78
|
+
: row[key] ? [row[key]] : [];
|
|
79
|
+
values.forEach((value) => {
|
|
80
|
+
const normalized = String(value || '').trim().toLowerCase();
|
|
81
|
+
if (!normalized) return;
|
|
82
|
+
counts.set(normalized, (counts.get(normalized) || 0) + 1);
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
return [...counts.entries()]
|
|
87
|
+
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
|
88
|
+
.slice(0, limit)
|
|
89
|
+
.map(([value]) => value);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function buildFeatureRegistry(rows, options = {}) {
|
|
93
|
+
return {
|
|
94
|
+
topTags: buildVocabulary(rows, 'targetTags', options.maxTags || 8),
|
|
95
|
+
topSkills: buildVocabulary(rows, 'skill', options.maxSkills || 4),
|
|
96
|
+
domains: DOMAIN_FEATURES,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function deriveTargetRisk(row) {
|
|
101
|
+
if (typeof row.targetRisk === 'number') return row.targetRisk > 0 ? 1 : 0;
|
|
102
|
+
if (typeof row.accepted === 'boolean' && row.accepted === false) return 1;
|
|
103
|
+
const label = String(row.label || row.signal || '').toLowerCase();
|
|
104
|
+
return label === 'negative' ? 1 : 0;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function deriveActionNegativeRate(actionPatterns) {
|
|
108
|
+
const entries = Object.values(actionPatterns || {});
|
|
109
|
+
if (entries.length === 0) return 0;
|
|
110
|
+
const rates = entries.map((entry) => {
|
|
111
|
+
const positive = Number(entry.positive || 0);
|
|
112
|
+
const negative = Number(entry.negative || 0);
|
|
113
|
+
const total = positive + negative;
|
|
114
|
+
return total > 0 ? negative / total : 0;
|
|
115
|
+
});
|
|
116
|
+
return average(rates);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function extractFeatureMap(row, registry) {
|
|
120
|
+
const rewardSequence = toArray(row.features && row.features.rewardSequence);
|
|
121
|
+
const timeGaps = toArray(row.features && row.features.timeGaps);
|
|
122
|
+
const currentTags = toArray(row.targetTags || row.tags).map((tag) => String(tag).trim().toLowerCase());
|
|
123
|
+
const currentSkill = String(row.skill || '').trim().toLowerCase();
|
|
124
|
+
const domain = String(row.domain || row.richContext && row.richContext.domain || 'general').trim().toLowerCase();
|
|
125
|
+
const context = String(row.context || '').toLowerCase();
|
|
126
|
+
const rubric = row.rubric || {};
|
|
127
|
+
const filePathCount = Number(row.filePathCount || 0);
|
|
128
|
+
const hasErrorType = row.errorType ? 1 : 0;
|
|
129
|
+
const failingCriteria = toArray(rubric.failingCriteria);
|
|
130
|
+
const failingGuardrails = toArray(rubric.failingGuardrails);
|
|
131
|
+
const judgeDisagreements = toArray(rubric.judgeDisagreements);
|
|
132
|
+
const weightedScore = Number(rubric.weightedScore);
|
|
133
|
+
|
|
134
|
+
const features = {
|
|
135
|
+
recentTrend: Number(row.features && row.features.recentTrend || 0),
|
|
136
|
+
sequenceLength: rewardSequence.length,
|
|
137
|
+
recentNegativeCount: countNegatives(rewardSequence),
|
|
138
|
+
recentPositiveCount: countPositives(rewardSequence),
|
|
139
|
+
avgTimeGap: average(timeGaps),
|
|
140
|
+
maxTimeGap: max(timeGaps),
|
|
141
|
+
tagCount: currentTags.length,
|
|
142
|
+
filePathCount,
|
|
143
|
+
hasErrorType,
|
|
144
|
+
hasRubric: rubric.weightedScore != null ? 1 : 0,
|
|
145
|
+
rubricWeightedScore: Number.isFinite(weightedScore) ? weightedScore : 0.5,
|
|
146
|
+
failingCriteriaCount: failingCriteria.length,
|
|
147
|
+
failingGuardrailsCount: failingGuardrails.length,
|
|
148
|
+
judgeDisagreementCount: judgeDisagreements.length,
|
|
149
|
+
actionNegativeRate: deriveActionNegativeRate(row.features && row.features.actionPatterns),
|
|
150
|
+
containsRiskWord: RISK_WORD_RE.test(context) ? 1 : 0,
|
|
151
|
+
containsVerificationWord: VERIFY_WORD_RE.test(context) ? 1 : 0,
|
|
152
|
+
containsSafetyWord: SAFETY_WORD_RE.test(context) ? 1 : 0,
|
|
153
|
+
containsSuccessWord: SUCCESS_WORD_RE.test(context) ? 1 : 0,
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
registry.domains.forEach((knownDomain) => {
|
|
157
|
+
features[`domain:${knownDomain}`] = domain === knownDomain ? 1 : 0;
|
|
158
|
+
});
|
|
159
|
+
registry.topTags.forEach((tag) => {
|
|
160
|
+
features[`tag:${tag}`] = currentTags.includes(tag) ? 1 : 0;
|
|
161
|
+
});
|
|
162
|
+
registry.topSkills.forEach((skill) => {
|
|
163
|
+
features[`skill:${skill}`] = currentSkill === skill ? 1 : 0;
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
return features;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function candidateThresholds(values) {
|
|
170
|
+
const uniques = [...new Set(values.map((value) => Number(value || 0)))].sort((left, right) => left - right);
|
|
171
|
+
if (uniques.length <= 1) return uniques.length === 1 ? [uniques[0]] : [0];
|
|
172
|
+
if (uniques.length <= 6) {
|
|
173
|
+
return uniques.slice(0, -1).map((value, index) => (value + uniques[index + 1]) / 2);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const percentiles = [0.2, 0.4, 0.6, 0.8].map((pct) => {
|
|
177
|
+
const position = Math.min(uniques.length - 2, Math.max(0, Math.floor((uniques.length - 1) * pct)));
|
|
178
|
+
return (uniques[position] + uniques[position + 1]) / 2;
|
|
179
|
+
});
|
|
180
|
+
return [...new Set(percentiles)];
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function stumpPredict(value, threshold, polarity) {
|
|
184
|
+
const decision = Number(value || 0) > threshold ? 1 : -1;
|
|
185
|
+
return decision * polarity;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function findBestWeakLearner(examples, weights, featureNames) {
|
|
189
|
+
let best = null;
|
|
190
|
+
|
|
191
|
+
featureNames.forEach((feature) => {
|
|
192
|
+
const values = examples.map((example) => example.features[feature]);
|
|
193
|
+
const thresholds = candidateThresholds(values);
|
|
194
|
+
thresholds.forEach((threshold) => {
|
|
195
|
+
[-1, 1].forEach((polarity) => {
|
|
196
|
+
let error = 0;
|
|
197
|
+
const predictions = [];
|
|
198
|
+
examples.forEach((example, index) => {
|
|
199
|
+
const prediction = stumpPredict(example.features[feature], threshold, polarity);
|
|
200
|
+
predictions.push(prediction);
|
|
201
|
+
if (prediction !== example.label) {
|
|
202
|
+
error += weights[index];
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
if (!best || error < best.error) {
|
|
207
|
+
best = {
|
|
208
|
+
feature,
|
|
209
|
+
threshold,
|
|
210
|
+
polarity,
|
|
211
|
+
error,
|
|
212
|
+
predictions,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
return best;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function normalizeWeights(weights) {
|
|
223
|
+
const total = weights.reduce((sum, value) => sum + value, 0) || 1;
|
|
224
|
+
return weights.map((value) => value / total);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function trainingAccuracy(model, examples) {
|
|
228
|
+
if (examples.length === 0) return 0;
|
|
229
|
+
const correct = examples.filter((example) => {
|
|
230
|
+
const prediction = predictRisk(model, example.row);
|
|
231
|
+
return prediction.label === (example.label === 1 ? 'high-risk' : 'low-risk');
|
|
232
|
+
}).length;
|
|
233
|
+
return correct / examples.length;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function buildPatternSummary(rows) {
|
|
237
|
+
function summarize(valuesFn) {
|
|
238
|
+
const buckets = new Map();
|
|
239
|
+
rows.forEach((row) => {
|
|
240
|
+
valuesFn(row).forEach((value) => {
|
|
241
|
+
const key = String(value || '').trim().toLowerCase();
|
|
242
|
+
if (!key) return;
|
|
243
|
+
if (!buckets.has(key)) {
|
|
244
|
+
buckets.set(key, { key, total: 0, highRisk: 0 });
|
|
245
|
+
}
|
|
246
|
+
const bucket = buckets.get(key);
|
|
247
|
+
bucket.total += 1;
|
|
248
|
+
bucket.highRisk += deriveTargetRisk(row);
|
|
249
|
+
});
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
return [...buckets.values()]
|
|
253
|
+
.filter((bucket) => bucket.total >= 2)
|
|
254
|
+
.map((bucket) => ({
|
|
255
|
+
...bucket,
|
|
256
|
+
riskRate: Math.round((bucket.highRisk / bucket.total) * 1000) / 1000,
|
|
257
|
+
}))
|
|
258
|
+
.sort((left, right) => right.riskRate - left.riskRate || right.total - left.total || left.key.localeCompare(right.key))
|
|
259
|
+
.slice(0, 5);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
tags: summarize((row) => toArray(row.targetTags || row.tags)),
|
|
264
|
+
domains: summarize((row) => [row.domain || row.richContext && row.richContext.domain || 'general']),
|
|
265
|
+
skills: summarize((row) => row.skill ? [row.skill] : []),
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function trainRiskModel(rows, options = {}) {
|
|
270
|
+
const registry = buildFeatureRegistry(rows, options);
|
|
271
|
+
const examples = rows.map((row) => ({
|
|
272
|
+
row,
|
|
273
|
+
label: deriveTargetRisk(row) === 1 ? 1 : -1,
|
|
274
|
+
features: extractFeatureMap(row, registry),
|
|
275
|
+
}));
|
|
276
|
+
|
|
277
|
+
const model = {
|
|
278
|
+
version: 1,
|
|
279
|
+
algorithm: 'adaboost-stumps',
|
|
280
|
+
trainedAt: new Date().toISOString(),
|
|
281
|
+
exampleCount: examples.length,
|
|
282
|
+
highRiskExamples: examples.filter((example) => example.label === 1).length,
|
|
283
|
+
baseRate: examples.length > 0
|
|
284
|
+
? examples.filter((example) => example.label === 1).length / examples.length
|
|
285
|
+
: 0,
|
|
286
|
+
featureRegistry: registry,
|
|
287
|
+
featureNames: examples[0] ? Object.keys(examples[0].features) : [],
|
|
288
|
+
learners: [],
|
|
289
|
+
patterns: buildPatternSummary(rows),
|
|
290
|
+
metrics: {
|
|
291
|
+
trainingAccuracy: 0,
|
|
292
|
+
rounds: 0,
|
|
293
|
+
mode: 'baseline',
|
|
294
|
+
},
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
if (examples.length < 6 || model.highRiskExamples === 0 || model.highRiskExamples === examples.length) {
|
|
298
|
+
model.metrics.trainingAccuracy = trainingAccuracy(model, examples);
|
|
299
|
+
return model;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
let weights = normalizeWeights(Array(examples.length).fill(1));
|
|
303
|
+
const rounds = Math.max(1, Math.min(12, Number(options.rounds || 8)));
|
|
304
|
+
|
|
305
|
+
for (let round = 0; round < rounds; round += 1) {
|
|
306
|
+
const learner = findBestWeakLearner(examples, weights, model.featureNames);
|
|
307
|
+
if (!learner) break;
|
|
308
|
+
|
|
309
|
+
const clippedError = Math.min(Math.max(learner.error, 1e-6), 1 - 1e-6);
|
|
310
|
+
if (clippedError >= 0.5) break;
|
|
311
|
+
|
|
312
|
+
const alpha = 0.5 * Math.log((1 - clippedError) / clippedError);
|
|
313
|
+
model.learners.push({
|
|
314
|
+
feature: learner.feature,
|
|
315
|
+
threshold: learner.threshold,
|
|
316
|
+
polarity: learner.polarity,
|
|
317
|
+
alpha: Math.round(alpha * 1000) / 1000,
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
weights = normalizeWeights(weights.map((weight, index) => (
|
|
321
|
+
weight * Math.exp(-alpha * examples[index].label * learner.predictions[index])
|
|
322
|
+
)));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
model.metrics.rounds = model.learners.length;
|
|
326
|
+
model.metrics.mode = model.learners.length > 0 ? 'boosted' : 'baseline';
|
|
327
|
+
model.metrics.trainingAccuracy = trainingAccuracy(model, examples);
|
|
328
|
+
return model;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function rawScore(model, row) {
|
|
332
|
+
if (!model || !model.featureRegistry) {
|
|
333
|
+
return 0;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if (!model.learners || model.learners.length === 0) {
|
|
337
|
+
const centeredBase = Number(model.baseRate || 0.5) - 0.5;
|
|
338
|
+
return centeredBase * 2;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const features = extractFeatureMap(row, model.featureRegistry);
|
|
342
|
+
return model.learners.reduce((sum, learner) => (
|
|
343
|
+
sum + learner.alpha * stumpPredict(features[learner.feature], learner.threshold, learner.polarity)
|
|
344
|
+
), 0);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function predictRisk(model, row) {
|
|
348
|
+
const score = rawScore(model, row);
|
|
349
|
+
const probability = model.learners && model.learners.length > 0
|
|
350
|
+
? 1 / (1 + Math.exp(-2 * score))
|
|
351
|
+
: Number(model.baseRate || 0);
|
|
352
|
+
return {
|
|
353
|
+
score: Math.round(score * 1000) / 1000,
|
|
354
|
+
probability: Math.round(probability * 1000) / 1000,
|
|
355
|
+
label: probability >= 0.5 ? 'high-risk' : 'low-risk',
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function buildRiskCandidate(params = {}, historyRows = []) {
|
|
360
|
+
const currentTags = toArray(params.tags)
|
|
361
|
+
.map((tag) => String(tag).trim())
|
|
362
|
+
.filter(Boolean);
|
|
363
|
+
const rewardSequence = historyRows.slice(-10).map((row) => Number(row.targetReward || 0)).filter((value) => Number.isFinite(value));
|
|
364
|
+
const timeGaps = [];
|
|
365
|
+
for (let index = Math.max(0, historyRows.length - 10); index < historyRows.length; index += 1) {
|
|
366
|
+
if (index === 0) continue;
|
|
367
|
+
const previous = Date.parse(historyRows[index - 1].timestamp || '');
|
|
368
|
+
const current = Date.parse(historyRows[index].timestamp || '');
|
|
369
|
+
if (Number.isFinite(previous) && Number.isFinite(current)) {
|
|
370
|
+
timeGaps.push((current - previous) / 1000 / 60);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return {
|
|
375
|
+
context: params.context || '',
|
|
376
|
+
targetTags: currentTags,
|
|
377
|
+
skill: params.skill || null,
|
|
378
|
+
domain: params.domain || 'general',
|
|
379
|
+
filePathCount: Number(params.filePathCount || 0),
|
|
380
|
+
errorType: params.errorType || null,
|
|
381
|
+
rubric: params.rubric || null,
|
|
382
|
+
features: {
|
|
383
|
+
rewardSequence,
|
|
384
|
+
recentTrend: rewardSequence.length > 0 ? average(rewardSequence.slice(-5)) : 0,
|
|
385
|
+
timeGaps,
|
|
386
|
+
actionPatterns: {},
|
|
387
|
+
},
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function modelPathFor(feedbackDir) {
|
|
392
|
+
return path.join(resolveFeedbackDir(feedbackDir), 'risk-model.json');
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function sequencePathFor(feedbackDir) {
|
|
396
|
+
return path.join(resolveFeedbackDir(feedbackDir), 'feedback-sequences.jsonl');
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function saveRiskModel(model, feedbackDir) {
|
|
400
|
+
const targetPath = modelPathFor(feedbackDir);
|
|
401
|
+
fs.mkdirSync(path.dirname(targetPath), { recursive: true });
|
|
402
|
+
fs.writeFileSync(targetPath, `${JSON.stringify(model, null, 2)}\n`);
|
|
403
|
+
return targetPath;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
function loadRiskModel(feedbackDir) {
|
|
407
|
+
const targetPath = modelPathFor(feedbackDir);
|
|
408
|
+
if (!fs.existsSync(targetPath)) return null;
|
|
409
|
+
return JSON.parse(fs.readFileSync(targetPath, 'utf8'));
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
function trainAndPersistRiskModel(feedbackDir, options = {}) {
|
|
413
|
+
const resolvedDir = resolveFeedbackDir(feedbackDir);
|
|
414
|
+
const rows = readJSONL(sequencePathFor(resolvedDir));
|
|
415
|
+
const model = trainRiskModel(rows, options);
|
|
416
|
+
const modelPath = saveRiskModel(model, resolvedDir);
|
|
417
|
+
return { model, modelPath, rows };
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function getRiskSummary(feedbackDir) {
|
|
421
|
+
const resolvedDir = resolveFeedbackDir(feedbackDir);
|
|
422
|
+
const rows = readJSONL(sequencePathFor(resolvedDir));
|
|
423
|
+
if (rows.length === 0) return null;
|
|
424
|
+
|
|
425
|
+
const model = loadRiskModel(resolvedDir) || trainRiskModel(rows);
|
|
426
|
+
return {
|
|
427
|
+
exampleCount: model.exampleCount,
|
|
428
|
+
baseRate: Math.round((model.baseRate || 0) * 1000) / 1000,
|
|
429
|
+
mode: model.metrics.mode,
|
|
430
|
+
trainingAccuracy: Math.round((model.metrics.trainingAccuracy || 0) * 1000) / 1000,
|
|
431
|
+
highRiskTags: model.patterns.tags,
|
|
432
|
+
highRiskDomains: model.patterns.domains,
|
|
433
|
+
highRiskSkills: model.patterns.skills,
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
module.exports = {
|
|
438
|
+
DEFAULT_MODEL_PATH,
|
|
439
|
+
DEFAULT_SEQUENCE_PATH,
|
|
440
|
+
buildFeatureRegistry,
|
|
441
|
+
buildRiskCandidate,
|
|
442
|
+
deriveTargetRisk,
|
|
443
|
+
extractFeatureMap,
|
|
444
|
+
loadRiskModel,
|
|
445
|
+
modelPathFor,
|
|
446
|
+
predictRisk,
|
|
447
|
+
readJSONL,
|
|
448
|
+
saveRiskModel,
|
|
449
|
+
sequencePathFor,
|
|
450
|
+
trainAndPersistRiskModel,
|
|
451
|
+
trainRiskModel,
|
|
452
|
+
getRiskSummary,
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
if (require.main === module) {
|
|
456
|
+
const { model, modelPath } = trainAndPersistRiskModel();
|
|
457
|
+
process.stdout.write(`${JSON.stringify({ modelPath, model }, null, 2)}\n`);
|
|
458
|
+
}
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
const fs = require('fs');
|
|
12
12
|
const path = require('path');
|
|
13
|
+
const { assessFeedbackActionability } = require('./feedback-quality');
|
|
13
14
|
|
|
14
15
|
// ---------------------------------------------------------------------------
|
|
15
16
|
// CLAUDE.md Constraint Definitions (weight sum = 1.0)
|
|
@@ -53,7 +54,12 @@ const CONSTRAINTS = [
|
|
|
53
54
|
{
|
|
54
55
|
id: 'no_vague_signal',
|
|
55
56
|
weight: 0.10,
|
|
56
|
-
check: (e) =>
|
|
57
|
+
check: (e) => assessFeedbackActionability({
|
|
58
|
+
signal: e.signal,
|
|
59
|
+
context: e.context,
|
|
60
|
+
whatWentWrong: e.whatWentWrong,
|
|
61
|
+
whatWorked: e.whatWorked,
|
|
62
|
+
}).promotable,
|
|
57
63
|
},
|
|
58
64
|
];
|
|
59
65
|
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Status Dashboard
|
|
4
|
+
*
|
|
5
|
+
* CLI dashboard that reads feedback data and outputs a learning curve summary.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* node scripts/status-dashboard.js
|
|
9
|
+
*
|
|
10
|
+
* Exports:
|
|
11
|
+
* generateStatus(feedbackDir) — returns status object with approval rates,
|
|
12
|
+
* trends, failure domains, memory count, prevention rule count, and learning curve.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const { getFeedbackPaths, readJSONL } = require('./feedback-loop');
|
|
18
|
+
|
|
19
|
+
function generateStatus(feedbackDir) {
|
|
20
|
+
const feedbackLogPath = path.join(feedbackDir, 'feedback-log.jsonl');
|
|
21
|
+
const memoryLogPath = path.join(feedbackDir, 'memory-log.jsonl');
|
|
22
|
+
const preventionRulesPath = path.join(feedbackDir, 'prevention-rules.md');
|
|
23
|
+
|
|
24
|
+
const entries = readJSONL(feedbackLogPath);
|
|
25
|
+
const totalSignals = entries.length;
|
|
26
|
+
const positive = entries.filter((e) => e.signal === 'positive').length;
|
|
27
|
+
const negative = entries.filter((e) => e.signal === 'negative').length;
|
|
28
|
+
const approvalRate = totalSignals > 0 ? Math.round((positive / totalSignals) * 100) : 0;
|
|
29
|
+
|
|
30
|
+
// Recent approval rate (last 20)
|
|
31
|
+
const recentWindow = 20;
|
|
32
|
+
const recent = entries.slice(-recentWindow);
|
|
33
|
+
const recentPositive = recent.filter((e) => e.signal === 'positive').length;
|
|
34
|
+
const recentApprovalRate = recent.length > 0 ? Math.round((recentPositive / recent.length) * 100) : 0;
|
|
35
|
+
|
|
36
|
+
// Trend
|
|
37
|
+
let trend = 'stable';
|
|
38
|
+
if (totalSignals >= recentWindow) {
|
|
39
|
+
const diff = recentApprovalRate - approvalRate;
|
|
40
|
+
if (diff > 5) trend = 'improving';
|
|
41
|
+
else if (diff < -5) trend = 'declining';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Top failure domains
|
|
45
|
+
const domainCounts = {};
|
|
46
|
+
entries
|
|
47
|
+
.filter((e) => e.signal === 'negative')
|
|
48
|
+
.forEach((e) => {
|
|
49
|
+
const domain = (e.richContext && e.richContext.domain) || inferDomainFromTags(e.tags);
|
|
50
|
+
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
|
51
|
+
});
|
|
52
|
+
const topFailureDomains = Object.entries(domainCounts)
|
|
53
|
+
.map(([domain, count]) => ({ domain, count }))
|
|
54
|
+
.sort((a, b) => b.count - a.count);
|
|
55
|
+
|
|
56
|
+
// Memory count
|
|
57
|
+
const memoryEntries = readJSONL(memoryLogPath);
|
|
58
|
+
const memoryCount = memoryEntries.length;
|
|
59
|
+
|
|
60
|
+
// Prevention rule count
|
|
61
|
+
let preventionRuleCount = 0;
|
|
62
|
+
if (fs.existsSync(preventionRulesPath)) {
|
|
63
|
+
const rulesContent = fs.readFileSync(preventionRulesPath, 'utf-8');
|
|
64
|
+
const ruleHeaders = rulesContent.match(/^## /gm);
|
|
65
|
+
preventionRuleCount = ruleHeaders ? ruleHeaders.length : 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Learning curve — sliding windows of 10
|
|
69
|
+
const learningCurve = [];
|
|
70
|
+
const windowSize = 10;
|
|
71
|
+
for (let i = 0; i + windowSize <= entries.length; i++) {
|
|
72
|
+
const window = entries.slice(i, i + windowSize);
|
|
73
|
+
const windowPositive = window.filter((e) => e.signal === 'positive').length;
|
|
74
|
+
const windowRate = Math.round((windowPositive / windowSize) * 100);
|
|
75
|
+
learningCurve.push({ window: i, approvalRate: windowRate });
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
totalSignals,
|
|
80
|
+
positive,
|
|
81
|
+
negative,
|
|
82
|
+
approvalRate,
|
|
83
|
+
recentApprovalRate,
|
|
84
|
+
trend,
|
|
85
|
+
topFailureDomains,
|
|
86
|
+
memoryCount,
|
|
87
|
+
preventionRuleCount,
|
|
88
|
+
learningCurve,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function inferDomainFromTags(tags) {
|
|
93
|
+
if (!Array.isArray(tags) || tags.length === 0) return 'general';
|
|
94
|
+
const tagSet = new Set(tags.map((t) => t.toLowerCase()));
|
|
95
|
+
if (tagSet.has('testing') || tagSet.has('test')) return 'testing';
|
|
96
|
+
if (tagSet.has('security')) return 'security';
|
|
97
|
+
if (tagSet.has('performance') || tagSet.has('perf')) return 'performance';
|
|
98
|
+
if (tagSet.has('ui') || tagSet.has('component')) return 'ui-components';
|
|
99
|
+
if (tagSet.has('api') || tagSet.has('endpoint')) return 'api-integration';
|
|
100
|
+
if (tagSet.has('git') || tagSet.has('commit')) return 'git-workflow';
|
|
101
|
+
if (tagSet.has('doc') || tagSet.has('readme')) return 'documentation';
|
|
102
|
+
if (tagSet.has('debug') || tagSet.has('debugging')) return 'debugging';
|
|
103
|
+
if (tagSet.has('arch') || tagSet.has('architecture')) return 'architecture';
|
|
104
|
+
if (tagSet.has('data') || tagSet.has('schema')) return 'data-modeling';
|
|
105
|
+
return 'general';
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function printDashboard(status) {
|
|
109
|
+
console.log('╔══════════════════════════════════════════╗');
|
|
110
|
+
console.log('║ RLHF Learning Status Dashboard ║');
|
|
111
|
+
console.log('╠══════════════════════════════════════════╣');
|
|
112
|
+
console.log(`║ Total Signals: ${String(status.totalSignals).padStart(6)} ║`);
|
|
113
|
+
console.log(`║ Positive: ${String(status.positive).padStart(6)} ║`);
|
|
114
|
+
console.log(`║ Negative: ${String(status.negative).padStart(6)} ║`);
|
|
115
|
+
console.log(`║ Approval Rate: ${String(status.approvalRate + '%').padStart(6)} ║`);
|
|
116
|
+
console.log(`║ Recent (last 20): ${String(status.recentApprovalRate + '%').padStart(6)} ║`);
|
|
117
|
+
console.log(`║ Trend: ${status.trend.padStart(6)} ║`);
|
|
118
|
+
console.log(`║ Memories: ${String(status.memoryCount).padStart(6)} ║`);
|
|
119
|
+
console.log(`║ Prevention Rules: ${String(status.preventionRuleCount).padStart(6)} ║`);
|
|
120
|
+
console.log('╠══════════════════════════════════════════╣');
|
|
121
|
+
|
|
122
|
+
if (status.topFailureDomains.length > 0) {
|
|
123
|
+
console.log('║ Top Failure Domains: ║');
|
|
124
|
+
status.topFailureDomains.slice(0, 5).forEach((d) => {
|
|
125
|
+
const line = ` ${d.domain}: ${d.count}`;
|
|
126
|
+
console.log(`║ ${line.padEnd(38)}║`);
|
|
127
|
+
});
|
|
128
|
+
} else {
|
|
129
|
+
console.log('║ No failure domains recorded ║');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
console.log('╠══════════════════════════════════════════╣');
|
|
133
|
+
console.log('║ Learning Curve: ║');
|
|
134
|
+
if (status.learningCurve.length > 0) {
|
|
135
|
+
const step = Math.max(1, Math.floor(status.learningCurve.length / 5));
|
|
136
|
+
for (let i = 0; i < status.learningCurve.length; i += step) {
|
|
137
|
+
const point = status.learningCurve[i];
|
|
138
|
+
const bar = '█'.repeat(Math.floor(point.approvalRate / 5));
|
|
139
|
+
const line = ` w${String(point.window).padStart(3)}: ${bar} ${point.approvalRate}%`;
|
|
140
|
+
console.log(`║ ${line.padEnd(38)}║`);
|
|
141
|
+
}
|
|
142
|
+
} else {
|
|
143
|
+
console.log('║ Not enough data for learning curve ║');
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
console.log('╚══════════════════════════════════════════╝');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (require.main === module) {
|
|
150
|
+
const { FEEDBACK_DIR } = getFeedbackPaths();
|
|
151
|
+
const status = generateStatus(FEEDBACK_DIR);
|
|
152
|
+
printDashboard(status);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
module.exports = { generateStatus, printDashboard };
|
package/scripts/test-coverage.js
CHANGED
|
@@ -29,7 +29,7 @@ function findCoverageTestFiles({
|
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
function buildCoverageArgs(files) {
|
|
32
|
-
return ['--test', '--experimental-test-coverage', ...files];
|
|
32
|
+
return ['--test', '--test-concurrency=1', '--experimental-test-coverage', ...files];
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
function runCoverage({
|