rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,300 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Feedback Schema Validator
4
+ *
5
+ * Implements three reliability patterns:
6
+ * 1. Typed schemas — enforce structure on every feedback memory
7
+ * 2. Action schemas — discriminated union of allowed feedback actions
8
+ * 3. Validation at boundaries — reject bad data before storage
9
+ */
10
+
11
+ const GENERIC_TAGS = new Set(['feedback', 'positive', 'negative']);
12
+ const MIN_CONTENT_LENGTH = 20;
13
+ const VALID_TITLE_PREFIXES = ['SUCCESS:', 'MISTAKE:', 'LEARNING:', 'PREFERENCE:'];
14
+ const VALID_CATEGORIES = new Set(['error', 'learning', 'preference']);
15
+
16
+ function validateFeedbackMemory(memory) {
17
+ const issues = [];
18
+
19
+ if (!memory.title || typeof memory.title !== 'string') {
20
+ issues.push('title: required string');
21
+ } else {
22
+ const hasPrefix = VALID_TITLE_PREFIXES.some((p) => memory.title.startsWith(p));
23
+ if (!hasPrefix) {
24
+ issues.push(`title: must start with one of ${VALID_TITLE_PREFIXES.join(', ')}`);
25
+ }
26
+ const afterPrefix = memory.title.replace(/^(SUCCESS|MISTAKE|LEARNING|PREFERENCE):\s*/, '');
27
+ if (afterPrefix.length < 5) {
28
+ issues.push('title: description after prefix too short (min 5 chars)');
29
+ }
30
+ }
31
+
32
+ if (!memory.content || typeof memory.content !== 'string') {
33
+ issues.push('content: required string');
34
+ } else if (memory.content.length < MIN_CONTENT_LENGTH) {
35
+ issues.push(`content: too short (${memory.content.length} chars, min ${MIN_CONTENT_LENGTH})`);
36
+ }
37
+
38
+ if (!memory.category) {
39
+ issues.push('category: required');
40
+ } else if (!VALID_CATEGORIES.has(memory.category)) {
41
+ issues.push(`category: must be one of ${[...VALID_CATEGORIES].join(', ')} (got "${memory.category}")`);
42
+ }
43
+
44
+ if (!Array.isArray(memory.tags) || memory.tags.length === 0) {
45
+ issues.push('tags: at least 1 tag required');
46
+ } else {
47
+ const domainTags = memory.tags.filter((t) => !GENERIC_TAGS.has(t));
48
+ if (domainTags.length === 0) {
49
+ issues.push('tags: at least 1 non-generic tag required');
50
+ }
51
+ }
52
+
53
+ if (memory.title && memory.category) {
54
+ const titleIsError = memory.title.startsWith('MISTAKE:');
55
+ const titleIsSuccess = memory.title.startsWith('SUCCESS:') || memory.title.startsWith('LEARNING:');
56
+ if (titleIsError && memory.category !== 'error') {
57
+ issues.push('consistency: MISTAKE title should have category "error"');
58
+ }
59
+ if (titleIsSuccess && memory.category === 'error') {
60
+ issues.push('consistency: SUCCESS/LEARNING title should not have category "error"');
61
+ }
62
+ }
63
+
64
+ if (memory.rubricSummary != null) {
65
+ if (typeof memory.rubricSummary !== 'object') {
66
+ issues.push('rubricSummary: must be an object when provided');
67
+ } else {
68
+ const weightedScore = Number(memory.rubricSummary.weightedScore);
69
+ if (!Number.isFinite(weightedScore) || weightedScore < 0 || weightedScore > 1) {
70
+ issues.push('rubricSummary.weightedScore: must be a number between 0 and 1');
71
+ }
72
+ if (!Array.isArray(memory.rubricSummary.failingCriteria)) {
73
+ issues.push('rubricSummary.failingCriteria: must be an array');
74
+ }
75
+ if (!Array.isArray(memory.rubricSummary.failingGuardrails)) {
76
+ issues.push('rubricSummary.failingGuardrails: must be an array');
77
+ }
78
+ }
79
+ }
80
+
81
+ return { valid: issues.length === 0, issues };
82
+ }
83
+
84
+ function resolveFeedbackAction(params) {
85
+ const {
86
+ signal,
87
+ context,
88
+ whatWentWrong,
89
+ whatToChange,
90
+ whatWorked,
91
+ tags,
92
+ rubricEvaluation,
93
+ } = params;
94
+
95
+ if (!context && !whatWentWrong && !whatWorked) {
96
+ return { type: 'no-action', reason: 'No context provided — cannot create actionable memory' };
97
+ }
98
+
99
+ const domainTags = (tags || []).filter((t) => !GENERIC_TAGS.has(t));
100
+ const rubricSummary = rubricEvaluation
101
+ ? {
102
+ rubricId: rubricEvaluation.rubricId,
103
+ weightedScore: rubricEvaluation.weightedScore,
104
+ failingCriteria: rubricEvaluation.failingCriteria || [],
105
+ failingGuardrails: rubricEvaluation.failingGuardrails || [],
106
+ judgeDisagreements: rubricEvaluation.judgeDisagreements || [],
107
+ blockReasons: rubricEvaluation.blockReasons || [],
108
+ }
109
+ : null;
110
+ const rubricFailureTags = rubricSummary
111
+ ? (rubricSummary.failingCriteria || []).map((criterion) => `rubric-${criterion}`)
112
+ : [];
113
+
114
+ if (signal === 'negative') {
115
+ if (!whatWentWrong && !context) {
116
+ return { type: 'no-action', reason: 'Negative feedback without context — cannot determine what went wrong' };
117
+ }
118
+
119
+ const content = [
120
+ whatWentWrong ? `What went wrong: ${whatWentWrong}` : `Context: ${context}`,
121
+ whatToChange ? `How to avoid: ${whatToChange}` : 'Action needed: investigate and prevent recurrence',
122
+ ].join('\n');
123
+ const rubricLines = [];
124
+ if (rubricSummary) {
125
+ rubricLines.push(`Rubric weighted score: ${rubricSummary.weightedScore}`);
126
+ if (rubricSummary.failingCriteria.length > 0) {
127
+ rubricLines.push(`Rubric failing criteria: ${rubricSummary.failingCriteria.join(', ')}`);
128
+ }
129
+ if (rubricSummary.failingGuardrails.length > 0) {
130
+ rubricLines.push(`Guardrails failed: ${rubricSummary.failingGuardrails.join(', ')}`);
131
+ }
132
+ if (rubricSummary.judgeDisagreements.length > 0) {
133
+ rubricLines.push('Judge disagreement detected; require manual review');
134
+ }
135
+ }
136
+
137
+ const description = whatWentWrong ? whatWentWrong.slice(0, 60) : (context || '').slice(0, 60);
138
+
139
+ return {
140
+ type: 'store-mistake',
141
+ memory: {
142
+ title: `MISTAKE: ${description}`,
143
+ content: rubricLines.length > 0 ? `${content}\n${rubricLines.join('\n')}` : content,
144
+ category: 'error',
145
+ importance: 'high',
146
+ tags: ['feedback', 'negative', ...domainTags, ...rubricFailureTags],
147
+ rubricSummary,
148
+ },
149
+ };
150
+ }
151
+
152
+ if (signal === 'positive') {
153
+ if (rubricEvaluation && !rubricEvaluation.promotionEligible) {
154
+ const reasons = rubricEvaluation.blockReasons && rubricEvaluation.blockReasons.length > 0
155
+ ? rubricEvaluation.blockReasons.join('; ')
156
+ : 'rubric gate did not pass';
157
+ return { type: 'no-action', reason: `Rubric gate prevented promotion: ${reasons}` };
158
+ }
159
+
160
+ if (!whatWorked && !context) {
161
+ return { type: 'no-action', reason: 'Positive feedback without context — cannot determine what worked' };
162
+ }
163
+
164
+ const content = whatWorked ? `What worked: ${whatWorked}` : `Approach: ${context}`;
165
+ const rubricLines = [];
166
+ if (rubricSummary) {
167
+ rubricLines.push(`Rubric weighted score: ${rubricSummary.weightedScore}`);
168
+ rubricLines.push(`Rubric criteria passed with no blocking guardrails.`);
169
+ }
170
+ const description = whatWorked ? whatWorked.slice(0, 60) : (context || '').slice(0, 60);
171
+
172
+ return {
173
+ type: 'store-learning',
174
+ memory: {
175
+ title: `SUCCESS: ${description}`,
176
+ content: rubricLines.length > 0 ? `${content}\n${rubricLines.join('\n')}` : content,
177
+ category: 'learning',
178
+ importance: 'normal',
179
+ tags: ['feedback', 'positive', ...domainTags],
180
+ rubricSummary,
181
+ },
182
+ };
183
+ }
184
+
185
+ return { type: 'no-action', reason: `Unknown signal: ${signal}` };
186
+ }
187
+
188
+ function prepareForStorage(memory) {
189
+ const validation = validateFeedbackMemory(memory);
190
+ if (!validation.valid) {
191
+ return { ok: false, issues: validation.issues };
192
+ }
193
+ return { ok: true, memory };
194
+ }
195
+
196
+ /**
197
+ * parseTimestamp — Parse any ISO 8601 timestamp string into a Date object.
198
+ * Handles: Z-suffix ("2026-03-04T12:00:00.000Z"), no-suffix ("2026-03-04T12:00:00"),
199
+ * and UTC offset ("2026-03-04T12:00:00+05:00").
200
+ * Returns null (not NaN) for null, undefined, or unparseable input.
201
+ * NOTE: Do NOT change how timestamps are WRITTEN — new Date().toISOString() already
202
+ * produces correct ISO 8601+Z format. This helper is for READING only.
203
+ * Python's train_from_feedback.py strips Z with .replace("Z","") before fromisoformat().
204
+ * That pattern is safe because Node always writes Z-suffix. Do not alter write behavior.
205
+ * @param {string|null|undefined} ts - Timestamp string to parse
206
+ * @returns {Date|null}
207
+ */
208
+ function parseTimestamp(ts) {
209
+ if (ts == null) return null;
210
+ const d = new Date(String(ts).trim());
211
+ return isNaN(d.getTime()) ? null : d;
212
+ }
213
+
214
+ function runTests() {
215
+ let passed = 0;
216
+ let failed = 0;
217
+
218
+ function assert(condition, name) {
219
+ if (condition) {
220
+ passed++;
221
+ console.log(` PASS ${name}`);
222
+ } else {
223
+ failed++;
224
+ console.log(` FAIL ${name}`);
225
+ }
226
+ }
227
+
228
+ console.log('\nfeedback-schema.js tests\n');
229
+
230
+ const goodError = {
231
+ title: 'MISTAKE: Did not verify before claiming fixed',
232
+ content: 'Always run tests and show evidence before claiming the work is complete.',
233
+ category: 'error',
234
+ tags: ['feedback', 'negative', 'verification'],
235
+ };
236
+ assert(validateFeedbackMemory(goodError).valid, 'valid error memory passes');
237
+
238
+ const shortContent = {
239
+ title: 'MISTAKE: Bad fix regression',
240
+ content: 'thumbs down',
241
+ category: 'error',
242
+ tags: ['verification'],
243
+ };
244
+ assert(!validateFeedbackMemory(shortContent).valid, 'short content fails');
245
+
246
+ const bareThumbsDown = resolveFeedbackAction({ signal: 'negative' });
247
+ assert(bareThumbsDown.type === 'no-action', 'bare negative feedback becomes no-action');
248
+
249
+ const fullNegative = resolveFeedbackAction({
250
+ signal: 'negative',
251
+ context: 'Pushed code with no tests',
252
+ whatWentWrong: 'Claimed fixed without test output',
253
+ whatToChange: 'Always run tests first',
254
+ tags: ['testing', 'verification'],
255
+ });
256
+ assert(fullNegative.type === 'store-mistake', 'negative feedback creates store-mistake action');
257
+
258
+ const prep = prepareForStorage(fullNegative.memory);
259
+ assert(prep.ok, 'store-mistake memory passes storage validation');
260
+
261
+ const fullPositive = resolveFeedbackAction({
262
+ signal: 'positive',
263
+ whatWorked: 'Ran tests and included output before final response',
264
+ tags: ['testing', 'verification'],
265
+ });
266
+ assert(fullPositive.type === 'store-learning', 'positive feedback creates store-learning action');
267
+
268
+ const blockedPositive = resolveFeedbackAction({
269
+ signal: 'positive',
270
+ whatWorked: 'Looked correct',
271
+ tags: ['testing'],
272
+ rubricEvaluation: {
273
+ promotionEligible: false,
274
+ blockReasons: ['failed_guardrails:testsPassed'],
275
+ failingCriteria: [],
276
+ failingGuardrails: ['testsPassed'],
277
+ weightedScore: 0.82,
278
+ rubricId: 'default-v1',
279
+ },
280
+ });
281
+ assert(blockedPositive.type === 'no-action', 'rubric gate blocks unsafe positive promotion');
282
+
283
+ console.log(`\nResults: ${passed} passed, ${failed} failed\n`);
284
+ process.exit(failed > 0 ? 1 : 0);
285
+ }
286
+
287
+ module.exports = {
288
+ validateFeedbackMemory,
289
+ resolveFeedbackAction,
290
+ prepareForStorage,
291
+ parseTimestamp,
292
+ GENERIC_TAGS,
293
+ MIN_CONTENT_LENGTH,
294
+ VALID_TITLE_PREFIXES,
295
+ VALID_CATEGORIES,
296
+ };
297
+
298
+ if (require.main === module && process.argv.includes('--test')) {
299
+ runTests();
300
+ }
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Feedback → Memory Bridge
4
+ *
5
+ * Converts raw feedback params into a schema-validated memory object
6
+ * suitable for mcp__memory__remember. This is the validation boundary
7
+ * between external feedback signals and MCP memory storage.
8
+ *
9
+ * Usage:
10
+ * echo '{"signal":"negative","context":"...","whatWentWrong":"...","tags":["testing"]}' | node scripts/feedback-to-memory.js
11
+ * node scripts/feedback-to-memory.js --test
12
+ *
13
+ * Input (stdin JSON):
14
+ * signal: "positive" | "negative"
15
+ * context: string — what the agent was doing
16
+ * whatWentWrong: string — for negative: what failed
17
+ * whatToChange: string — for negative: how to avoid
18
+ * whatWorked: string — for positive: the pattern to repeat
19
+ * tags: string[] — domain tags (at least 1 non-generic)
20
+ *
21
+ * Output (stdout JSON):
22
+ * { ok: true, memory: { title, content, category, importance, tags } }
23
+ * { ok: false, reason: string, issues?: string[] }
24
+ */
25
+ 'use strict';
26
+
27
+ const { resolveFeedbackAction, prepareForStorage } = require('./feedback-schema');
28
+
29
+ function convertFeedbackToMemory(params) {
30
+ const action = resolveFeedbackAction({
31
+ signal: params.signal,
32
+ context: params.context || '',
33
+ whatWentWrong: params.whatWentWrong,
34
+ whatToChange: params.whatToChange,
35
+ whatWorked: params.whatWorked,
36
+ tags: params.tags || [],
37
+ });
38
+
39
+ if (!action || action.type === 'no-action') {
40
+ return { ok: false, reason: action ? action.reason : 'Unknown action resolution failure' };
41
+ }
42
+
43
+ const prep = prepareForStorage(action.memory);
44
+ if (!prep.ok) {
45
+ return { ok: false, reason: `Schema validation failed: ${prep.issues.join('; ')}`, issues: prep.issues };
46
+ }
47
+
48
+ return { ok: true, actionType: action.type, memory: prep.memory };
49
+ }
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // CLI: stdin mode
53
+ // ---------------------------------------------------------------------------
54
+
55
+ function runStdin() {
56
+ let input = '';
57
+ process.stdin.setEncoding('utf-8');
58
+ process.stdin.on('data', (chunk) => { input += chunk; });
59
+ process.stdin.on('end', () => {
60
+ try {
61
+ const params = JSON.parse(input.trim());
62
+ const result = convertFeedbackToMemory(params);
63
+ process.stdout.write(JSON.stringify(result, null, 2) + '\n');
64
+ process.exit(result.ok ? 0 : 2);
65
+ } catch (err) {
66
+ process.stdout.write(JSON.stringify({ ok: false, reason: `Parse error: ${err.message}` }, null, 2) + '\n');
67
+ process.exit(1);
68
+ }
69
+ });
70
+ }
71
+
72
+ // ---------------------------------------------------------------------------
73
+ // Built-in Tests
74
+ // ---------------------------------------------------------------------------
75
+
76
+ function runTests() {
77
+ let passed = 0;
78
+ let failed = 0;
79
+
80
+ function assert(condition, name) {
81
+ if (condition) {
82
+ passed++;
83
+ console.log(` ✅ ${name}`);
84
+ } else {
85
+ failed++;
86
+ console.log(` ❌ ${name}`);
87
+ }
88
+ }
89
+
90
+ console.log('\n🧪 feedback-to-memory.js — Tests\n');
91
+
92
+ // Valid negative feedback → memory
93
+ const neg = convertFeedbackToMemory({
94
+ signal: 'negative',
95
+ context: 'Agent claimed fix without test evidence',
96
+ whatWentWrong: 'No tests were run before claiming the bug was fixed',
97
+ whatToChange: 'Always run tests and show output before claiming done',
98
+ tags: ['verification', 'testing'],
99
+ });
100
+ assert(neg.ok === true, 'valid negative → ok');
101
+ assert(neg.actionType === 'store-mistake', 'negative → store-mistake');
102
+ assert(neg.memory.title.startsWith('MISTAKE:'), 'negative → MISTAKE: prefix');
103
+ assert(neg.memory.category === 'error', 'negative → error category');
104
+ assert(neg.memory.tags.includes('verification'), 'preserves domain tags');
105
+
106
+ // Valid positive feedback → memory
107
+ const pos = convertFeedbackToMemory({
108
+ signal: 'positive',
109
+ whatWorked: 'Built schema-validated feedback system with prevention rules',
110
+ tags: ['architecture', 'rlhf'],
111
+ });
112
+ assert(pos.ok === true, 'valid positive → ok');
113
+ assert(pos.actionType === 'store-learning', 'positive → store-learning');
114
+ assert(pos.memory.title.startsWith('SUCCESS:'), 'positive → SUCCESS: prefix');
115
+ assert(pos.memory.category === 'learning', 'positive → learning category');
116
+
117
+ // Bare thumbs down → rejected
118
+ const bare = convertFeedbackToMemory({ signal: 'negative' });
119
+ assert(bare.ok === false, 'bare negative → rejected');
120
+ assert(bare.reason.includes('No context') || bare.reason.includes('cannot'), 'reports missing context');
121
+
122
+ // Bare thumbs up → rejected
123
+ const bareUp = convertFeedbackToMemory({ signal: 'positive' });
124
+ assert(bareUp.ok === false, 'bare positive → rejected');
125
+
126
+ // Unknown signal → rejected
127
+ const unknown = convertFeedbackToMemory({ signal: 'maybe', context: 'test' });
128
+ assert(unknown.ok === false, 'unknown signal → rejected');
129
+
130
+ // Context-only negative → ok
131
+ const ctxNeg = convertFeedbackToMemory({
132
+ signal: 'negative',
133
+ context: 'Showed fake RLHF statistics panel to user',
134
+ tags: ['rlhf'],
135
+ });
136
+ assert(ctxNeg.ok === true, 'context-only negative → ok');
137
+
138
+ // Context-only positive → ok
139
+ const ctxPos = convertFeedbackToMemory({
140
+ signal: 'positive',
141
+ context: 'Ran full test suite and showed green output before responding',
142
+ tags: ['verification'],
143
+ });
144
+ assert(ctxPos.ok === true, 'context-only positive → ok');
145
+
146
+ console.log(`\n${'═'.repeat(50)}`);
147
+ console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`);
148
+ console.log(`${'═'.repeat(50)}\n`);
149
+
150
+ process.exit(failed > 0 ? 1 : 0);
151
+ }
152
+
153
+ // ---------------------------------------------------------------------------
154
+ // Exports & main
155
+ // ---------------------------------------------------------------------------
156
+
157
+ module.exports = { convertFeedbackToMemory };
158
+
159
+ if (require.main === module) {
160
+ if (process.argv.includes('--test')) {
161
+ runTests();
162
+ } else {
163
+ runStdin();
164
+ }
165
+ }
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const DEFAULT_LOG = path.join(__dirname, '..', '.claude', 'memory', 'feedback', 'feedback-log.jsonl');
7
+ const NEG = new Set(['negative', 'negative_strong', 'down', 'thumbs_down']);
8
+ const POS = new Set(['positive', 'positive_strong', 'up', 'thumbs_up']);
9
+
10
+ function parseFeedbackFile(filePath) {
11
+ if (!fs.existsSync(filePath)) return [];
12
+ const entries = [];
13
+ for (const line of fs.readFileSync(filePath, 'utf8').split('\n')) {
14
+ const trimmed = line.trim();
15
+ if (!trimmed) continue;
16
+ try { entries.push(JSON.parse(trimmed)); } catch { /* skip malformed */ }
17
+ }
18
+ return entries;
19
+ }
20
+
21
+ function classifySignal(entry) {
22
+ const sig = (entry.signal || entry.feedback || '').toLowerCase();
23
+ if (NEG.has(sig)) return 'negative';
24
+ if (POS.has(sig)) return 'positive';
25
+ return null;
26
+ }
27
+
28
+ function normalize(ctx) {
29
+ return (ctx || '').replace(/\/Users\/[^\s/]+/g, '~').replace(/:[0-9]+/g, '').toLowerCase().trim();
30
+ }
31
+
32
+ function analyze(entries) {
33
+ let positiveCount = 0, negativeCount = 0;
34
+ const categories = {};
35
+ const toolBuckets = {};
36
+ const contextCounts = {};
37
+
38
+ for (const e of entries) {
39
+ const cls = classifySignal(e);
40
+ if (!cls) continue;
41
+ cls === 'positive' ? positiveCount++ : negativeCount++;
42
+
43
+ const cat = e.task_category || e.category || 'uncategorized';
44
+ categories[cat] = categories[cat] || { positive: 0, negative: 0, total: 0 };
45
+ categories[cat][cls]++;
46
+ categories[cat].total++;
47
+
48
+ if (cls === 'negative') {
49
+ const tool = e.tool_name || 'unknown';
50
+ toolBuckets[tool] = (toolBuckets[tool] || 0) + 1;
51
+ const key = normalize(e.context);
52
+ if (key.length > 10) {
53
+ if (!contextCounts[key]) contextCounts[key] = { raw: e.context, count: 0, tool };
54
+ contextCounts[key].count++;
55
+ }
56
+ }
57
+ }
58
+
59
+ const total = positiveCount + negativeCount;
60
+ const recurringIssues = Object.values(contextCounts)
61
+ .filter(v => v.count >= 2)
62
+ .sort((a, b) => b.count - a.count)
63
+ .map(v => ({
64
+ pattern: v.raw.slice(0, 120),
65
+ count: v.count,
66
+ severity: v.count >= 4 ? 'critical' : v.count >= 3 ? 'high' : 'medium',
67
+ suggestedRule: `NEVER ${v.raw.slice(0, 80).replace(/CRITICAL ERROR - User frustrated: /i, '')}`,
68
+ }));
69
+
70
+ return {
71
+ generatedAt: new Date().toISOString(),
72
+ totalFeedback: total,
73
+ negativeCount,
74
+ positiveCount,
75
+ negativeRate: total ? `${((negativeCount / total) * 100).toFixed(1)}%` : '0%',
76
+ recurringIssues,
77
+ categoryBreakdown: categories,
78
+ topTools: toolBuckets,
79
+ };
80
+ }
81
+
82
+ function toRules(report) {
83
+ const lines = ['# Suggested Rules from Feedback Analysis', `# Generated: ${report.generatedAt}`, ''];
84
+ lines.push(`# Negative rate: ${report.negativeRate} (${report.negativeCount}/${report.totalFeedback})`);
85
+ lines.push('');
86
+ for (const issue of report.recurringIssues) {
87
+ lines.push(`- [${issue.severity.toUpperCase()}] (${issue.count}x) ${issue.suggestedRule}`);
88
+ }
89
+ if (!report.recurringIssues.length) lines.push('- No recurring issues detected.');
90
+ return lines.join('\n');
91
+ }
92
+
93
+ if (require.main === module) {
94
+ try {
95
+ const logPath = process.argv[2] && !process.argv[2].startsWith('--') ? process.argv[2] : DEFAULT_LOG;
96
+ const entries = parseFeedbackFile(logPath);
97
+ const report = analyze(entries);
98
+ if (process.argv.includes('--rules')) {
99
+ console.log(toRules(report));
100
+ } else {
101
+ console.log(JSON.stringify(report, null, 2));
102
+ }
103
+ } catch (err) {
104
+ console.error('Warning:', err.message);
105
+ }
106
+ process.exit(0);
107
+ }
108
+
109
+ module.exports = { parseFeedbackFile, classifySignal, analyze, toRules, normalize };
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
5
+ cd "$ROOT_DIR"
6
+
7
+ if [[ -f .env.paperbanana ]]; then
8
+ # shellcheck source=/dev/null
9
+ source .env.paperbanana
10
+ fi
11
+
12
+ if [[ ! -f .env ]]; then
13
+ echo "Missing .env. Add GEMINI_API_KEY first."
14
+ exit 1
15
+ fi
16
+
17
+ # shellcheck source=/dev/null
18
+ source .env
19
+
20
+ if [[ -z "${GOOGLE_API_KEY:-}" && -z "${GEMINI_API_KEY:-}" ]]; then
21
+ echo "GEMINI_API_KEY or GOOGLE_API_KEY is required in .env"
22
+ exit 1
23
+ fi
24
+
25
+ # Prefer explicit GOOGLE_API_KEY since paperbanana/google-genai prioritizes it.
26
+ if [[ -z "${GOOGLE_API_KEY:-}" ]]; then
27
+ export GOOGLE_API_KEY="$GEMINI_API_KEY"
28
+ fi
29
+
30
+ PB_ESTIMATE_PER_DIAGRAM="${PB_ESTIMATE_PER_DIAGRAM:-0.80}"
31
+ PB_MAX_ITERATIONS="${PB_MAX_ITERATIONS:-1}"
32
+ PB_VLM_MODEL="${PB_VLM_MODEL:-gemini-2.5-flash}"
33
+ PB_IMAGE_MODEL="${PB_IMAGE_MODEL:-gemini-3-pro-image-preview}"
34
+
35
+ budget_check() {
36
+ local estimate="$1"
37
+ node -e '
38
+ const { getBudgetStatus } = require("./scripts/budget-guard");
39
+ const est = Number(process.argv[1]);
40
+ const s = getBudgetStatus();
41
+ const projected = s.totalUsd + est;
42
+ if (projected > s.budgetUsd) {
43
+ console.error(`Blocked: projected spend ${projected.toFixed(2)} exceeds budget ${s.budgetUsd.toFixed(2)} USD`);
44
+ process.exit(3);
45
+ }
46
+ ' "$estimate"
47
+ }
48
+
49
+ mkdir -p docs/diagrams
50
+
51
+ cat > docs/diagrams/rlhf-architecture.txt <<'TXT'
52
+ The system starts when a user gives explicit thumbs up/down feedback. A capture layer enriches the signal with context and tags.
53
+ An action resolver maps the signal to store-learning, store-mistake, or no-action.
54
+ A schema validator enforces strict structure before memory promotion.
55
+ Valid records are stored in a local memory log categorized as error or learning.
56
+ An analytics layer computes quality trends and recurrence patterns.
57
+ A prevention-rule engine converts repeated mistakes into hard guardrails.
58
+ A DPO export layer pairs learning and error memories into prompt/chosen/rejected JSONL.
59
+ All channels (ChatGPT Actions, Claude MCP, Codex MCP, Gemini tools, Amp skills) route through one shared API and policy core.
60
+ TXT
61
+
62
+ cat > docs/diagrams/plugin-topology.txt <<'TXT'
63
+ Show a central RLHF Feedback API with five adapters around it.
64
+ Adapter 1: ChatGPT via GPT Actions OpenAPI.
65
+ Adapter 2: Claude via local MCP server and .mcp.json.
66
+ Adapter 3: Codex via MCP server config.toml.
67
+ Adapter 4: Gemini via function-calling tool declarations.
68
+ Adapter 5: Amp via skills template.
69
+ Include bidirectional arrows from each adapter to the API.
70
+ Under the API place three internal modules: schema validation, prevention rules, and DPO export.
71
+ Add a budget guard module enforcing a strict monthly cost cap of 10 USD.
72
+ TXT
73
+
74
+ budget_check "$PB_ESTIMATE_PER_DIAGRAM"
75
+ paperbanana generate \
76
+ --input docs/diagrams/rlhf-architecture.txt \
77
+ --caption "RLHF Feedback Loop architecture for AI coding agents with schema gate, memory store, prevention rules, and DPO export" \
78
+ --vlm-provider gemini \
79
+ --vlm-model "$PB_VLM_MODEL" \
80
+ --image-provider google_imagen \
81
+ --image-model "$PB_IMAGE_MODEL" \
82
+ --iterations "$PB_MAX_ITERATIONS" \
83
+ --output docs/diagrams/rlhf-architecture.png
84
+ node scripts/budget-guard.js --add="$PB_ESTIMATE_PER_DIAGRAM" --source=paperbanana --note="architecture-overview"
85
+
86
+ budget_check "$PB_ESTIMATE_PER_DIAGRAM"
87
+ paperbanana generate \
88
+ --input docs/diagrams/plugin-topology.txt \
89
+ --caption "Go-to-market plugin topology: ChatGPT Actions, Claude MCP, Codex MCP, Gemini function calling, and Amp skills through one RLHF API core" \
90
+ --vlm-provider gemini \
91
+ --vlm-model "$PB_VLM_MODEL" \
92
+ --image-provider google_imagen \
93
+ --image-model "$PB_IMAGE_MODEL" \
94
+ --iterations "$PB_MAX_ITERATIONS" \
95
+ --output docs/diagrams/plugin-topology.png
96
+ node scripts/budget-guard.js --add="$PB_ESTIMATE_PER_DIAGRAM" --source=paperbanana --note="plugin-topology"
97
+
98
+ echo "Generated diagrams:"
99
+ ls -la docs/diagrams/*.png