rlhf-feedback-loop 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/LICENSE +21 -0
  3. package/README.md +308 -0
  4. package/adapters/README.md +8 -0
  5. package/adapters/amp/skills/rlhf-feedback/SKILL.md +20 -0
  6. package/adapters/chatgpt/INSTALL.md +80 -0
  7. package/adapters/chatgpt/openapi.yaml +292 -0
  8. package/adapters/claude/.mcp.json +8 -0
  9. package/adapters/codex/config.toml +4 -0
  10. package/adapters/gemini/function-declarations.json +95 -0
  11. package/adapters/mcp/server-stdio.js +444 -0
  12. package/bin/cli.js +167 -0
  13. package/config/mcp-allowlists.json +29 -0
  14. package/config/policy-bundles/constrained-v1.json +53 -0
  15. package/config/policy-bundles/default-v1.json +80 -0
  16. package/config/rubrics/default-v1.json +52 -0
  17. package/config/subagent-profiles.json +32 -0
  18. package/openapi/openapi.yaml +292 -0
  19. package/package.json +91 -0
  20. package/plugins/amp-skill/INSTALL.md +52 -0
  21. package/plugins/amp-skill/SKILL.md +31 -0
  22. package/plugins/claude-skill/INSTALL.md +55 -0
  23. package/plugins/claude-skill/SKILL.md +46 -0
  24. package/plugins/codex-profile/AGENTS.md +20 -0
  25. package/plugins/codex-profile/INSTALL.md +57 -0
  26. package/plugins/gemini-extension/INSTALL.md +74 -0
  27. package/plugins/gemini-extension/gemini_prompt.txt +10 -0
  28. package/plugins/gemini-extension/tool_contract.json +28 -0
  29. package/scripts/billing.js +471 -0
  30. package/scripts/budget-guard.js +173 -0
  31. package/scripts/code-reasoning.js +307 -0
  32. package/scripts/context-engine.js +547 -0
  33. package/scripts/contextfs.js +513 -0
  34. package/scripts/contract-audit.js +198 -0
  35. package/scripts/dpo-optimizer.js +208 -0
  36. package/scripts/export-dpo-pairs.js +316 -0
  37. package/scripts/export-training.js +448 -0
  38. package/scripts/feedback-attribution.js +313 -0
  39. package/scripts/feedback-inbox-read.js +162 -0
  40. package/scripts/feedback-loop.js +838 -0
  41. package/scripts/feedback-schema.js +300 -0
  42. package/scripts/feedback-to-memory.js +165 -0
  43. package/scripts/feedback-to-rules.js +109 -0
  44. package/scripts/generate-paperbanana-diagrams.sh +99 -0
  45. package/scripts/hybrid-feedback-context.js +676 -0
  46. package/scripts/intent-router.js +164 -0
  47. package/scripts/mcp-policy.js +92 -0
  48. package/scripts/meta-policy.js +194 -0
  49. package/scripts/plan-gate.js +154 -0
  50. package/scripts/prove-adapters.js +364 -0
  51. package/scripts/prove-attribution.js +364 -0
  52. package/scripts/prove-automation.js +393 -0
  53. package/scripts/prove-data-quality.js +219 -0
  54. package/scripts/prove-intelligence.js +256 -0
  55. package/scripts/prove-lancedb.js +370 -0
  56. package/scripts/prove-loop-closure.js +255 -0
  57. package/scripts/prove-rlaif.js +404 -0
  58. package/scripts/prove-subway-upgrades.js +250 -0
  59. package/scripts/prove-training-export.js +324 -0
  60. package/scripts/prove-v2-milestone.js +273 -0
  61. package/scripts/prove-v3-milestone.js +381 -0
  62. package/scripts/rlaif-self-audit.js +123 -0
  63. package/scripts/rubric-engine.js +230 -0
  64. package/scripts/self-heal.js +127 -0
  65. package/scripts/self-healing-check.js +111 -0
  66. package/scripts/skill-quality-tracker.js +284 -0
  67. package/scripts/subagent-profiles.js +79 -0
  68. package/scripts/sync-gh-secrets-from-env.sh +29 -0
  69. package/scripts/thompson-sampling.js +331 -0
  70. package/scripts/train_from_feedback.py +914 -0
  71. package/scripts/validate-feedback.js +580 -0
  72. package/scripts/vector-store.js +100 -0
  73. package/src/api/server.js +497 -0
@@ -0,0 +1,292 @@
1
+ openapi: 3.1.0
2
+ info:
3
+ title: RLHF Feedback Loop API
4
+ version: 1.1.0
5
+ description: |
6
+ Production API for feedback capture, schema-validated memory promotion,
7
+ prevention rule generation, and DPO export.
8
+ servers:
9
+ - url: http://localhost:8787
10
+ security:
11
+ - bearerAuth: []
12
+ components:
13
+ securitySchemes:
14
+ bearerAuth:
15
+ type: http
16
+ scheme: bearer
17
+ bearerFormat: API Key
18
+ schemas:
19
+ RubricScore:
20
+ type: object
21
+ required: [criterion, score]
22
+ properties:
23
+ criterion:
24
+ type: string
25
+ score:
26
+ type: number
27
+ minimum: 1
28
+ maximum: 5
29
+ evidence:
30
+ type: string
31
+ judge:
32
+ type: string
33
+ CaptureFeedbackRequest:
34
+ type: object
35
+ required: [signal, context]
36
+ properties:
37
+ signal:
38
+ type: string
39
+ enum: [up, down, positive, negative]
40
+ context:
41
+ type: string
42
+ whatWentWrong:
43
+ type: string
44
+ whatToChange:
45
+ type: string
46
+ whatWorked:
47
+ type: string
48
+ rubricScores:
49
+ type: array
50
+ items:
51
+ $ref: '#/components/schemas/RubricScore'
52
+ guardrails:
53
+ type: object
54
+ properties:
55
+ testsPassed:
56
+ type: boolean
57
+ pathSafety:
58
+ type: boolean
59
+ budgetCompliant:
60
+ type: boolean
61
+ tags:
62
+ oneOf:
63
+ - type: array
64
+ items:
65
+ type: string
66
+ - type: string
67
+ skill:
68
+ type: string
69
+ IntentPlanRequest:
70
+ type: object
71
+ required: [intentId]
72
+ properties:
73
+ intentId:
74
+ type: string
75
+ context:
76
+ type: string
77
+ mcpProfile:
78
+ type: string
79
+ bundleId:
80
+ type: string
81
+ approved:
82
+ type: boolean
83
+ paths:
84
+ /healthz:
85
+ get:
86
+ operationId: healthz
87
+ responses:
88
+ '200':
89
+ description: Service health
90
+ '401':
91
+ description: Unauthorized
92
+ /v1/feedback/capture:
93
+ post:
94
+ operationId: captureFeedback
95
+ requestBody:
96
+ required: true
97
+ content:
98
+ application/json:
99
+ schema:
100
+ $ref: '#/components/schemas/CaptureFeedbackRequest'
101
+ responses:
102
+ '200':
103
+ description: Feedback accepted and promoted to memory
104
+ '422':
105
+ description: Feedback recorded but rejected for memory promotion
106
+ '401':
107
+ description: Unauthorized
108
+ /v1/feedback/stats:
109
+ get:
110
+ operationId: getFeedbackStats
111
+ responses:
112
+ '200':
113
+ description: Aggregated feedback statistics
114
+ '401':
115
+ description: Unauthorized
116
+ /v1/intents/catalog:
117
+ get:
118
+ operationId: listIntentCatalog
119
+ parameters:
120
+ - in: query
121
+ name: mcpProfile
122
+ schema:
123
+ type: string
124
+ - in: query
125
+ name: bundleId
126
+ schema:
127
+ type: string
128
+ responses:
129
+ '200':
130
+ description: Intent catalog with risk and checkpoint metadata
131
+ '401':
132
+ description: Unauthorized
133
+ /v1/intents/plan:
134
+ post:
135
+ operationId: planIntent
136
+ requestBody:
137
+ required: true
138
+ content:
139
+ application/json:
140
+ schema:
141
+ $ref: '#/components/schemas/IntentPlanRequest'
142
+ responses:
143
+ '200':
144
+ description: Policy-scoped intent execution plan
145
+ '400':
146
+ description: Invalid intent request
147
+ '401':
148
+ description: Unauthorized
149
+ /v1/feedback/summary:
150
+ get:
151
+ operationId: getFeedbackSummary
152
+ parameters:
153
+ - in: query
154
+ name: recent
155
+ schema:
156
+ type: integer
157
+ default: 20
158
+ responses:
159
+ '200':
160
+ description: Feedback summary text
161
+ '401':
162
+ description: Unauthorized
163
+ /v1/feedback/rules:
164
+ post:
165
+ operationId: generatePreventionRules
166
+ requestBody:
167
+ required: false
168
+ content:
169
+ application/json:
170
+ schema:
171
+ type: object
172
+ properties:
173
+ minOccurrences:
174
+ type: integer
175
+ default: 2
176
+ outputPath:
177
+ type: string
178
+ responses:
179
+ '200':
180
+ description: Prevention rules generated
181
+ '401':
182
+ description: Unauthorized
183
+ /v1/dpo/export:
184
+ post:
185
+ operationId: exportDpoPairs
186
+ requestBody:
187
+ required: false
188
+ content:
189
+ application/json:
190
+ schema:
191
+ type: object
192
+ properties:
193
+ inputPath:
194
+ type: string
195
+ memoryLogPath:
196
+ type: string
197
+ outputPath:
198
+ type: string
199
+ responses:
200
+ '200':
201
+ description: DPO export completed
202
+ '401':
203
+ description: Unauthorized
204
+ /v1/context/construct:
205
+ post:
206
+ operationId: constructContextPack
207
+ requestBody:
208
+ required: false
209
+ content:
210
+ application/json:
211
+ schema:
212
+ type: object
213
+ properties:
214
+ query:
215
+ type: string
216
+ maxItems:
217
+ type: integer
218
+ default: 8
219
+ maxChars:
220
+ type: integer
221
+ default: 6000
222
+ namespaces:
223
+ type: array
224
+ items:
225
+ type: string
226
+ enum:
227
+ - raw_history
228
+ - memory/error
229
+ - memory/learning
230
+ - rules
231
+ - tools
232
+ - provenance
233
+ responses:
234
+ '200':
235
+ description: Context pack created
236
+ '400':
237
+ description: Invalid namespace selection
238
+ '401':
239
+ description: Unauthorized
240
+ /v1/context/evaluate:
241
+ post:
242
+ operationId: evaluateContextPack
243
+ requestBody:
244
+ required: true
245
+ content:
246
+ application/json:
247
+ schema:
248
+ type: object
249
+ required: [packId, outcome]
250
+ properties:
251
+ packId:
252
+ type: string
253
+ outcome:
254
+ type: string
255
+ signal:
256
+ type: string
257
+ notes:
258
+ type: string
259
+ rubricScores:
260
+ type: array
261
+ items:
262
+ $ref: '#/components/schemas/RubricScore'
263
+ guardrails:
264
+ type: object
265
+ properties:
266
+ testsPassed:
267
+ type: boolean
268
+ pathSafety:
269
+ type: boolean
270
+ budgetCompliant:
271
+ type: boolean
272
+ responses:
273
+ '200':
274
+ description: Evaluation recorded
275
+ '400':
276
+ description: Invalid rubric payload
277
+ '401':
278
+ description: Unauthorized
279
+ /v1/context/provenance:
280
+ get:
281
+ operationId: getContextProvenance
282
+ parameters:
283
+ - in: query
284
+ name: limit
285
+ schema:
286
+ type: integer
287
+ default: 50
288
+ responses:
289
+ '200':
290
+ description: Recent provenance events
291
+ '401':
292
+ description: Unauthorized
@@ -0,0 +1,8 @@
1
+ {
2
+ "mcpServers": {
3
+ "rlhf-feedback-loop": {
4
+ "command": "node",
5
+ "args": ["adapters/mcp/server-stdio.js"]
6
+ }
7
+ }
8
+ }
@@ -0,0 +1,4 @@
1
+ # Codex MCP profile (copy into ~/.codex/config.toml or merge section)
2
+ [mcp_servers.rlhf_feedback_loop]
3
+ command = "node"
4
+ args = ["adapters/mcp/server-stdio.js"]
@@ -0,0 +1,95 @@
1
+ {
2
+ "tools": [
3
+ {
4
+ "name": "capture_feedback",
5
+ "description": "Capture thumbs-up/down feedback and promote actionable memories",
6
+ "parameters": {
7
+ "type": "object",
8
+ "properties": {
9
+ "signal": { "type": "string", "enum": ["up", "down"] },
10
+ "context": { "type": "string" },
11
+ "whatWentWrong": { "type": "string" },
12
+ "whatToChange": { "type": "string" },
13
+ "whatWorked": { "type": "string" },
14
+ "rubricScores": {
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "criterion": { "type": "string" },
20
+ "score": { "type": "number" },
21
+ "evidence": { "type": "string" },
22
+ "judge": { "type": "string" }
23
+ }
24
+ }
25
+ },
26
+ "guardrails": {
27
+ "type": "object",
28
+ "properties": {
29
+ "testsPassed": { "type": "boolean" },
30
+ "pathSafety": { "type": "boolean" },
31
+ "budgetCompliant": { "type": "boolean" }
32
+ }
33
+ },
34
+ "tags": {
35
+ "type": "array",
36
+ "items": { "type": "string" }
37
+ },
38
+ "skill": { "type": "string" }
39
+ },
40
+ "required": ["signal", "context"]
41
+ },
42
+ "http": {
43
+ "method": "POST",
44
+ "path": "/v1/feedback/capture"
45
+ }
46
+ },
47
+ {
48
+ "name": "feedback_summary",
49
+ "description": "Get a compact summary of recent feedback performance",
50
+ "parameters": {
51
+ "type": "object",
52
+ "properties": {
53
+ "recent": { "type": "integer" }
54
+ }
55
+ },
56
+ "http": {
57
+ "method": "GET",
58
+ "path": "/v1/feedback/summary"
59
+ }
60
+ },
61
+ {
62
+ "name": "prevention_rules",
63
+ "description": "Generate prevention rules from repeated mistakes",
64
+ "parameters": {
65
+ "type": "object",
66
+ "properties": {
67
+ "minOccurrences": { "type": "integer" }
68
+ }
69
+ },
70
+ "http": {
71
+ "method": "POST",
72
+ "path": "/v1/feedback/rules"
73
+ }
74
+ },
75
+ {
76
+ "name": "plan_intent",
77
+ "description": "Generate a policy-aware execution plan with checkpoint requirements",
78
+ "parameters": {
79
+ "type": "object",
80
+ "properties": {
81
+ "intentId": { "type": "string" },
82
+ "context": { "type": "string" },
83
+ "mcpProfile": { "type": "string" },
84
+ "bundleId": { "type": "string" },
85
+ "approved": { "type": "boolean" }
86
+ },
87
+ "required": ["intentId"]
88
+ },
89
+ "http": {
90
+ "method": "POST",
91
+ "path": "/v1/intents/plan"
92
+ }
93
+ }
94
+ ]
95
+ }