bmad-plus 0.9.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ /**
2
+ * BMAD+ `mem` Command — portable Karpathy memory loop (Pillar 3)
3
+ *
4
+ * Exposes tools/cli/lib/memory-journal.js through the CLI so recall / write /
5
+ * reinforce behave IDENTICALLY no matter which agent CLI is driving
6
+ * (claude-code, gemini-cli, codex-cli, cursor, opencode, aider, antigravity):
7
+ *
8
+ * bmad-plus mem recall <query...> — ranked retrieval over journal + memory files
9
+ * bmad-plus mem write — append a structured event (--agent --cli --model --task --outcome)
10
+ * bmad-plus mem reinforce — apply a reward to a pattern score; any resulting
11
+ * promotion is persisted as PROPOSED, never applied
12
+ *
13
+ * NOTE: registered as `mem` (not `memory`) — `memory` is already taken by the
14
+ * brain scanner in tools/cli/commands/memory.js and must not be disturbed.
15
+ *
16
+ * Design rules (mirroring memory-journal.js):
17
+ * - The clock is read ONCE inside the action (new Date().toISOString()) and
18
+ * injected into every library call. Nothing reads the clock at import time.
19
+ * - The run* handlers take an injected `now` + `log`, so tests drive them
20
+ * deterministically against a tmp dir (tests/unit/memory-journal-cmd.test.js).
21
+ * - Output goes through plain log lines (+ --json for machine consumption) so
22
+ * ANY driving CLI can parse results — no interactive prompts, no TTY needs.
23
+ * - Governance guard: reinforce may PROPOSE a pattern promotion when the
24
+ * posterior mean crosses the threshold, but the record is always written
25
+ * with status PROPOSED (memory-journal.appendPromotion forces it anyway).
26
+ *
27
+ * Author: Laurent Rochetta
28
+ */
29
+
30
+ 'use strict';
31
+
32
+ const path = require('node:path');
33
+ const fs = require('node:fs');
34
+ const mj = require('../lib/memory-journal');
35
+
36
+ // ── Pattern score store ──────────────────────────────────────────────────────
37
+ // Lives next to the journal in the north-star scope (.bmad/memory/). Keyed by
38
+ // patternId (the `### heading` in patterns.md). This file is CLI-owned state;
39
+ // memory-journal.js stays a pure library and never touches it.
40
+
41
+ const SCORES_RELPATH = path.join('.bmad', 'memory', 'pattern-scores.json');
42
+
43
+ // Promotion proposal thresholds (candidate → validated). Tuned conservatively:
44
+ // the posterior mean is decayed-Bayesian (memory-journal.updatePatternScore),
45
+ // so 0.7 over >= 3 updates means a genuinely recent, repeated success signal.
46
+ const PROMOTION_MEAN_THRESHOLD = 0.7;
47
+ const PROMOTION_MIN_UPDATES = 3;
48
+
49
+ function scoresPath(baseDir) {
50
+ return path.join(baseDir, SCORES_RELPATH);
51
+ }
52
+
53
+ function readScores(baseDir) {
54
+ const file = scoresPath(baseDir);
55
+ if (!fs.existsSync(file)) return {};
56
+ try {
57
+ const parsed = JSON.parse(fs.readFileSync(file, 'utf8'));
58
+ return parsed && typeof parsed === 'object' ? parsed : {};
59
+ } catch {
60
+ // Corrupt store never blocks the loop — reinforcement restarts from priors.
61
+ return {};
62
+ }
63
+ }
64
+
65
+ function writeScores(baseDir, scores) {
66
+ const file = scoresPath(baseDir);
67
+ fs.mkdirSync(path.dirname(file), { recursive: true });
68
+ fs.writeFileSync(file, JSON.stringify(scores, null, 2) + '\n', 'utf8');
69
+ }
70
+
71
+ // ── Flag parsing helpers ─────────────────────────────────────────────────────
72
+
73
+ /** Map CLI flags to the memory-journal signals shape { evalScore, acceptance, ci }. */
74
+ function parseSignals(opts) {
75
+ const signals = {};
76
+ if (opts.eval !== undefined && opts.eval !== null) {
77
+ signals.evalScore = Number(opts.eval);
78
+ }
79
+ if (opts.accept !== undefined && opts.accept !== null) {
80
+ const raw = String(opts.accept).toLowerCase();
81
+ if (raw === 'true' || raw === 'yes') signals.acceptance = true;
82
+ else if (raw === 'false' || raw === 'no') signals.acceptance = false;
83
+ else signals.acceptance = Number(opts.accept);
84
+ }
85
+ if (opts.ci !== undefined && opts.ci !== null) {
86
+ signals.ci = opts.ci === 'pass' || opts.ci === 'fail' ? opts.ci : Number(opts.ci);
87
+ }
88
+ return signals;
89
+ }
90
+
91
+ function truncate(text, max = 120) {
92
+ const oneLine = String(text).replace(/\s+/g, ' ').trim();
93
+ return oneLine.length > max ? oneLine.slice(0, max - 1) + '…' : oneLine;
94
+ }
95
+
96
+ // ── Action handlers (dependency-injected, unit-testable) ─────────────────────
97
+
98
+ /**
99
+ * `mem recall <query...>` — ranked retrieval over journal events + memory notes.
100
+ *
101
+ * @param {string} query
102
+ * @param {object} opts - { baseDir, scope, portfolioDir, limit, now, json, log }
103
+ * `now` is the INJECTED clock (enables recency decay).
104
+ * @returns {object[]} ranked entries (same shape as memory-journal.recall)
105
+ */
106
+ function runRecall(query, opts = {}) {
107
+ const { baseDir = process.cwd(), scope = 'project', portfolioDir = null, limit = 8, now = null, json = false, log = console.log } = opts;
108
+
109
+ const results = mj.recall(query, { baseDir, scope, portfolioDir, limit, now });
110
+
111
+ if (json) {
112
+ log(JSON.stringify({ query, scope, count: results.length, results }, null, 2));
113
+ return results;
114
+ }
115
+
116
+ if (results.length === 0) {
117
+ log(`mem recall: no matches for "${query}" (scope: ${scope})`);
118
+ return results;
119
+ }
120
+ log(`mem recall: ${results.length} match(es) for "${query}" (scope: ${scope})`);
121
+ results.forEach((r, i) => {
122
+ const origin = r.kind === 'event' ? `event ${r.source}#${r.ref}` : `note ${r.source} § ${r.ref}`;
123
+ log(` ${i + 1}. [${r.score.toFixed(3)}] ${origin}`);
124
+ log(` ${truncate(r.text)}`);
125
+ });
126
+ return results;
127
+ }
128
+
129
+ /**
130
+ * `mem write` — append a structured event to the journal.
131
+ *
132
+ * @param {object} opts - { baseDir, agent, cli, model, task, outcome, signals, now, json, log }
133
+ * `now` is the INJECTED clock used as the event ts.
134
+ * @returns {object} the persisted event
135
+ */
136
+ function runWrite(opts = {}) {
137
+ const { baseDir = process.cwd(), agent, cli = null, model = null, task = '', outcome = undefined, signals = {}, now, json = false, log = console.log } = opts;
138
+
139
+ const event = mj.appendEvent(baseDir, {
140
+ ts: now,
141
+ agent,
142
+ cli,
143
+ model,
144
+ task,
145
+ outcome,
146
+ signals,
147
+ });
148
+
149
+ if (json) {
150
+ log(JSON.stringify({ written: event }, null, 2));
151
+ } else {
152
+ log(`mem write: event ${event.id} appended (${event.ts})`);
153
+ log(` agent=${event.agent} cli=${event.cli || '-'} model=${event.model || '-'} outcome=${event.outcome || '-'}`);
154
+ if (event.task) log(` task: ${truncate(event.task)}`);
155
+ }
156
+ return event;
157
+ }
158
+
159
+ /**
160
+ * `mem reinforce` — fold reward signals into a pattern's score.
161
+ *
162
+ * Updates the CLI-owned pattern-scores.json (Elo for ranking, decayed-Bayesian
163
+ * mean for promotion). When the mean crosses PROMOTION_MEAN_THRESHOLD with
164
+ * enough updates, a promotion is PROPOSED via memory-journal's governance
165
+ * guard — persisted as PROPOSED, applied only after human/Shield approval.
166
+ *
167
+ * @param {object} opts - { baseDir, patternId, signals, evidence, now, json, log }
168
+ * @returns {{patternId:string, reward:number, previous:object, next:object, promotion:object|null}}
169
+ */
170
+ function runReinforce(opts = {}) {
171
+ const { baseDir = process.cwd(), patternId, signals = {}, evidence = [], now, json = false, log = console.log } = opts;
172
+
173
+ if (typeof patternId !== 'string' || patternId.trim().length === 0) {
174
+ throw new TypeError('mem reinforce: --pattern <id> is required (the pattern heading in patterns.md)');
175
+ }
176
+
177
+ const reward = mj.computeReward(signals);
178
+ const scores = readScores(baseDir);
179
+ const previous = scores[patternId] || null;
180
+ const next = mj.updatePatternScore(previous, reward, { ts: now });
181
+ scores[patternId] = next;
182
+ writeScores(baseDir, scores);
183
+
184
+ // Governance: promotion is only ever PROPOSED here. appendPromotion() forces
185
+ // status PROPOSED on disk regardless, and applying requires
186
+ // assertPromotionApplicable() to pass with a human/Shield approval.
187
+ let promotion = null;
188
+ if (next.mean >= PROMOTION_MEAN_THRESHOLD && next.updates >= PROMOTION_MIN_UPDATES) {
189
+ promotion = mj.appendPromotion(
190
+ baseDir,
191
+ mj.proposePromotion({
192
+ patternId,
193
+ ts: now,
194
+ reason: `posterior mean ${next.mean.toFixed(3)} >= ${PROMOTION_MEAN_THRESHOLD} over ${next.updates} update(s)`,
195
+ evidence,
196
+ score: next,
197
+ })
198
+ );
199
+ }
200
+
201
+ const result = { patternId, reward, previous, next, promotion };
202
+
203
+ if (json) {
204
+ log(JSON.stringify(result, null, 2));
205
+ return result;
206
+ }
207
+
208
+ const prevElo = previous ? previous.elo : mj.INITIAL_PATTERN_SCORE.elo;
209
+ log(`mem reinforce: pattern "${patternId}"`);
210
+ log(` reward ${reward.toFixed(3)} (from ${JSON.stringify(signals)})`);
211
+ log(` elo ${prevElo.toFixed(1)} -> ${next.elo.toFixed(1)}`);
212
+ log(` mean ${next.mean.toFixed(3)} (alpha=${next.alpha.toFixed(2)}, beta=${next.beta.toFixed(2)}, updates=${next.updates})`);
213
+ if (promotion) {
214
+ log(` promotion PROPOSED (${promotion.id}): ${promotion.fromStatus} -> ${promotion.toStatus}`);
215
+ log(` awaiting human/Shield approval — never auto-applied (governance guard)`);
216
+ } else {
217
+ log(` promotion: none proposed (needs mean >= ${PROMOTION_MEAN_THRESHOLD} and >= ${PROMOTION_MIN_UPDATES} updates)`);
218
+ }
219
+ return result;
220
+ }
221
+
222
+ // ── Commander wiring ─────────────────────────────────────────────────────────
223
+
224
+ module.exports = {
225
+ command: 'mem <action> [query...]',
226
+ description: 'Karpathy memory loop — recall | write | reinforce (portable across agent CLIs)',
227
+ options: [
228
+ ['-d, --directory <path>', 'Project directory (default: current directory)'],
229
+ ['--scope <scope>', 'Recall scope: project | portfolio', 'project'],
230
+ ['--portfolio <path>', 'Portfolio brain directory (used with --scope portfolio)'],
231
+ ['--limit <n>', 'Max recall results', '8'],
232
+ ['--agent <name>', 'Agent that produced the event (write)'],
233
+ ['--cli <name>', 'Driving CLI: claude-code, gemini-cli, codex-cli, cursor, opencode, aider, antigravity'],
234
+ ['--model <id>', 'Model id used (model-agnostic: claude/gpt/gemini/local)'],
235
+ ['--task <text>', 'What was attempted (write)'],
236
+ ['--outcome <outcome>', 'success | failure | partial | abandoned (write)'],
237
+ ['--pattern <id>', 'Pattern id to reinforce (its heading in patterns.md)'],
238
+ ['--eval <score>', 'Eval suite score in [0,1] (write/reinforce signal)'],
239
+ ['--accept <bool>', 'User acceptance: true | false | [0,1] (write/reinforce signal)'],
240
+ ['--ci <result>', 'CI outcome: pass | fail | [0,1] (write/reinforce signal)'],
241
+ ['--evidence <ids>', 'Comma-separated journal event ids backing a reinforcement'],
242
+ ['--json', 'Machine-readable JSON output'],
243
+ ],
244
+ subcommands: {
245
+ recall: 'Ranked retrieval over the journal + memory files',
246
+ write: 'Append a structured event to .bmad/memory/journal.ndjson',
247
+ reinforce: 'Apply a reward to a pattern score (promotions PROPOSED only)',
248
+ },
249
+
250
+ action: async (action, query, options = {}) => {
251
+ // Clock is read HERE, at call time, then injected everywhere below —
252
+ // memory-journal.js never reads it (see its determinism contract).
253
+ const now = new Date().toISOString();
254
+ const baseDir = path.resolve(options.directory || process.cwd());
255
+ const json = Boolean(options.json);
256
+ const queryText = Array.isArray(query) ? query.join(' ') : query || '';
257
+
258
+ try {
259
+ if (action === 'recall') {
260
+ runRecall(queryText, {
261
+ baseDir,
262
+ scope: options.scope || 'project',
263
+ portfolioDir: options.portfolio || null,
264
+ limit: Number(options.limit || 8),
265
+ now,
266
+ json,
267
+ });
268
+ } else if (action === 'write') {
269
+ runWrite({
270
+ baseDir,
271
+ agent: options.agent,
272
+ cli: options.cli || null,
273
+ model: options.model || null,
274
+ task: options.task || '',
275
+ outcome: options.outcome,
276
+ signals: parseSignals(options),
277
+ now,
278
+ json,
279
+ });
280
+ } else if (action === 'reinforce') {
281
+ runReinforce({
282
+ baseDir,
283
+ patternId: options.pattern,
284
+ signals: parseSignals(options),
285
+ evidence: options.evidence ? String(options.evidence).split(',').map(s => s.trim()).filter(Boolean) : [],
286
+ now,
287
+ json,
288
+ });
289
+ } else {
290
+ console.error(`mem: unknown action '${action}' — expected recall | write | reinforce`);
291
+ process.exitCode = 1;
292
+ }
293
+ } catch (err) {
294
+ console.error(`mem ${action}: ${err.message}`);
295
+ process.exitCode = 1;
296
+ }
297
+ },
298
+
299
+ // Exported for tests + future MCP wrapper (memory.recall / memory.write tools)
300
+ _internal: {
301
+ runRecall,
302
+ runWrite,
303
+ runReinforce,
304
+ parseSignals,
305
+ readScores,
306
+ writeScores,
307
+ SCORES_RELPATH,
308
+ PROMOTION_MEAN_THRESHOLD,
309
+ PROMOTION_MIN_UPDATES,
310
+ },
311
+ };
@@ -0,0 +1,125 @@
1
+ # memory-journal.js — Karpathy Learning Layer core (Pillar 3)
2
+
3
+ Portable data structures + helpers for the BMAD+ **memory → reward → reinforcement** loop.
4
+ Prompt-level learning only: scores steer retrieval and pattern promotion — there is
5
+ **no base-model fine-tuning** (see `audit/2026-07-01/north-star/registry.yaml` →
6
+ `memory.reward_signal.applies_to`).
7
+
8
+ Builds **on top of** the existing `pack-memory` (Zecher, Karpathy guardrails G1–G4,
9
+ `decisions/lessons/patterns/context.md` templates). It never modifies those files or
10
+ `tools/cli/lib/memory-init.js` — it adds a structured, machine-readable layer beside them.
11
+
12
+ ```
13
+ .bmad/memory/journal.ndjson ← structured event log (this module, north-star scope)
14
+ .bmad/memory/promotions.ndjson ← governance queue (always PROPOSED)
15
+ .agents/memory/*.md ← human memory (pack-memory, current layout) — READ ONLY here
16
+ .bmad/memory/*.md ← human memory (north-star layout) — READ ONLY here
17
+ ```
18
+
19
+ ## Hard rules baked into the module
20
+
21
+ | Rule | Enforcement |
22
+ |---|---|
23
+ | No hidden clock / randomness | `ts` is a **required, caller-injected** field; ids are content hashes (sha256). The module never calls `Date.now()` or `Math.random()` — at import or runtime. |
24
+ | Node stdlib only | `fs`, `path`, `crypto`. No network, no native deps → runs identically under every CLI. |
25
+ | Journal is append-only, corruption-tolerant | `readJournal`/`readPromotions` skip torn lines instead of throwing (concurrent CLIs may write). |
26
+ | Promotions are never auto-applied | `proposePromotion()` only emits `status: 'PROPOSED'`; `appendPromotion()` **forces** `PROPOSED` + clears approval fields on disk even for tampered records; `assertPromotionApplicable()` throws unless `status === 'APPROVED'` **and** `approvedBy` names a human/Shield reviewer. |
27
+
28
+ ## API
29
+
30
+ ### 1. Journal
31
+
32
+ ```js
33
+ const mj = require('./memory-journal');
34
+
35
+ mj.appendEvent(projectDir, {
36
+ ts: new Date().toISOString(), // REQUIRED — injected by the caller
37
+ agent: 'forge', // REQUIRED
38
+ cli: 'claude-code', // claude-code | gemini-cli | antigravity | cursor | codex-cli | opencode | aider
39
+ model: 'claude', // model-agnostic by contract (claude/gpt/gemini/local)
40
+ task: 'refactor postgres pooling',
41
+ outcome: 'success', // success | failure | partial | abandoned
42
+ signals: { evalScore: 0.9, acceptance: true, ci: 'pass' },
43
+ artifactHashes: ['abc123'], // traceability to produced artifacts
44
+ });
45
+
46
+ mj.readJournal(projectDir); // → events[], oldest first, corrupt lines skipped
47
+ ```
48
+
49
+ ### 2. Recall (lexical first cut + vector-backend seam)
50
+
51
+ ```js
52
+ mj.recall('postgres pooling', {
53
+ baseDir: projectDir,
54
+ scope: 'project', // or 'portfolio' + portfolioDir: 'D:/travail/DEV/_brain'
55
+ limit: 8,
56
+ now: new Date().toISOString(), // optional injected clock → recency decay on events
57
+ halfLifeDays: 30,
58
+ });
59
+ // → [{ score, kind: 'event'|'note', source, ref, text, event? }] ranked desc
60
+ ```
61
+
62
+ Sources merged: `journal.ndjson` events + `### `-sectioned entries from
63
+ `decisions.md` / `lessons.md` / `patterns.md` in **both** `.bmad/memory/` (north-star)
64
+ and `.agents/memory/` (current pack-memory layout), plus `<portfolioDir>/memory/*.md`
65
+ when `scope: 'portfolio'`.
66
+
67
+ **ChromaDB seam** — pass `backend: { search(query, opts) }` and ranking is delegated
68
+ wholesale to it. The intended production backend is the existing RAG stack
69
+ (`mcp-server/rag.py`: ChromaDB + SentenceTransformers — `registry.yaml → memory.index`).
70
+ Backends must return the same entry shape as the lexical fallback, so callers never
71
+ know which engine served them. The lexical scorer is the zero-dependency fallback for
72
+ machines without Python provisioned.
73
+
74
+ ### 3. Reward + pattern score
75
+
76
+ ```js
77
+ const reward = mj.computeReward({ evalScore: 0.8, acceptance: true, ci: 'fail' });
78
+ // weights eval 0.5 / acceptance 0.3 / ci 0.2 (registry.yaml → memory.reward_signal.inputs)
79
+ // missing signals renormalize the remaining weights; result always in [0, 1]
80
+
81
+ let score = mj.INITIAL_PATTERN_SCORE; // { elo: 1200, alpha: 1, beta: 1, mean: 0.5, ... }
82
+ score = mj.updatePatternScore(score, reward, { ts: eventTs });
83
+ ```
84
+
85
+ Two complementary estimators per pattern:
86
+
87
+ - **Elo** (`k=32`, baseline 1200): `elo' = elo + K·(reward − expected)` — fast-moving,
88
+ ordinal, used to **rank** patterns in recall. Fresh pattern + reward 1 → 1216.
89
+ - **Decayed Bayesian (Beta)**: evidence decays multiplicatively toward the uniform
90
+ prior (1,1) — per-update (`decay=0.98`) and time-based (`halfLifeDays=90`, only when
91
+ `ts` is injected) — so `mean = α/(α+β)` tracks the **recent** success rate, used for
92
+ **promotion thresholds** (`candidate → validated → deprecated` in `patterns.md`).
93
+
94
+ Pure function: never mutates input, never reads the clock.
95
+
96
+ ### 4. Governance guard
97
+
98
+ ```js
99
+ const p = mj.proposePromotion({ patternId: 'chromadb batch ingestion', ts, reason: 'mean 0.82 / 12 events', evidence: [eventIds] });
100
+ mj.appendPromotion(projectDir, p); // persisted as PROPOSED, always
101
+ // ... a human / Shield reviewer flips it to APPROVED with approvedBy elsewhere ...
102
+ mj.assertPromotionApplicable(approved); // the gate every apply path MUST call
103
+ ```
104
+
105
+ Governed by **Shield** (`registry.yaml → memory.reward_signal.governed_by`): bounded
106
+ self-modification, versioned (append-only ndjson) and reversible (a promotion record
107
+ never rewrites history; a rollback is just another proposal).
108
+
109
+ ## Testing
110
+
111
+ ```
112
+ npx jest tests/unit/memory-journal.test.js
113
+ ```
114
+
115
+ 32 tests: append/recall round-trip on a tmp dir, corrupt-line tolerance, recency decay
116
+ with injected clock, portfolio scope, backend-seam delegation, exact Elo/Beta math,
117
+ purity, and the anti-tamper governance guard.
118
+
119
+ ## Future wiring (done by the orchestrator, not this module)
120
+
121
+ - **MCP tools** `memory.write` / `memory.recall` in `mcp-server/` — thin wrappers over
122
+ this ndjson contract so every MCP-capable CLI shares one memory (Pillar 5).
123
+ - **CLI commands** `bmad-plus memory log|recall|promote` in `tools/cli/commands/memory.js`.
124
+ - **Zecher consolidation**: pack-memory's archivist reads `journal.ndjson` during
125
+ session consolidation and proposes pattern promotions via `proposePromotion()`.
Binary file