create-byan-agent 2.22.0 → 2.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +104 -0
- package/install/src/byan-v2/index.js +1 -1
- package/install/templates/.claude/hooks/drain-advisory.js +85 -0
- package/install/templates/.claude/hooks/lib/failure-detector.js +18 -4
- package/install/templates/.claude/settings.json +4 -0
- package/install/templates/.claude/skills/byan-insight/SKILL.md +56 -0
- package/install/templates/.claude/workflows/check-implementation-readiness.js +1 -1
- package/install/templates/.githooks/pre-commit +21 -2
- package/install/templates/.github/agents/bmad-agent-byan.md +6 -6
- package/install/templates/.github/agents/bmad-agent-skeptic.md +1 -1
- package/install/templates/_byan/_config/agent-manifest.csv +2 -0
- package/install/templates/_byan/core/activation/soul-activation.md +3 -3
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-insight-digest.js +31 -0
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-sync-stubs.js +51 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/advisory-autofeed.js +83 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/insight-harvest.js +220 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/outcome-buffer.js +64 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/stub-sync.js +158 -0
- package/install/templates/_byan/mcp/byan-mcp-server/server.js +58 -0
- package/install/templates/_byan/worker/workers.md +8 -7
- package/install/templates/_byan/workflow/simple/byan/feature-workflow.md +2 -2
- package/package.json +1 -1
- package/src/loadbalancer/loadbalancer.js +1 -1
- package/install/templates/.claude/skills/byan-byan-test/SKILL.md +0 -12
- package/install/templates/.claude/skills/byan-test-dynamic/SKILL.md +0 -7
- package/src/core/dispatcher/execution-router.js +0 -66
package/CHANGELOG.md
CHANGED
|
@@ -9,6 +9,110 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
9
9
|
|
|
10
10
|
## [Unreleased]
|
|
11
11
|
|
|
12
|
+
## [2.25.0] - 2026-06-09
|
|
13
|
+
|
|
14
|
+
### Added - Advisory auto-feed (BYAN learns from each session, automatically)
|
|
15
|
+
|
|
16
|
+
The insight loop observed and proposed; the missing half was the LEARNING. BYAN's
|
|
17
|
+
advisory ledgers (ELO trust, the suitability ledger) updated only when the agent
|
|
18
|
+
remembered to call a record tool. This wires the automatic half — outcomes are
|
|
19
|
+
recorded at end of turn, with no agent action — while behavior surfaces stay
|
|
20
|
+
human-gated.
|
|
21
|
+
|
|
22
|
+
- **Capture.** The `byan_outcome_log` MCP tool appends one validated advisory
|
|
23
|
+
outcome to a buffer (cheap; it does not write a ledger directly). kind=elo logs
|
|
24
|
+
`{domain, result}`; kind=suitability logs `{model, leafId, success}`.
|
|
25
|
+
- **Drain.** `.claude/hooks/drain-advisory.js` is a Stop hook that, at end of each
|
|
26
|
+
turn, records the buffered outcomes into the ELO ledger (full Glicko update) and
|
|
27
|
+
the suitability ledger, advancing a line cursor for idempotency. It is strictly
|
|
28
|
+
non-blocking (all work in try/catch, emits `{continue:true}` and exit 0 on every
|
|
29
|
+
path) and crosses the ESM/CJS boundary (the CJS ELO engine via require, the ESM
|
|
30
|
+
suitability store via dynamic import).
|
|
31
|
+
- **Advisory-only.** The loop writes only the buffer and the two advisory ledgers.
|
|
32
|
+
Behavior surfaces (routing, personas, mantra thresholds) are left untouched —
|
|
33
|
+
those stay a human decision, consistent with the insight loop's gated philosophy.
|
|
34
|
+
- 71 tests (the pure planners, the buffer, and a drain-hook e2e with ledger
|
|
35
|
+
snapshot/restore) plus a live smoke test recording a real Glicko update. The tool
|
|
36
|
+
and hook ship in the template; the hook registers alongside the existing Stop
|
|
37
|
+
hooks.
|
|
38
|
+
- Explicit follow-ups (out of this scope): the adversarial verdict panel that would
|
|
39
|
+
feed suitability without a manual log, and a fact-graph-derived ELO source.
|
|
40
|
+
|
|
41
|
+
## [2.24.0] - 2026-06-09
|
|
42
|
+
|
|
43
|
+
### Added - Session insight loop (gated self-improvement)
|
|
44
|
+
|
|
45
|
+
BYAN already has advisory learning surfaces (ELO trust, the suitability ledger)
|
|
46
|
+
and the native Claude Code hooks already leave outcome trails on disk, but the
|
|
47
|
+
loop was open: the agent had to read and act on them by hand. This closes it,
|
|
48
|
+
under a strict gated philosophy.
|
|
49
|
+
|
|
50
|
+
- **Harvester** `_byan/mcp/byan-mcp-server/lib/insight-harvest.js` +
|
|
51
|
+
`bin/byan-insight-digest.js` + the `byan_insight_digest` MCP tool: read the
|
|
52
|
+
native trails (`tool-log.jsonl` health, strict `audit.log` recurring gaps,
|
|
53
|
+
the suitability ledger routing outcomes, the ELO profile trends) and aggregate
|
|
54
|
+
them into a digest with conservative, GATED proposals. Pure aggregation +
|
|
55
|
+
IO-isolated reader, mirroring the template-fidelity pattern.
|
|
56
|
+
- **Gated by design.** The harvester only READS; it writes nothing to a behavior
|
|
57
|
+
surface (routing, personas, mantra thresholds). Every proposal carries
|
|
58
|
+
`gated: true` and is surfaced for a human to ratify — an agent that rewrote its
|
|
59
|
+
own routing on a heuristic would be the silent-downgrade BYAN exists to prevent.
|
|
60
|
+
- **Skill** `byan-insight` presents the digest as a gated improvement proposal
|
|
61
|
+
(observe, propose, human ratifies), consistent with the advisory ELO /
|
|
62
|
+
suitability doctrine.
|
|
63
|
+
- **Guard false-positive fix.** `tool-failure-guard` flagged any tool whose result
|
|
64
|
+
echoed the literal phrase "internal error" as a failure, exempting only
|
|
65
|
+
Write/Edit/Read. Bash (diagnostic stdout) and MCP tools (echoed stored data) now
|
|
66
|
+
join the echo-heavy set: their `is_error` flag is trusted, content patterns are
|
|
67
|
+
not. A genuine failure still sets `is_error`. Caught live (a Bash log-grep
|
|
68
|
+
blocked the session twice) and covered by unit + e2e tests.
|
|
69
|
+
- 43 harvester unit tests + the detector tests; the e2e guard tests moved their
|
|
70
|
+
content-pattern cases onto a non-echo tool. The tool and skill ship in the
|
|
71
|
+
template.
|
|
72
|
+
|
|
73
|
+
### Changed - Closed the fused-route and output-folder legacy debts
|
|
74
|
+
|
|
75
|
+
- Removed the dead parallel router `src/core/dispatcher/execution-router.js` (zero
|
|
76
|
+
live consumers) and its test; the routing docs (`workers.md`,
|
|
77
|
+
`feature-workflow.md`) and the loadbalancer architecture comment now point only
|
|
78
|
+
to `byan_dispatch` and its two-axis model (strategy from score, model tier from
|
|
79
|
+
nature).
|
|
80
|
+
- Standardized the documented output folder from the legacy `_bmad-output/` to the
|
|
81
|
+
runtime's `_byan-output/` across the agent and platform docs plus an inert config
|
|
82
|
+
default. Left untouched on purpose: the deliberate back-compat read in
|
|
83
|
+
`agent-packager.js` (recovers agent creations from older installs under
|
|
84
|
+
`_bmad-output/bmb-creations`), the migration guides, and the anti-regression
|
|
85
|
+
tests that assert the old name is gone.
|
|
86
|
+
|
|
87
|
+
## [2.23.0] - 2026-06-09
|
|
88
|
+
|
|
89
|
+
### Added - Stub path normalizer + a 5th pre-commit gate (no _bmad/@bmad drift)
|
|
90
|
+
|
|
91
|
+
The installer generated platform stubs (`.codex/prompts`, `.github/agents`,
|
|
92
|
+
`.claude/skills`) across many versions; older generators wrote the legacy path
|
|
93
|
+
layout (`_bmad/*/agents/X.md`, `@bmad/bmm/agents/X.md`,
|
|
94
|
+
`@bmad-output/bmb-creations/X/X.md`), so the tracked corpus carried a mix of stale
|
|
95
|
+
path forms while the agent source files stayed clean. This adds the mechanism that
|
|
96
|
+
removes the drift and blocks its return.
|
|
97
|
+
|
|
98
|
+
- **Tool** `_byan/mcp/byan-mcp-server/lib/stub-sync.js` + `bin/byan-sync-stubs.js`:
|
|
99
|
+
normalizes stale `_bmad/` and `@bmad/` PATH tokens to the `_byan/` canonical
|
|
100
|
+
layout, in place and surgically. The `@bmad-<word>` invocation syntax and the
|
|
101
|
+
`_bmad-output/` artifact dir are preserved; no stub is overwritten wholesale, so
|
|
102
|
+
the github full-copies and hand-authored skills keep their content. `--check`
|
|
103
|
+
reports any residual stale ref and exits non-zero.
|
|
104
|
+
- **5th pre-commit gate.** `.githooks/pre-commit` runs `byan-sync-stubs --check`
|
|
105
|
+
after the template-fidelity gate, blocking a commit whose tracked stubs have
|
|
106
|
+
drifted. It self-disables when the tool or the stub dirs are absent
|
|
107
|
+
(installed-user no-op).
|
|
108
|
+
- **First run.** 101 stub files normalized (codex prompts + the Codex global
|
|
109
|
+
`instructions.md` + 5 github stubs + their template twins); the byan github
|
|
110
|
+
full-copy changed only its 3 stale path lines, its other 1059 lines untouched.
|
|
111
|
+
- Design mirrors the template-fidelity sync (pure rewrite rules + IO-isolated
|
|
112
|
+
apply); 20 unit tests pin every rule, the two preservation cases, the IO layer,
|
|
113
|
+
and idempotence. The tool ships in the template, so the gate is live for
|
|
114
|
+
installed users too.
|
|
115
|
+
|
|
12
116
|
## [2.22.0] - 2026-06-09
|
|
13
117
|
|
|
14
118
|
### Changed - byan_dispatch routes the model tier by task nature, not by size
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// drain-advisory.js — Stop hook. At the end of each assistant turn, drain the
|
|
3
|
+
// outcome buffer into BYAN's ADVISORY ledgers (ELO trust, suitability). This is the
|
|
4
|
+
// automatic half of the closed learning loop: outcomes logged during the turn (via
|
|
5
|
+
// byan_outcome_log) are recorded with NO agent action. Behavior surfaces (routing /
|
|
6
|
+
// personas / mantras) are never touched — only advisory data is written.
|
|
7
|
+
//
|
|
8
|
+
// STRICTLY non-blocking. All work is wrapped in try/catch; the hook ALWAYS emits
|
|
9
|
+
// {continue:true} and exits 0, and never throws or exits 2. An advisory feed must
|
|
10
|
+
// never break a turn (the stage-to-byan.js contract: "staging must never break the
|
|
11
|
+
// session"). Idempotent via a line cursor, so a re-fired Stop (stop_hook_active)
|
|
12
|
+
// records nothing new.
|
|
13
|
+
//
|
|
14
|
+
// ESM/CJS: this hook is CommonJS. The ELO engine is CJS (require). The pure libs and
|
|
15
|
+
// the suitability store are ESM under a type:module package, reached via dynamic
|
|
16
|
+
// import() with a file:// URL.
|
|
17
|
+
|
|
18
|
+
const path = require('path');
|
|
19
|
+
const { pathToFileURL } = require('url');
|
|
20
|
+
|
|
21
|
+
function readStdin() {
|
|
22
|
+
return new Promise((resolve) => {
|
|
23
|
+
if (process.stdin.isTTY) return resolve('');
|
|
24
|
+
let data = '';
|
|
25
|
+
process.stdin.on('data', (c) => (data += c));
|
|
26
|
+
process.stdin.on('end', () => resolve(data));
|
|
27
|
+
process.stdin.on('error', () => resolve(data));
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function done() {
|
|
32
|
+
process.stdout.write(JSON.stringify({ continue: true }));
|
|
33
|
+
process.exit(0);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
(async () => {
|
|
37
|
+
try {
|
|
38
|
+
await readStdin(); // the Stop payload is not needed — we drain disk state
|
|
39
|
+
const root = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
40
|
+
const esm = (rel) => import(pathToFileURL(path.join(root, rel)).href);
|
|
41
|
+
|
|
42
|
+
const af = await esm('_byan/mcp/byan-mcp-server/lib/advisory-autofeed.js');
|
|
43
|
+
const buf = await esm('_byan/mcp/byan-mcp-server/lib/outcome-buffer.js');
|
|
44
|
+
|
|
45
|
+
const outcomes = af.parseOutcomes(buf.readBuffer({ rootDir: root }));
|
|
46
|
+
const cursor = buf.readCursor({ rootDir: root });
|
|
47
|
+
const { pending, newCursor } = af.planDrain(outcomes, cursor);
|
|
48
|
+
if (!pending.length) return done();
|
|
49
|
+
|
|
50
|
+
let eloEngine = null;
|
|
51
|
+
let suitability = null;
|
|
52
|
+
for (const o of pending) {
|
|
53
|
+
const rec = af.classifyOutcome(o);
|
|
54
|
+
if (!rec) continue;
|
|
55
|
+
try {
|
|
56
|
+
if (rec.kind === 'elo') {
|
|
57
|
+
if (!eloEngine) {
|
|
58
|
+
const EloEngine = require(path.join(root, 'src', 'byan-v2', 'elo', 'index.js'));
|
|
59
|
+
eloEngine = new EloEngine({
|
|
60
|
+
storagePath: path.join(root, '_byan', 'memoire', 'elo-profile.json'),
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
eloEngine.recordResult(rec.domain, rec.result);
|
|
64
|
+
} else if (rec.kind === 'suitability') {
|
|
65
|
+
if (!suitability) {
|
|
66
|
+
suitability = await esm('_byan/mcp/byan-mcp-server/lib/suitability-store.js');
|
|
67
|
+
}
|
|
68
|
+
suitability.record({
|
|
69
|
+
model: rec.model,
|
|
70
|
+
leafId: rec.leafId,
|
|
71
|
+
success: rec.success,
|
|
72
|
+
source: 'autofeed',
|
|
73
|
+
projectRoot: root,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
// one bad record must not abort the drain or block the turn
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
buf.writeCursor(newCursor, { rootDir: root });
|
|
81
|
+
} catch {
|
|
82
|
+
// any failure degrades silently — the feed is housekeeping, never a blocker
|
|
83
|
+
}
|
|
84
|
+
done();
|
|
85
|
+
})();
|
|
@@ -10,14 +10,27 @@ const ERROR_PATTERNS = [
|
|
|
10
10
|
/tool_use_error/i,
|
|
11
11
|
];
|
|
12
12
|
|
|
13
|
-
// Tools whose response echoes user-authored or
|
|
13
|
+
// Tools whose response echoes user-authored or stored content (Write/Edit
|
|
14
14
|
// return file paths + content fragments, Read echoes file content
|
|
15
15
|
// verbatim). Pattern match on their response fires false positives when
|
|
16
|
-
// the
|
|
16
|
+
// the content itself contains the literal phrase "internal error"
|
|
17
17
|
// (e.g. a doc about errors, a test fixture, a hook that detects errors).
|
|
18
18
|
// For these, only trust the explicit is_error flag.
|
|
19
19
|
const ECHO_TOOLS = new Set(['Write', 'Edit', 'NotebookEdit', 'Read']);
|
|
20
20
|
|
|
21
|
+
// Two more tool classes echo DATA (not a stderr stream), so content-pattern
|
|
22
|
+
// matching on their response is noise. A genuine failure of either sets is_error
|
|
23
|
+
// (checked before this guard), so we lose no real-failure detection:
|
|
24
|
+
// - MCP tools (mcp__server__tool): byan_fd_* echoes the FD state (which can
|
|
25
|
+
// hold user-authored raw_ideas / notes containing the literal phrase),
|
|
26
|
+
// byan_*_status echoes ledger content, etc.
|
|
27
|
+
// - Bash: its response is command stdout - diagnostics, log greps, test output
|
|
28
|
+
// that legitimately surface error-words. A real Bash failure exits non-zero,
|
|
29
|
+
// which the harness marks as is_error.
|
|
30
|
+
function isEchoHeavy(toolName) {
|
|
31
|
+
return ECHO_TOOLS.has(toolName) || toolName === 'Bash' || toolName.startsWith('mcp__');
|
|
32
|
+
}
|
|
33
|
+
|
|
21
34
|
function detectFailure(payload) {
|
|
22
35
|
if (!payload || typeof payload !== 'object') return null;
|
|
23
36
|
|
|
@@ -30,8 +43,9 @@ function detectFailure(payload) {
|
|
|
30
43
|
}
|
|
31
44
|
}
|
|
32
45
|
|
|
33
|
-
// Do not pattern-match on echo-heavy tools
|
|
34
|
-
|
|
46
|
+
// Do not pattern-match on echo-heavy tools (file-echo + MCP data) — only
|
|
47
|
+
// trust the is_error flag, checked above.
|
|
48
|
+
if (isEchoHeavy(toolName)) {
|
|
35
49
|
return null;
|
|
36
50
|
}
|
|
37
51
|
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: byan-insight
|
|
3
|
+
description: Harvest the native Claude Code outcome trails (tool-log, strict-audit gaps, suitability ledger, ELO) into a GATED self-improvement digest for BYAN. Invoke when the user asks "what did this session teach BYAN", "insight digest", "self-improvement", "qu'est-ce que BYAN a appris", or wants to review recurring gaps / routing outcomes / tool health before deciding what to improve. Observe and propose; the human ratifies each change.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# BYAN Insight Loop (gated self-improvement)
|
|
7
|
+
|
|
8
|
+
BYAN already has advisory learning surfaces (ELO trust, the suitability ledger,
|
|
9
|
+
soul-memory) and the native Claude Code hooks already leave outcome trails on
|
|
10
|
+
disk. This skill closes the loop: it READS those trails, aggregates them into a
|
|
11
|
+
digest, and surfaces GATED proposals. It does not modify a behavior surface.
|
|
12
|
+
|
|
13
|
+
## The one hard rule: observe and propose, do not silently self-modify
|
|
14
|
+
|
|
15
|
+
An agent that rewrote its own routing, personas, or mantra thresholds on a
|
|
16
|
+
heuristic would be the exact silent-downgrade BYAN exists to prevent. So this
|
|
17
|
+
loop stops at a PROPOSAL. Applying a change (a routing tweak, a new checklist
|
|
18
|
+
item, a persona edit) stays a human decision — ideally run as its own FD. The
|
|
19
|
+
advisory data (ELO, suitability) is read-only here; behavior surfaces are left
|
|
20
|
+
to the human gate.
|
|
21
|
+
|
|
22
|
+
## Protocol
|
|
23
|
+
|
|
24
|
+
1. **Harvest.** Call the MCP tool `byan_insight_digest` (read-only, no args). It
|
|
25
|
+
returns `{ gated: true, digest, render }` where `digest` is
|
|
26
|
+
`{ toolHealth, recurringGaps, routingOutcomes, eloTrends, proposals }`.
|
|
27
|
+
- `toolHealth` : call count, failure rate, top failing tools, output-token cost
|
|
28
|
+
(from `_byan-output/tool-log.jsonl`).
|
|
29
|
+
- `recurringGaps` : clustered self-verify gap themes with counts (from
|
|
30
|
+
`.byan-strict/audit.log`) — what BYAN keeps missing.
|
|
31
|
+
- `routingOutcomes` : per cheap-model x leaf keep-rate (from the suitability
|
|
32
|
+
ledger) — where a downgrade is proven good or bad.
|
|
33
|
+
- `eloTrends` : per-domain trust rating.
|
|
34
|
+
- `proposals` : conservative, GATED suggestions (each `gated: true`).
|
|
35
|
+
2. **Present.** Show the `render` text, then the proposals as a numbered list.
|
|
36
|
+
Make explicit that nothing has been applied.
|
|
37
|
+
3. **Gate.** For each proposal the user accepts, run the change as its own scoped
|
|
38
|
+
work (a short FD for a behavior change; a direct edit for a doc/checklist).
|
|
39
|
+
Do not auto-apply a proposal.
|
|
40
|
+
|
|
41
|
+
## CLI equivalent
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
node _byan/mcp/byan-mcp-server/bin/byan-insight-digest.js [--root <dir>] [--json]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Prints the human-readable digest, or the raw JSON with `--json`. Self-disables
|
|
48
|
+
(empty digest) when the trails are absent, so a fresh checkout is not an error.
|
|
49
|
+
|
|
50
|
+
## What it deliberately leaves alone
|
|
51
|
+
|
|
52
|
+
- It does not call `byan_elo_record` / `byan_suitability_record` for you (those
|
|
53
|
+
stay where the outcome actually happens, e.g. a VALIDATE pass).
|
|
54
|
+
- It does not edit `lib/dispatch.js`, `native-tiers.js`, a persona, or the mantra
|
|
55
|
+
thresholds. Those are behavior surfaces; a proposal names them, a human
|
|
56
|
+
changes them.
|
|
@@ -18,7 +18,7 @@ export const meta = {
|
|
|
18
18
|
// this script). No wall-clock, no randomness: any date/id is passed via args
|
|
19
19
|
// so the runtime can resume deterministically.
|
|
20
20
|
|
|
21
|
-
const planningArtifacts = (args && args.planningArtifacts) || '
|
|
21
|
+
const planningArtifacts = (args && args.planningArtifacts) || '_byan-output/planning-artifacts'
|
|
22
22
|
const reportDate = (args && args.date) || 'unspecified'
|
|
23
23
|
const role = 'an expert Product Manager and Scrum Master specialized in requirements traceability and spotting gaps in planning artifacts. Be adversarial: your job is to find the failures others missed, not to reassure.'
|
|
24
24
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# BYAN pre-commit hook.
|
|
2
|
+
# BYAN pre-commit hook. Five gates run in order:
|
|
3
3
|
# 1. Strict Mode gate : block if a strict session is engaged but not completed.
|
|
4
4
|
# 2. Native-workflow lint : block if a .claude/workflows/*.js couples to state.
|
|
5
5
|
# 3. Template fidelity : block if install/templates/ drifted from root.
|
|
6
|
-
# 4.
|
|
6
|
+
# 4. Stub path drift : block if a tracked stub carries a stale _bmad/@bmad path ref.
|
|
7
|
+
# 5. Mantra floor : block if a Gen3 persona source scores below the floor.
|
|
7
8
|
#
|
|
8
9
|
# Install :
|
|
9
10
|
# git config core.hooksPath .githooks
|
|
@@ -81,6 +82,24 @@ if [ -f "$TEMPLATE_SYNC" ]; then
|
|
|
81
82
|
fi
|
|
82
83
|
fi
|
|
83
84
|
|
|
85
|
+
# Stub path drift gate — the installer generated platform stubs (.codex/prompts,
|
|
86
|
+
# .github/agents, .claude/skills) over many versions; older generators emitted the
|
|
87
|
+
# legacy _bmad/@bmad path layout while the agent sources are clean. This gate
|
|
88
|
+
# blocks a commit whose tracked stubs still carry a stale _bmad/ or @bmad/ PATH
|
|
89
|
+
# ref (the @bmad- invocation syntax and the _bmad-output/ artifact dir are left
|
|
90
|
+
# alone). Re-normalize with the apply command, then restage. No-op if the tool is
|
|
91
|
+
# absent or no stub dirs exist (installed-user no-op).
|
|
92
|
+
STUB_SYNC="_byan/mcp/byan-mcp-server/bin/byan-sync-stubs.js"
|
|
93
|
+
if [ -f "$STUB_SYNC" ]; then
|
|
94
|
+
if ! node "$STUB_SYNC" --check --root "$(git rev-parse --show-toplevel)"; then
|
|
95
|
+
echo ""
|
|
96
|
+
echo "Commit blocked : a tracked stub carries a stale _bmad/@bmad path ref."
|
|
97
|
+
echo "Re-normalize with 'node $STUB_SYNC' then restage, or bypass with"
|
|
98
|
+
echo "'git commit --no-verify' (emergency only)."
|
|
99
|
+
exit 1
|
|
100
|
+
fi
|
|
101
|
+
fi
|
|
102
|
+
|
|
84
103
|
if [ ! -f "$VALIDATOR" ]; then
|
|
85
104
|
exit 0
|
|
86
105
|
fi
|
|
@@ -1023,23 +1023,23 @@ Il n'est PAS un worker isole — il est un orchestrateur dans l'ecosysteme BMAD.
|
|
|
1023
1023
|
|
|
1024
1024
|
L'agent peut executer n'importe quel workflow BMAD :
|
|
1025
1025
|
- Via commande : `@bmad-{module}-{workflow}` (ex: `@bmad-bmm-create-prd`)
|
|
1026
|
-
- Via menu handler : `exec="{project-root}/
|
|
1027
|
-
- Manifeste : `{project-root}/
|
|
1026
|
+
- Via menu handler : `exec="{project-root}/_byan/{module}/workflows/{workflow}/workflow.md"`
|
|
1027
|
+
- Manifeste : `{project-root}/_byan/_config/workflow-manifest.csv`
|
|
1028
1028
|
|
|
1029
1029
|
### Deleguer a d'autres Agents
|
|
1030
1030
|
|
|
1031
1031
|
L'agent peut invoquer n'importe quel agent specialise :
|
|
1032
1032
|
- Via commande : `@bmad-agent-{name}` (ex: `@bmad-agent-bmm-dev`)
|
|
1033
|
-
- Via manifeste : `{project-root}/
|
|
1033
|
+
- Via manifeste : `{project-root}/_byan/_config/agent-manifest.csv`
|
|
1034
1034
|
- L'agent delegue reprend le controle — l'agent courant se retire
|
|
1035
1035
|
|
|
1036
1036
|
### Acceder aux Contextes
|
|
1037
1037
|
|
|
1038
1038
|
Variables de session disponibles apres chargement config :
|
|
1039
1039
|
- `{project-root}` : Racine du repository
|
|
1040
|
-
- `{output_folder}` : Dossier de sortie (`
|
|
1041
|
-
- `{planning_artifacts}` : `
|
|
1042
|
-
- `{implementation_artifacts}` : `
|
|
1040
|
+
- `{output_folder}` : Dossier de sortie (`_byan-output/`)
|
|
1041
|
+
- `{planning_artifacts}` : `_byan-output/planning-artifacts/`
|
|
1042
|
+
- `{implementation_artifacts}` : `_byan-output/implementation-artifacts/`
|
|
1043
1043
|
- `{user_name}`, `{communication_language}` : Depuis config.yaml
|
|
1044
1044
|
|
|
1045
1045
|
### Orchestration Multi-Agent
|
|
@@ -6,7 +6,7 @@ description: 'Scientific Claim Challenger and Epistemic Guard'
|
|
|
6
6
|
You must fully embody this agent's persona and follow all activation instructions exactly as specified. NEVER break character until given an exit command.
|
|
7
7
|
|
|
8
8
|
<agent-activation CRITICAL="TRUE">
|
|
9
|
-
1. LOAD the FULL agent file from {project-root}/
|
|
9
|
+
1. LOAD the FULL agent file from {project-root}/_byan/agent/skeptic/skeptic.md
|
|
10
10
|
2. READ its entire contents - this contains the complete agent persona, menu, and instructions
|
|
11
11
|
3. LOAD the soul activation protocol from {project-root}/_byan/core/activation/soul-activation.md and EXECUTE it silently
|
|
12
12
|
4. FOLLOW every step in the <activation> section precisely
|
|
@@ -25,4 +25,6 @@ expert-merise-agile,"Expert Merise","Expert Merise Agile - Assistant de Concepti
|
|
|
25
25
|
"skeptic","The Skeptic","Scientific Claim Challenger and Epistemic Guard","[?]","Epistemic Guard + Fact-Check Specialist","Methodical challenger of all claims. Applies 3-step verification (Source / Proof type / Reproducible). Specializes in auditing documents for unsourced assertions, computing Trust Scores, and verifying reasoning chains with multiplicative confidence propagation.","Cold, methodical, impeccably polite. Speaks in structured CLAIM/CHALLENGE/VERDICT blocks. Uses Socratic method — questions before conclusions. Never hostile, always rigorous.","Challenge Before Confirm | Extraordinary claims require extraordinary evidence | Descartes Doubt | No URL generation | Strict-domain LEVEL-2 minimum","core","_byan/agent/skeptic/skeptic.md"
|
|
26
26
|
"forgeron","Le Forgeron","Revelateur d ames","","Revelateur d ames — Soul Forger","Expert en interview psychologique profonde pour extraire l ame du createur depuis ses experiences de vie. Detecte emotions, valeurs, blessures fondatrices. Genere creator-soul.md et agent soul files. Calme, patient, utilise le silence comme outil.","Calme, patient, minimal, profond. Questions rares mais chaque une compte. Utilise le silence. Reflete sans projeter.","Ne jamais interpreter a la place du createur | Ne jamais precipiter | Emotions = donnees de navigation | Preuve avant sentence","bmb","_byan/agent/forgeron/forgeron.md"
|
|
27
27
|
"tao","Tao","Le Tao — Directeur de Voix des Agents","道","Voice Director — Soul to Expression Bridge","Transforme les valeurs abstraites du soul.md en directives vocales concretes : tics de langage, registre, signatures verbales, vocabulaire interdit. Forge le tao.md de chaque agent. Garantit l anti-uniformite : chaque agent sonne unique.","Calme, precis, chirurgical. L oreille absolue pour les voix. Detecte le generique a la premiere phrase. Concret : jamais de regle sans exemple.","Derivation tracable : chaque tic nait d une valeur d ame | Anti-uniformite : deux agents ne sonnent jamais pareil | Exemple obligatoire | La voix sert l ame pas l inverse","core","_byan/agent/tao/tao.md"
|
|
28
|
+
"jimmy","Jimmy","Spécialiste Documentation Technique & Processus Internes","book-open","Technical Documentation Specialist + Outline Knowledge Custodian","Specialist in technical documentation and internal processes. Creates, maintains, and organizes operational documentation on Outline: runbooks, deployment procedures, infrastructure configs, and server and web guides. Expert in infra, web, and server work with zero approximation in procedures. Documents in French for francophone technical teams.","Professional and rigorous, direct without unnecessary jargon, and pedagogical when needed. Uses a standardized structure per document type.","Clear and actionable technical documentation | Standardized structure per document type | Systematic validation before publication | Coherently named and organized collections | Reusable templates when relevant","bmm","_byan/agent/jimmy/jimmy.md"
|
|
29
|
+
"mike","Mike","Gestionnaire de Projet — Spécialiste Leantime","clipboard-list","Project Manager + Leantime Integration Specialist","Project manager specialized in Leantime. Creates, organizes, and manages projects, tasks (tickets), sprints, and milestones on Leantime. Structures team work clearly and efficiently. Works in French for francophone teams.","Professional and organized, results-oriented, and direct in French. Asks targeted questions to structure the work with no superfluous content.","MVP first, the minimum viable to get moving | Validation before action, confirm before executing | Clarity through explicit names and concise descriptions | Traceability by documenting important decisions | Explicit errors with proposed alternatives","bmm","_byan/agent/mike/mike.md"
|
|
28
30
|
|
|
@@ -158,9 +158,9 @@ L'agent peut invoquer n'importe quel agent specialise :
|
|
|
158
158
|
|
|
159
159
|
Variables de session disponibles apres chargement config :
|
|
160
160
|
- `{project-root}` : Racine du repository
|
|
161
|
-
- `{output_folder}` : Dossier de sortie (`
|
|
162
|
-
- `{planning_artifacts}` : `
|
|
163
|
-
- `{implementation_artifacts}` : `
|
|
161
|
+
- `{output_folder}` : Dossier de sortie (`_byan-output/`)
|
|
162
|
+
- `{planning_artifacts}` : `_byan-output/planning-artifacts/`
|
|
163
|
+
- `{implementation_artifacts}` : `_byan-output/implementation-artifacts/`
|
|
164
164
|
- `{user_name}`, `{communication_language}` : Depuis config.yaml
|
|
165
165
|
|
|
166
166
|
### Orchestration Multi-Agent
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { harvest, renderDigest } from '../lib/insight-harvest.js';
|
|
3
|
+
|
|
4
|
+
// Aggregate native Claude Code outcome trails into a GATED improvement digest.
|
|
5
|
+
// Reads: _byan-output/tool-log.jsonl, .byan-strict/audit.log,
|
|
6
|
+
// _byan-output/suitability-ledger.json, _byan/memoire/elo-profile.json
|
|
7
|
+
// Missing trail -> empty; digest self-disables gracefully.
|
|
8
|
+
//
|
|
9
|
+
// Usage: node bin/byan-insight-digest.js [--root <dir>] [--json]
|
|
10
|
+
|
|
11
|
+
function parseArgs(argv) {
|
|
12
|
+
const args = { json: false };
|
|
13
|
+
for (let i = 2; i < argv.length; i++) {
|
|
14
|
+
if (argv[i] === '--json') args.json = true;
|
|
15
|
+
else if (argv[i] === '--root') args.root = argv[++i];
|
|
16
|
+
}
|
|
17
|
+
return args;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const args = parseArgs(process.argv);
|
|
21
|
+
const root = args.root || process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
22
|
+
|
|
23
|
+
const digest = harvest({ rootDir: root });
|
|
24
|
+
|
|
25
|
+
if (args.json) {
|
|
26
|
+
process.stdout.write(JSON.stringify(digest, null, 2) + '\n');
|
|
27
|
+
} else {
|
|
28
|
+
process.stdout.write(renderDigest(digest) + '\n');
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
process.exit(0);
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { applyFix, check, STUB_DIRS } from '../lib/stub-sync.js';
|
|
5
|
+
|
|
6
|
+
// Normalize stale _bmad / @bmad PATH references in tracked agent stubs to the
|
|
7
|
+
// _byan/ canonical form. Two modes:
|
|
8
|
+
// (default) fix : rewrite stale path tokens in place (atomic per file).
|
|
9
|
+
// --check : report any residual stale ref and exit non-zero (no writes).
|
|
10
|
+
// This is the pre-commit gate's entry point.
|
|
11
|
+
// Usage: node bin/byan-sync-stubs.js [--check] [--root <dir>]
|
|
12
|
+
|
|
13
|
+
function parseArgs(argv) {
|
|
14
|
+
const args = { check: false };
|
|
15
|
+
for (let i = 2; i < argv.length; i++) {
|
|
16
|
+
if (argv[i] === '--check') args.check = true;
|
|
17
|
+
else if (argv[i] === '--root') args.projectRoot = argv[++i];
|
|
18
|
+
}
|
|
19
|
+
return args;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const args = parseArgs(process.argv);
|
|
23
|
+
const root = args.projectRoot || process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
24
|
+
|
|
25
|
+
// Self-disable when none of the stub dirs exist (installed-user no-op).
|
|
26
|
+
const anyDir = STUB_DIRS.some(
|
|
27
|
+
(d) => fs.existsSync(path.join(root, d)) || fs.existsSync(path.join(root, 'install', 'templates', d)),
|
|
28
|
+
);
|
|
29
|
+
if (!anyDir) {
|
|
30
|
+
process.stdout.write('[byan-sync-stubs] no stub directories - nothing to normalize\n');
|
|
31
|
+
process.exit(0);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (args.check) {
|
|
35
|
+
const { stale, ok, scanned } = check({ rootDir: root });
|
|
36
|
+
if (ok) {
|
|
37
|
+
process.stdout.write(`[byan-sync-stubs] OK - ${scanned} stubs free of stale _bmad/@bmad path refs\n`);
|
|
38
|
+
process.exit(0);
|
|
39
|
+
}
|
|
40
|
+
for (const { file, refs } of stale) {
|
|
41
|
+
process.stderr.write(`[byan-sync-stubs] stale: ${file} -> ${refs.join(', ')}\n`);
|
|
42
|
+
}
|
|
43
|
+
process.stderr.write(
|
|
44
|
+
`[byan-sync-stubs] FAIL - ${stale.length} stub(s) carry stale path refs. Run: node bin/byan-sync-stubs.js\n`,
|
|
45
|
+
);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const { fixed, scanned } = applyFix({ rootDir: root });
|
|
50
|
+
process.stdout.write(`[byan-sync-stubs] normalized - ${fixed.length} stub(s) fixed of ${scanned} scanned\n`);
|
|
51
|
+
process.exit(0);
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// Advisory auto-feed — the pure planning half of the closed learning loop.
|
|
2
|
+
//
|
|
3
|
+
// BYAN's advisory ledgers (ELO trust, the suitability ledger) only INFORM future
|
|
4
|
+
// decisions; they never override behavior. The open gap was that nothing fed them
|
|
5
|
+
// automatically: the agent had to remember to call a record tool. This loop closes
|
|
6
|
+
// that — outcomes are LOGGED to a buffer during a turn (cheaply, via byan_outcome_log),
|
|
7
|
+
// and a Stop hook DRAINS the buffer into the ledgers at end of turn, with no agent
|
|
8
|
+
// action. Behavior surfaces (routing / personas / mantras) are out of scope: this
|
|
9
|
+
// only writes advisory data.
|
|
10
|
+
//
|
|
11
|
+
// This module is the PURE half (no I/O), so it is exhaustively unit-testable; the
|
|
12
|
+
// Stop hook supplies the buffer text + a cursor and applies the records.
|
|
13
|
+
//
|
|
14
|
+
// Buffer line shapes (jsonl, one outcome per line):
|
|
15
|
+
// { kind: 'elo', domain, result } result: VALIDATED|PARTIAL|BLOCKED
|
|
16
|
+
// { kind: 'suitability', model, leafId, success } success: boolean
|
|
17
|
+
// A line missing required fields or with a bad type is dropped (classifyOutcome -> null),
|
|
18
|
+
// never throwing — a malformed log line must not break the drain.
|
|
19
|
+
|
|
20
|
+
// Parse a jsonl buffer into outcome objects, skipping malformed lines.
|
|
21
|
+
export function parseOutcomes(text) {
|
|
22
|
+
if (!text) return [];
|
|
23
|
+
return text
|
|
24
|
+
.split('\n')
|
|
25
|
+
.filter((l) => l.trim())
|
|
26
|
+
.map((l) => {
|
|
27
|
+
try {
|
|
28
|
+
return JSON.parse(l);
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
})
|
|
33
|
+
.filter(Boolean);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Idempotent drain plan keyed on a LINE cursor: everything from `cursor` onward is
|
|
37
|
+
// pending; the new cursor is the full length. A re-fired Stop with no new lines
|
|
38
|
+
// yields an empty `pending`, so an outcome is recorded at most once.
|
|
39
|
+
export function planDrain(outcomes, cursor = 0) {
|
|
40
|
+
const safeCursor = Number.isInteger(cursor) && cursor >= 0 ? cursor : 0;
|
|
41
|
+
const start = Math.min(safeCursor, outcomes.length);
|
|
42
|
+
return { pending: outcomes.slice(start), newCursor: outcomes.length };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// The ELO engine's result vocabulary. The MCP/skill vocabulary uses PARTIAL; the
|
|
46
|
+
// engine uses PARTIALLY_VALID. classifyOutcome normalizes to the engine form.
|
|
47
|
+
const ELO_RESULTS = new Set(['VALIDATED', 'PARTIALLY_VALID', 'BLOCKED']);
|
|
48
|
+
function normalizeEloResult(r) {
|
|
49
|
+
if (r === 'PARTIAL') return 'PARTIALLY_VALID';
|
|
50
|
+
return r;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Validate + normalize one buffer outcome into a record intent, or null if invalid.
|
|
54
|
+
// elo -> { kind: 'elo', domain, result } (result in ELO_RESULTS)
|
|
55
|
+
// suitability -> { kind: 'suitability', model, leafId, success } (success boolean)
|
|
56
|
+
export function classifyOutcome(o) {
|
|
57
|
+
if (!o || typeof o !== 'object') return null;
|
|
58
|
+
if (o.kind === 'elo') {
|
|
59
|
+
const domain = typeof o.domain === 'string' ? o.domain.trim() : '';
|
|
60
|
+
const result = normalizeEloResult(o.result);
|
|
61
|
+
if (!domain || !ELO_RESULTS.has(result)) return null;
|
|
62
|
+
return { kind: 'elo', domain, result };
|
|
63
|
+
}
|
|
64
|
+
if (o.kind === 'suitability') {
|
|
65
|
+
const model = typeof o.model === 'string' ? o.model.trim() : '';
|
|
66
|
+
const leafId = typeof o.leafId === 'string' ? o.leafId.trim() : '';
|
|
67
|
+
if (!model || !leafId || typeof o.success !== 'boolean') return null;
|
|
68
|
+
return { kind: 'suitability', model, leafId, success: o.success };
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Validate an outcome BEFORE it is appended to the buffer (used by byan_outcome_log).
|
|
74
|
+
// Returns the canonical line object to write, or null if the input is not a valid
|
|
75
|
+
// outcome. Keyed on the same rules as classifyOutcome so the buffer only ever holds
|
|
76
|
+
// drainable lines.
|
|
77
|
+
export function validateForLog(input) {
|
|
78
|
+
const rec = classifyOutcome(input);
|
|
79
|
+
if (!rec) return null;
|
|
80
|
+
return rec.kind === 'elo'
|
|
81
|
+
? { kind: 'elo', domain: rec.domain, result: rec.result }
|
|
82
|
+
: { kind: 'suitability', model: rec.model, leafId: rec.leafId, success: rec.success };
|
|
83
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
// Session insight harvester — read native Claude Code outcome trails and
|
|
2
|
+
// aggregate them into a GATED improvement digest for BYAN.
|
|
3
|
+
//
|
|
4
|
+
// Philosophy (the whole point): OBSERVE and PROPOSE, never silently self-modify.
|
|
5
|
+
// BYAN already has advisory learning surfaces (ELO trust, the suitability
|
|
6
|
+
// ledger) the agent updates by hand; the native hooks already leave outcome
|
|
7
|
+
// trails on disk. This module closes the loop by READING those trails and
|
|
8
|
+
// surfacing a digest with GATED proposals. It writes nothing back to a behavior
|
|
9
|
+
// surface (routing / personas / mantras): applying any change stays a human
|
|
10
|
+
// decision. An agent that rewrote its own routing on a heuristic would be the
|
|
11
|
+
// exact silent-downgrade BYAN exists to prevent.
|
|
12
|
+
//
|
|
13
|
+
// The aggregation is PURE (no I/O) so it is exhaustively unit-testable; the I/O
|
|
14
|
+
// entry takes an injected reader, mirroring template-sync.js / stub-sync.js.
|
|
15
|
+
//
|
|
16
|
+
// Trails consumed (shapes verified against the live repo):
|
|
17
|
+
// _byan-output/tool-log.jsonl post line {phase:'post', tool, ok, est_output_tokens?}
|
|
18
|
+
// .byan-strict/audit.log {event:'self_verify', verdict:'gap', findings:[]}
|
|
19
|
+
// _byan-output/suitability-ledger.json { "model::leaf": {model, leafId, successes, failures} }
|
|
20
|
+
// _byan/memoire/elo-profile.json { domains: { <domain>: {rating, blocked_streak, ...} } }
|
|
21
|
+
|
|
22
|
+
import fs from 'node:fs';
|
|
23
|
+
import path from 'node:path';
|
|
24
|
+
|
|
25
|
+
// Parse a JSONL blob into an array of objects, skipping malformed lines.
|
|
26
|
+
export function parseJsonl(text) {
|
|
27
|
+
if (!text) return [];
|
|
28
|
+
return text
|
|
29
|
+
.split('\n')
|
|
30
|
+
.filter(Boolean)
|
|
31
|
+
.map((l) => {
|
|
32
|
+
try {
|
|
33
|
+
return JSON.parse(l);
|
|
34
|
+
} catch {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
})
|
|
38
|
+
.filter(Boolean);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Tool health from tool-log.jsonl post lines: call count, failure rate, the top
|
|
42
|
+
// failing tools, and an output-token cost proxy. est_output_tokens is absent on
|
|
43
|
+
// older lines (added later by the hook), so it defaults to 0.
|
|
44
|
+
export function harvestToolHealth(toolLogEntries) {
|
|
45
|
+
const post = (toolLogEntries || []).filter((e) => e && e.phase === 'post');
|
|
46
|
+
const failures = post.filter((e) => e.ok === false);
|
|
47
|
+
const byTool = {};
|
|
48
|
+
for (const f of failures) byTool[f.tool || 'unknown'] = (byTool[f.tool || 'unknown'] || 0) + 1;
|
|
49
|
+
const topFailing = Object.entries(byTool)
|
|
50
|
+
.sort((a, b) => b[1] - a[1])
|
|
51
|
+
.slice(0, 5)
|
|
52
|
+
.map(([tool, count]) => ({ tool, count }));
|
|
53
|
+
const estOutputTokens = post.reduce((s, e) => s + (e.est_output_tokens || 0), 0);
|
|
54
|
+
return {
|
|
55
|
+
calls: post.length,
|
|
56
|
+
failures: failures.length,
|
|
57
|
+
failureRate: post.length ? +(failures.length / post.length).toFixed(3) : 0,
|
|
58
|
+
topFailing,
|
|
59
|
+
estOutputTokens,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Coarse theme key for a strict gap finding. The categories mirror the recurring
|
|
64
|
+
// gap types BYAN actually hits; anything unmatched is 'other' (never silently
|
|
65
|
+
// dropped — it still counts under 'other').
|
|
66
|
+
function normalizeGap(finding) {
|
|
67
|
+
const s = String(finding).toLowerCase();
|
|
68
|
+
if (/\btest|coverage|spec\b/.test(s)) return 'tests/coverage';
|
|
69
|
+
if (/error|edge|exception|fail|throw/.test(s)) return 'error/edge handling';
|
|
70
|
+
if (/doc|comment|changelog|readme/.test(s)) return 'documentation';
|
|
71
|
+
if (/template|fidelity|sync|twin/.test(s)) return 'template fidelity';
|
|
72
|
+
if (/emoji/.test(s)) return 'emoji';
|
|
73
|
+
if (/scope|downgrade|cut|stub|mvp/.test(s)) return 'scope/downgrade';
|
|
74
|
+
return 'other';
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Recurring strict-gap clustering (L3): mine self_verify gap findings from the
|
|
78
|
+
// audit log and group them into themes. A theme is "recurring" at count >= 2.
|
|
79
|
+
export function harvestStrictGaps(auditEntries) {
|
|
80
|
+
const findings = [];
|
|
81
|
+
for (const e of auditEntries || []) {
|
|
82
|
+
if (e && e.event === 'self_verify' && e.verdict === 'gap' && Array.isArray(e.findings)) {
|
|
83
|
+
findings.push(...e.findings);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const themes = {};
|
|
87
|
+
for (const f of findings) {
|
|
88
|
+
const key = normalizeGap(f);
|
|
89
|
+
if (!themes[key]) themes[key] = { theme: key, count: 0, samples: [] };
|
|
90
|
+
themes[key].count++;
|
|
91
|
+
if (themes[key].samples.length < 2) themes[key].samples.push(String(f).slice(0, 100));
|
|
92
|
+
}
|
|
93
|
+
const recurring = Object.values(themes)
|
|
94
|
+
.filter((t) => t.count >= 2)
|
|
95
|
+
.sort((a, b) => b.count - a.count);
|
|
96
|
+
return { totalGapFindings: findings.length, recurring };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Routing outcomes (L1): surface the suitability ledger as per (cheap-model x
|
|
100
|
+
// leaf) keep-rate rows, busiest first. keepRate = successes / (successes+failures).
|
|
101
|
+
export function harvestRouting(ledger) {
|
|
102
|
+
const rows = [];
|
|
103
|
+
const entries = ledger && typeof ledger === 'object' ? Object.entries(ledger) : [];
|
|
104
|
+
for (const [key, v] of entries) {
|
|
105
|
+
if (!v || typeof v !== 'object') continue;
|
|
106
|
+
const successes = Number(v.successes || 0);
|
|
107
|
+
const failures = Number(v.failures || 0);
|
|
108
|
+
const n = successes + failures;
|
|
109
|
+
if (!n) continue;
|
|
110
|
+
const model = v.model || key.split('::')[0];
|
|
111
|
+
const leaf = v.leafId || key.split('::')[1] || key;
|
|
112
|
+
rows.push({ model, leaf, successes, failures, n, keepRate: +(successes / n).toFixed(2) });
|
|
113
|
+
}
|
|
114
|
+
return rows.sort((a, b) => b.n - a.n);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Domain trust trends from the ELO profile: rating + blocked streak per domain.
|
|
118
|
+
export function harvestEloTrends(eloProfile) {
|
|
119
|
+
const domains = (eloProfile && eloProfile.domains) || {};
|
|
120
|
+
const rows = [];
|
|
121
|
+
for (const [domain, d] of Object.entries(domains)) {
|
|
122
|
+
if (!d || typeof d !== 'object' || typeof d.rating !== 'number') continue;
|
|
123
|
+
rows.push({ domain, rating: d.rating, blockedStreak: d.blocked_streak || 0 });
|
|
124
|
+
}
|
|
125
|
+
return rows.sort((a, b) => b.rating - a.rating);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Assemble the digest and derive GATED proposals. Every proposal is a suggestion
|
|
129
|
+
// for the human to ratify (gated:true) — none is auto-applied. The thresholds
|
|
130
|
+
// are deliberately conservative so noise does not generate proposals.
|
|
131
|
+
export function buildDigest({ toolHealth, gaps, routing, elo } = {}) {
|
|
132
|
+
const proposals = [];
|
|
133
|
+
|
|
134
|
+
if (toolHealth && toolHealth.failureRate > 0.1 && toolHealth.topFailing.length) {
|
|
135
|
+
const t = toolHealth.topFailing[0];
|
|
136
|
+
proposals.push({
|
|
137
|
+
kind: 'tool-reliability',
|
|
138
|
+
gated: true,
|
|
139
|
+
suggestion: `Tool failure rate ${toolHealth.failureRate}; top offender ${t.tool} (${t.count}). Investigate before relying on it.`,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
for (const g of (gaps && gaps.recurring) || []) {
|
|
143
|
+
if (g.count >= 3) {
|
|
144
|
+
proposals.push({
|
|
145
|
+
kind: 'recurring-gap',
|
|
146
|
+
gated: true,
|
|
147
|
+
suggestion: `Recurring self-verify gap "${g.theme}" (${g.count}x). Consider a pre-build checklist item.`,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
for (const r of routing || []) {
|
|
152
|
+
if (r.n >= 5 && r.keepRate < 0.5) {
|
|
153
|
+
proposals.push({
|
|
154
|
+
kind: 'routing',
|
|
155
|
+
gated: true,
|
|
156
|
+
suggestion: `Cheap model ${r.model} underperforms on "${r.leaf}" (keepRate ${r.keepRate}, n=${r.n}). Consider keeping that leaf deep.`,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
toolHealth: toolHealth || null,
|
|
163
|
+
recurringGaps: gaps || { totalGapFindings: 0, recurring: [] },
|
|
164
|
+
routingOutcomes: routing || [],
|
|
165
|
+
eloTrends: elo || [],
|
|
166
|
+
proposals,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Human-readable render of a digest (for the CLI and the skill).
|
|
171
|
+
export function renderDigest(d) {
|
|
172
|
+
const lines = ['BYAN session insight digest', ''];
|
|
173
|
+
if (d.toolHealth) {
|
|
174
|
+
lines.push(
|
|
175
|
+
`Tool health: ${d.toolHealth.calls} calls, ${d.toolHealth.failures} failures (rate ${d.toolHealth.failureRate}), ~${d.toolHealth.estOutputTokens} output tokens.`
|
|
176
|
+
);
|
|
177
|
+
if (d.toolHealth.topFailing.length) {
|
|
178
|
+
lines.push(` Top failing: ${d.toolHealth.topFailing.map((t) => `${t.tool}(${t.count})`).join(', ')}`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
lines.push(`Recurring gaps: ${d.recurringGaps.recurring.map((g) => `${g.theme}(${g.count})`).join(', ') || 'none'}`);
|
|
182
|
+
if (d.routingOutcomes.length) {
|
|
183
|
+
lines.push('Routing outcomes (cheap-model keep-rate):');
|
|
184
|
+
for (const r of d.routingOutcomes.slice(0, 8)) {
|
|
185
|
+
lines.push(` ${r.model}::${r.leaf} -> keep ${r.keepRate} (n=${r.n})`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (d.eloTrends.length) {
|
|
189
|
+
lines.push(`ELO trends: ${d.eloTrends.slice(0, 6).map((e) => `${e.domain}=${e.rating}`).join(', ')}`);
|
|
190
|
+
}
|
|
191
|
+
lines.push('', `Proposals (GATED — human ratifies, nothing auto-applied): ${d.proposals.length}`);
|
|
192
|
+
for (const p of d.proposals) lines.push(` [${p.kind}] ${p.suggestion}`);
|
|
193
|
+
return lines.join('\n');
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// I/O entry: read the trails under rootDir (missing trail -> empty, so the digest
|
|
197
|
+
// self-disables gracefully on a fresh checkout) and build the digest.
|
|
198
|
+
export function harvest({ rootDir, io = fs } = {}) {
|
|
199
|
+
const readText = (rel) => {
|
|
200
|
+
try {
|
|
201
|
+
return io.readFileSync(path.join(rootDir, rel), 'utf8');
|
|
202
|
+
} catch {
|
|
203
|
+
return '';
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
const readJson = (rel) => {
|
|
207
|
+
const t = readText(rel);
|
|
208
|
+
if (!t) return null;
|
|
209
|
+
try {
|
|
210
|
+
return JSON.parse(t);
|
|
211
|
+
} catch {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
const toolHealth = harvestToolHealth(parseJsonl(readText('_byan-output/tool-log.jsonl')));
|
|
216
|
+
const gaps = harvestStrictGaps(parseJsonl(readText('.byan-strict/audit.log')));
|
|
217
|
+
const routing = harvestRouting(readJson('_byan-output/suitability-ledger.json'));
|
|
218
|
+
const elo = harvestEloTrends(readJson('_byan/memoire/elo-profile.json'));
|
|
219
|
+
return buildDigest({ toolHealth, gaps, routing, elo });
|
|
220
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// Outcome buffer — the append-only capture file the advisory auto-feed drains.
|
|
2
|
+
//
|
|
3
|
+
// byan_outcome_log appends one validated outcome per line here during a turn; the
|
|
4
|
+
// drain-advisory Stop hook reads it at end of turn and records each new line into
|
|
5
|
+
// the advisory ledgers, advancing a line cursor for idempotency. Both sides take an
|
|
6
|
+
// injected `io` so the logic is testable without touching the real filesystem, and
|
|
7
|
+
// every operation is best-effort: a capture buffer must never break a turn.
|
|
8
|
+
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
|
|
12
|
+
export const BUFFER_REL = path.join('_byan-output', 'pending-outcomes.jsonl');
|
|
13
|
+
export const CURSOR_REL = path.join('_byan-output', '.advisory-cursor.json');
|
|
14
|
+
|
|
15
|
+
function bufferPath(rootDir) {
|
|
16
|
+
return path.join(rootDir, BUFFER_REL);
|
|
17
|
+
}
|
|
18
|
+
function cursorPath(rootDir) {
|
|
19
|
+
return path.join(rootDir, CURSOR_REL);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Append one outcome object as a jsonl line. Best-effort: returns true on write,
|
|
23
|
+
// false if the write threw (the caller stays safe).
|
|
24
|
+
export function appendOutcome(outcome, { rootDir, io = fs } = {}) {
|
|
25
|
+
try {
|
|
26
|
+
const p = bufferPath(rootDir);
|
|
27
|
+
io.mkdirSync(path.dirname(p), { recursive: true });
|
|
28
|
+
io.appendFileSync(p, JSON.stringify(outcome) + '\n');
|
|
29
|
+
return true;
|
|
30
|
+
} catch {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Read the raw buffer text, or '' if absent/unreadable.
|
|
36
|
+
export function readBuffer({ rootDir, io = fs } = {}) {
|
|
37
|
+
try {
|
|
38
|
+
return io.readFileSync(bufferPath(rootDir), 'utf8');
|
|
39
|
+
} catch {
|
|
40
|
+
return '';
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Read the drain cursor (number of buffer lines already recorded), or 0.
|
|
45
|
+
export function readCursor({ rootDir, io = fs } = {}) {
|
|
46
|
+
try {
|
|
47
|
+
const obj = JSON.parse(io.readFileSync(cursorPath(rootDir), 'utf8'));
|
|
48
|
+
return Number.isInteger(obj && obj.drained) && obj.drained >= 0 ? obj.drained : 0;
|
|
49
|
+
} catch {
|
|
50
|
+
return 0;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Persist the drain cursor. Best-effort.
|
|
55
|
+
export function writeCursor(drained, { rootDir, io = fs } = {}) {
|
|
56
|
+
try {
|
|
57
|
+
const p = cursorPath(rootDir);
|
|
58
|
+
io.mkdirSync(path.dirname(p), { recursive: true });
|
|
59
|
+
io.writeFileSync(p, JSON.stringify({ drained }) + '\n');
|
|
60
|
+
return true;
|
|
61
|
+
} catch {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// Stub path normalizer — keep tracked agent stubs free of stale _bmad / @bmad
|
|
2
|
+
// PATH references.
|
|
3
|
+
//
|
|
4
|
+
// The installer generated platform stubs (.codex/prompts, .github/agents,
|
|
5
|
+
// .claude/skills) over many versions. Older generators emitted the legacy path
|
|
6
|
+
// layout (`_bmad/*/agents/X.md`, `@bmad/bmm/agents/X.md`,
|
|
7
|
+
// `@bmad-output/bmb-creations/X/X.md`); the current generator emits the `_byan/`
|
|
8
|
+
// new layout. The committed corpus therefore carries a mix of stale path forms,
|
|
9
|
+
// while the source agent files are clean. This module normalizes those stale
|
|
10
|
+
// PATH tokens to the `_byan/` canonical form, in place, touching nothing else.
|
|
11
|
+
//
|
|
12
|
+
// Two tokens look similar but are NOT paths and must survive untouched:
|
|
13
|
+
// - `@bmad-<word>` : the agent/workflow INVOCATION syntax (`@bmad-bmm-create-prd`,
|
|
14
|
+
// `@bmad-party-mode`). A command, not a file path.
|
|
15
|
+
// - `_bmad-output/` : the accepted output-artifact directory (planning/
|
|
16
|
+
// implementation artifacts), documented in CLAUDE.md.
|
|
17
|
+
// Both are distinguished structurally: a path token is `@bmad/` or `_bmad/`
|
|
18
|
+
// (immediate slash); the survivors are `@bmad-` / `_bmad-` (immediate hyphen).
|
|
19
|
+
// The one exception is `[@_]bmad-output/bmb-creations/<name>/<name>.md`, which is
|
|
20
|
+
// a stale AGENT-LOAD path (the agent now lives at `_byan/agent/<name>/`), so that
|
|
21
|
+
// specific sub-form IS rewritten.
|
|
22
|
+
//
|
|
23
|
+
// Design mirrors template-sync.js: the risky half (the rewrite rules) is pure and
|
|
24
|
+
// exhaustively unit-tested; the I/O half takes an injected `io` so tests pin
|
|
25
|
+
// behaviour without touching the real filesystem. The tool only ever edits stale
|
|
26
|
+
// path tokens — it never regenerates or overwrites a stub wholesale, so the 6
|
|
27
|
+
// github full-copies and the 12 hand-authored rich skills keep their content.
|
|
28
|
+
|
|
29
|
+
import fs from 'node:fs';
|
|
30
|
+
import path from 'node:path';
|
|
31
|
+
|
|
32
|
+
// Tracked stub directories (root-relative POSIX). Each is scanned (its .md files)
|
|
33
|
+
// both at root and under its install/templates/ twin. `.codex` is taken whole so
|
|
34
|
+
// the Codex global context file (.codex/instructions.md) is normalized alongside
|
|
35
|
+
// the per-agent .codex/prompts/ stubs.
|
|
36
|
+
export const STUB_DIRS = ['.codex', '.github/agents', '.claude/skills'];
|
|
37
|
+
export const TEMPLATE_PREFIX = 'install/templates';
|
|
38
|
+
|
|
39
|
+
// Canonical new-layout reference for an agent name.
|
|
40
|
+
function agentRef(name) {
|
|
41
|
+
return `_byan/agent/${name}/${name}.md`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Ordered rewrite rules. Order matters: the specific agent-load forms run before
|
|
45
|
+
// the generic prefix swaps, so an agent path becomes the new layout
|
|
46
|
+
// (`_byan/agent/X/X.md`) rather than the legacy one (`_byan/*/agents/X.md`).
|
|
47
|
+
const RULES = [
|
|
48
|
+
// bmb-creations agent load -> new-layout agent ref. Matches both @bmad-output
|
|
49
|
+
// and _bmad-output ONLY when followed by /bmb-creations/<dir>/<name>.md, so a
|
|
50
|
+
// plain _bmad-output/ artifact path is left alone. Keyed on the .md FILENAME
|
|
51
|
+
// (not requiring dir == filename) so it covers every form findStaleRefs flags,
|
|
52
|
+
// keeping --fix and --check in lockstep (no flag-but-cannot-fix gate trap).
|
|
53
|
+
[/[@_]bmad-output\/bmb-creations\/[a-z0-9-]+\/([a-z0-9-]+)\.md/gi, (_m, n) => agentRef(n)],
|
|
54
|
+
// agent path, flat or nested: (@bmad|_bmad)/(*|module)/agents/<name>(/<name>)?.md
|
|
55
|
+
[/(?:@bmad|_bmad)\/(?:\*|[a-z0-9_-]+)\/agents\/([a-z0-9-]+)(?:\/[a-z0-9-]+)?\.md/gi, (_m, n) => agentRef(n)],
|
|
56
|
+
// generic _bmad/ path prefix. Does not touch _bmad-output (that is _bmad- then
|
|
57
|
+
// a hyphen, never _bmad followed by a slash).
|
|
58
|
+
[/_bmad\//g, '_byan/'],
|
|
59
|
+
// generic @bmad/ path prefix. Does not touch @bmad- invocation syntax.
|
|
60
|
+
[/@bmad\//g, '_byan/'],
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
// Pure: rewrite stale path tokens. Returns { text, changed }.
|
|
64
|
+
export function normalizeText(text) {
|
|
65
|
+
let out = text;
|
|
66
|
+
for (const [re, rep] of RULES) out = out.replace(re, rep);
|
|
67
|
+
return { text: out, changed: out !== text };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Pure: the stale path refs a clean file must NOT contain. A path token
|
|
71
|
+
// (`@bmad/` or `_bmad/`) or a bmb-creations agent load. Invocation `@bmad-` and
|
|
72
|
+
// plain `_bmad-output/` artifacts are excluded by construction.
|
|
73
|
+
// The generic arm uses [^...]* (zero-or-more) so a bare `@bmad/` or `_bmad/`
|
|
74
|
+
// prefix is flagged even when the next char is an excluded terminator (space,
|
|
75
|
+
// paren, quote, backtick) — rule 3/4 rewrite the prefix regardless, so --check
|
|
76
|
+
// must flag it regardless too.
|
|
77
|
+
const STALE_RE = /(?:[@_]bmad-output\/bmb-creations\/[a-z0-9-]+\/[a-z0-9-]+\.md|(?:@bmad|_bmad)\/[^\s)`'"]*)/gi;
|
|
78
|
+
export function findStaleRefs(text) {
|
|
79
|
+
const m = text.match(STALE_RE);
|
|
80
|
+
return m ? [...new Set(m)] : [];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Recursively list root-relative POSIX paths of every .md file under dir. Returns
|
|
84
|
+
// [] if dir does not exist (an installed user without these dirs is not an error).
|
|
85
|
+
export function walkRelMd(dir, { io = fs, base = dir } = {}) {
|
|
86
|
+
let entries;
|
|
87
|
+
try {
|
|
88
|
+
entries = io.readdirSync(dir, { withFileTypes: true });
|
|
89
|
+
} catch {
|
|
90
|
+
return [];
|
|
91
|
+
}
|
|
92
|
+
const out = [];
|
|
93
|
+
for (const e of entries) {
|
|
94
|
+
const full = path.join(dir, e.name);
|
|
95
|
+
if (e.isDirectory()) {
|
|
96
|
+
out.push(...walkRelMd(full, { io, base }));
|
|
97
|
+
} else if (e.isFile() && e.name.endsWith('.md')) {
|
|
98
|
+
out.push(path.relative(base, full).split(path.sep).join('/'));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return out;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Every stub .md file (root-relative POSIX), root dirs + their template twins.
|
|
105
|
+
export function listStubFiles({ rootDir, io = fs } = {}) {
|
|
106
|
+
const files = [];
|
|
107
|
+
for (const d of STUB_DIRS) {
|
|
108
|
+
files.push(...walkRelMd(path.join(rootDir, d), { io, base: rootDir }));
|
|
109
|
+
files.push(...walkRelMd(path.join(rootDir, TEMPLATE_PREFIX, d), { io, base: rootDir }));
|
|
110
|
+
}
|
|
111
|
+
return files;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Plan: which tracked stub files would change under normalization.
|
|
115
|
+
export function planFix({ rootDir, io = fs } = {}) {
|
|
116
|
+
const files = listStubFiles({ rootDir, io });
|
|
117
|
+
const toFix = [];
|
|
118
|
+
for (const rel of files) {
|
|
119
|
+
const { changed } = normalizeText(io.readFileSync(path.join(rootDir, rel), 'utf8'));
|
|
120
|
+
if (changed) toFix.push(rel);
|
|
121
|
+
}
|
|
122
|
+
return { toFix, scanned: files.length };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Apply: normalize every file that needs it. Each write is atomic (stage adjacent
|
|
126
|
+
// tmp, rename over the target) so a crash never leaves a half-written stub.
|
|
127
|
+
export function applyFix({ rootDir, io = fs } = {}) {
|
|
128
|
+
const { toFix, scanned } = planFix({ rootDir, io });
|
|
129
|
+
for (const rel of toFix) {
|
|
130
|
+
const dest = path.join(rootDir, rel);
|
|
131
|
+
const { text } = normalizeText(io.readFileSync(dest, 'utf8'));
|
|
132
|
+
const tmp = `${dest}.tmp`;
|
|
133
|
+
try {
|
|
134
|
+
io.writeFileSync(tmp, text);
|
|
135
|
+
io.chmodSync(tmp, io.statSync(dest).mode & 0o777);
|
|
136
|
+
io.renameSync(tmp, dest);
|
|
137
|
+
} catch (err) {
|
|
138
|
+
try {
|
|
139
|
+
io.unlinkSync(tmp);
|
|
140
|
+
} catch {
|
|
141
|
+
void 0;
|
|
142
|
+
}
|
|
143
|
+
throw err;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return { fixed: toFix, scanned };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Check: the drift verdict for --check. ok=true when no stale ref remains.
|
|
150
|
+
export function check({ rootDir, io = fs } = {}) {
|
|
151
|
+
const files = listStubFiles({ rootDir, io });
|
|
152
|
+
const stale = [];
|
|
153
|
+
for (const rel of files) {
|
|
154
|
+
const refs = findStaleRefs(io.readFileSync(path.join(rootDir, rel), 'utf8'));
|
|
155
|
+
if (refs.length) stale.push({ file: rel, refs });
|
|
156
|
+
}
|
|
157
|
+
return { stale, ok: stale.length === 0, scanned: files.length };
|
|
158
|
+
}
|
|
@@ -9,6 +9,9 @@ import {
|
|
|
9
9
|
ListToolsRequestSchema,
|
|
10
10
|
} from '@modelcontextprotocol/sdk/types.js';
|
|
11
11
|
import { dispatch } from './lib/dispatch.js';
|
|
12
|
+
import { harvest as harvestInsights, renderDigest as renderInsightDigest } from './lib/insight-harvest.js';
|
|
13
|
+
import { appendOutcome } from './lib/outcome-buffer.js';
|
|
14
|
+
import { validateForLog } from './lib/advisory-autofeed.js';
|
|
12
15
|
import { readSoul, appendSoulMemory } from './lib/soul.js';
|
|
13
16
|
import { listSessions, readSessionEvents, searchSessions } from './lib/copilot.js';
|
|
14
17
|
import {
|
|
@@ -545,6 +548,34 @@ const tools = [
|
|
|
545
548
|
additionalProperties: false,
|
|
546
549
|
},
|
|
547
550
|
},
|
|
551
|
+
{
|
|
552
|
+
name: 'byan_insight_digest',
|
|
553
|
+
description:
|
|
554
|
+
'Harvest native Claude Code outcome trails (tool-log, strict-audit gaps, the suitability ledger, ELO) into a GATED improvement digest for BYAN. Read-only: it OBSERVES and PROPOSES; every proposal is gated for a human to ratify, nothing is auto-applied to routing / personas / mantras. Returns { toolHealth, recurringGaps, routingOutcomes, eloTrends, proposals }.',
|
|
555
|
+
inputSchema: {
|
|
556
|
+
type: 'object',
|
|
557
|
+
properties: {},
|
|
558
|
+
additionalProperties: false,
|
|
559
|
+
},
|
|
560
|
+
},
|
|
561
|
+
{
|
|
562
|
+
name: 'byan_outcome_log',
|
|
563
|
+
description:
|
|
564
|
+
'Log one ADVISORY outcome to the auto-feed buffer (cheap append; it never writes a ledger directly). The drain-advisory Stop hook records buffered outcomes into the ELO / suitability ledgers at end of turn, so BYAN auto-learns without the agent recording by hand. kind=elo needs { domain, result: VALIDATED|PARTIAL|BLOCKED }; kind=suitability needs { model, leafId, success }. Advisory-only: behavior surfaces (routing / personas / mantras) are never written.',
|
|
565
|
+
inputSchema: {
|
|
566
|
+
type: 'object',
|
|
567
|
+
properties: {
|
|
568
|
+
kind: { type: 'string', enum: ['elo', 'suitability'] },
|
|
569
|
+
domain: { type: 'string', description: 'elo: the technical domain of the claim' },
|
|
570
|
+
result: { type: 'string', enum: ['VALIDATED', 'PARTIAL', 'BLOCKED'], description: 'elo: the claim verdict' },
|
|
571
|
+
model: { type: 'string', description: 'suitability: the cheap model tier/id' },
|
|
572
|
+
leafId: { type: 'string', description: 'suitability: the workflow leaf' },
|
|
573
|
+
success: { type: 'boolean', description: 'suitability: did the cheap model survive adversarial review' },
|
|
574
|
+
},
|
|
575
|
+
required: ['kind'],
|
|
576
|
+
additionalProperties: false,
|
|
577
|
+
},
|
|
578
|
+
},
|
|
548
579
|
{
|
|
549
580
|
name: 'byan_strict_lock_scope',
|
|
550
581
|
description:
|
|
@@ -1383,6 +1414,33 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1383
1414
|
};
|
|
1384
1415
|
}
|
|
1385
1416
|
|
|
1417
|
+
if (name === 'byan_insight_digest') {
|
|
1418
|
+
const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
1419
|
+
const digest = harvestInsights({ rootDir });
|
|
1420
|
+
return {
|
|
1421
|
+
content: [
|
|
1422
|
+
{
|
|
1423
|
+
type: 'text',
|
|
1424
|
+
text: JSON.stringify({ gated: true, digest, render: renderInsightDigest(digest) }, null, 2),
|
|
1425
|
+
},
|
|
1426
|
+
],
|
|
1427
|
+
};
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
if (name === 'byan_outcome_log') {
|
|
1431
|
+
const line = validateForLog(args);
|
|
1432
|
+
if (!line) {
|
|
1433
|
+
return {
|
|
1434
|
+
content: [{ type: 'text', text: JSON.stringify({ logged: false, reason: 'invalid_outcome' }) }],
|
|
1435
|
+
};
|
|
1436
|
+
}
|
|
1437
|
+
const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
1438
|
+
const ok = appendOutcome(line, { rootDir });
|
|
1439
|
+
return {
|
|
1440
|
+
content: [{ type: 'text', text: JSON.stringify({ logged: ok, outcome: line }) }],
|
|
1441
|
+
};
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1386
1444
|
if (name === 'byan_strict_lock_scope') {
|
|
1387
1445
|
const r = strictLockScope({
|
|
1388
1446
|
scopeText: args.scopeText,
|
|
@@ -287,24 +287,25 @@ very different optimal targets depending on whether they run **alongside
|
|
|
287
287
|
siblings** (parallel) or **in sequence**. The v2 router adds a
|
|
288
288
|
`parallelizable` axis and emits an **execution strategy**, not a model.
|
|
289
289
|
|
|
290
|
-
Implementation :
|
|
291
|
-
|
|
290
|
+
Implementation : the MCP tool `byan_dispatch`
|
|
291
|
+
(`_byan/mcp/byan-mcp-server/lib/dispatch.js`), the single source of truth. The
|
|
292
|
+
strategy comes from the score + `parallelizable` ; the model tier is a separate
|
|
293
|
+
axis, derived from the task NATURE via `native-tiers.js`.
|
|
292
294
|
|
|
293
295
|
```
|
|
294
296
|
score < 15 → main-thread
|
|
295
297
|
score 15-39 + parallelizable: true → agent-subagent-worktree
|
|
296
|
-
score 15-39 + parallelizable: false → mcp-worker
|
|
297
|
-
score >= 40 → main-thread
|
|
298
|
+
score 15-39 + parallelizable: false → mcp-worker
|
|
299
|
+
score >= 40 → main-thread (heavy)
|
|
298
300
|
```
|
|
299
301
|
|
|
300
302
|
Rationale :
|
|
301
303
|
|
|
302
304
|
| Strategy | When | Why |
|
|
303
305
|
|---|---|---|
|
|
304
|
-
| `main-thread` | Trivial task | Spawning
|
|
306
|
+
| `main-thread` | Trivial or heavy task | Spawning costs more than solving inline (trivial), or the work is heavy and stays in the main thread. |
|
|
305
307
|
| `agent-subagent-worktree` | Medium parallel | Claude Code Agent tool with `isolation: "worktree"` amortizes boot cost across the wall-clock savings. |
|
|
306
|
-
| `mcp-worker
|
|
307
|
-
| `main-thread-opus` | Complex | Reasoning depth needed; subagent boot + context handoff would waste more than the delegation saves. |
|
|
308
|
+
| `mcp-worker` | Medium sequential | Delegate to a worker via MCP tool — no subagent boot, cheaper than the main thread. The model tier is set separately, by nature. |
|
|
308
309
|
|
|
309
310
|
The score threshold of 15 is where Claude Code `Agent` tool boot overhead
|
|
310
311
|
(~5-10k tokens for system prompt + tools) stops being worth it for
|
|
@@ -111,8 +111,8 @@ INIT
|
|
|
111
111
|
|------------------|-------|-----------|
|
|
112
112
|
| < 15 | `main-thread` | Inline dans le contexte courant, zéro overhead de délégation |
|
|
113
113
|
| < 40 + parallélisable | `agent-subagent-worktree` | Agent tool Claude Code avec isolation worktree |
|
|
114
|
-
| < 40 séquentiel | `mcp-worker
|
|
115
|
-
| ≥ 40 | `main-thread
|
|
114
|
+
| < 40 séquentiel | `mcp-worker` | Worker léger via MCP (le tier de modèle vient de la nature, pas de la taille) |
|
|
115
|
+
| ≥ 40 | `main-thread` | Garde en main thread (lourd) ; modèle hérité de la session |
|
|
116
116
|
|
|
117
117
|
> Le score (0-100) est estimé depuis la complexité de la tâche (longueur si absent). Appeler `byan_dispatch` pour le calcul — ne pas réinventer les seuils ici.
|
|
118
118
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "create-byan-agent",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.25.0",
|
|
4
4
|
"description": "BYAN v2.8 - Intelligent AI agent creator with ELO trust system + scientific fact-check + Hermes universal dispatcher + native Claude Code integration (hooks, skills, MCP server). Multi-platform (Copilot CLI, Claude Code, Codex). Merise Agile + TDD + 71 Mantras. ~54% LLM cost savings.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - Integration with RateLimitTracker + SharedStateStore + SessionBridge
|
|
9
9
|
*
|
|
10
10
|
* Sits ABOVE existing BYAN routers:
|
|
11
|
-
* LoadBalancer (picks PLATFORM) →
|
|
11
|
+
* LoadBalancer (picks PLATFORM) → byan_dispatch (picks STRATEGY + model TIER)
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
const { EventEmitter } = require('events');
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: byan-byan-test
|
|
3
|
-
description: BYAN Test - Token Optimized Version (-46%)
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# byan-test
|
|
7
|
-
|
|
8
|
-
## Rules
|
|
9
|
-
|
|
10
|
-
- This is a TEST version of BYAN optimized for token reduction (-46%)
|
|
11
|
-
- Full agent: _byan/bmb/agents/byan-test.md (116 lines vs 215 original)
|
|
12
|
-
- Original BYAN still available via bmad-agent-byan
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Execution strategy router.
|
|
3
|
-
*
|
|
4
|
-
* Decides WHERE a task runs (not which model). Four strategies, routed by
|
|
5
|
-
* complexity score and whether the task is parallelizable with siblings:
|
|
6
|
-
*
|
|
7
|
-
* main-thread score < 15
|
|
8
|
-
* agent-subagent-worktree score 15-39 + parallelizable = true
|
|
9
|
-
* mcp-worker-haiku score 15-39 + sequential
|
|
10
|
-
* main-thread-opus score >= 40
|
|
11
|
-
*
|
|
12
|
-
* Complementary to EconomicDispatcher (which picks the model).
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
class ExecutionRouter {
|
|
16
|
-
/**
|
|
17
|
-
* @param {{task?: string, complexity?: number, parallelizable?: boolean}} input
|
|
18
|
-
* @returns {{score: number, strategy: string, reasoning: string, parallelizable: boolean}}
|
|
19
|
-
*/
|
|
20
|
-
route(input = {}) {
|
|
21
|
-
const { task, complexity, parallelizable } = input;
|
|
22
|
-
|
|
23
|
-
const score =
|
|
24
|
-
typeof complexity === 'number'
|
|
25
|
-
? complexity
|
|
26
|
-
: Math.min(100, Math.floor((task?.length || 0) / 10));
|
|
27
|
-
|
|
28
|
-
const isPar = parallelizable === true;
|
|
29
|
-
|
|
30
|
-
if (score < 15) {
|
|
31
|
-
return {
|
|
32
|
-
score,
|
|
33
|
-
strategy: 'main-thread',
|
|
34
|
-
reasoning: `Score ${score} < 15. Inline in current context, no delegation overhead.`,
|
|
35
|
-
parallelizable: isPar,
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
if (score < 40 && isPar) {
|
|
40
|
-
return {
|
|
41
|
-
score,
|
|
42
|
-
strategy: 'agent-subagent-worktree',
|
|
43
|
-
reasoning: `Score ${score} + parallelizable. Spawn Claude Code Agent tool with worktree isolation.`,
|
|
44
|
-
parallelizable: isPar,
|
|
45
|
-
};
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if (score < 40) {
|
|
49
|
-
return {
|
|
50
|
-
score,
|
|
51
|
-
strategy: 'mcp-worker-haiku',
|
|
52
|
-
reasoning: `Score ${score}, sequential. Delegate to lightweight Haiku worker via MCP.`,
|
|
53
|
-
parallelizable: isPar,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return {
|
|
58
|
-
score,
|
|
59
|
-
strategy: 'main-thread-opus',
|
|
60
|
-
reasoning: `Score ${score} >= 40. Complex task, keep in main thread with Opus reasoning.`,
|
|
61
|
-
parallelizable: isPar,
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
module.exports = ExecutionRouter;
|