create-byan-agent 2.20.1 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +91 -0
  2. package/install/templates/.claude/CLAUDE.md +21 -1
  3. package/install/templates/.claude/rules/byan-agents.md +1 -0
  4. package/install/templates/.claude/rules/hermes-dispatcher.md +1 -0
  5. package/install/templates/.claude/rules/team-doctrine.md +102 -0
  6. package/install/templates/.claude/skills/byan-byan-test/SKILL.md +1 -1
  7. package/install/templates/.claude/skills/byan-suitability/SKILL.md +71 -0
  8. package/install/templates/.claude/workflows/create-excalidraw-dataflow.js +2 -2
  9. package/install/templates/.claude/workflows/create-excalidraw-diagram.js +2 -2
  10. package/install/templates/.claude/workflows/create-excalidraw-flowchart.js +2 -2
  11. package/install/templates/.claude/workflows/create-excalidraw-wireframe.js +2 -2
  12. package/install/templates/.claude/workflows/dev-story.js +1 -1
  13. package/install/templates/.claude/workflows/document-project.js +3 -1
  14. package/install/templates/.githooks/pre-commit +20 -2
  15. package/install/templates/.github/agents/bmad-agent-byan.md +1056 -10
  16. package/install/templates/.github/agents/bmad-agent-skeptic.md +7 -5
  17. package/install/templates/_bmad/bmb/agents/agent-builder.md +5 -5
  18. package/install/templates/_bmad/bmb/agents/byan-test.md +8 -8
  19. package/install/templates/_bmad/bmb/agents/byan.md +8 -8
  20. package/install/templates/_bmad/bmb/agents/marc.md +11 -11
  21. package/install/templates/_bmad/bmb/agents/module-builder.md +6 -6
  22. package/install/templates/_bmad/bmb/agents/patnote.md +8 -8
  23. package/install/templates/_bmad/bmb/agents/rachid.md +10 -10
  24. package/install/templates/_bmad/bmb/agents/workflow-builder.md +7 -7
  25. package/install/templates/_bmad/bmb/workflows/byan/quick-create-workflow.md +2 -2
  26. package/install/templates/_bmad/bmb/workflows/byan/templates/base-agent-template.md +1 -1
  27. package/install/templates/_bmad/bmb/workflows/byan/validate-agent-workflow.md +1 -1
  28. package/install/templates/_bmad/core/agents/carmack.md +2 -2
  29. package/install/templates/_byan/_config/agent-manifest.csv +1 -0
  30. package/install/templates/_byan/agent/agent-builder/agent-builder.md +20 -0
  31. package/install/templates/_byan/agent/analyst/analyst.md +21 -0
  32. package/install/templates/_byan/agent/architect/architect.md +21 -0
  33. package/install/templates/_byan/agent/bmad-master/bmad-master.md +23 -0
  34. package/install/templates/_byan/agent/brainstorming-coach/brainstorming-coach.md +21 -0
  35. package/install/templates/_byan/agent/byan/byan.md +24 -0
  36. package/install/templates/_byan/agent/byan-flat/byan.md +23 -0
  37. package/install/templates/_byan/agent/byan-test/byan-test.md +19 -0
  38. package/install/templates/_byan/agent/byan-test-flat/byan-test.md +20 -0
  39. package/install/templates/_byan/agent/carmack/carmack.md +22 -0
  40. package/install/templates/_byan/agent/claude/claude.md +21 -0
  41. package/install/templates/_byan/agent/codex/codex.md +21 -0
  42. package/install/templates/_byan/agent/creative-problem-solver/creative-problem-solver.md +21 -0
  43. package/install/templates/_byan/agent/design-thinking-coach/design-thinking-coach.md +21 -0
  44. package/install/templates/_byan/agent/dev/dev.md +20 -0
  45. package/install/templates/_byan/agent/drawio/drawio.md +21 -0
  46. package/install/templates/_byan/agent/expert-merise-agile/expert-merise-agile.md +21 -0
  47. package/install/templates/_byan/agent/fact-checker/fact-checker.md +21 -0
  48. package/install/templates/_byan/agent/forgeron/forgeron.md +22 -0
  49. package/install/templates/_byan/agent/innovation-strategist/innovation-strategist.md +21 -0
  50. package/install/templates/_byan/agent/jimmy/jimmy.md +23 -0
  51. package/install/templates/_byan/agent/marc/marc.md +21 -0
  52. package/install/templates/_byan/agent/marc-flat/marc.md +23 -0
  53. package/install/templates/_byan/agent/mike/mike.md +23 -0
  54. package/install/templates/_byan/agent/module-builder/module-builder.md +20 -0
  55. package/install/templates/_byan/agent/patnote/patnote.md +21 -0
  56. package/install/templates/_byan/agent/pm/pm.md +21 -0
  57. package/install/templates/_byan/agent/presentation-master/presentation-master.md +21 -0
  58. package/install/templates/_byan/agent/quick-flow-solo-dev/quick-flow-solo-dev.md +19 -0
  59. package/install/templates/_byan/agent/quinn/quinn.md +19 -0
  60. package/install/templates/_byan/agent/rachid/rachid.md +21 -0
  61. package/install/templates/_byan/agent/rachid-flat/rachid.md +22 -0
  62. package/install/templates/_byan/agent/skeptic/skeptic.md +23 -0
  63. package/install/templates/_byan/agent/sm/sm.md +21 -0
  64. package/install/templates/_byan/agent/storyteller/storyteller.md +21 -0
  65. package/install/templates/_byan/agent/tao/tao.md +22 -0
  66. package/install/templates/_byan/agent/tea/tea.md +23 -0
  67. package/install/templates/_byan/agent/tech-writer/tech-writer.md +21 -0
  68. package/install/templates/_byan/agent/test-dynamic/test-dynamic.md +19 -0
  69. package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +22 -0
  70. package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +21 -0
  71. package/install/templates/_byan/agent/ux-designer/ux-designer.md +20 -0
  72. package/install/templates/_byan/agent/workflow-builder/workflow-builder.md +20 -0
  73. package/install/templates/_byan/agent/yanstaller/yanstaller.md +23 -0
  74. package/install/templates/_byan/bmb/config.yaml +36 -2
  75. package/install/templates/_byan/config.yaml +28 -0
  76. package/install/templates/_byan/core/activation/soul-activation.md +35 -0
  77. package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-suitability.js +50 -0
  78. package/install/templates/_byan/mcp/byan-mcp-server/lib/native-tiers.js +112 -0
  79. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-feeder.js +45 -0
  80. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-store.js +102 -0
  81. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability.js +234 -0
  82. package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-lint.js +72 -2
  83. package/install/templates/_byan/mcp/byan-mcp-server/package.json +9 -3
  84. package/install/templates/_byan/mcp/byan-mcp-server/server.js +58 -0
  85. package/install/templates/_byan/worker/workers.md +71 -1
  86. package/install/templates/_byan/workflow/simple/byan/soul-memory-update.md +25 -3
  87. package/install/templates/docs/native-workflows-contract.md +109 -0
  88. package/package.json +1 -1
  89. package/src/byan-v2/dispatcher/complexity-scorer.js +6 -0
  90. package/src/byan-v2/dispatcher/task-router.js +5 -0
@@ -0,0 +1,112 @@
1
+ // Single source of truth for MODEL ROUTING of native-workflow leaves.
2
+ //
3
+ // Claude Code's in-CLI Workflow tool runs each agent() leaf on the main-loop
4
+ // model unless that call sets opts.model. Ported BYAN workflows never set it,
5
+ // so every leaf ran on the session model (Opus) — the read-the-file leaf paid
6
+ // the same tier as the implement-and-verify leaf. This module is the one place
7
+ // that decides a leaf's model tier, so the rule lives once and the linter
8
+ // (workflows-lint.js) can enforce it.
9
+ //
10
+ // This is a DISTINCT concern from src/byan-v2/dispatcher/complexity-scorer.js,
11
+ // which scores task COMPLEXITY (0-100) to route a whole task to an executor.
12
+ // That scorer answers "how hard is this task"; this module answers "which model
13
+ // tier does this workflow LEAF deserve". They share the same exploration intent
14
+ // but produce different outputs, so they stay separate (clarified, not merged).
15
+ //
16
+ // The sandbox forbids import INSIDE a .claude/workflows/*.js script, so a script
17
+ // cannot require() this file at runtime. The contract it encodes is instead a
18
+ // literal (model: 'haiku') the author writes on exploration leaves, validated
19
+ // against this module by the linter. This module is the canonical reference.
20
+
21
+ // The three-tier vocabulary. cheap/balanced are explicit downgrades; deep is the
22
+ // default and means "inherit the main-loop model".
23
+ export const TIERS = Object.freeze({ CHEAP: 'cheap', BALANCED: 'balanced', DEEP: 'deep' });
24
+
25
+ // tier -> concrete opts.model value, or null = OMIT opts.model (inherit).
26
+ //
27
+ // deep MUST be null. Omitting opts.model lets the leaf inherit whatever model the
28
+ // session runs (Opus by default, but Sonnet if the user chose Sonnet). We never
29
+ // PIN UP — pinning a leaf to a fixed high tier would override the user's session
30
+ // choice and could silently DOWNGRADE a Sonnet/Opus session's heavy leaf. Only
31
+ // cheap/balanced carry a value, and only exploration leaves ever get one.
32
+ //
33
+ // Values are the harness model-selection aliases (same set as the Agent tool:
34
+ // 'haiku' | 'sonnet' | 'opus'). They are version-independent. If a future
35
+ // runtime needs full model ids, this map is the ONLY edit — the linter then
36
+ // flags every script literal that drifts from it, so the fan-out stays bounded.
37
+ export const TIER_MODEL = Object.freeze({ cheap: 'haiku', balanced: 'sonnet', deep: null });
38
+
39
+ // Leaf task-type taxonomy. EXPLORATION is the only downgrade-safe class; the
40
+ // other three are protected (never downgraded).
41
+ export const LEAF_TYPES = Object.freeze({
42
+ EXPLORATION: 'exploration',
43
+ IMPLEMENTATION: 'implementation',
44
+ VERIFICATION: 'verification',
45
+ ANALYSIS: 'analysis',
46
+ });
47
+
48
+ // Label keyword sets, matched as substrings on the leaf LABEL (not the prompt —
49
+ // see classifyLeaf). Protected sets are checked first so any protected signal
50
+ // beats an exploration signal (conservative: when in doubt, do not downgrade).
51
+ // Note: 'test' is deliberately ABSENT. It collides both ways — 'discover-tests'
52
+ // is exploration (find the test files) while 'test-design' is analysis — so the
53
+ // bare token decides nothing. Real verification leaves carry verify/validate/
54
+ // check/review/gate/audit/assert/lint; a leaf that runs tests is labelled
55
+ // 'verify-*' in practice.
56
+ const VERIFICATION_KEYWORDS = ['verify', 'validate', 'check', 'assert', 'gate', 'lint', 'audit', 'review'];
57
+ const ANALYSIS_KEYWORDS = ['analy', 'design', 'architect', 'assess', 'evaluate', 'strategy', 'risk', 'nfr', 'recommend', 'judge', 'score', 'coverage'];
58
+ const IMPLEMENTATION_KEYWORDS = ['implement', 'build', 'write', 'generate', 'create', 'dev', 'rgr', 'refactor', 'fix', 'scaffold', 'save', 'optimize', 'aggregate', 'report', 'present', 'plan', 'map', 'select', 'subprocess', 'sub-'];
59
+ const EXPLORATION_KEYWORDS = ['load', 'read', 'scan', 'list', 'parse', 'detect', 'discover', 'fetch', 'lookup', 'source-tree', 'mode-detection'];
60
+
61
+ function matchesAny(text, keywords) {
62
+ return keywords.some((kw) => text.includes(kw));
63
+ }
64
+
65
+ // classifyLeaf({ label }) -> a LEAF_TYPES value.
66
+ //
67
+ // Keys off the LABEL, deliberately NOT the prompt. A leaf's prompt is noisy: an
68
+ // exploration leaf like load-story says "Read... Parse... Report the story key",
69
+ // and 'report' would wrongly pull it to implementation. The label is the curated,
70
+ // stable signal the author controls. Priority is protect-first: VERIFICATION,
71
+ // then ANALYSIS, then IMPLEMENTATION, then EXPLORATION. Anything unmatched
72
+ // defaults to IMPLEMENTATION (deep), so an unknown leaf is never downgraded.
73
+ export function classifyLeaf(leaf) {
74
+ const label = String((leaf && leaf.label) || '').toLowerCase();
75
+ if (!label) return LEAF_TYPES.IMPLEMENTATION;
76
+ if (matchesAny(label, VERIFICATION_KEYWORDS)) return LEAF_TYPES.VERIFICATION;
77
+ if (matchesAny(label, ANALYSIS_KEYWORDS)) return LEAF_TYPES.ANALYSIS;
78
+ if (matchesAny(label, IMPLEMENTATION_KEYWORDS)) return LEAF_TYPES.IMPLEMENTATION;
79
+ if (matchesAny(label, EXPLORATION_KEYWORDS)) return LEAF_TYPES.EXPLORATION;
80
+ return LEAF_TYPES.IMPLEMENTATION;
81
+ }
82
+
83
+ // tierFor(taskType) -> a TIERS value. Conservative auto-routing: only EXPLORATION
84
+ // is downgraded (cheap); every other type stays deep. BALANCED is part of the
85
+ // vocabulary but is never auto-assigned — it exists for an explicit, manual
86
+ // opt-in on a leaf an author judges mid-weight. Automation only ever picks
87
+ // cheap or deep.
88
+ export function tierFor(taskType) {
89
+ return taskType === LEAF_TYPES.EXPLORATION ? TIERS.CHEAP : TIERS.DEEP;
90
+ }
91
+
92
+ // modelForLeaf({ label }) -> the opts.model value to write (a string) or null
93
+ // (omit opts.model). This is what F2 stamps onto exploration leaves.
94
+ export function modelForLeaf(leaf) {
95
+ return TIER_MODEL[tierFor(classifyLeaf(leaf))];
96
+ }
97
+
98
+ // isKnownTierModel(modelId) -> true if modelId is one of the concrete downgrade
99
+ // models (cheap/balanced). Used by the linter to reject an opts.model literal
100
+ // that is not a recognised tier. null/'' are not "known" (deep = omission, not a
101
+ // literal). 'opus' is intentionally NOT known — we never pin up.
102
+ export function isKnownTierModel(modelId) {
103
+ if (!modelId) return false;
104
+ return Object.values(TIER_MODEL).filter(Boolean).includes(modelId);
105
+ }
106
+
107
+ // isDowngradeModel(modelId) -> true if modelId pins a leaf BELOW the inherited
108
+ // tier (cheap or balanced). The linter's anti-downgrade rule uses this: a
109
+ // protected leaf must never carry a downgrade model.
110
+ export function isDowngradeModel(modelId) {
111
+ return modelId === TIER_MODEL.cheap || modelId === TIER_MODEL.balanced;
112
+ }
@@ -0,0 +1,45 @@
1
+ // Feeder B — adversarial-pass verdict -> ledger outcome (F3).
2
+ //
3
+ // The adversarial VALIDATE pass runs N skeptics against ONE downgraded leaf,
4
+ // each trying to REFUTE that the cheap model is adequate there. The leaf is
5
+ // "flagged" (cheap inadequate) when at least half the skeptics refute it. This
6
+ // module maps that vote into the ledger's binary outcome:
7
+ //
8
+ // success = the cheap model SURVIVED the panel (refuters fell short of half).
9
+ //
10
+ // It is PURE and DETERMINISTIC and does no I/O. The actual byan_suitability_record
11
+ // call happens in the orchestrating skill on a main-thread turn — a workflow
12
+ // script cannot call MCP tools or write state (sandbox/state-coupling rule), so
13
+ // the script returns the verdicts as DATA and the skill records them. This
14
+ // module is the shared shaping step both sides agree on.
15
+
16
+ // At least half the panel refuting flags the leaf. Ties resolve AGAINST the
17
+ // cheap model — the conservative bias for an anti-downgrade rail. The adversarial
18
+ // pass uses an odd panel (3) so ties do not arise in practice; the rule is
19
+ // defined for any n so an even panel still degrades safely.
20
+ function isFlagged(refutedVotes, totalVotes) {
21
+ return refutedVotes * 2 >= totalVotes;
22
+ }
23
+
24
+ // verdictToOutcome({ model, leafId, refutedVotes, totalVotes }) ->
25
+ // { model, leafId, success }. Throws on malformed input (programmer error);
26
+ // the no-op-on-failure contract lives one layer up, at the MCP store boundary.
27
+ export function verdictToOutcome({ model, leafId, refutedVotes, totalVotes } = {}) {
28
+ if (!model || !leafId) throw new Error('verdictToOutcome requires model and leafId');
29
+ const total = Number(totalVotes);
30
+ const refuted = Number(refutedVotes);
31
+ if (!Number.isInteger(total) || total <= 0) {
32
+ throw new Error('totalVotes must be a positive integer');
33
+ }
34
+ if (!Number.isInteger(refuted) || refuted < 0 || refuted > total) {
35
+ throw new Error('refutedVotes must be an integer in 0..totalVotes');
36
+ }
37
+ return { model, leafId, success: !isFlagged(refuted, total) };
38
+ }
39
+
40
+ // verdictsToOutcomes([verdict, ...]) -> [outcome, ...]. The skill iterates this
41
+ // and calls byan_suitability_record once per outcome.
42
+ export function verdictsToOutcomes(verdicts = []) {
43
+ if (!Array.isArray(verdicts)) throw new Error('verdictsToOutcomes expects an array');
44
+ return verdicts.map(verdictToOutcome);
45
+ }
@@ -0,0 +1,102 @@
1
+ // Model-suitability ledger — persistence + MCP-facing surface (F2).
2
+ //
3
+ // The pure math lives in suitability.js (no I/O). This module is the ONLY place
4
+ // that writes the ledger to disk, which is what makes the sandbox/state-coupling
5
+ // rule hold: a .claude/workflows/*.js script cannot import this file (the sandbox
6
+ // forbids it) and therefore cannot write ledger state. State changes flow only
7
+ // through the MCP tools (byan_suitability_record / _report), which call into
8
+ // here. The workflow feeds the tool; the tool owns the write.
9
+ //
10
+ // Best-effort contract (mirrors strict-sync.js): record() NEVER throws. Bad
11
+ // input or a failed write degrades to { recorded: false, reason } and leaves the
12
+ // on-disk ledger untouched. A telemetry write must never block or corrupt the
13
+ // real work — losing one outcome is acceptable; crashing the caller is not.
14
+
15
+ import fs from 'node:fs';
16
+ import path from 'node:path';
17
+ import { recordOutcome as pureRecord, rating, report } from './suitability.js';
18
+
19
+ export function resolveRoot(projectRoot) {
20
+ return projectRoot || process.env.CLAUDE_PROJECT_DIR || process.cwd();
21
+ }
22
+
23
+ // The ledger lives beside the FD state, under the gitignored _byan-output/.
24
+ export function ledgerPath(projectRoot) {
25
+ return path.join(resolveRoot(projectRoot), '_byan-output', 'suitability-ledger.json');
26
+ }
27
+
28
+ // readLedger never throws: a missing, corrupt, or non-object file reads as {}.
29
+ // A consumer should always get a usable ledger, even degraded to empty.
30
+ export function readLedger({ projectRoot, io = fs } = {}) {
31
+ const p = ledgerPath(projectRoot);
32
+ try {
33
+ if (!io.existsSync(p)) return {};
34
+ const parsed = JSON.parse(io.readFileSync(p, 'utf8'));
35
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {};
36
+ } catch {
37
+ return {};
38
+ }
39
+ }
40
+
41
+ function writeLedger(ledger, { projectRoot, io = fs } = {}) {
42
+ const p = ledgerPath(projectRoot);
43
+ io.mkdirSync(path.dirname(p), { recursive: true });
44
+ // Atomic write: stage into a temp file ADJACENT to the target (same directory,
45
+ // hence same filesystem, so the rename is atomic and EXDEV-free), then rename
46
+ // over the target. A partial or failed write leaves the existing ledger
47
+ // byte-identical. The "untouched on failure" guarantee is then literally true,
48
+ // not merely tolerated downstream by readLedger.
49
+ const tmp = `${p}.tmp`;
50
+ try {
51
+ io.writeFileSync(tmp, JSON.stringify(ledger, null, 2));
52
+ io.renameSync(tmp, p);
53
+ } catch (err) {
54
+ // Best-effort cleanup so a failed write leaves no orphan staged file behind.
55
+ try {
56
+ io.unlinkSync(tmp);
57
+ } catch {
58
+ void 0; // nothing was staged, or unlink is unsupported — nothing to clean
59
+ }
60
+ throw err;
61
+ }
62
+ return p;
63
+ }
64
+
65
+ // record one adequacy outcome. Returns { recorded, reason, rating, source }.
66
+ // recorded:false with reason 'invalid_input' (bad args) or 'persist_failed'
67
+ // (write threw). On a persist failure the rating reflects the PRE-write ledger,
68
+ // so the caller never sees a phantom update. Never throws.
69
+ export function record({ model, leafId, success, source, projectRoot, io = fs } = {}) {
70
+ const before = readLedger({ projectRoot, io });
71
+
72
+ let after;
73
+ try {
74
+ after = pureRecord(before, { model, leafId, success });
75
+ } catch (err) {
76
+ return { recorded: false, reason: 'invalid_input', error: err.message, source: source || null };
77
+ }
78
+
79
+ let recorded = true;
80
+ let reason = null;
81
+ try {
82
+ writeLedger(after, { projectRoot, io });
83
+ } catch (err) {
84
+ recorded = false;
85
+ reason = 'persist_failed';
86
+ void err; // swallowed by contract — the outcome is lost, the caller is safe
87
+ }
88
+
89
+ return {
90
+ recorded,
91
+ reason,
92
+ rating: rating(recorded ? after : before, { model, leafId }),
93
+ source: source || null,
94
+ };
95
+ }
96
+
97
+ // reportLedger -> advisory ratings (most-actionable first), each carrying the
98
+ // credible lower bound and n. Optional model filter. Read-only.
99
+ export function reportLedger({ model, projectRoot, io = fs } = {}) {
100
+ const rows = report(readLedger({ projectRoot, io }));
101
+ return model ? rows.filter((r) => r.model === model) : rows;
102
+ }
@@ -0,0 +1,234 @@
1
+ // Model-suitability ledger — the math core (design D1, advisory only).
2
+ //
3
+ // It answers ONE question per (model x leaf) pair: from the binary adequacy
4
+ // outcomes we have seen, is this CHEAP model safe to keep on this leaf? The
5
+ // answer is conservative by construction — it commits to "keep-cheap" only when
6
+ // the evidence is both good AND plentiful, and to "demote" only when the
7
+ // evidence is clearly bad. Everything in between is "watch": not enough proof to
8
+ // move, so the safe default (deep / no downgrade) stands.
9
+ //
10
+ // This module is PURE and DETERMINISTIC. No Date, no Math.random, no I/O. The
11
+ // ledger is a plain object the caller owns; every update returns a NEW ledger.
12
+ // Persistence lives behind the MCP tools (F2) so the sandbox/state-coupling rule
13
+ // holds: a workflow script never writes ledger state, the MCP tool does. The
14
+ // statistics here are the part a downgraded model would get subtly wrong, which
15
+ // is exactly why this leaf was kept on the strong model.
16
+
17
+ // Defaults. The math does not hard-depend on these — they are the policy knobs,
18
+ // passed through opts and overridable per call.
19
+ //
20
+ // Prior Beta(1,1) is uniform/neutral: the conservatism comes from the credible
21
+ // INTERVAL (a thin sample yields a wide interval and therefore a low floor),
22
+ // not from a stacked prior. keepThreshold > demoteThreshold leaves a deliberate
23
+ // "watch" band between them that a straddling interval falls into.
24
+ export const DEFAULTS = Object.freeze({
25
+ priorAlpha: 1,
26
+ priorBeta: 1,
27
+ credibleLevel: 0.95, // equal-tailed credible interval width
28
+ keepThreshold: 0.85, // lower credible bound >= this -> keep-cheap (proven safe)
29
+ demoteThreshold: 0.70, // upper credible bound <= this -> demote (proven unsafe)
30
+ });
31
+
32
+ // --- Statistics: regularized incomplete beta and its inverse ---------------
33
+ //
34
+ // We need P(p <= x) for a Beta(a,b) posterior (the regularized incomplete beta
35
+ // I_x(a,b)) and its inverse (the quantile) to read off a credible interval.
36
+ // Implemented from first principles (Lanczos log-gamma + Numerical-Recipes
37
+ // continued fraction + bisection) so there is no dependency and the result is
38
+ // reproducible to ~1e-10.
39
+
40
+ const LANCZOS_G = 7;
41
+ const LANCZOS_C = [
42
+ 0.99999999999980993,
43
+ 676.5203681218851,
44
+ -1259.1392167224028,
45
+ 771.32342877765313,
46
+ -176.61502916214059,
47
+ 12.507343278686905,
48
+ -0.13857109526572012,
49
+ 9.9843695780195716e-6,
50
+ 1.5056327351493116e-7,
51
+ ];
52
+
53
+ // Natural log of the Gamma function (Lanczos approximation, reflection for z<0.5).
54
+ export function lgamma(z) {
55
+ if (z < 0.5) {
56
+ return Math.log(Math.PI / Math.sin(Math.PI * z)) - lgamma(1 - z);
57
+ }
58
+ z -= 1;
59
+ let x = LANCZOS_C[0];
60
+ for (let i = 1; i < LANCZOS_G + 2; i++) x += LANCZOS_C[i] / (z + i);
61
+ const t = z + LANCZOS_G + 0.5;
62
+ return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
63
+ }
64
+
65
+ // Continued fraction for the incomplete beta (Lentz's method). The 300-iteration
66
+ // cap is a backstop, not a working limit: betai only ever calls this on the
67
+ // fast-converging side (x < (a+1)/(a+b+2), enforced by its reflection), where
68
+ // Lentz reaches the 1e-12 tolerance in tens of steps for any realistic posterior.
69
+ function betacf(x, a, b) {
70
+ const FPMIN = 1e-300;
71
+ const qab = a + b;
72
+ const qap = a + 1;
73
+ const qam = a - 1;
74
+ let c = 1;
75
+ let d = 1 - (qab * x) / qap;
76
+ if (Math.abs(d) < FPMIN) d = FPMIN;
77
+ d = 1 / d;
78
+ let h = d;
79
+ for (let m = 1; m <= 300; m++) {
80
+ const m2 = 2 * m;
81
+ let aa = (m * (b - m) * x) / ((qam + m2) * (a + m2));
82
+ d = 1 + aa * d;
83
+ if (Math.abs(d) < FPMIN) d = FPMIN;
84
+ c = 1 + aa / c;
85
+ if (Math.abs(c) < FPMIN) c = FPMIN;
86
+ d = 1 / d;
87
+ h *= d * c;
88
+ aa = (-(a + m) * (qab + m) * x) / ((a + m2) * (qap + m2));
89
+ d = 1 + aa * d;
90
+ if (Math.abs(d) < FPMIN) d = FPMIN;
91
+ c = 1 + aa / c;
92
+ if (Math.abs(c) < FPMIN) c = FPMIN;
93
+ d = 1 / d;
94
+ const del = d * c;
95
+ h *= del;
96
+ if (Math.abs(del - 1) < 1e-12) break;
97
+ }
98
+ return h;
99
+ }
100
+
101
+ // Regularized incomplete beta I_x(a,b) = P(X <= x) for X ~ Beta(a,b).
102
+ export function betai(x, a, b) {
103
+ if (x <= 0) return 0;
104
+ if (x >= 1) return 1;
105
+ const logBeta = lgamma(a + b) - lgamma(a) - lgamma(b);
106
+ const front = Math.exp(logBeta + a * Math.log(x) + b * Math.log(1 - x));
107
+ // Use the fraction on the side where it converges fastest, then reflect.
108
+ if (x < (a + 1) / (a + b + 2)) {
109
+ return (front * betacf(x, a, b)) / a;
110
+ }
111
+ return 1 - (front * betacf(1 - x, b, a)) / b;
112
+ }
113
+
114
+ // Inverse CDF: smallest x with I_x(a,b) = p. Bisection — monotone, dependency
115
+ // free, deterministic. 100 halvings drive the bracket below 1e-30, far tighter
116
+ // than the betai accuracy, so the quantile is exact for our purposes.
117
+ export function betaQuantile(p, a, b) {
118
+ if (p <= 0) return 0;
119
+ if (p >= 1) return 1;
120
+ let lo = 0;
121
+ let hi = 1;
122
+ for (let i = 0; i < 100; i++) {
123
+ const mid = (lo + hi) / 2;
124
+ if (betai(mid, a, b) < p) lo = mid;
125
+ else hi = mid;
126
+ }
127
+ return (lo + hi) / 2;
128
+ }
129
+
130
+ // --- Ledger ----------------------------------------------------------------
131
+
132
+ // Key for a (model x leaf) pair. '::' is reserved as the separator; model and
133
+ // leafId are also stored on the entry so the report never has to parse keys.
134
+ export function leafKey(model, leafId) {
135
+ return `${model}::${leafId}`;
136
+ }
137
+
138
+ function emptyEntry(model, leafId) {
139
+ return { model, leafId, successes: 0, failures: 0 };
140
+ }
141
+
142
+ // recordOutcome(ledger, { model, leafId, success }) -> a NEW ledger.
143
+ //
144
+ // success === true : the cheap model was adequate on this leaf this time.
145
+ // success === false : it was not (the adversarial pass refuted it).
146
+ // Stores RAW counts (prior-independent) so the prior stays a read-time policy.
147
+ // Throws on malformed input — that is a programmer error, surfaced loudly, not
148
+ // silently swallowed. (The MCP tool's no-op-on-failure contract is about
149
+ // transport/persistence, not input validation.)
150
+ export function recordOutcome(ledger, { model, leafId, success } = {}) {
151
+ if (!model || !leafId) throw new Error('recordOutcome requires model and leafId');
152
+ if (typeof success !== 'boolean') throw new Error('recordOutcome requires success:boolean');
153
+ const key = leafKey(model, leafId);
154
+ const cur = ledger[key] || emptyEntry(model, leafId);
155
+ const next = {
156
+ model,
157
+ leafId,
158
+ successes: cur.successes + (success ? 1 : 0),
159
+ failures: cur.failures + (success ? 0 : 1),
160
+ };
161
+ return { ...ledger, [key]: next };
162
+ }
163
+
164
+ // posterior(entry, opts) -> { alpha, beta }. Applies the prior to raw counts.
165
+ export function posterior(entry, opts) {
166
+ const o = { ...DEFAULTS, ...opts };
167
+ return {
168
+ alpha: o.priorAlpha + (entry ? entry.successes : 0),
169
+ beta: o.priorBeta + (entry ? entry.failures : 0),
170
+ };
171
+ }
172
+
173
+ // verdictFromBounds(lower, upper, opts) -> 'keep-cheap' | 'demote' | 'watch'.
174
+ // keep-cheap and demote are mutually exclusive (lower <= upper), so the order
175
+ // of the two tests does not matter; "watch" is everything the evidence has not
176
+ // settled. This is the whole safety policy in three lines.
177
+ export function verdictFromBounds(lower, upper, opts) {
178
+ const o = { ...DEFAULTS, ...opts };
179
+ if (lower >= o.keepThreshold) return 'keep-cheap';
180
+ if (upper <= o.demoteThreshold) return 'demote';
181
+ return 'watch';
182
+ }
183
+
184
+ // rating(ledger, { model, leafId }, opts) -> the full advisory record. ALWAYS
185
+ // carries the credible lower bound and n; a consumer that shows only `mean`
186
+ // would be discarding the very signal that makes a thin sample untrustworthy.
187
+ export function rating(ledger, { model, leafId }, opts) {
188
+ const o = { ...DEFAULTS, ...opts };
189
+ const entry = ledger[leafKey(model, leafId)] || emptyEntry(model, leafId);
190
+ const { alpha, beta } = posterior(entry, o);
191
+ const n = entry.successes + entry.failures;
192
+ const tail = (1 - o.credibleLevel) / 2;
193
+ const lower = betaQuantile(tail, alpha, beta);
194
+ const upper = betaQuantile(1 - tail, alpha, beta);
195
+ const mean = alpha / (alpha + beta);
196
+ return {
197
+ model,
198
+ leafId,
199
+ n,
200
+ successes: entry.successes,
201
+ failures: entry.failures,
202
+ mean,
203
+ lower,
204
+ upper,
205
+ credibleLevel: o.credibleLevel,
206
+ verdict: verdictFromBounds(lower, upper, o),
207
+ };
208
+ }
209
+
210
+ // Most-actionable first: demote, then watch, then keep-cheap; ties by leaf then
211
+ // model for stable output.
212
+ const SEVERITY = { demote: 0, watch: 1, 'keep-cheap': 2 };
213
+
214
+ // report(ledger, opts) -> ratings for every pair, severity-sorted.
215
+ export function report(ledger, opts) {
216
+ return Object.values(ledger || {})
217
+ .map((e) => rating(ledger, { model: e.model, leafId: e.leafId }, opts))
218
+ .sort(
219
+ (a, b) =>
220
+ SEVERITY[a.verdict] - SEVERITY[b.verdict] ||
221
+ a.leafId.localeCompare(b.leafId) ||
222
+ a.model.localeCompare(b.model),
223
+ );
224
+ }
225
+
226
+ // formatRating(rating) -> one advisory line. It REFUSES to print a bare point
227
+ // estimate: the credible lower bound and n are always present, because "92%"
228
+ // over 3 samples and "92%" over 300 are not the same claim, and only the second
229
+ // should ever move a human to drop a downgrade.
230
+ export function formatRating(r) {
231
+ const pct = (x) => (x * 100).toFixed(1);
232
+ const lvl = Math.round(r.credibleLevel * 100);
233
+ return `${r.model} x ${r.leafId}: lower${lvl}=${pct(r.lower)}% (mean ${pct(r.mean)}%, n=${r.n}) -> ${r.verdict}`;
234
+ }
@@ -12,6 +12,7 @@
12
12
 
13
13
  import fs from 'node:fs';
14
14
  import path from 'node:path';
15
+ import { isKnownTierModel, isDowngradeModel, classifyLeaf, LEAF_TYPES } from './native-tiers.js';
15
16
 
16
17
  // Strip /* block */ and // line comments. Preserve "://" inside strings (URLs)
17
18
  // by only treating // as a comment when not preceded by a colon.
@@ -87,10 +88,79 @@ export function metaLiteralViolations(src) {
87
88
  }];
88
89
  }
89
90
 
91
+ // Model-routing anti-downgrade guard (enforcement-bridge F3).
92
+ //
93
+ // A native leaf may pin a CHEAPER model (opts.model) ONLY when it is an
94
+ // EXPLORATION leaf (read/load/parse/detect). Implement, verify and analysis
95
+ // leaves must inherit the session model (no opts.model). This is the structural
96
+ // net that stops a cheap model from silently landing on a heavy leaf — the exact
97
+ // STRICT-2 (No Downgrade) line. The source of truth for tiers and leaf
98
+ // classification is native-tiers.js; this rule only enforces it.
99
+ //
100
+ // Parsing is comment-stripped (a model: token in a comment is not a real call).
101
+ // Each model: is keyed to the nearest preceding label: within the SAME opts
102
+ // object (no intervening }). Downgraded leaves carry static-string labels by
103
+ // convention, so a quoted-literal match is sufficient.
104
+ const MODEL_RE = /\bmodel:\s*(['"`])([^'"`]*)\1/g;
105
+ const LABEL_RE = /\blabel:\s*(['"`])([^'"`]*)\1/g;
106
+
107
+ function nearestLabelBefore(code, modelIndex) {
108
+ const before = code.slice(0, modelIndex);
109
+ let last = null;
110
+ let m;
111
+ LABEL_RE.lastIndex = 0;
112
+ while ((m = LABEL_RE.exec(before))) {
113
+ last = { value: m[2], end: m.index + m[0].length };
114
+ }
115
+ if (!last) return null;
116
+ // Same object only: an object-close between the label and the model means the
117
+ // label belongs to a different (earlier) call.
118
+ if (before.slice(last.end).includes('}')) return null;
119
+ return last.value;
120
+ }
121
+
122
+ export function modelRoutingViolations(src) {
123
+ const code = stripComments(src);
124
+ const out = [];
125
+ let m;
126
+ MODEL_RE.lastIndex = 0;
127
+ while ((m = MODEL_RE.exec(code))) {
128
+ const model = m[2];
129
+ if (!isKnownTierModel(model)) {
130
+ out.push({
131
+ id: 'unknown-tier-model',
132
+ msg: `opts.model '${model}' is not a known downgrade tier (cheap/balanced); never pin up — omit opts.model to inherit the session model on deep leaves`,
133
+ });
134
+ continue;
135
+ }
136
+ const label = nearestLabelBefore(code, m.index);
137
+ if (!label) {
138
+ out.push({
139
+ id: 'downgrade-without-label',
140
+ msg: `a model downgrade ('${model}') must sit on a labelled exploration leaf; no label found in this opts object`,
141
+ });
142
+ continue;
143
+ }
144
+ if (isDowngradeModel(model) && classifyLeaf({ label }) !== LEAF_TYPES.EXPLORATION) {
145
+ out.push({
146
+ id: 'protected-leaf-downgraded',
147
+ msg: `leaf '${label}' is not exploration but carries downgrade model '${model}'; only read/load/parse/detect leaves may downgrade (STRICT-2 No Downgrade)`,
148
+ });
149
+ }
150
+ }
151
+ return out;
152
+ }
153
+
90
154
  // Full native-workflow contract: state-coupling (comment-stripped) + clock/RNG
91
- // (raw) + meta-literal-first. Returns the combined [{ id, msg }] violations.
155
+ // (raw) + meta-literal-first + model-routing anti-downgrade. Returns the
156
+ // combined [{ id, msg }] violations.
92
157
  export function validateContract(src) {
93
- return [...lintSource(src), ...clockRngViolations(src), ...metaLiteralViolations(src)];
158
+ return [
159
+ ...lintSource(src),
160
+ ...clockRngViolations(src),
161
+ ...metaLiteralViolations(src),
162
+ ...modelRoutingViolations(src),
163
+ ];
94
164
  }
95
165
 
96
166
  // Lint every *.js in a directory (non-recursive; native workflows are flat)
@@ -5,18 +5,24 @@
5
5
  "main": "server.js",
6
6
  "type": "module",
7
7
  "bin": {
8
- "byan-mcp": "./server.js"
8
+ "byan-mcp": "./server.js",
9
+ "byan-sync-rules": "./bin/byan-sync-rules.js"
9
10
  },
10
11
  "scripts": {
11
12
  "start": "node server.js",
12
13
  "test": "node --test test/*.test.js"
13
14
  },
14
15
  "dependencies": {
15
- "@modelcontextprotocol/sdk": "^1.29.0"
16
+ "@modelcontextprotocol/sdk": "^1.29.0",
17
+ "js-yaml": "^4.1.1"
16
18
  },
17
19
  "engines": {
18
20
  "node": ">=18.0.0"
19
21
  },
20
- "keywords": ["mcp", "byan", "claude-code"],
22
+ "keywords": [
23
+ "mcp",
24
+ "byan",
25
+ "claude-code"
26
+ ],
21
27
  "license": "MIT"
22
28
  }