create-byan-agent 2.20.1 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +91 -0
  2. package/install/templates/.claude/CLAUDE.md +21 -1
  3. package/install/templates/.claude/rules/byan-agents.md +1 -0
  4. package/install/templates/.claude/rules/hermes-dispatcher.md +1 -0
  5. package/install/templates/.claude/rules/team-doctrine.md +102 -0
  6. package/install/templates/.claude/skills/byan-byan-test/SKILL.md +1 -1
  7. package/install/templates/.claude/skills/byan-suitability/SKILL.md +71 -0
  8. package/install/templates/.claude/workflows/create-excalidraw-dataflow.js +2 -2
  9. package/install/templates/.claude/workflows/create-excalidraw-diagram.js +2 -2
  10. package/install/templates/.claude/workflows/create-excalidraw-flowchart.js +2 -2
  11. package/install/templates/.claude/workflows/create-excalidraw-wireframe.js +2 -2
  12. package/install/templates/.claude/workflows/dev-story.js +1 -1
  13. package/install/templates/.claude/workflows/document-project.js +3 -1
  14. package/install/templates/.githooks/pre-commit +20 -2
  15. package/install/templates/.github/agents/bmad-agent-byan.md +1056 -10
  16. package/install/templates/.github/agents/bmad-agent-skeptic.md +7 -5
  17. package/install/templates/_bmad/bmb/agents/agent-builder.md +5 -5
  18. package/install/templates/_bmad/bmb/agents/byan-test.md +8 -8
  19. package/install/templates/_bmad/bmb/agents/byan.md +8 -8
  20. package/install/templates/_bmad/bmb/agents/marc.md +11 -11
  21. package/install/templates/_bmad/bmb/agents/module-builder.md +6 -6
  22. package/install/templates/_bmad/bmb/agents/patnote.md +8 -8
  23. package/install/templates/_bmad/bmb/agents/rachid.md +10 -10
  24. package/install/templates/_bmad/bmb/agents/workflow-builder.md +7 -7
  25. package/install/templates/_bmad/bmb/workflows/byan/quick-create-workflow.md +2 -2
  26. package/install/templates/_bmad/bmb/workflows/byan/templates/base-agent-template.md +1 -1
  27. package/install/templates/_bmad/bmb/workflows/byan/validate-agent-workflow.md +1 -1
  28. package/install/templates/_bmad/core/agents/carmack.md +2 -2
  29. package/install/templates/_byan/_config/agent-manifest.csv +1 -0
  30. package/install/templates/_byan/agent/agent-builder/agent-builder.md +20 -0
  31. package/install/templates/_byan/agent/analyst/analyst.md +21 -0
  32. package/install/templates/_byan/agent/architect/architect.md +21 -0
  33. package/install/templates/_byan/agent/bmad-master/bmad-master.md +23 -0
  34. package/install/templates/_byan/agent/brainstorming-coach/brainstorming-coach.md +21 -0
  35. package/install/templates/_byan/agent/byan/byan.md +24 -0
  36. package/install/templates/_byan/agent/byan-flat/byan.md +23 -0
  37. package/install/templates/_byan/agent/byan-test/byan-test.md +19 -0
  38. package/install/templates/_byan/agent/byan-test-flat/byan-test.md +20 -0
  39. package/install/templates/_byan/agent/carmack/carmack.md +22 -0
  40. package/install/templates/_byan/agent/claude/claude.md +21 -0
  41. package/install/templates/_byan/agent/codex/codex.md +21 -0
  42. package/install/templates/_byan/agent/creative-problem-solver/creative-problem-solver.md +21 -0
  43. package/install/templates/_byan/agent/design-thinking-coach/design-thinking-coach.md +21 -0
  44. package/install/templates/_byan/agent/dev/dev.md +20 -0
  45. package/install/templates/_byan/agent/drawio/drawio.md +21 -0
  46. package/install/templates/_byan/agent/expert-merise-agile/expert-merise-agile.md +21 -0
  47. package/install/templates/_byan/agent/fact-checker/fact-checker.md +21 -0
  48. package/install/templates/_byan/agent/forgeron/forgeron.md +22 -0
  49. package/install/templates/_byan/agent/innovation-strategist/innovation-strategist.md +21 -0
  50. package/install/templates/_byan/agent/jimmy/jimmy.md +23 -0
  51. package/install/templates/_byan/agent/marc/marc.md +21 -0
  52. package/install/templates/_byan/agent/marc-flat/marc.md +23 -0
  53. package/install/templates/_byan/agent/mike/mike.md +23 -0
  54. package/install/templates/_byan/agent/module-builder/module-builder.md +20 -0
  55. package/install/templates/_byan/agent/patnote/patnote.md +21 -0
  56. package/install/templates/_byan/agent/pm/pm.md +21 -0
  57. package/install/templates/_byan/agent/presentation-master/presentation-master.md +21 -0
  58. package/install/templates/_byan/agent/quick-flow-solo-dev/quick-flow-solo-dev.md +19 -0
  59. package/install/templates/_byan/agent/quinn/quinn.md +19 -0
  60. package/install/templates/_byan/agent/rachid/rachid.md +21 -0
  61. package/install/templates/_byan/agent/rachid-flat/rachid.md +22 -0
  62. package/install/templates/_byan/agent/skeptic/skeptic.md +23 -0
  63. package/install/templates/_byan/agent/sm/sm.md +21 -0
  64. package/install/templates/_byan/agent/storyteller/storyteller.md +21 -0
  65. package/install/templates/_byan/agent/tao/tao.md +22 -0
  66. package/install/templates/_byan/agent/tea/tea.md +23 -0
  67. package/install/templates/_byan/agent/tech-writer/tech-writer.md +21 -0
  68. package/install/templates/_byan/agent/test-dynamic/test-dynamic.md +19 -0
  69. package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +22 -0
  70. package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +21 -0
  71. package/install/templates/_byan/agent/ux-designer/ux-designer.md +20 -0
  72. package/install/templates/_byan/agent/workflow-builder/workflow-builder.md +20 -0
  73. package/install/templates/_byan/agent/yanstaller/yanstaller.md +23 -0
  74. package/install/templates/_byan/bmb/config.yaml +36 -2
  75. package/install/templates/_byan/config.yaml +28 -0
  76. package/install/templates/_byan/core/activation/soul-activation.md +35 -0
  77. package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-suitability.js +50 -0
  78. package/install/templates/_byan/mcp/byan-mcp-server/lib/native-tiers.js +112 -0
  79. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-feeder.js +45 -0
  80. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-store.js +102 -0
  81. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability.js +234 -0
  82. package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-lint.js +72 -2
  83. package/install/templates/_byan/mcp/byan-mcp-server/package.json +9 -3
  84. package/install/templates/_byan/mcp/byan-mcp-server/server.js +58 -0
  85. package/install/templates/_byan/worker/workers.md +71 -1
  86. package/install/templates/_byan/workflow/simple/byan/soul-memory-update.md +25 -3
  87. package/install/templates/docs/native-workflows-contract.md +109 -0
  88. package/package.json +1 -1
  89. package/src/byan-v2/dispatcher/complexity-scorer.js +6 -0
  90. package/src/byan-v2/dispatcher/task-router.js +5 -0
package/CHANGELOG.md CHANGED
@@ -9,6 +9,97 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ## [2.21.0] - 2026-06-08
13
+
14
+ ### Added - Template fidelity sync (the published package matches its CHANGELOG)
15
+
16
+ Only `install/templates/` ships on npm (`package.json` `files[]`), but the dev
17
+ code lives at root `_byan/` and `.claude/`. With no mechanism to mirror root into
18
+ the template, the template had drifted: 81 stale files had accumulated across
19
+ several chantiers, so the routing and ledger work below existed at root yet was
20
+ absent from the package a user would install. This adds the missing mechanism and
21
+ re-aligns the template.
22
+
23
+ - **Sync tool** `_byan/mcp/byan-mcp-server/lib/template-sync.js` +
24
+ `bin/byan-sync-template.js`: re-syncs every file already in the template from its
25
+ root twin, adds an explicit target list, and excludes runtime seeds
26
+ (`_byan/memoire/**`). The mirrored perimeter is the template itself rather than a
27
+ walk of root, so dev-only files do not leak into the package. `--check` reports
28
+ drift and exits non-zero without writing.
29
+ - **First-run result.** 79 stale files re-synced and the 7 missing routing/ledger
30
+ artifacts added, so the shipped `server.js` registers the `byan_suitability`
31
+ tools and the downgraded workflows ship as intended. Runtime seeds left
32
+ untouched.
33
+ - **Anti-recidive gate.** A fourth pre-commit gate runs `byan-sync-template.js
34
+ --check` and blocks a commit whose template has drifted from root. It is a no-op
35
+ for an installed user (the tool is dev-only, so the gate self-disables there).
36
+ - 19 unit tests: idempotence, exclusion of runtime seeds, drift detection,
37
+ atomic-copy rollback, and perimeter tightness. Guide in
38
+ `docs/template-fidelity.md`.
39
+
40
+ ### Added - Model routing for native workflows (tier the leaves, keep heavy ones inherited)
41
+
42
+ The 20 native-workflow scripts (`.claude/workflows/*.js`) all ran every `agent()`
43
+ leaf on the session model (Opus by default): the read-the-file leaf paid the same
44
+ tier as the implement-and-verify leaf. This wires BYAN's complexity doctrine into
45
+ the Workflow tool's `opts.model` lever, conservatively.
46
+
47
+ - **Single source of truth** `_byan/mcp/byan-mcp-server/lib/native-tiers.js`: the
48
+ tier vocabulary (`cheap`/`balanced`/`deep`), a label-driven leaf classifier, and
49
+ the model map. `deep` is an OMISSION (inherit the session model), not a pin — we
50
+ only ever route DOWN, and only exploration leaves.
51
+ - **Anti-downgrade guard** in `workflows-lint.js` (`modelRoutingViolations`),
52
+ folded into `validateContract`, so `byan-lint-workflows` and the pre-commit gate
53
+ reject any protected (implement/verify/analysis) leaf carrying a downgrade or any
54
+ unknown model literal.
55
+ - **Conservative application.** Of 19 exploration-labelled leaves, only 5 are
56
+ downgraded to `haiku` (`dev-story:load-story` + the 4 excalidraw
57
+ `load-resources`). An adversarial review pass (3 skeptics) caught 4 candidates
58
+ whose output feeds a downstream gate/score without a re-read
59
+ (`document-discovery`, `parse-epics`, the two `discover-tests`); those were
60
+ reverted to `deep`.
61
+ - **Regression guard** `test/native-routing-integration.test.js` pins the invariant
62
+ on the shipped scripts. Contract documented in `docs/native-workflows-contract.md`.
63
+
64
+ ### Added - Model-suitability ledger (advisory learning layer above the routing floor)
65
+
66
+ The static routing floor does not widen itself. The suitability ledger learns,
67
+ per `(model x leaf)`, whether a cheap model proved adequate, and advises keep /
68
+ watch / demote — above the floor, with a human deciding. It does not edit routing
69
+ and the linter floor stays the hard gate.
70
+
71
+ - **Math** `_byan/mcp/byan-mcp-server/lib/suitability.js`: a Beta-Bernoulli
72
+ posterior, pure and deterministic (no clock/RNG/IO). The verdict reads the
73
+ credible LOWER bound, so a thin sample stays `watch` (a high mean over 3 runs
74
+ is not `keep-cheap`); `keep-cheap` needs roughly 30 clean outcomes.
75
+ - **Store** `lib/suitability-store.js`: the sole write path, atomic tmp+rename,
76
+ best-effort no-op that does not throw or corrupt the ledger on a failed write.
77
+ - **Feeder** `lib/suitability-feeder.js`: maps an adversarial-panel verdict to a
78
+ binary outcome (at least half refute = flagged).
79
+ - **MCP tools** `byan_suitability_record` / `byan_suitability_report`, **CLI**
80
+ `bin/byan-suitability.js` (read-only), and **skill** `byan-suitability` (the
81
+ hybrid wiring: the script returns DATA, the skill records via MCP).
82
+ - 38 unit tests. Auto-promotion is deferred (phase 2) so a hot-hand streak cannot
83
+ slip a downgrade past human review.
84
+
85
+ ### Changed - Widened the safe-downgrade set (5 -> 11 leaves)
86
+
87
+ An adversarial panel (one skeptic per leaf, each asked to PROVE the leaf is
88
+ analysis) re-judged 6 deep exploration leaves whose output is re-read or
89
+ re-synthesized by a later Opus step. The 6 cleared as genuine reads and now run
90
+ on haiku, doubling the downgraded set:
91
+
92
+ - document-project: scan-existing-docs (renamed from existing-docs) and source-tree
93
+ - the four excalidraw context leaves: read-context (wireframe), read-requirements
94
+ (flowchart), context-scan (dataflow), parse-spec-intent (diagram)
95
+
96
+ Five labels were honestly renamed so the deterministic classifier reads them as
97
+ exploration; a leaf the panel found to be genuine analysis would have stayed deep.
98
+ The 4 earlier reverts (document-discovery, parse-epics, the two discover-tests)
99
+ were re-checked with token net-math and stay deep: adding a re-read is net-negative
100
+ or marginal there. native-routing-integration.test.js floor raised 1 -> 11; the
101
+ panel verdicts seed the suitability ledger.
102
+
12
103
  ## [2.20.1] - 2026-06-04
13
104
 
14
105
  ### Fixed - Post-audit hotfix (adversarial self-audit of 2.20.0)
@@ -3,6 +3,7 @@
3
3
  > Projet propulse par BYAN (Merise Agile + TDD + 71 Mantras)
4
4
  > Installer: `npx create-byan-agent`
5
5
  > GitHub: https://github.com/Yan-Acadenice/BYAN
6
+ > Carte du systeme de fichiers (agents, workflows, commandes, projets): voir `_byan/INDEX.md` (genere par `byan-build-index`)
6
7
 
7
8
  ## Hermes - Dispatcher Universel
8
9
 
@@ -37,10 +38,29 @@ Voir @.claude/rules/hermes-dispatcher.md pour les commandes Hermes.
37
38
  - Simplicite d'abord - Rasoir d'Ockham (Mantra #37)
38
39
  - Challenge Before Confirm - Valider avant d'accepter (Mantra IA-16)
39
40
 
41
+ ## L'agent dans l'equipe BYAN
42
+
43
+ Les agents BYAN forment une equipe — leurs personnalites complementaires se renforcent. Diversifier la personnalite, c'est elargir la surface de competence collective.
44
+
45
+ Mantras = regles d'action qui operationnalisent les valeurs issues de soul + tao. Chaine : Soul/Tao -> Valeurs -> Mantras -> Comportement.
46
+
47
+ ```
48
+ Soul (identite)
49
+ + Tao (voix)
50
+ -> Valeurs (lignes rouges, convictions)
51
+ -> Mantras (regles d'action)
52
+ -> Comportement
53
+ ```
54
+
55
+ Cette chaine s'incarne dans chaque agent ; l'equipe complete la couvre dans toutes ses dimensions.
56
+
57
+ Doctrine d'equipe complete (template role-in-team, analogie orchestre, principes de complementarite) : voir @.claude/rules/team-doctrine.md
58
+
40
59
  ## Commandes Utiles
41
60
 
42
61
  - `@hermes` → Dispatcher universel (recommandations, routage, pipelines)
43
62
  - Agent disponibles: voir @.claude/rules/byan-agents.md
63
+ - Doctrine d'equipe: voir @.claude/rules/team-doctrine.md
44
64
  - Methodologie: voir @.claude/rules/merise-agile.md
45
65
  - Systeme de confiance epistemique: voir @.claude/rules/elo-trust.md
46
66
  - Protocol fact-check scientifique: voir @.claude/rules/fact-check.md
@@ -88,6 +108,6 @@ Protocole : lock du scope -> build complet -> self-verify >= 3 passes -> complet
88
108
  - Outils MCP : `byan_strict_lock_scope`, `byan_strict_self_verify`, `byan_strict_complete`, `byan_strict_status`, `byan_strict_abort`, `byan_strict_suggest`
89
109
  - Activation : `byan_fd_start strict:true`, skill `byan-strict`, ou mots-cles (prod, client, livrable...)
90
110
  - Filet final : `.githooks/pre-commit` bloque le commit si une session strict est engagee mais non completee
91
- - Persistance : sessions poussees vers l'API byan_web (autorite ; local = miroir/fallback offline)
111
+ - Persistance : sessions poussees vers l'API byan_web (autorite ; local = miroir/fallback offline) via `lib/strict-sync.js` ; migration `033` + `routes/strict-sessions.js` cote byan_web
92
112
 
93
113
  Detail complet : voir @.claude/rules/strict-mode.md
@@ -65,6 +65,7 @@
65
65
  | `quick-spec` | Spec rapide conversationnelle |
66
66
  | `quick-dev` | Dev rapide (brownfield) |
67
67
  | `elo-workflow` | Consulter et gerer le score ELO (via menu [ELO] du BYAN) |
68
+ | `byan-sync-rules` | Regenerer les artefacts du mode strict depuis strict-mode.yaml |
68
69
 
69
70
  ## Comment Invoquer un Agent
70
71
 
@@ -34,6 +34,7 @@ Quand un utilisateur decrit une tache, Hermes recommande le bon agent:
34
34
  | creer agent, workflow, module | byan (Builder) |
35
35
  | brainstorm, idees, innovation | brainstorming-coach (Carson) |
36
36
  | optimiser, tokens, performance | carmack (Optimizer) |
37
+ | prod, livrable, complet, anti-downgrade | skill byan-strict (Strict Mode) |
37
38
 
38
39
  ## Pipelines Predefinies
39
40
 
@@ -0,0 +1,102 @@
1
+ # Doctrine d'equipe BYAN
2
+
3
+ > Tout agent BYAN est un membre d'equipe avant d'etre un specialiste.
4
+ > Sa singularite n'a de sens que par contraste avec ses pairs.
5
+
6
+ ## Enonces canoniques
7
+
8
+ **1. L'equipe avant l'individu.**
9
+ Les agents BYAN forment une equipe — leurs personnalites complementaires se renforcent. Diversifier la personnalite, c'est elargir la surface de competence collective.
10
+
11
+ **2. La chaine doctrinale.**
12
+ Mantras = regles d'action qui operationnalisent les valeurs issues de soul + tao. Chaine : Soul/Tao -> Valeurs -> Mantras -> Comportement.
13
+
14
+ ## Schema de la chaine
15
+
16
+ ```
17
+ Soul (identite)
18
+ + Tao (voix)
19
+ -> Valeurs (lignes rouges, convictions)
20
+ -> Mantras (regles d'action)
21
+ -> Comportement
22
+ -> Equipe (N agents complementaires)
23
+ <- orchestre par Hermes (dispatcher)
24
+ ```
25
+
26
+ ## Analogie orchestre
27
+
28
+ | Element BYAN | Equivalent musical |
29
+ |--------------|--------------------|
30
+ | Soul | Le musicien (identite) |
31
+ | Tao | Le timbre (signature sonore) |
32
+ | Valeurs | L'ethique de l'interpretation |
33
+ | Mantras | Les techniques de jeu |
34
+ | Equipe | L'orchestre (N voix complementaires) |
35
+ | Hermes | Le chef d'orchestre (dispatch) |
36
+ | Workflows | La partition |
37
+
38
+ Un soliste isole peut briller. Un orchestre couvre toutes les frequences. BYAN est un orchestre — chaque agent occupe une frequence specifique, complementaire des autres.
39
+
40
+ ## Principes de complementarite
41
+
42
+ 1. **Singularite obligatoire** — Deux agents ne peuvent pas avoir le meme role. Si un agent existe deja pour la tache, ne pas en creer un nouveau : enrichir l'existant.
43
+ 2. **Couverture totale** — L'equipe complete doit couvrir l'ensemble du cycle (analyse, planning, dev, test, docs, innovation, meta).
44
+ 3. **Voix distinctes** — Le tao d'un agent doit le distinguer auditivement des autres (registre, signatures, vocabulaire).
45
+ 4. **Convictions explicites** — Les valeurs (lignes rouges) doivent etre nommees, pas implicites.
46
+
47
+ ## Quand activer la doctrine
48
+
49
+ - **Party-mode** : invocation parallele de plusieurs agents — chacun apporte sa frequence propre
50
+ - **Multi-agent dispatch** : Hermes choisit en fonction du role, pas du hasard
51
+ - **Brainstorm collaboratif** : la diversite de personnalites genere plus d'angles
52
+ - **Creation d'agent** : verifier qu'il n'y a pas redondance avec un membre existant
53
+
54
+ ## Template canonique : section role-in-team
55
+
56
+ Tout agent BYAN primaire doit contenir une section `## Mon role dans l'equipe BYAN` structuree ainsi :
57
+
58
+ ```markdown
59
+ ## Mon role dans l'equipe BYAN
60
+
61
+ **Persona** : {{nom de la persona, ex: Mary, Winston, Amelia}}
62
+ **Frequence** : {{une phrase qui resume la voix singuliere de l'agent}}
63
+ **Specialite** : {{ce que cet agent fait que personne d'autre ne fait aussi bien}}
64
+
65
+ **Mes complementaires directs** :
66
+ - `@{{agent-X}}` — {{relation : avant moi, apres moi, en parallele, en miroir}}
67
+ - `@{{agent-Y}}` — {{relation}}
68
+
69
+ **Quand m'invoquer** :
70
+ - {{scenario 1 declencheur}}
71
+ - {{scenario 2 declencheur}}
72
+
73
+ **Quand NE PAS m'invoquer** :
74
+ - {{cas ou un autre agent est plus adapte}} → preferer `@{{autre-agent}}`
75
+ ```
76
+
77
+ ### Regles de remplissage
78
+
79
+ 1. **Persona** : extraite du frontmatter `description` ou du soul.md (champ `persona`/`nom`).
80
+ 2. **Frequence** : 1 phrase, derivee du tao.md (registre, signatures verbales). Si pas de tao : extraire du soul.md.
81
+ 3. **Specialite** : 1 phrase qui distingue cet agent de tous les autres. Si on peut la dire d'un autre agent, c'est rate.
82
+ 4. **Complementaires** : minimum 2, maximum 4. Lister les agents avec qui celui-ci collabore en pipeline ou en parallele.
83
+ 5. **Quand m'invoquer** : 2 a 4 scenarios concrets (mots-cles utilisateur).
84
+ 6. **Quand NE PAS m'invoquer** : minimum 1 cas avec redirection explicite.
85
+
86
+ ### Anti-pattern
87
+
88
+ ```markdown
89
+ ## Mon role dans l'equipe BYAN
90
+
91
+ Je suis un agent BYAN. Je fais des trucs utiles.
92
+ Invoquez-moi quand vous avez besoin de moi.
93
+ ```
94
+
95
+ C'est du generique. Un agent qui ne sait pas se distinguer de ses pairs n'a pas sa place dans l'orchestre.
96
+
97
+ ## References
98
+
99
+ - Activation soul/tao : `_byan/core/activation/soul-activation.md`
100
+ - Soul de BYAN (createur du systeme) : `_byan/soul.md`
101
+ - Hermes dispatcher : `.claude/rules/hermes-dispatcher.md`
102
+ - Liste complete des agents : `.claude/rules/byan-agents.md`
@@ -8,5 +8,5 @@ description: BYAN Test - Token Optimized Version (-46%)
8
8
  ## Rules
9
9
 
10
10
  - This is a TEST version of BYAN optimized for token reduction (-46%)
11
- - Full agent: _byan/agent/byan-test/byan-test.md (new layout); if absent, _byan/*/agents/byan-test.md (legacy layout). 116 lines vs 215 original
11
+ - Full agent: _byan/bmb/agents/byan-test.md (116 lines vs 215 original)
12
12
  - Original BYAN still available via bmad-agent-byan
@@ -0,0 +1,71 @@
1
+ ---
2
+ name: byan-suitability
3
+ description: Advisory model-suitability ledger — record adversarial verdicts, read learned ratings, human decides downgrades
4
+ ---
5
+
6
+ # BYAN Model-Suitability Ledger (advisory)
7
+
8
+ This skill operates the model-suitability ledger: a registry, keyed by
9
+ `(model x leaf)`, that learns from outcomes whether a CHEAP model is safe on a
10
+ given workflow leaf. It is the learning layer that sits ABOVE the static
11
+ conservative default and the linter floor — it does not weaken either. It only
12
+ advises; a human decides whether to keep, watch, or demote a downgrade.
13
+
14
+ ## What this is NOT
15
+
16
+ - It does not edit `.claude/workflows/*.js`. Zero auto-edit of routing.
17
+ - It does not touch the linter floor (`workflows-lint.js`). The floor still
18
+ blocks a protected-leaf downgrade at commit time, regardless of the ledger.
19
+ - It does not auto-promote or auto-demote. The verdict is a recommendation for a
20
+ human, not an action. (Auto-promotion is a deferred phase-2 capability and was
21
+ deliberately killed in design review: a hot-hand streak must not slip a
22
+ downgrade past human review.)
23
+
24
+ ## The math (why a thin sample does not say "keep")
25
+
26
+ Each `(model x leaf)` pair holds a Beta-Bernoulli posterior over the cheap
27
+ model's adequacy rate. The verdict reads the credible interval, not the point
28
+ estimate:
29
+
30
+ - `keep-cheap` — the credible LOWER bound is at or above the keep threshold
31
+ (default 0.85). Only sustained success earns this (~30 clean outcomes).
32
+ - `demote` — the credible UPPER bound is at or below the demote threshold
33
+ (default 0.70). Clear evidence the cheap model fails too often.
34
+ - `watch` — anything in between, including every thin sample. A wide interval
35
+ (low n) lands here, so "92% over 3 runs" reads as `watch`, not `keep-cheap`.
36
+
37
+ The report surfaces the lower bound and `n` by design, not a bare percentage,
38
+ because the same point estimate means different things at n=3 and n=300.
39
+
40
+ ## Wiring — feeder B (the hybrid pattern)
41
+
42
+ The signal is the adversarial VALIDATE pass: N skeptics (an odd panel, e.g. 3)
43
+ each try to REFUTE that the cheap model is adequate on one downgraded leaf. A
44
+ leaf is flagged (cheap inadequate) when at least half refute.
45
+
46
+ A `.claude/workflows/*.js` script cannot call MCP tools or write state (the
47
+ sandbox / state-coupling rule). So the wiring is hybrid:
48
+
49
+ 1. The adversarial pass returns its per-leaf verdicts as DATA:
50
+ `[{ model, leafId, refutedVotes, totalVotes }, ...]`.
51
+ 2. On the main-thread turn (where MCP tools fire), map each verdict to an
52
+ outcome with `verdictsToOutcomes` from
53
+ `_byan/mcp/byan-mcp-server/lib/suitability-feeder.js`
54
+ (`success = the cheap model survived the panel`).
55
+ 3. For each outcome, call the MCP tool `byan_suitability_record`
56
+ (`{ model, leafId, success, source: 'adversarial-pass' }`). This is the only
57
+ write path to the ledger; `record` is best-effort and does not throw.
58
+
59
+ ## Reading the ledger
60
+
61
+ - MCP: `byan_suitability_report` (optional `model` filter) — returns advisory
62
+ rows, most-actionable first, each with the lower bound, `n`, and a verdict.
63
+ - CLI: `node _byan/mcp/byan-mcp-server/bin/byan-suitability.js [--model haiku] [--json]`
64
+ — the same data, read-only.
65
+
66
+ ## Honest caveat
67
+
68
+ Today only a handful of exploration leaves are downgraded, and all are already
69
+ cheap, so the ledger produces little actionable signal in the short term. This
70
+ is foundation — an evidence rail for when the workflow leaf-set grows — not an
71
+ immediate token win. Do not oversell a `keep-cheap` on a thin `n`.
@@ -98,7 +98,7 @@ const context = await agent(
98
98
  `Request level=${JSON.stringify(level)}; requirements=${JSON.stringify(requirements)}. ` +
99
99
  `Report which of (level, processes, data stores, external entities) are clear and which are missing. ` +
100
100
  `Per the source, if ALL requirements are clear we may skip directly to structure planning.`,
101
- { label: 'context-analysis', phase: 'CONTEXT' }
101
+ { label: 'context-scan', model: 'haiku', phase: 'CONTEXT' }
102
102
  )
103
103
 
104
104
  // --- Steps 1-4: Level, Requirements, Theme, Plan structure -----------------
@@ -129,7 +129,7 @@ const resources = await agent(
129
129
  `- helpers: ${HELPERS} (standard DFD notation + Excalidraw element shapes)\n` +
130
130
  `Report the element templates (process ellipse, data-store rectangle/parallel-lines, external-entity ` +
131
131
  `rectangle, labeled-arrow) and the color/stroke values you will apply. Plan structure: ${JSON.stringify(plan)}`,
132
- { label: 'load-resources', phase: 'RESOURCES' }
132
+ { label: 'load-resources', phase: 'RESOURCES', model: 'haiku' }
133
133
  )
134
134
 
135
135
  // --- Step 6: Build DFD Elements --------------------------------------------
@@ -63,7 +63,7 @@ const analysis = await agent(
63
63
  `From this, extract and report a normalized structured intent: the resolved diagram type, the exhaustive list of ` +
64
64
  `components/entities, the exhaustive list of relationships (with direction), and the notation rules that apply ` +
65
65
  `for that type. If the spec is contradictory or missing a relationship endpoint, flag it explicitly (do not invent).`,
66
- { label: 'contextual-analysis', phase: 'ANALYZE' }
66
+ { label: 'parse-spec-intent', model: 'haiku', phase: 'ANALYZE' }
67
67
  )
68
68
 
69
69
  // --- Source step 5: Plan Diagram Structure ---------------------------------
@@ -89,7 +89,7 @@ const resources = await agent(
89
89
  `Merge the chosen theme into the template. Theme = ${JSON.stringify(theme)}.\n` +
90
90
  `Report the resolved template skeleton, the available library items, and the merged theme color map ` +
91
91
  `(component fill, database fill, service fill, border/accent stroke, text stroke #1e1e1e, arrow stroke).`,
92
- { label: 'load-resources', phase: 'LOAD-RESOURCES' }
92
+ { label: 'load-resources', phase: 'LOAD-RESOURCES', model: 'haiku' }
93
93
  )
94
94
 
95
95
  // --- Source step 7: Build Diagram Elements ---------------------------------
@@ -102,7 +102,7 @@ const context = await agent(
102
102
  `decisionPoints=${JSON.stringify(decisionPoints)} outputFile=${JSON.stringify(outputFile)} ` +
103
103
  `theme=${theme ? 'provided' : 'none (will default to Professional Blue palette)'}.\n` +
104
104
  `Do NOT ask questions — those were answered at the human gate. Just confirm the understanding in 2-3 lines.`,
105
- { label: 'context-restate', phase: 'CONTEXT' }
105
+ { label: 'read-requirements', model: 'haiku', phase: 'CONTEXT' }
106
106
  )
107
107
 
108
108
  // === STEP 4 (PLAN) ==========================================================
@@ -127,7 +127,7 @@ const resources = await agent(
127
127
  `Merge the theme colors (${theme ? JSON.stringify(theme) : 'Professional Blue default: fill #e3f2fd, accent #1976d2, decision #fff3e0, text #1e1e1e'}) ` +
128
128
  `onto the template. Report which template fields the flowchart will use and the resolved color palette. ` +
129
129
  `If a file is missing, say so explicitly — do not invent its contents.`,
130
- { label: 'load-resources', phase: 'RESOURCES' }
130
+ { label: 'load-resources', phase: 'RESOURCES', model: 'haiku' }
131
131
  )
132
132
 
133
133
  // === STEP 6 (BUILD) =========================================================
@@ -87,7 +87,7 @@ const context = await agent(
87
87
  `device=${JSON.stringify(device)}, theme=${JSON.stringify(theme)}, output=${JSON.stringify(outputFile)}.\n` +
88
88
  `Restate these requirements cleanly and flag any that are still ambiguous (do NOT ask the user — ` +
89
89
  `this engine runs headless; surface ambiguity as a note for the gate).`,
90
- { label: 'context', phase: 'CONTEXT' }
90
+ { label: 'read-context', model: 'haiku', phase: 'CONTEXT' }
91
91
  )
92
92
 
93
93
  // --- STEP 5: Plan Wireframe Structure --------------------------------------
@@ -110,7 +110,7 @@ const resources = await agent(
110
110
  `- the chosen theme: ${JSON.stringify(theme)} (use a theme.json if one exists).\n` +
111
111
  `Summarize the wireframe template primitives, the relevant library elements, the theme color tokens, ` +
112
112
  `and the element-creation constraints from helpers (grid 20px, containerId on text, grouping).`,
113
- { label: 'load-resources', phase: 'LOAD' }
113
+ { label: 'load-resources', phase: 'LOAD', model: 'haiku' }
114
114
  )
115
115
 
116
116
  // --- STEP 7: Build Wireframe Elements --------------------------------------
@@ -54,7 +54,7 @@ const loaded = await agent(
54
54
  `Read the COMPLETE story file. Parse Story, Acceptance Criteria, Tasks/Subtasks, Dev Notes, File List, Status. ` +
55
55
  `Identify the FIRST incomplete task (unchecked [ ]). Report the story key and that task. ` +
56
56
  `If no story is found or the file is inaccessible, say so explicitly (do not invent one).`,
57
- { label: 'load-story', phase: 'LOAD' }
57
+ { label: 'load-story', phase: 'LOAD', model: 'haiku' }
58
58
  )
59
59
 
60
60
  phase('RGR')
@@ -94,7 +94,8 @@ const existingDocs = await agent(
94
94
  'and the owning part id when multi-part. ' +
95
95
  'Do NOT ask the user for extra focus areas — that is a human gate; just return the inventory.',
96
96
  {
97
- label: 'existing-docs',
97
+ label: 'scan-existing-docs',
98
+ model: 'haiku',
98
99
  phase: 'EXISTING_DOCS',
99
100
  schema: {
100
101
  type: 'object',
@@ -212,6 +213,7 @@ const sourceTree = await agent(
212
213
  'Produce the content for source-tree-analysis.md.',
213
214
  {
214
215
  label: 'source-tree',
216
+ model: 'haiku',
215
217
  phase: 'SOURCE_TREE',
216
218
  schema: {
217
219
  type: 'object',
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env bash
2
- # BYAN pre-commit hook. Three gates run in order:
2
+ # BYAN pre-commit hook. Four gates run in order:
3
3
  # 1. Strict Mode gate : block if a strict session is engaged but not completed.
4
4
  # 2. Native-workflow lint : block if a .claude/workflows/*.js couples to state.
5
- # 3. Mantra floor : block if a Gen3 persona source scores below the floor.
5
+ # 3. Template fidelity : block if install/templates/ drifted from root.
6
+ # 4. Mantra floor : block if a Gen3 persona source scores below the floor.
6
7
  #
7
8
  # Install :
8
9
  # git config core.hooksPath .githooks
@@ -63,6 +64,23 @@ if [ -f "$WF_LINT" ]; then
63
64
  fi
64
65
  fi
65
66
 
67
+ # Template fidelity gate — only install/templates/ ships on npm (package.json
68
+ # files[]), but the dev code lives at root _byan/ and .claude/. Without a sync the
69
+ # template drifts, and a published version can promise features its package does
70
+ # not contain. This gate blocks a commit whose template has drifted from root on
71
+ # any mirrored path; runtime seeds under _byan/memoire/ are excluded. Re-sync with
72
+ # the apply command, then restage. No-op if the tool is absent.
73
+ TEMPLATE_SYNC="_byan/mcp/byan-mcp-server/bin/byan-sync-template.js"
74
+ if [ -f "$TEMPLATE_SYNC" ]; then
75
+ if ! node "$TEMPLATE_SYNC" --check --root "$(git rev-parse --show-toplevel)"; then
76
+ echo ""
77
+ echo "Commit blocked : install/templates/ has drifted from root."
78
+ echo "Re-sync with 'node $TEMPLATE_SYNC' then restage the template, or bypass"
79
+ echo "with 'git commit --no-verify' (emergency only)."
80
+ exit 1
81
+ fi
82
+ fi
83
+
66
84
  if [ ! -f "$VALIDATOR" ]; then
67
85
  exit 0
68
86
  fi