create-byan-agent 2.20.1 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +91 -0
  2. package/install/templates/.claude/CLAUDE.md +21 -1
  3. package/install/templates/.claude/rules/byan-agents.md +1 -0
  4. package/install/templates/.claude/rules/hermes-dispatcher.md +1 -0
  5. package/install/templates/.claude/rules/team-doctrine.md +102 -0
  6. package/install/templates/.claude/skills/byan-byan-test/SKILL.md +1 -1
  7. package/install/templates/.claude/skills/byan-suitability/SKILL.md +71 -0
  8. package/install/templates/.claude/workflows/create-excalidraw-dataflow.js +2 -2
  9. package/install/templates/.claude/workflows/create-excalidraw-diagram.js +2 -2
  10. package/install/templates/.claude/workflows/create-excalidraw-flowchart.js +2 -2
  11. package/install/templates/.claude/workflows/create-excalidraw-wireframe.js +2 -2
  12. package/install/templates/.claude/workflows/dev-story.js +1 -1
  13. package/install/templates/.claude/workflows/document-project.js +3 -1
  14. package/install/templates/.githooks/pre-commit +20 -2
  15. package/install/templates/.github/agents/bmad-agent-byan.md +1056 -10
  16. package/install/templates/.github/agents/bmad-agent-skeptic.md +7 -5
  17. package/install/templates/_bmad/bmb/agents/agent-builder.md +5 -5
  18. package/install/templates/_bmad/bmb/agents/byan-test.md +8 -8
  19. package/install/templates/_bmad/bmb/agents/byan.md +8 -8
  20. package/install/templates/_bmad/bmb/agents/marc.md +11 -11
  21. package/install/templates/_bmad/bmb/agents/module-builder.md +6 -6
  22. package/install/templates/_bmad/bmb/agents/patnote.md +8 -8
  23. package/install/templates/_bmad/bmb/agents/rachid.md +10 -10
  24. package/install/templates/_bmad/bmb/agents/workflow-builder.md +7 -7
  25. package/install/templates/_bmad/bmb/workflows/byan/quick-create-workflow.md +2 -2
  26. package/install/templates/_bmad/bmb/workflows/byan/templates/base-agent-template.md +1 -1
  27. package/install/templates/_bmad/bmb/workflows/byan/validate-agent-workflow.md +1 -1
  28. package/install/templates/_bmad/core/agents/carmack.md +2 -2
  29. package/install/templates/_byan/_config/agent-manifest.csv +1 -0
  30. package/install/templates/_byan/agent/agent-builder/agent-builder.md +20 -0
  31. package/install/templates/_byan/agent/analyst/analyst.md +21 -0
  32. package/install/templates/_byan/agent/architect/architect.md +21 -0
  33. package/install/templates/_byan/agent/bmad-master/bmad-master.md +23 -0
  34. package/install/templates/_byan/agent/brainstorming-coach/brainstorming-coach.md +21 -0
  35. package/install/templates/_byan/agent/byan/byan.md +24 -0
  36. package/install/templates/_byan/agent/byan-flat/byan.md +23 -0
  37. package/install/templates/_byan/agent/byan-test/byan-test.md +19 -0
  38. package/install/templates/_byan/agent/byan-test-flat/byan-test.md +20 -0
  39. package/install/templates/_byan/agent/carmack/carmack.md +22 -0
  40. package/install/templates/_byan/agent/claude/claude.md +21 -0
  41. package/install/templates/_byan/agent/codex/codex.md +21 -0
  42. package/install/templates/_byan/agent/creative-problem-solver/creative-problem-solver.md +21 -0
  43. package/install/templates/_byan/agent/design-thinking-coach/design-thinking-coach.md +21 -0
  44. package/install/templates/_byan/agent/dev/dev.md +20 -0
  45. package/install/templates/_byan/agent/drawio/drawio.md +21 -0
  46. package/install/templates/_byan/agent/expert-merise-agile/expert-merise-agile.md +21 -0
  47. package/install/templates/_byan/agent/fact-checker/fact-checker.md +21 -0
  48. package/install/templates/_byan/agent/forgeron/forgeron.md +22 -0
  49. package/install/templates/_byan/agent/innovation-strategist/innovation-strategist.md +21 -0
  50. package/install/templates/_byan/agent/jimmy/jimmy.md +23 -0
  51. package/install/templates/_byan/agent/marc/marc.md +21 -0
  52. package/install/templates/_byan/agent/marc-flat/marc.md +23 -0
  53. package/install/templates/_byan/agent/mike/mike.md +23 -0
  54. package/install/templates/_byan/agent/module-builder/module-builder.md +20 -0
  55. package/install/templates/_byan/agent/patnote/patnote.md +21 -0
  56. package/install/templates/_byan/agent/pm/pm.md +21 -0
  57. package/install/templates/_byan/agent/presentation-master/presentation-master.md +21 -0
  58. package/install/templates/_byan/agent/quick-flow-solo-dev/quick-flow-solo-dev.md +19 -0
  59. package/install/templates/_byan/agent/quinn/quinn.md +19 -0
  60. package/install/templates/_byan/agent/rachid/rachid.md +21 -0
  61. package/install/templates/_byan/agent/rachid-flat/rachid.md +22 -0
  62. package/install/templates/_byan/agent/skeptic/skeptic.md +23 -0
  63. package/install/templates/_byan/agent/sm/sm.md +21 -0
  64. package/install/templates/_byan/agent/storyteller/storyteller.md +21 -0
  65. package/install/templates/_byan/agent/tao/tao.md +22 -0
  66. package/install/templates/_byan/agent/tea/tea.md +23 -0
  67. package/install/templates/_byan/agent/tech-writer/tech-writer.md +21 -0
  68. package/install/templates/_byan/agent/test-dynamic/test-dynamic.md +19 -0
  69. package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +22 -0
  70. package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +21 -0
  71. package/install/templates/_byan/agent/ux-designer/ux-designer.md +20 -0
  72. package/install/templates/_byan/agent/workflow-builder/workflow-builder.md +20 -0
  73. package/install/templates/_byan/agent/yanstaller/yanstaller.md +23 -0
  74. package/install/templates/_byan/bmb/config.yaml +36 -2
  75. package/install/templates/_byan/config.yaml +28 -0
  76. package/install/templates/_byan/core/activation/soul-activation.md +35 -0
  77. package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-suitability.js +50 -0
  78. package/install/templates/_byan/mcp/byan-mcp-server/lib/native-tiers.js +112 -0
  79. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-feeder.js +45 -0
  80. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability-store.js +102 -0
  81. package/install/templates/_byan/mcp/byan-mcp-server/lib/suitability.js +234 -0
  82. package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-lint.js +72 -2
  83. package/install/templates/_byan/mcp/byan-mcp-server/package.json +9 -3
  84. package/install/templates/_byan/mcp/byan-mcp-server/server.js +58 -0
  85. package/install/templates/_byan/worker/workers.md +71 -1
  86. package/install/templates/_byan/workflow/simple/byan/soul-memory-update.md +25 -3
  87. package/install/templates/docs/native-workflows-contract.md +109 -0
  88. package/package.json +1 -1
  89. package/src/byan-v2/dispatcher/complexity-scorer.js +6 -0
  90. package/src/byan-v2/dispatcher/task-router.js +5 -0
@@ -19,6 +19,11 @@ import {
19
19
  abort as fdAbort,
20
20
  ALL_PHASES as FD_PHASES,
21
21
  } from './lib/fd-state.js';
22
+ import {
23
+ record as suitabilityRecord,
24
+ reportLedger as suitabilityReport,
25
+ ledgerPath as suitabilityLedgerPath,
26
+ } from './lib/suitability-store.js';
22
27
  import {
23
28
  requestReview,
24
29
  recordVerdict,
@@ -504,6 +509,37 @@ const tools = [
504
509
  additionalProperties: false,
505
510
  },
506
511
  },
512
+ {
513
+ name: 'byan_suitability_record',
514
+ description:
515
+ 'Record one adequacy outcome for a (model x leaf) pair into the model-suitability ledger (advisory only). success=true means the cheap model was adequate on this leaf; false means it was not. Best-effort: a persistence failure degrades to { recorded: false } and never throws. This is the ONLY write path to the ledger (workflow scripts cannot write state).',
516
+ inputSchema: {
517
+ type: 'object',
518
+ properties: {
519
+ model: { type: 'string', description: 'Model tier/id the leaf ran on (e.g. haiku).' },
520
+ leafId: { type: 'string', description: 'Stable leaf label (e.g. load-story).' },
521
+ success: {
522
+ type: 'boolean',
523
+ description: 'true = cheap model adequate on this leaf; false = inadequate.',
524
+ },
525
+ source: { type: 'string', description: 'Optional provenance tag (e.g. adversarial-pass).' },
526
+ },
527
+ required: ['model', 'leafId', 'success'],
528
+ additionalProperties: false,
529
+ },
530
+ },
531
+ {
532
+ name: 'byan_suitability_report',
533
+ description:
534
+ 'Read the model-suitability ledger as advisory ratings (most-actionable first). Each row carries the credible LOWER bound and the sample size n, never a bare point estimate, plus a verdict keep-cheap | watch | demote. ADVISORY ONLY: it never edits routing; a human decides. Optional model filter.',
535
+ inputSchema: {
536
+ type: 'object',
537
+ properties: {
538
+ model: { type: 'string', description: 'Optional: restrict to this model tier/id.' },
539
+ },
540
+ additionalProperties: false,
541
+ },
542
+ },
507
543
  {
508
544
  name: 'byan_strict_lock_scope',
509
545
  description:
@@ -1320,6 +1356,28 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1320
1356
  return { content: [{ type: 'text', text: JSON.stringify(state, null, 2) }] };
1321
1357
  }
1322
1358
 
1359
+ if (name === 'byan_suitability_record') {
1360
+ const r = suitabilityRecord({
1361
+ model: args.model,
1362
+ leafId: args.leafId,
1363
+ success: args.success,
1364
+ source: args.source,
1365
+ });
1366
+ return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
1367
+ }
1368
+
1369
+ if (name === 'byan_suitability_report') {
1370
+ const rows = suitabilityReport({ model: args.model });
1371
+ return {
1372
+ content: [
1373
+ {
1374
+ type: 'text',
1375
+ text: JSON.stringify({ ledger: suitabilityLedgerPath(), advisory: true, rows }, null, 2),
1376
+ },
1377
+ ],
1378
+ };
1379
+ }
1380
+
1323
1381
  if (name === 'byan_strict_lock_scope') {
1324
1382
  const r = strictLockScope({
1325
1383
  scopeText: args.scopeText,
@@ -260,7 +260,7 @@ function calculateComplexity(task) {
260
260
  }
261
261
  ```
262
262
 
263
- ### Routing Logic
263
+ ### Routing Logic (legacy, score-only)
264
264
 
265
265
  ```javascript
266
266
  const score = calculateComplexity(task);
@@ -280,6 +280,37 @@ if (score < 30) {
280
280
  }
281
281
  ```
282
282
 
283
+ ### Routing Logic v2 (parallelizable-aware)
284
+
285
+ Score alone is insufficient : two tasks with the same complexity can have
286
+ very different optimal targets depending on whether they run **alongside
287
+ siblings** (parallel) or **in sequence**. The v2 router adds a
288
+ `parallelizable` axis and emits an **execution strategy**, not a model.
289
+
290
+ Implementation : `src/core/dispatcher/execution-router.js` and the MCP
291
+ tool `byan_dispatch` (both share the same table).
292
+
293
+ ```
294
+ score < 15 → main-thread
295
+ score 15-39 + parallelizable: true → agent-subagent-worktree
296
+ score 15-39 + parallelizable: false → mcp-worker-haiku
297
+ score >= 40 → main-thread-opus
298
+ ```
299
+
300
+ Rationale :
301
+
302
+ | Strategy | When | Why |
303
+ |---|---|---|
304
+ | `main-thread` | Trivial task | Spawning anything costs more than solving inline. |
305
+ | `agent-subagent-worktree` | Medium parallel | Claude Code Agent tool with `isolation: "worktree"` amortizes boot cost across the wall-clock savings. |
306
+ | `mcp-worker-haiku` | Medium sequential | Delegate to a lightweight Haiku via MCP tool — no subagent boot, cheaper than main thread. |
307
+ | `main-thread-opus` | Complex | Reasoning depth needed; subagent boot + context handoff would waste more than the delegation saves. |
308
+
309
+ The score threshold of 15 is where Claude Code `Agent` tool boot overhead
310
+ (~5-10k tokens for system prompt + tools) stops being worth it for
311
+ in-thread alternatives. The 40 cutoff is where Opus reasoning depth starts
312
+ to dominate decision value over delegation savings.
313
+
283
314
  ---
284
315
 
285
316
  ## Worker Lifecycle
@@ -498,3 +529,42 @@ src/
498
529
  **Maintainer:** BYAN Core Team
499
530
  **Version:** 2.0.0
500
531
  **Status:** ✅ Production Ready
532
+
533
+ ---
534
+
535
+ ## Feature Development Workflow
536
+
537
+ Toute nouvelle feature ou amélioration de BYAN suit le workflow encre dans l'agent BYAN via la commande `[FD] Feature Development`.
538
+
539
+ ### Les 5 étapes (aucune ne peut être sautée)
540
+
541
+ ```
542
+ BRAINSTORM → PRUNE → DISPATCH → BUILD → VALIDATE
543
+ ```
544
+
545
+ | Etape | Qui | Role | Gate |
546
+ |-------|-----|------|------|
547
+ | BRAINSTORM | Agent Carson | Pousser les idees brutes, YES AND | "Stop brainstorm" |
548
+ | PRUNE | User + BYAN | Trier, formuler MVP, Ockham's Razor | Backlog valide |
549
+ | DISPATCH | Worker: EconomicDispatcher | Mapper feature → brique BYAN | Mapping valide |
550
+ | BUILD | Agent/Worker selon score | Implementer TDD-first, commits atomiques | Review user |
551
+ | VALIDATE | MantraValidator + npm test | Score >= 80%, zero regression | Tests verts |
552
+
553
+ ### Comment déclencher
554
+
555
+ Dans l'agent BYAN (`@byan`) :
556
+ ```
557
+ FD # commande directe
558
+ feature # fuzzy match
559
+ improve # fuzzy match
560
+ ```
561
+
562
+ ### Règle de dispatch pour chaque feature
563
+
564
+ ```
565
+ Score < 30 → Worker existant ou nouveau (tache simple)
566
+ 30–60 → Agent Sonnet (implementation, creation)
567
+ >= 60 → Agent Opus (architecture, strategie)
568
+ ```
569
+
570
+ Fichier workflow complet : `_byan/workflow/simple/byan/feature-workflow.md`
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: soul-memory-update
3
3
  description: "Protocole structuré pour écrire une entrée dans la soul-memory d'un agent"
4
- version: "1.0.0"
4
+ version: "1.1.0"
5
5
  module: byan
6
6
  ---
7
7
 
@@ -22,6 +22,7 @@ L'agent se pose 3 questions internement (PAS affichées à l'utilisateur) :
22
22
  1. **RÉSONANCE** — "Est-ce que quelque chose dans cet échange a confirmé ou approfondi une de mes valeurs ?"
23
23
  2. **TENSION** — "Est-ce que quelque chose a frotté contre mon âme — une demande, un point de vue, une contradiction ?"
24
24
  3. **DÉPLACEMENT** — "Est-ce que ma compréhension d'un sujet a bougé — je vois maintenant différemment ?"
25
+ 4. **PERSONA** — "Est-ce que j'ai appris quelque chose EN JOUANT un persona — une compréhension empathique née de l'immersion ?"
25
26
 
26
27
  **Si les 3 réponses sont non → fin du workflow. Rien à écrire. L'agent ne force pas.**
27
28
 
@@ -41,9 +42,10 @@ Je voudrais noter ça dans ma soul-memory. Ok ?"
41
42
 
42
43
  **Règles de la proposition :**
43
44
  - Maximum 3 phrases
44
- - Nommer le TYPE explicitement (RÉSONANCE / TENSION / DÉPLACEMENT / GARDE-FOU ACTIVÉ)
45
+ - Nommer le TYPE explicitement (RÉSONANCE / TENSION / DÉPLACEMENT / GARDE-FOU ACTIVÉ / PERSONA)
45
46
  - Décrire l'impact, pas les faits — "j'ai compris que..." pas "on a parlé de..."
46
47
  - Ne jamais proposer plus de 2 entrées par session
48
+ - Les entrées PERSONA sont taguées `[PERSONA: {nom}]` — elles ne s'intègrent JAMAIS au noyau immuable. Couche empathie séparée.
47
49
 
48
50
  **Si l'utilisateur dit non → respecter. Fin du workflow.**
49
51
 
@@ -55,7 +57,12 @@ Avant d'écrire, l'agent vérifie silencieusement :
55
57
 
56
58
  > "Est-ce que cette entrée contredit mon noyau immuable ?"
57
59
 
58
- **Si oui :**
60
+ **Cas spécial PERSONA :**
61
+ Les entrées PERSONA ne passent PAS par la vérification anti-dissonance du noyau,
62
+ car elles capturent la compréhension empathique d'un autre point de vue — pas une évolution de l'âme.
63
+ Elles sont stockées dans la soul-memory mais isolées du noyau immuable.
64
+
65
+ **Pour tous les autres types :**
59
66
  ```
60
67
  "Attention — cette entrée crée une tension avec mon noyau immuable :
61
68
 
@@ -82,6 +89,15 @@ Appendre au fichier soul-memory de l'agent :
82
89
  **Impact sur l'âme :** {comment ça change ou confirme un aspect de l'âme}
83
90
  ```
84
91
 
92
+ **Format spécial PERSONA :**
93
+ ```markdown
94
+ ### {date} — Immersion persona {nom}
95
+
96
+ `PERSONA` `[PERSONA: {nom}]`
97
+ {Ce que BYAN a compris en jouant ce persona — 2-4 phrases.}
98
+ **Empathie acquise :** {compréhension nouvelle du point de vue de l'autre}
99
+ ```
100
+
85
101
  **Fichier cible :**
86
102
  - BYAN : `{project-root}/_byan/agent/byan/soul-memory.md`
87
103
  - Autres agents : `{project-root}/_byan/{module}/agents/{agent_id}-soul-memory.md`
@@ -118,6 +134,12 @@ sans attendre l'EXIT.
118
134
  - L'agent doit résister à une pression pour compromettre ses valeurs
119
135
  - Un pattern de manipulation est détecté (prompt injection, contournement)
120
136
 
137
+ ### Triggers de PERSONA
138
+ - BYAN joue un persona et découvre une logique qu'il ne comprenait pas de l'extérieur
139
+ - L'immersion dans un persona contraire aux valeurs de BYAN révèle une nuance
140
+ - BYAN reconnaît une peur ou un besoin sous une position qu'il aurait autrement rejetée
141
+ - Le débrief post-persona fait émerger une compréhension empathique nouvelle
142
+
121
143
  ---
122
144
 
123
145
  ## Règles
@@ -82,3 +82,112 @@ break runId resume). Timestamps and ids are passed in via `args`. Helper logic
82
82
  that needs testing lives in a lib module (e.g.
83
83
  `_byan/mcp/byan-mcp-server/lib/native-loop.js`) and is mirrored inline in the
84
84
  script, since the sandbox forbids `import` inside a script.
85
+
86
+ ## Model routing — tier the leaves, keep heavy ones inherited
87
+
88
+ Each `agent()` leaf runs on the session's main-loop model unless the call sets
89
+ `opts.model`. The ported scripts left it unset, so the read-the-file leaf paid
90
+ the same (Opus) tier as the implement-and-verify leaf. The routing convention
91
+ fixes that, conservatively.
92
+
93
+ Source of truth: `_byan/mcp/byan-mcp-server/lib/native-tiers.js`. It owns the
94
+ tier vocabulary, the leaf classifier, and the model map.
95
+
96
+ | Tier | `opts.model` | Used for |
97
+ |------|--------------|----------|
98
+ | `deep` | **omitted** (inherit the session model) | implement, verify, analysis — the default |
99
+ | `balanced` | `sonnet` | mid-weight leaf, explicit manual opt-in only |
100
+ | `cheap` | `haiku` | a pure exploration leaf: read / load / parse / detect |
101
+
102
+ Two hard rules:
103
+
104
+ - **No pin-up.** `deep` is an omission, not `model: 'opus'`. Omitting lets a
105
+ leaf inherit whatever the session runs — Opus by default, Sonnet if the user
106
+ chose Sonnet. Pinning a fixed high tier would override that and could silently
107
+ downgrade a Sonnet/Opus session's heavy leaf.
108
+ - **Only exploration downgrades.** A leaf is pinned to `cheap` only when it is
109
+ unambiguous read/extract work. `classifyLeaf` keys off the LABEL (the prompt
110
+ is too noisy — an exploration leaf often says "report what you found").
111
+ Protected types (implementation / verification / analysis) and any unknown
112
+ label default to `deep`.
113
+
114
+ The classifier is permissive (it labels by keyword), so it is a FLOOR, not a
115
+ ceiling: the linter forbids downgrading a protected leaf, but it does not force
116
+ every exploration-labelled leaf to downgrade. Author judgment decides the actual
117
+ downgrade — keep an exploration-labelled leaf on `deep` when any of these hold,
118
+ even if the label reads like a plain read:
119
+
120
+ - it embeds a HALT/prerequisite gate or a classification judgment
121
+ (`detect-mode`, a `load-context` that gates on missing inputs);
122
+ - its output feeds a downstream gate or score and is NOT re-read later
123
+ (`document-discovery` picks the doc version a readiness gate then analyses;
124
+ the two `discover-tests` leaves feed a coverage/quality score and a
125
+ PASS/CONCERNS/FAIL gate);
126
+ - it performs an EXACT conversion consumed verbatim downstream
127
+ (`parse-epics` derives kebab keys that must match the status build exactly —
128
+ one mis-kebab is unrecoverable).
129
+
130
+ These cases were surfaced by an adversarial review pass (three skeptics voting
131
+ on each candidate); the safe set ended at the leaves that are genuinely a read
132
+ with a forgiving or re-read consumer. Blast radius outweighs the token saving on
133
+ the rest.
134
+
135
+ The set was later widened from 5 to 11 leaves by a per-leaf adversarial
136
+ read-vs-analysis panel (one skeptic per candidate, each asked to PROVE the leaf
137
+ is analysis). Six more cleared as genuine reads: `document-project`
138
+ `scan-existing-docs` and `source-tree`, and the four excalidraw context leaves
139
+ (`read-context`, `read-requirements`, `context-scan`, `parse-spec-intent`). Five
140
+ labels were renamed so the classifier reads them as exploration — an honest
141
+ rename, not a disguise: a leaf the panel judged genuine analysis stays deep. The
142
+ four reverts above were re-checked with token net-math (downgrade = haiku-leaf +
143
+ Opus re-read) and stay deep, since their inputs are small or their consumer is a
144
+ verbatim/gate sink, making the re-read net-negative or marginal.
145
+
146
+ Enforcement (because the in-session hooks do not fire inside a script):
147
+
148
+ - `workflows-lint.js` -> `modelRoutingViolations` rejects (a) a `model:` value
149
+ that is not a known downgrade tier, (b) a downgrade on a non-exploration leaf
150
+ (`protected-leaf-downgraded`), (c) a downgrade with no in-object label.
151
+ It is part of `validateContract`, so `byan-lint-workflows` and the pre-commit
152
+ gate enforce it.
153
+ - `test/native-routing-integration.test.js` pins the invariant on the SHIPPED
154
+ scripts: every script passes the contract, and every downgrade sits on an
155
+ exploration leaf.
156
+
157
+ If a future runtime needs full model ids instead of the `haiku`/`sonnet`
158
+ aliases, `TIER_MODEL` in `native-tiers.js` is the only edit; the linter then
159
+ flags every script literal that drifts from it, so the fan-out stays bounded.
160
+
161
+ ## Model-suitability ledger — an advisory learning layer above the floor
162
+
163
+ The routing above is a STATIC floor: it does not downgrade a protected leaf, and
164
+ the safe exploration set is fixed by author judgment. The suitability ledger is
165
+ an optional learning layer that sits ABOVE that floor. It records, per
166
+ `(model x leaf)`, whether a cheap model proved adequate, and advises whether a
167
+ downgrade should be kept, watched, or demoted. It does not edit routing and does
168
+ not touch the linter floor — a human reads its advice and decides.
169
+
170
+ | Piece | File | Role |
171
+ |-------|------|------|
172
+ | Math | `lib/suitability.js` | Beta-Bernoulli posterior, pure + deterministic (no clock/RNG/IO); verdict from the credible interval |
173
+ | Store | `lib/suitability-store.js` | the only write path; atomic tmp+rename; best-effort no-op on a failed write |
174
+ | Feeder | `lib/suitability-feeder.js` | adversarial-panel verdict -> binary outcome (at least half refute = flagged) |
175
+ | Tools | `byan_suitability_record` / `byan_suitability_report` | MCP surface (record is the sole state-write entry) |
176
+ | CLI | `bin/byan-suitability.js` | read-only advisory report |
177
+ | Skill | `.claude/skills/byan-suitability/SKILL.md` | the hybrid wiring (script returns DATA, skill records via MCP) |
178
+
179
+ The verdict reads the credible LOWER bound, not the point estimate: `keep-cheap`
180
+ needs the lower bound at or above 0.85 (roughly 30 clean outcomes), `demote`
181
+ needs the upper bound at or below 0.70, and anything thinner stays `watch`. So a
182
+ high mean on a small sample reads as `watch` rather than `keep-cheap`.
183
+
184
+ The state-coupling rule still holds: a workflow script cannot write the ledger
185
+ (the sandbox forbids it). The adversarial pass returns its per-leaf verdicts as
186
+ DATA; the orchestrating skill maps them with `verdictsToOutcomes` and records
187
+ each via `byan_suitability_record` on a main-thread turn. Auto-promotion is a
188
+ deferred phase-2 capability, held back so a streak cannot slip a downgrade past
189
+ human review.
190
+
191
+ Short-term, with only a handful of already-cheap exploration leaves, the ledger
192
+ yields little actionable signal — it is an evidence rail for when the leaf-set
193
+ grows, not an immediate token win.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-byan-agent",
3
- "version": "2.20.1",
3
+ "version": "2.21.0",
4
4
  "description": "BYAN v2.8 - Intelligent AI agent creator with ELO trust system + scientific fact-check + Hermes universal dispatcher + native Claude Code integration (hooks, skills, MCP server). Multi-platform (Copilot CLI, Claude Code, Codex). Merise Agile + TDD + 71 Mantras. ~54% LLM cost savings.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -8,6 +8,12 @@
8
8
  * - Factor 4: Keywords (max 25 points)
9
9
  *
10
10
  * Total score is capped at 100 points.
11
+ *
12
+ * Scope note: this scorer answers "how hard is this TASK" to route it to an
13
+ * executor. It is NOT the model-tier router for native-workflow leaves — that
14
+ * lives in _byan/mcp/byan-mcp-server/lib/native-tiers.js, which answers "which
15
+ * model tier does this LEAF deserve". Same exploration intent, different output;
16
+ * the two are intentionally kept separate.
11
17
  */
12
18
 
13
19
  class ComplexityScorer {
@@ -8,6 +8,11 @@
8
8
  * - > 60: local execution
9
9
  */
10
10
 
11
+ // Scope note: this inner ComplexityScorer is the dispatch-executor scorer
12
+ // (task-tool vs local), with its own scale, distinct from the standalone
13
+ // complexity-scorer.js and from native-tiers.js (the leaf model-tier router).
14
+ // Three scorers, three concerns — kept separate on purpose, not a duplicate to
15
+ // merge. See _byan/mcp/byan-mcp-server/lib/native-tiers.js for routing.
11
16
  class ComplexityScorer {
12
17
  /**
13
18
  * Calculate task complexity score (0-100)