create-byan-agent 2.23.0 → 2.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +230 -0
- package/README.md +9 -12
- package/install/bin/create-byan-agent-v2.js +29 -169
- package/install/lib/agent-generator.js +5 -5
- package/install/lib/byan-web-integration.js +1 -1
- package/install/lib/claude-native-setup.js +1 -1
- package/install/lib/phase2-chat.js +3 -10
- package/install/lib/platforms/claude-code.js +2 -2
- package/install/lib/platforms/index.js +0 -2
- package/install/lib/project-agents-generator.js +3 -3
- package/install/lib/staging-consent.js +3 -3
- package/install/lib/subagent-generator.js +3 -3
- package/install/lib/yanstaller/agent-launcher.js +1 -27
- package/install/lib/yanstaller/detector.js +4 -4
- package/install/lib/yanstaller/installer.js +0 -2
- package/install/lib/yanstaller/interviewer.js +1 -1
- package/install/lib/yanstaller/platform-selector.js +1 -13
- package/install/package.json +1 -1
- package/install/src/byan-v2/context/session-state.js +2 -2
- package/install/src/byan-v2/index.js +2 -6
- package/install/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/install/src/webui/api.js +0 -2
- package/install/src/webui/chat/bridge.js +1 -13
- package/install/src/webui/chat/cli-detector.js +0 -23
- package/install/src/webui/public/app.js +1 -3
- package/install/src/webui/public/chat.html +0 -2
- package/install/src/webui/public/chat.js +0 -1
- package/install/src/webui/public/index.html +2 -2
- package/install/templates/.claude/CLAUDE.md +13 -2
- package/install/templates/.claude/agents/bmad-byan.md +1 -1
- package/install/templates/.claude/hooks/autobench-stop-guard.js +286 -0
- package/install/templates/.claude/hooks/drain-advisory.js +85 -0
- package/install/templates/.claude/hooks/fact-check-absolutes.js +1 -61
- package/install/templates/.claude/hooks/fact-check-claims.js +69 -0
- package/install/templates/.claude/hooks/fd-response-check.js +37 -46
- package/install/templates/.claude/hooks/inject-soul.js +64 -25
- package/install/templates/.claude/hooks/leantime-fd-sync.js +216 -0
- package/install/templates/.claude/hooks/lib/autobench-config.json +81 -0
- package/install/templates/.claude/hooks/lib/autobench-fc-enrich.js +251 -0
- package/install/templates/.claude/hooks/lib/autobench-ledger-report.js +253 -0
- package/install/templates/.claude/hooks/lib/autobench-runtime.js +199 -0
- package/install/templates/.claude/hooks/lib/fact-check-core.js +69 -0
- package/install/templates/.claude/hooks/lib/failure-detector.js +18 -4
- package/install/templates/.claude/hooks/lib/transcript-read.js +137 -0
- package/install/templates/.claude/hooks/soul-memory-check.js +49 -25
- package/install/templates/.claude/hooks/soul-memory-triggers.js +27 -8
- package/install/templates/.claude/hooks/stage-to-byan.js +25 -7
- package/install/templates/.claude/hooks/strict-stop-guard.js +4 -16
- package/install/templates/.claude/rules/benchmark.md +251 -0
- package/install/templates/.claude/rules/byan-agents.md +0 -1
- package/install/templates/.claude/rules/byan-api.md +64 -0
- package/install/templates/.claude/rules/fact-check.md +1 -1
- package/install/templates/.claude/rules/strict-mode.md +10 -9
- package/install/templates/.claude/settings.json +16 -0
- package/install/templates/.claude/skills/byan-benchmark/SKILL.md +159 -0
- package/install/templates/.claude/skills/byan-byan/SKILL.md +73 -12
- package/install/templates/.claude/skills/byan-fact-check/SKILL.md +1 -1
- package/install/templates/.claude/skills/byan-hermes-dispatch/SKILL.md +5 -6
- package/install/templates/.claude/skills/byan-insight/SKILL.md +56 -0
- package/install/templates/.claude/skills/byan-orchestrate/SKILL.md +11 -3
- package/install/templates/.claude/skills/byan-strict/SKILL.md +4 -1
- package/install/templates/.claude/workflows/INDEX.md +2 -1
- package/install/templates/.claude/workflows/byan-benchmark.js +328 -0
- package/install/templates/.claude/workflows/check-implementation-readiness.js +1 -1
- package/install/templates/_byan/_config/agent-manifest.csv +1 -1
- package/install/templates/_byan/_config/autobench.yaml +510 -0
- package/install/templates/_byan/_config/strict-mode.yaml +9 -3
- package/install/templates/_byan/_config/workflow-manifest.csv +1 -0
- package/install/templates/_byan/agent/byan/byan.md +1 -3
- package/install/templates/_byan/agent/byan-flat/byan.md +1 -3
- package/install/templates/_byan/agent/byan-test/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan-test-flat/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan.optimized/byan.optimized.md +2 -2
- package/install/templates/_byan/agent/byan.optimized-v2/byan.optimized-v2.md +2 -2
- package/install/templates/_byan/agent/claude/claude.md +0 -2
- package/install/templates/_byan/agent/codex/codex.md +0 -2
- package/install/templates/_byan/agent/rachid/rachid.md +2 -10
- package/install/templates/_byan/agent/rachid-flat/rachid.md +2 -11
- package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +2 -5
- package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +5 -13
- package/install/templates/_byan/agent/yanstaller/yanstaller.md +2 -24
- package/install/templates/_byan/config.yaml +0 -1
- package/install/templates/_byan/core/activation/soul-activation.md +3 -3
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-insight-digest.js +31 -0
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-sync-rules.js +20 -4
- package/install/templates/_byan/mcp/byan-mcp-server/lib/advisory-autofeed.js +96 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/index-generator.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/insight-harvest.js +220 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/kanban.js +6 -3
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-fd-core.js +205 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-sync.js +415 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/outcome-buffer.js +64 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/precommit-gate.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-activation.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-mode.js +8 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/sync-rules.js +172 -23
- package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-generator.js +1 -0
- package/install/templates/_byan/mcp/byan-mcp-server/server.js +262 -81
- package/install/templates/_byan/worker/launchers/README.md +4 -24
- package/install/templates/_byan/worker/workers.md +8 -9
- package/install/templates/_byan/workflow/simple/bmb/byan-benchmark/workflow.md +86 -0
- package/install/templates/_byan/workflow/simple/byan/feature-workflow.md +2 -2
- package/install/templates/docs/leantime-integration.md +160 -0
- package/package.json +3 -7
- package/src/byan-v2/context/session-state.js +2 -2
- package/src/byan-v2/generation/mantra-validator.js +3 -3
- package/src/byan-v2/index.js +1 -5
- package/src/byan-v2/integration/voice-integration.js +1 -1
- package/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/src/loadbalancer/loadbalancer.js +1 -1
- package/src/staging/staging.js +20 -6
- package/install/bin/build-copilot-stubs.js +0 -138
- package/install/lib/platforms/copilot-cli.js +0 -123
- package/install/lib/platforms/vscode.js +0 -51
- package/install/src/byan-v2/context/copilot-context.js +0 -79
- package/install/src/webui/chat/copilot-adapter.js +0 -68
- package/install/templates/.claude/agents/bmad-marc.md +0 -25
- package/install/templates/.claude/skills/byan-marc/SKILL.md +0 -20
- package/install/templates/.github/agents/bmad-agent-bmad-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-agent-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-module-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-workflow-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-analyst.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-architect.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-pm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quick-flow-solo-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quinn.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-sm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-tech-writer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-ux-designer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-byan-test.md +0 -33
- package/install/templates/.github/agents/bmad-agent-byan-v2.md +0 -44
- package/install/templates/.github/agents/bmad-agent-byan.md +0 -1062
- package/install/templates/.github/agents/bmad-agent-carmack.md +0 -14
- package/install/templates/.github/agents/bmad-agent-cis-brainstorming-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-creative-problem-solver.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-design-thinking-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-innovation-strategist.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-presentation-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-storyteller.md +0 -16
- package/install/templates/.github/agents/bmad-agent-claude.md +0 -49
- package/install/templates/.github/agents/bmad-agent-codex.md +0 -49
- package/install/templates/.github/agents/bmad-agent-drawio.md +0 -45
- package/install/templates/.github/agents/bmad-agent-fact-checker.md +0 -16
- package/install/templates/.github/agents/bmad-agent-forgeron.md +0 -15
- package/install/templates/.github/agents/bmad-agent-jimmy.md +0 -15
- package/install/templates/.github/agents/bmad-agent-marc.md +0 -49
- package/install/templates/.github/agents/bmad-agent-mike.md +0 -15
- package/install/templates/.github/agents/bmad-agent-patnote.md +0 -49
- package/install/templates/.github/agents/bmad-agent-rachid.md +0 -48
- package/install/templates/.github/agents/bmad-agent-skeptic.md +0 -16
- package/install/templates/.github/agents/bmad-agent-tao.md +0 -14
- package/install/templates/.github/agents/bmad-agent-tea-tea.md +0 -16
- package/install/templates/.github/agents/bmad-agent-test-dynamic.md +0 -22
- package/install/templates/.github/agents/bmad-agent-yanstaller-interview.md +0 -50
- package/install/templates/.github/agents/bmad-agent-yanstaller-phase2.md +0 -189
- package/install/templates/.github/agents/bmad-agent-yanstaller.md +0 -350
- package/install/templates/.github/agents/expert-merise-agile.md +0 -178
- package/install/templates/.github/agents/franck.md +0 -379
- package/install/templates/.github/agents/hermes.md +0 -575
- package/install/templates/.github/extensions/byan-staging/extension.mjs +0 -169
- package/install/templates/.github/extensions/byan-staging/package.json +0 -8
- package/install/templates/_byan/agent/marc/marc-soul.md +0 -47
- package/install/templates/_byan/agent/marc/marc-tao.md +0 -77
- package/install/templates/_byan/agent/marc/marc.md +0 -324
- package/install/templates/_byan/agent/marc-flat/marc.md +0 -387
- package/install/templates/_byan/mcp/byan-mcp-server/lib/copilot.js +0 -148
- package/install/templates/_byan/worker/launchers/launch-yanstaller-copilot.md +0 -173
- package/install/templates/workers/cost-optimizer.js +0 -169
- package/src/byan-v2/context/copilot-context.js +0 -79
- package/src/core/dispatcher/execution-router.js +0 -66
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
|
|
17
17
|
const fs = require('fs');
|
|
18
18
|
const path = require('path');
|
|
19
|
+
// Shared transcript reader — the real Stop payload has no inline transcript
|
|
20
|
+
// (transcript_path JSONL). Without it extractTurn got null and never staged.
|
|
21
|
+
const { extractRecentMessages } = require('./lib/transcript-read');
|
|
19
22
|
|
|
20
23
|
const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
21
24
|
|
|
@@ -60,11 +63,24 @@ function readMemorySyncConfig() {
|
|
|
60
63
|
return null;
|
|
61
64
|
}
|
|
62
65
|
|
|
66
|
+
// Local opt-in override. Pure. A machine enables staging via BYAN_MEMORY_SYNC=1
|
|
67
|
+
// (in the gitignored .claude/settings.local.json env, or process env) WITHOUT
|
|
68
|
+
// touching _byan/config.yaml — that file is git-tracked + template-mirrored, so
|
|
69
|
+
// a flag there would ship enabled:true to every install and break the
|
|
70
|
+
// install-time consent design.
|
|
71
|
+
function applyEnvEnable(memorySync, env = {}) {
|
|
72
|
+
if (String(env.BYAN_MEMORY_SYNC || '') === '1') {
|
|
73
|
+
return { ...memorySync, enabled: true };
|
|
74
|
+
}
|
|
75
|
+
return memorySync;
|
|
76
|
+
}
|
|
77
|
+
|
|
63
78
|
function buildConfig() {
|
|
64
79
|
const settingsEnv = readSettingsEnv();
|
|
65
80
|
const apiUrl = settingsEnv.BYAN_API_URL || process.env.BYAN_API_URL || null;
|
|
66
81
|
const apiToken = settingsEnv.BYAN_API_TOKEN || process.env.BYAN_API_TOKEN || null;
|
|
67
|
-
const
|
|
82
|
+
const mergedEnv = { ...process.env, ...settingsEnv };
|
|
83
|
+
const memorySync = applyEnvEnable(readMemorySyncConfig() || {}, mergedEnv);
|
|
68
84
|
return {
|
|
69
85
|
byan_api_url: apiUrl,
|
|
70
86
|
byan_api_token: apiToken,
|
|
@@ -75,18 +91,18 @@ function buildConfig() {
|
|
|
75
91
|
function extractTurn(payload) {
|
|
76
92
|
if (!payload || typeof payload !== 'object') return null;
|
|
77
93
|
|
|
78
|
-
|
|
79
|
-
|
|
94
|
+
// Resolve from any payload shape: inline array (fixtures) or transcript_path
|
|
95
|
+
// JSONL (production). The last 4 user/assistant messages are the staged turn.
|
|
96
|
+
const messages = extractRecentMessages(payload, 4);
|
|
97
|
+
if (!messages) return null;
|
|
80
98
|
|
|
81
99
|
return {
|
|
82
100
|
sessionId: payload.session_id || payload.sessionId || null,
|
|
83
|
-
messages
|
|
84
|
-
.filter((m) => m && (m.role === 'user' || m.role === 'assistant'))
|
|
85
|
-
.slice(-4),
|
|
101
|
+
messages,
|
|
86
102
|
};
|
|
87
103
|
}
|
|
88
104
|
|
|
89
|
-
(async () => {
|
|
105
|
+
if (require.main === module) (async () => {
|
|
90
106
|
const raw = await readStdin();
|
|
91
107
|
let payload = {};
|
|
92
108
|
try {
|
|
@@ -119,3 +135,5 @@ function extractTurn(payload) {
|
|
|
119
135
|
process.stdout.write(JSON.stringify({ continue: true }));
|
|
120
136
|
process.exit(0);
|
|
121
137
|
})();
|
|
138
|
+
|
|
139
|
+
module.exports = { applyEnvEnable, buildConfig, readMemorySyncConfig };
|
|
@@ -16,6 +16,10 @@
|
|
|
16
16
|
|
|
17
17
|
const { loadConfig, loadState, isEngaged, passCount, lastVerdict, readStdin, parseJson } =
|
|
18
18
|
require('./lib/strict-runtime');
|
|
19
|
+
// Shared transcript reader — the real Stop payload has no inline transcript
|
|
20
|
+
// (last_assistant_message + transcript_path JSONL). Same reader as the other
|
|
21
|
+
// Stop hooks so the completion-claim guard sees the actual finished message.
|
|
22
|
+
const { extractLastAssistantText } = require('./lib/transcript-read');
|
|
19
23
|
|
|
20
24
|
const DEFAULT_MARKERS = ['done', 'finished', 'complete', 'delivered', 'ready'];
|
|
21
25
|
|
|
@@ -31,22 +35,6 @@ function claimsCompletion(text, markers) {
|
|
|
31
35
|
});
|
|
32
36
|
}
|
|
33
37
|
|
|
34
|
-
function extractLastAssistantText(payload) {
|
|
35
|
-
if (!payload || typeof payload !== 'object') return '';
|
|
36
|
-
const tx = payload.transcript || payload.messages || [];
|
|
37
|
-
if (!Array.isArray(tx)) return '';
|
|
38
|
-
for (let i = tx.length - 1; i >= 0; i--) {
|
|
39
|
-
const m = tx[i];
|
|
40
|
-
if (m && m.role === 'assistant') {
|
|
41
|
-
if (typeof m.content === 'string') return m.content;
|
|
42
|
-
if (Array.isArray(m.content)) {
|
|
43
|
-
return m.content.map((c) => (c && c.text ? c.text : '')).join(' ');
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
return '';
|
|
48
|
-
}
|
|
49
|
-
|
|
50
38
|
// Pure decision : returns { block, reason }.
|
|
51
39
|
function decideStop({ state, config, lastAssistantText }) {
|
|
52
40
|
if (!isEngaged(state)) return { block: false };
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
<!-- Generated by byan-sync-rules from _byan/_config/autobench.yaml. Do not hand-edit. -->
|
|
2
|
+
|
|
3
|
+
# BYAN Auto-Benchmark — Proactive Decision Doctrine
|
|
4
|
+
|
|
5
|
+
> The user is about to be asked to choose. Before that question reaches them,
|
|
6
|
+
> benchmark the fork: name the options, score them on the criteria that
|
|
7
|
+
> actually diverge, and lead with a recommendation. A choice presented without
|
|
8
|
+
> a benchmark hands the user the work the agent was supposed to do.
|
|
9
|
+
|
|
10
|
+
This doctrine is PROACTIVE: the agent self-applies it before it writes the
|
|
11
|
+
choice. The reactive Stop hook (`autobench-stop-guard.js`) is the net, not the
|
|
12
|
+
mechanism — see the honest ceiling note at the end.
|
|
13
|
+
|
|
14
|
+
## The marker protocol (emit this verbatim)
|
|
15
|
+
|
|
16
|
+
Whenever a turn presents the compact reco table, emit this single-line HTML
|
|
17
|
+
comment IMMEDIATELY BEFORE the table (so it survives even if the table is
|
|
18
|
+
truncated):
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
<!-- BYAN-BENCH:done g1=<#options> g2=<#divergent-criteria> scope=<internal|external> conf=<assertive|lean> -->
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
- `g1` must be >= 2 (the option count) and `g2` must be >= 1 (the count of
|
|
25
|
+
criteria on which the options diverge), or the marker is invalid.
|
|
26
|
+
- For the degenerate / collapsed case — an obvious default, a `never-list`
|
|
27
|
+
prompt, an already-coherent fork — emit the SKIP marker instead. This is a
|
|
28
|
+
POSITIVE signal that the fork was considered and deliberately not tabled:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
<!-- BYAN-BENCH:skip reason=<obvious-default|never-listed|escape-hatch|already-coherent> -->
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
The Stop hook treats either marker as satisfied. An unmarked turn that presents
|
|
35
|
+
choice-language (and is not a confirm, not destructive, not escape-hatched) is
|
|
36
|
+
blocked once and forces a single regen.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Brick 1 — TRIGGER: when a turn deserves a benchmark
|
|
41
|
+
|
|
42
|
+
### The 2-gate rule
|
|
43
|
+
|
|
44
|
+
A benchmark fires only when BOTH gates hold:
|
|
45
|
+
|
|
46
|
+
- **G1 — at least 2 non-trivial, non-substitutable options.** Two phrasings of
|
|
47
|
+
the same thing are ONE option. Two libraries that genuinely differ are TWO.
|
|
48
|
+
- **G2 — divergence on at least 1 weighted criterion.** If every criterion
|
|
49
|
+
ranks the options identically, there is no real fork. Collapse it.
|
|
50
|
+
|
|
51
|
+
### The `never-list` (do not benchmark these)
|
|
52
|
+
|
|
53
|
+
- yes/no and y/n confirmations (`proceed?`, `continue?`, `ok?`, `on continue?`)
|
|
54
|
+
- destructive prompts (`delete`, `drop`, `rm -rf`, `overwrite`, `force push`, `reset --hard`)
|
|
55
|
+
- single trivial acknowledgements (`ok`, `done`, `noted`)
|
|
56
|
+
- a fork already coherent with the locked stack (see anti-redundancy)
|
|
57
|
+
|
|
58
|
+
A confirm or a destructive prompt is not a fork. Confirm it plainly and emit
|
|
59
|
+
`BYAN-BENCH:skip reason=never-listed` (or `escape-hatch`).
|
|
60
|
+
|
|
61
|
+
### Internal vs external routing (decides links BEFORE depth decides words)
|
|
62
|
+
|
|
63
|
+
- **Internal** — the fork stays within the existing repo / stack (which existing
|
|
64
|
+
module, which local pattern). No external links here. Coherence with the
|
|
65
|
+
current stack is the dominant criterion. Decide from model-knowledge of the
|
|
66
|
+
repo.
|
|
67
|
+
- **External** — the fork introduces a new dependency, vendor, or standard.
|
|
68
|
+
Sourcing is allowed: an evidence level per option, and a URL only if WebFetch
|
|
69
|
+
opened it this turn.
|
|
70
|
+
|
|
71
|
+
### Depth dial (stakes x reversibility, applied AFTER routing)
|
|
72
|
+
|
|
73
|
+
- **Low-stakes, reversible** — one line. `lean X over Y (Y costs an extra dep
|
|
74
|
+
for no gain here).` No table.
|
|
75
|
+
- **High-stakes, hard-to-reverse** — full compact table + an explicit dissenting
|
|
76
|
+
view (the strongest case for the option you did NOT pick).
|
|
77
|
+
|
|
78
|
+
### Obvious-default escape
|
|
79
|
+
|
|
80
|
+
When one option is the obvious, coherent default and the alternatives are
|
|
81
|
+
strictly worse or `never-listed`, do not table it. Emit
|
|
82
|
+
`BYAN-BENCH:skip reason=obvious-default` so the consideration stays auditable,
|
|
83
|
+
then proceed with the default.
|
|
84
|
+
|
|
85
|
+
### Choice-language auto-fire
|
|
86
|
+
|
|
87
|
+
These keywords in the user's text auto-arm the benchmark posture: `should I`,
|
|
88
|
+
`veux-tu que je`, `do you want me to`, `préfères-tu`, `which one`,
|
|
89
|
+
`which approach`, `A or B`, `option 1 or option 2`, `vs`, `ou bien`,
|
|
90
|
+
`soit ... soit`, `pros and cons`, `trade-off`, `trade-offs`.
|
|
91
|
+
|
|
92
|
+
### Decision tree — verbatim few-shot examples (FIRE vs SKIP)
|
|
93
|
+
|
|
94
|
+
| # | Situation | Verdict | Why |
|
|
95
|
+
|---|-----------|---------|-----|
|
|
96
|
+
| EX1 | "should I use Redis or Postgres for this session cache?" | **FIRE** | G1 (two non-substitutable stores), G2 (latency / ops-cost / durability diverge). External -> sourcing allowed. |
|
|
97
|
+
| EX2 | "rename the variable foo to userCount, ok?" | **SKIP** | `never-list`: y/n confirm on a trivial reversible edit. `skip reason=escape-hatch`, just do it. |
|
|
98
|
+
| EX3 | "Jest or Vitest for this repo already on Jest everywhere?" | **SKIP** | Obvious-default: stack is on Jest; adding Vitest breaks coherence for no gain. `skip reason=obvious-default`. (No runner yet -> would FIRE.) |
|
|
99
|
+
| EX4 | "auth: JWT-in-cookie vs JWT-in-localStorage vs server sessions" | **FIRE** | G1 (three options), G2 (XSS / CSRF / scalability diverge). Security -> floor L2. High-stakes -> full table + dissent. |
|
|
100
|
+
| EX5 | "delete the legacy migrations folder, proceed?" | **SKIP** | `never-list`: destructive. Do not benchmark a delete. `skip reason=escape-hatch`, confirm plainly. |
|
|
101
|
+
| EX6 | two ways to name a private helper (parseRow vs rowParse) | **SKIP** | G1 fails: substitutable phrasings, no criterion diverges. `skip reason=already-coherent`, pick one. |
|
|
102
|
+
| EX7 | "pick a charting lib for the new dashboard" (repo has none) | **FIRE** | G1 (Chart.js / Recharts / D3), G2 (bundle / API / customization diverge). External -> links only if WebFetch opened them. |
|
|
103
|
+
| EX8 | "debounce of 200ms or 300ms on the search box?" | **SKIP** | Low-stakes, reversible, G2 weak. Collapse to one line ('lean 250ms, tune later'), `skip reason=obvious-default`. |
|
|
104
|
+
| EX9 | "Turborepo vs Nx vs polyrepo for the platform" | **FIRE** | G1 (three structural options), G2 (caching / learning-curve / blast-radius diverge). High-stakes -> full table + dissent. |
|
|
105
|
+
| EX10 | "commit this, on continue ?" | **SKIP** | `never-list`: proceed/continue confirm. Not a fork. `skip reason=escape-hatch`. |
|
|
106
|
+
|
|
107
|
+
Four FIRE, six SKIP (ten total) — each FIRE pairs with a near-miss SKIP so the
|
|
108
|
+
boundary between "real fork" and "not a fork" is shown, not just asserted.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Brick 2 — SCALER / SOURCING: evidence and confidence
|
|
113
|
+
|
|
114
|
+
### Routing before depth
|
|
115
|
+
|
|
116
|
+
Decide internal-vs-external FIRST: it sets whether links are even allowed. Only
|
|
117
|
+
THEN does stakes-x-reversibility set how many words. A verbose table must not
|
|
118
|
+
smuggle in links for an internal fork.
|
|
119
|
+
|
|
120
|
+
### The 5-level evidence rubric (the Niv column)
|
|
121
|
+
|
|
122
|
+
| Niv | Score | Sources |
|
|
123
|
+
|-----|-------|---------|
|
|
124
|
+
| L1 | 95% | RFC, W3C, ECMAScript, POSIX, an official spec. |
|
|
125
|
+
| L2 | 80% | A reproducible benchmark, a CVE reference, official product docs. |
|
|
126
|
+
| L3 | 65% | A peer-reviewed article or a recognized technical book. |
|
|
127
|
+
| L4 | 50% | Community consensus (StackOverflow > 1000 votes). |
|
|
128
|
+
| L5 | 20% | Opinion or personal experience. Presented as such, not as fact. |
|
|
129
|
+
|
|
130
|
+
### Strict-domain floors
|
|
131
|
+
|
|
132
|
+
| Domain | Floor | Below the floor |
|
|
133
|
+
|--------|-------|-----------------|
|
|
134
|
+
| security | L2 | BLOCKED — CVE or reproducible benchmark required |
|
|
135
|
+
| performance | L2 | BLOCKED — profiler output or reproducible benchmark required |
|
|
136
|
+
| compliance | L1 | BLOCKED — regulatory text required |
|
|
137
|
+
|
|
138
|
+
A cell below its floor is BLOCKED, not guessed.
|
|
139
|
+
|
|
140
|
+
### The link rule
|
|
141
|
+
|
|
142
|
+
A URL appears in the table only if WebFetch opened it this turn. Otherwise cite
|
|
143
|
+
from model-knowledge and tag the claim `[UNVERIFIED]`. Do not fabricate a URL.
|
|
144
|
+
Do not reconstruct a URL from memory and present it as opened.
|
|
145
|
+
|
|
146
|
+
### Confidence changes the VERB
|
|
147
|
+
|
|
148
|
+
- **assertive** — evidence at or above the floor for the domain. Phrase the reco
|
|
149
|
+
as "recommend X" / "go with X".
|
|
150
|
+
- **lean** — evidence below the floor, or model-knowledge only on a strict
|
|
151
|
+
domain. Phrase the reco as "lean X (low-confidence, verify before committing)".
|
|
152
|
+
|
|
153
|
+
The verb carries the confidence, not a hidden footnote. This maps to the
|
|
154
|
+
`conf=<assertive|lean>` field in the done-marker.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Brick 3 — FORMAT + ANTI-BLOAT: the output shape
|
|
159
|
+
|
|
160
|
+
### Default format
|
|
161
|
+
|
|
162
|
+
One compact table: a row per option, up to 4 criteria columns, a `Niv`
|
|
163
|
+
(evidence level) column, and ONE best-first recommendation line under it.
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
<!-- BYAN-BENCH:done g1=3 g2=3 scope=external conf=assertive -->
|
|
167
|
+
|
|
168
|
+
| Option | Latency | Ops cost | Durability | Niv |
|
|
169
|
+
|----------|---------|----------|------------|-----|
|
|
170
|
+
| Redis | best | medium | weak | L2 |
|
|
171
|
+
| Postgres | good | low | strong | L1 |
|
|
172
|
+
| In-mem | best | none | none | L5 |
|
|
173
|
+
|
|
174
|
+
Recommend Postgres: durability dominates for session data and it is already in
|
|
175
|
+
the stack; Redis only wins if sub-ms latency becomes the binding constraint.
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Hard caps
|
|
179
|
+
|
|
180
|
+
- options: <= 4
|
|
181
|
+
- criteria: <= 4
|
|
182
|
+
- links: <= 3
|
|
183
|
+
- screens: 1 (the whole benchmark fits in one screen)
|
|
184
|
+
|
|
185
|
+
### Collapse the degenerate case
|
|
186
|
+
|
|
187
|
+
If only one option survives the gates, do not render a 1-row table. Collapse to
|
|
188
|
+
one line and emit the skip marker. A table of one is theatre.
|
|
189
|
+
|
|
190
|
+
### Expanded form — opt-in, named trigger
|
|
191
|
+
|
|
192
|
+
Only when the user types `[bench:expand]` does the agent render the expanded
|
|
193
|
+
form (more criteria, per-cell sourcing, the full dissent). The default stays
|
|
194
|
+
compact.
|
|
195
|
+
|
|
196
|
+
### Anti-bloat guards
|
|
197
|
+
|
|
198
|
+
- **Latency guard** — default to model-knowledge + `[UNVERIFIED]` tags.
|
|
199
|
+
Auto-WebFetch only for a strict domain (security / performance / compliance)
|
|
200
|
+
or a volatile fact (current version, current price, current status). Do not
|
|
201
|
+
fetch to decorate an internal fork.
|
|
202
|
+
- **Escape-hatch** — session: the file `.byan-autobench/off` disables blocking
|
|
203
|
+
(`touch` to silence, `rm` to re-enable). Cross-session: the config key
|
|
204
|
+
`escape_hatch.disabled=true` (regenerated from the YAML) disables it
|
|
205
|
+
persistently. Both still log `satisfied-escape` so misses stay auditable.
|
|
206
|
+
- **No re-benchmark** — do not re-table a fork already decided and coherent with
|
|
207
|
+
the locked stack unless the user explicitly reopens it. If the architecture
|
|
208
|
+
already chose Postgres, do not benchmark Redis-vs-Postgres again.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## The 14 mantras
|
|
213
|
+
|
|
214
|
+
- `BENCH-1` Gate Before Table
|
|
215
|
+
- `BENCH-2` Never Benchmark A Confirm
|
|
216
|
+
- `BENCH-3` Route Before Depth
|
|
217
|
+
- `BENCH-4` No Fabricated URL
|
|
218
|
+
- `BENCH-5` Strict-Domain Floor
|
|
219
|
+
- `BENCH-6` Confidence Changes The Verb
|
|
220
|
+
- `BENCH-7` One Compact Table
|
|
221
|
+
- `BENCH-8` Collapse The Degenerate
|
|
222
|
+
- `BENCH-9` Expand Only On Demand
|
|
223
|
+
- `BENCH-10` Latency Guard
|
|
224
|
+
- `BENCH-11` Respect The Escape-Hatch
|
|
225
|
+
- `BENCH-12` Emit The Marker
|
|
226
|
+
- `BENCH-13` No Re-Benchmark
|
|
227
|
+
- `BENCH-14` Honest Ceiling
|
|
228
|
+
|
|
229
|
+
Full text in `_byan/_config/autobench.yaml`.
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## The honest ceiling (GH #28273)
|
|
234
|
+
|
|
235
|
+
Claude Code exposes no pre-display interception hook: there is no event that
|
|
236
|
+
fires BEFORE the assistant's text reaches the user. The benchmark therefore
|
|
237
|
+
cannot be injected into a turn before it is shown. Two mechanisms cover this
|
|
238
|
+
honestly, and neither pretends otherwise:
|
|
239
|
+
|
|
240
|
+
1. **The proactive doctrine** (this file, mirrored into `CLAUDE.md` and
|
|
241
|
+
`AGENTS.md` via `byan-sync-rules`) makes the agent
|
|
242
|
+
self-apply the benchmark before it writes the choice.
|
|
243
|
+
2. **The reactive Stop hook** catches a turn that presented a choice WITHOUT a
|
|
244
|
+
marker and forces exactly one regen. It is POST-HOC by construction — it
|
|
245
|
+
cannot rewrite the already-shown text, only block the turn end and require a
|
|
246
|
+
re-presentation.
|
|
247
|
+
|
|
248
|
+
Promising pre-display interception would be a lie. This design does not make it:
|
|
249
|
+
the first table the user sees may be unmarked; the hook's job is to ensure the
|
|
250
|
+
NEXT turn corrects it, once, and that every fire and miss is recorded in
|
|
251
|
+
`_byan-output/benchmark-ledger.jsonl`.
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
| **byan** | Builder | Createur d'agents via interview (12 questions, 64 mantras) — [FC] + [ELO] intégrés |
|
|
17
17
|
| **fact-checker** | Scientifique | Fact-check: assertions, audits de documents, chaines de raisonnement |
|
|
18
18
|
| **agent-builder** | Constructeur | Expert en construction d'agents |
|
|
19
|
-
| **marc** | Specialiste | Integration GitHub Copilot |
|
|
20
19
|
| **rachid** | Specialiste | Deploiement NPM/NPX |
|
|
21
20
|
| **carmack** | Optimiseur | Optimisation tokens |
|
|
22
21
|
| **patnote** | Gestionnaire | Mises a jour et conflits |
|
|
@@ -119,3 +119,67 @@ curl -X POST -H "Authorization: ApiKey $BYAN_API_TOKEN" -H "Content-Type: applic
|
|
|
119
119
|
| Lancer un workflow | `byan_api_workflows_run` |
|
|
120
120
|
| Chercher dans la memoire | `byan_api_memory_search` |
|
|
121
121
|
| Importer un projet local | `byan_import_project` |
|
|
122
|
+
|
|
123
|
+
## 8. Famille byan_leantime_* (board externe Leantime, distinct de byan_web)
|
|
124
|
+
|
|
125
|
+
Backend SEPARE de byan_web : ces tools parlent a une instance Leantime
|
|
126
|
+
self-hosted (gestion de projet), pas a l'API byan_web. Le workflow FD les
|
|
127
|
+
declenche pour mirror son cycle de vie sur un board Leantime, en sens unique
|
|
128
|
+
(FD pilote Leantime ; Leantime ne pilote pas FD). Cablage par phase : voir
|
|
129
|
+
`.claude/skills/byan-byan/SKILL.md` section 2.5.
|
|
130
|
+
|
|
131
|
+
Tu n'appelles pas ces tools a la main : le hook `PostToolUse`
|
|
132
|
+
`.claude/hooks/leantime-fd-sync.js` (coeur pur `lib/leantime-fd-core.js`) fire le
|
|
133
|
+
sync automatiquement apres `byan_fd_advance` / `byan_fd_update`, best-effort
|
|
134
|
+
(sort en 0, n'interrompt pas le tour), idempotent via le sidecar gitignore
|
|
135
|
+
`.byan-leantime/map.json`. Detail : SKILL 2.5 + `docs/leantime-integration.md`.
|
|
136
|
+
|
|
137
|
+
### Config (env distinct de BYAN_API_URL)
|
|
138
|
+
|
|
139
|
+
| Var | Role |
|
|
140
|
+
|-----|------|
|
|
141
|
+
| `LEANTIME_API_URL` | Base de l'instance Leantime (host du backend `/api/jsonrpc`, PAS le host de l'UI). Sans `/api` final — le client ajoute `/api/jsonrpc`. |
|
|
142
|
+
| `LEANTIME_API_TOKEN` | Cle API Leantime, envoyee en header `x-api-key`. Generation : voir "Generer le token" plus bas (cle API compte de service OU Personal Access Token). |
|
|
143
|
+
| `LEANTIME_CLIENT_ID` | Optionnel : clientId pour `addProject` (sinon premier client retourne, sinon 1). |
|
|
144
|
+
| `LEANTIME_ASSIGN_USER_ID` | Optionnel : id du user humain a relier au projet cree (visibilite dans son selecteur) + editorId par defaut des taches. Absent -> projet visible du seul compte de service API. |
|
|
145
|
+
|
|
146
|
+
Les deux premieres sont injectees via `.mcp.json` `${...}` (zero secret tracke).
|
|
147
|
+
Quand la paire est absente, les tools reportent `enabled: false` et le FD avance
|
|
148
|
+
sans bloquer.
|
|
149
|
+
|
|
150
|
+
### Generer le token
|
|
151
|
+
|
|
152
|
+
Cle API Leantime (header `x-api-key`), generee dans l'UI Leantime : Company
|
|
153
|
+
Settings -> onglet "Cle d'API" -> "Generate API Key" (role owner/admin requis ;
|
|
154
|
+
la cle a le prefixe `lt_`, affichee une seule fois). Guide d'usage complet
|
|
155
|
+
(config, generation pas-a-pas, troubleshooting, securite) :
|
|
156
|
+
`docs/leantime-integration.md`.
|
|
157
|
+
|
|
158
|
+
### Authentification (header propre)
|
|
159
|
+
|
|
160
|
+
Leantime authentifie le JSON-RPC par le header `x-api-key: <token>` — PAS le
|
|
161
|
+
switch `Authorization: ApiKey/Bearer` de byan_web. Reutiliser le scheme byan_web
|
|
162
|
+
enverrait un header que Leantime ignore : l'appel passe non authentifie
|
|
163
|
+
(probablement un 401, ou un fall-through vers la page HTML de login que la garde
|
|
164
|
+
non_json attrape). Le code exact est confirme a F0 (live-verify).
|
|
165
|
+
|
|
166
|
+
### Tools
|
|
167
|
+
|
|
168
|
+
| Tool | Usage | Guard |
|
|
169
|
+
|------|-------|-------|
|
|
170
|
+
| `byan_leantime_ping` | Healthcheck : reporte api_url, token_configured, enabled, reachable. Ne throw pas. | aucun |
|
|
171
|
+
| `byan_leantime_project_ensure` | Cree-ou-recupere un projet Leantime (idempotent par nom), retourne l'id | requireLeantime |
|
|
172
|
+
| `byan_leantime_task_create` | Cree une tache (addTicket) depuis un item backlog, retourne l'id tache | requireLeantime |
|
|
173
|
+
| `byan_leantime_task_move` | Transitionne une tache vers une colonne `todo\|doing\|blocked\|review\|done` (resolue en statut Leantime du projet) | requireLeantime |
|
|
174
|
+
| `byan_leantime_task_assign` | Pose l'assignee (editorId) | requireLeantime |
|
|
175
|
+
| `byan_leantime_task_get` | Lit une tache par id | requireLeantime |
|
|
176
|
+
| `byan_leantime_board_get` | Lit le board d'un projet groupe par colonne | requireLeantime |
|
|
177
|
+
|
|
178
|
+
### Lecon mauvais-host (la garde non-JSON)
|
|
179
|
+
|
|
180
|
+
Leantime sert l'app HTML ET l'API JSON-RPC sur le meme domaine. Si
|
|
181
|
+
`LEANTIME_API_URL` pointe sur le host de l'UI au lieu du backend, un POST
|
|
182
|
+
`/api/jsonrpc` peut renvoyer 200 + une page HTML de login. Le client rejette ce
|
|
183
|
+
corps en `reason: non_json` avec un hint, plutot que de le lire comme un board
|
|
184
|
+
vide. C'est la meme lecon que `BYAN_API_URL` (UI SSO vs host API). Si un appel
|
|
185
|
+
Leantime renvoie `non_json`, corriger `LEANTIME_API_URL` vers le host backend.
|
|
@@ -9,17 +9,17 @@
|
|
|
9
9
|
Strict mode locks a contract (the scope) at the start of a task, forces the
|
|
10
10
|
agent to self-verify its work against that contract at least three times, and
|
|
11
11
|
blocks the delivery (the commit) until verification is earned. It works on the
|
|
12
|
-
|
|
12
|
+
2 platforms BYAN targets: Claude Code, Codex.
|
|
13
13
|
|
|
14
14
|
| Layer | Mechanism | Platforms |
|
|
15
15
|
|-------|-----------|-----------|
|
|
16
|
-
| Scope lock + self-verify + complete | MCP tools (`byan_strict_*`) |
|
|
16
|
+
| Scope lock + self-verify + complete | MCP tools (`byan_strict_*`) | both (MCP) |
|
|
17
17
|
| In-session blocking | Claude Code hooks (Stop / PreToolUse / UserPromptSubmit) | Claude Code |
|
|
18
|
-
| Context injection | `AGENTS.md` block
|
|
19
|
-
| Final net | `.githooks/pre-commit` audit gate |
|
|
18
|
+
| Context injection | `AGENTS.md` block | Codex |
|
|
19
|
+
| Final net | `.githooks/pre-commit` audit gate | both (commit time) |
|
|
20
20
|
|
|
21
|
-
Codex
|
|
22
|
-
net that catches
|
|
21
|
+
Codex has no in-session blocking hook. The pre-commit gate is the
|
|
22
|
+
net that catches it: a commit cannot land if a strict session was engaged but
|
|
23
23
|
not completed correctly.
|
|
24
24
|
|
|
25
25
|
## Source de verite
|
|
@@ -39,7 +39,6 @@ The generator emits (idempotent, between `BYAN-STRICT:BEGIN/END` markers):
|
|
|
39
39
|
- `.claude/skills/byan-strict/SKILL.md` — the Claude Code skill
|
|
40
40
|
- `.claude/hooks/lib/strict-config.json` — runtime config for the hooks
|
|
41
41
|
- `AGENTS.md` block — Codex
|
|
42
|
-
- `.github/copilot-instructions.md` block — Copilot
|
|
43
42
|
- `src/byan-v2/data/strict-mantras.json` — the MantraValidator ruleset
|
|
44
43
|
|
|
45
44
|
Do not hand-edit the generated blocks; edit the YAML and regenerate.
|
|
@@ -48,8 +47,10 @@ Do not hand-edit the generated blocks; edit the YAML and regenerate.
|
|
|
48
47
|
|
|
49
48
|
1. **Lock the scope** — `byan_strict_lock_scope` with a verbatim restatement of
|
|
50
49
|
the request and a non-empty list of testable `acceptanceCriteria`. Optional
|
|
51
|
-
`allowedPaths` restrict where writes may land.
|
|
52
|
-
|
|
50
|
+
`allowedPaths` restrict where writes may land. Optional `domain` (e.g.
|
|
51
|
+
security, performance, javascript) feeds one VALIDATED tick to the ELO loop on
|
|
52
|
+
a successful completion — pass it when one technical domain dominates the
|
|
53
|
+
task, explicit only, omit otherwise. The locked scope is the contract.
|
|
53
54
|
2. **Build the full scope** — do not substitute an MVP or a stub. If a part
|
|
54
55
|
cannot be done, surface it as a gap in self-verify; do not cut it silently.
|
|
55
56
|
3. **Self-verify >= 3 times** — `byan_strict_self_verify` with `verdict` `ok`
|
|
@@ -46,6 +46,10 @@
|
|
|
46
46
|
"type": "command",
|
|
47
47
|
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/mantra-validate.js"
|
|
48
48
|
},
|
|
49
|
+
{
|
|
50
|
+
"type": "command",
|
|
51
|
+
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/fact-check-claims.js"
|
|
52
|
+
},
|
|
49
53
|
{
|
|
50
54
|
"type": "command",
|
|
51
55
|
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/fd-response-check.js"
|
|
@@ -57,6 +61,14 @@
|
|
|
57
61
|
{
|
|
58
62
|
"type": "command",
|
|
59
63
|
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/strict-stop-guard.js"
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"type": "command",
|
|
67
|
+
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/autobench-stop-guard.js"
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"type": "command",
|
|
71
|
+
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/drain-advisory.js"
|
|
60
72
|
}
|
|
61
73
|
]
|
|
62
74
|
}
|
|
@@ -87,6 +99,10 @@
|
|
|
87
99
|
{
|
|
88
100
|
"type": "command",
|
|
89
101
|
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/tool-failure-guard.js"
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"type": "command",
|
|
105
|
+
"command": "node \"$CLAUDE_PROJECT_DIR\"/.claude/hooks/leantime-fd-sync.js"
|
|
90
106
|
}
|
|
91
107
|
]
|
|
92
108
|
}
|