create-byan-agent 2.25.0 → 2.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +155 -0
- package/README.md +9 -12
- package/install/bin/create-byan-agent-v2.js +29 -169
- package/install/lib/agent-generator.js +5 -5
- package/install/lib/byan-web-integration.js +1 -1
- package/install/lib/claude-native-setup.js +1 -1
- package/install/lib/phase2-chat.js +3 -10
- package/install/lib/platforms/claude-code.js +2 -2
- package/install/lib/platforms/index.js +0 -2
- package/install/lib/project-agents-generator.js +3 -3
- package/install/lib/staging-consent.js +3 -3
- package/install/lib/subagent-generator.js +3 -3
- package/install/lib/yanstaller/agent-launcher.js +1 -27
- package/install/lib/yanstaller/detector.js +4 -4
- package/install/lib/yanstaller/installer.js +0 -2
- package/install/lib/yanstaller/interviewer.js +1 -1
- package/install/lib/yanstaller/platform-selector.js +1 -13
- package/install/package.json +1 -1
- package/install/src/byan-v2/context/session-state.js +2 -2
- package/install/src/byan-v2/index.js +1 -5
- package/install/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/install/src/webui/api.js +0 -2
- package/install/src/webui/chat/bridge.js +1 -13
- package/install/src/webui/chat/cli-detector.js +0 -23
- package/install/src/webui/public/app.js +1 -3
- package/install/src/webui/public/chat.html +0 -2
- package/install/src/webui/public/chat.js +0 -1
- package/install/src/webui/public/index.html +2 -2
- package/install/templates/.claude/CLAUDE.md +13 -2
- package/install/templates/.claude/agents/bmad-byan.md +1 -1
- package/install/templates/.claude/hooks/autobench-stop-guard.js +286 -0
- package/install/templates/.claude/hooks/fact-check-absolutes.js +1 -61
- package/install/templates/.claude/hooks/fact-check-claims.js +69 -0
- package/install/templates/.claude/hooks/fd-response-check.js +37 -46
- package/install/templates/.claude/hooks/inject-soul.js +64 -25
- package/install/templates/.claude/hooks/leantime-fd-sync.js +216 -0
- package/install/templates/.claude/hooks/lib/autobench-config.json +81 -0
- package/install/templates/.claude/hooks/lib/autobench-fc-enrich.js +251 -0
- package/install/templates/.claude/hooks/lib/autobench-ledger-report.js +253 -0
- package/install/templates/.claude/hooks/lib/autobench-runtime.js +199 -0
- package/install/templates/.claude/hooks/lib/fact-check-core.js +69 -0
- package/install/templates/.claude/hooks/lib/transcript-read.js +137 -0
- package/install/templates/.claude/hooks/soul-memory-check.js +49 -25
- package/install/templates/.claude/hooks/soul-memory-triggers.js +27 -8
- package/install/templates/.claude/hooks/stage-to-byan.js +25 -7
- package/install/templates/.claude/hooks/strict-stop-guard.js +4 -16
- package/install/templates/.claude/rules/benchmark.md +251 -0
- package/install/templates/.claude/rules/byan-agents.md +0 -1
- package/install/templates/.claude/rules/byan-api.md +64 -0
- package/install/templates/.claude/rules/fact-check.md +1 -1
- package/install/templates/.claude/rules/strict-mode.md +10 -9
- package/install/templates/.claude/settings.json +12 -0
- package/install/templates/.claude/skills/byan-benchmark/SKILL.md +159 -0
- package/install/templates/.claude/skills/byan-byan/SKILL.md +73 -12
- package/install/templates/.claude/skills/byan-fact-check/SKILL.md +1 -1
- package/install/templates/.claude/skills/byan-hermes-dispatch/SKILL.md +5 -6
- package/install/templates/.claude/skills/byan-orchestrate/SKILL.md +11 -3
- package/install/templates/.claude/skills/byan-strict/SKILL.md +4 -1
- package/install/templates/.claude/workflows/INDEX.md +2 -1
- package/install/templates/.claude/workflows/byan-benchmark.js +328 -0
- package/install/templates/_byan/_config/agent-manifest.csv +1 -1
- package/install/templates/_byan/_config/autobench.yaml +510 -0
- package/install/templates/_byan/_config/strict-mode.yaml +9 -3
- package/install/templates/_byan/_config/workflow-manifest.csv +1 -0
- package/install/templates/_byan/agent/byan/byan.md +1 -3
- package/install/templates/_byan/agent/byan-flat/byan.md +1 -3
- package/install/templates/_byan/agent/byan-test/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan-test-flat/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan.optimized/byan.optimized.md +2 -2
- package/install/templates/_byan/agent/byan.optimized-v2/byan.optimized-v2.md +2 -2
- package/install/templates/_byan/agent/claude/claude.md +0 -2
- package/install/templates/_byan/agent/codex/codex.md +0 -2
- package/install/templates/_byan/agent/rachid/rachid.md +2 -10
- package/install/templates/_byan/agent/rachid-flat/rachid.md +2 -11
- package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +2 -5
- package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +5 -13
- package/install/templates/_byan/agent/yanstaller/yanstaller.md +2 -24
- package/install/templates/_byan/config.yaml +0 -1
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-sync-rules.js +20 -4
- package/install/templates/_byan/mcp/byan-mcp-server/lib/advisory-autofeed.js +13 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/index-generator.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/kanban.js +6 -3
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-fd-core.js +205 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-sync.js +415 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/precommit-gate.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-activation.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-mode.js +8 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/sync-rules.js +172 -23
- package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-generator.js +1 -0
- package/install/templates/_byan/mcp/byan-mcp-server/server.js +205 -82
- package/install/templates/_byan/worker/launchers/README.md +4 -24
- package/install/templates/_byan/worker/workers.md +0 -2
- package/install/templates/_byan/workflow/simple/bmb/byan-benchmark/workflow.md +86 -0
- package/install/templates/docs/leantime-integration.md +160 -0
- package/package.json +3 -7
- package/src/byan-v2/context/session-state.js +2 -2
- package/src/byan-v2/generation/mantra-validator.js +3 -3
- package/src/byan-v2/index.js +1 -5
- package/src/byan-v2/integration/voice-integration.js +1 -1
- package/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/src/staging/staging.js +20 -6
- package/install/bin/build-copilot-stubs.js +0 -138
- package/install/lib/platforms/copilot-cli.js +0 -123
- package/install/lib/platforms/vscode.js +0 -51
- package/install/src/byan-v2/context/copilot-context.js +0 -79
- package/install/src/webui/chat/copilot-adapter.js +0 -68
- package/install/templates/.claude/agents/bmad-marc.md +0 -25
- package/install/templates/.claude/skills/byan-marc/SKILL.md +0 -20
- package/install/templates/.github/agents/bmad-agent-bmad-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-agent-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-module-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-workflow-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-analyst.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-architect.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-pm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quick-flow-solo-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quinn.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-sm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-tech-writer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-ux-designer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-byan-test.md +0 -33
- package/install/templates/.github/agents/bmad-agent-byan-v2.md +0 -44
- package/install/templates/.github/agents/bmad-agent-byan.md +0 -1062
- package/install/templates/.github/agents/bmad-agent-carmack.md +0 -14
- package/install/templates/.github/agents/bmad-agent-cis-brainstorming-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-creative-problem-solver.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-design-thinking-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-innovation-strategist.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-presentation-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-storyteller.md +0 -16
- package/install/templates/.github/agents/bmad-agent-claude.md +0 -49
- package/install/templates/.github/agents/bmad-agent-codex.md +0 -49
- package/install/templates/.github/agents/bmad-agent-drawio.md +0 -45
- package/install/templates/.github/agents/bmad-agent-fact-checker.md +0 -16
- package/install/templates/.github/agents/bmad-agent-forgeron.md +0 -15
- package/install/templates/.github/agents/bmad-agent-jimmy.md +0 -15
- package/install/templates/.github/agents/bmad-agent-marc.md +0 -49
- package/install/templates/.github/agents/bmad-agent-mike.md +0 -15
- package/install/templates/.github/agents/bmad-agent-patnote.md +0 -49
- package/install/templates/.github/agents/bmad-agent-rachid.md +0 -48
- package/install/templates/.github/agents/bmad-agent-skeptic.md +0 -16
- package/install/templates/.github/agents/bmad-agent-tao.md +0 -14
- package/install/templates/.github/agents/bmad-agent-tea-tea.md +0 -16
- package/install/templates/.github/agents/bmad-agent-test-dynamic.md +0 -22
- package/install/templates/.github/agents/bmad-agent-yanstaller-interview.md +0 -50
- package/install/templates/.github/agents/bmad-agent-yanstaller-phase2.md +0 -189
- package/install/templates/.github/agents/bmad-agent-yanstaller.md +0 -350
- package/install/templates/.github/agents/expert-merise-agile.md +0 -178
- package/install/templates/.github/agents/franck.md +0 -379
- package/install/templates/.github/agents/hermes.md +0 -575
- package/install/templates/.github/extensions/byan-staging/extension.mjs +0 -169
- package/install/templates/.github/extensions/byan-staging/package.json +0 -8
- package/install/templates/_byan/agent/marc/marc-soul.md +0 -47
- package/install/templates/_byan/agent/marc/marc-tao.md +0 -77
- package/install/templates/_byan/agent/marc/marc.md +0 -324
- package/install/templates/_byan/agent/marc-flat/marc.md +0 -387
- package/install/templates/_byan/mcp/byan-mcp-server/lib/copilot.js +0 -148
- package/install/templates/_byan/worker/launchers/launch-yanstaller-copilot.md +0 -173
- package/install/templates/workers/cost-optimizer.js +0 -169
- package/src/byan-v2/context/copilot-context.js +0 -79
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Stop hook — BYAN Auto-Benchmark end-of-turn guard.
|
|
4
|
+
*
|
|
5
|
+
* Goal : when the assistant's finished message presents a CHOICE between
|
|
6
|
+
* options (a fork) but did NOT emit a BYAN-BENCH marker, the agent skipped the
|
|
7
|
+
* benchmark doctrine. The hook blocks ONCE to force exactly one regeneration in
|
|
8
|
+
* which the agent re-presents the fork as the compact benchmark table and emits
|
|
9
|
+
* the marker. It never loops : a block-token keyed on the message content makes
|
|
10
|
+
* the second pass non-blocking by construction.
|
|
11
|
+
*
|
|
12
|
+
* Shipped DISARMED (approach C) : the hook observes and ledgers every turn but
|
|
13
|
+
* does not block until explicitly armed (enforcement.armed in the config —
|
|
14
|
+
* config-only, set via the YAML + byan-sync-rules, no loose flag file). Day one
|
|
15
|
+
* is zero noise / zero latency; the net is pre-built but inert until the user
|
|
16
|
+
* opts in. A disarmed turn that WOULD have fired is recorded as
|
|
17
|
+
* observed-disarmed-fork so arming later is an informed decision.
|
|
18
|
+
*
|
|
19
|
+
* Detection is ARTIFACT-primary with a regex fallback. The decision order :
|
|
20
|
+
* 1. MARKER : a BYAN-BENCH:done|skip marker -> satisfied (the agent already
|
|
21
|
+
* benchmarked, or deliberately skipped a degenerate fork).
|
|
22
|
+
* 2. NEVER : y/n confirms and destructive prompts are exempt — we never
|
|
23
|
+
* benchmark "proceed?" or "rm -rf".
|
|
24
|
+
* 3. ESCAPE : the session flag (.byan-autobench/off) or the cross-session
|
|
25
|
+
* config opt-out suppresses all blocking.
|
|
26
|
+
* 4. DISARMED : not armed -> observe + ledger, never block (the default).
|
|
27
|
+
* 5. BLOCKED : a block-token for this turn already exists -> the regen pass,
|
|
28
|
+
* do not block again.
|
|
29
|
+
* 6. FORK : a fork is present when an AskUserQuestion tool_use artifact is
|
|
30
|
+
* in the finished turn (the unambiguous, primary signal). The
|
|
31
|
+
* choice-language regex is only a LAST-RESORT fallback for
|
|
32
|
+
* inline-prose forks that never called the tool.
|
|
33
|
+
*
|
|
34
|
+
* Honest ceiling (GH #28273) : the Stop hook is REACTIVE. It cannot intercept
|
|
35
|
+
* before the message is displayed; it can only force a regeneration after the
|
|
36
|
+
* fact. The proactive half is the doctrine the agent self-applies. This hook is
|
|
37
|
+
* the safety net, not a pre-display filter.
|
|
38
|
+
*
|
|
39
|
+
* Non-blocking on any IO/parse error : the hook never traps a turn it cannot
|
|
40
|
+
* read. Every invocation appends one fire/miss line to the ledger.
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
'use strict';
|
|
44
|
+
|
|
45
|
+
const {
|
|
46
|
+
loadAutobenchConfig,
|
|
47
|
+
escapeHatchActive,
|
|
48
|
+
isArmed,
|
|
49
|
+
readBlockToken,
|
|
50
|
+
writeBlockToken,
|
|
51
|
+
appendLedger,
|
|
52
|
+
extractLastAssistantText,
|
|
53
|
+
extractLastAssistantContent,
|
|
54
|
+
hasChoiceArtifact,
|
|
55
|
+
turnHash,
|
|
56
|
+
readStdin,
|
|
57
|
+
parseJson,
|
|
58
|
+
} = require('./lib/autobench-runtime');
|
|
59
|
+
|
|
60
|
+
// Reconstruct a RegExp from a {source, flags} config pair. Returns null on a
|
|
61
|
+
// malformed pattern so a single bad config entry never throws the whole hook.
|
|
62
|
+
function compileRegex(spec) {
|
|
63
|
+
if (!spec || typeof spec.source !== 'string') return null;
|
|
64
|
+
try {
|
|
65
|
+
return new RegExp(spec.source, spec.flags || '');
|
|
66
|
+
} catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function countMatches(text, re) {
|
|
72
|
+
if (!re) return 0;
|
|
73
|
+
// A global regex is required to count; clone with the g flag if absent.
|
|
74
|
+
const g = re.flags.includes('g') ? re : new RegExp(re.source, re.flags + 'g');
|
|
75
|
+
const m = text.match(g);
|
|
76
|
+
return m ? m.length : 0;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function anyRegexMatches(text, specs) {
|
|
80
|
+
if (!Array.isArray(specs)) return false;
|
|
81
|
+
return specs.some((spec) => {
|
|
82
|
+
const re = compileRegex(spec);
|
|
83
|
+
return Boolean(re) && re.test(text);
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Evaluate the choice-language signals with their per-signal thresholds.
|
|
88
|
+
// A signal fires when : (min_matches present -> >= that many matches) OR
|
|
89
|
+
// (requires_candidates present -> the signal matches AND >= N candidate tokens
|
|
90
|
+
// co-occur) OR (a plain match otherwise).
|
|
91
|
+
function hasChoiceLanguage(text, config) {
|
|
92
|
+
const specs = (config && config.choice_language) || [];
|
|
93
|
+
const candidateRe = compileRegex(config && config.candidate_token);
|
|
94
|
+
const candidateCount = candidateRe ? countMatches(text, candidateRe) : 0;
|
|
95
|
+
|
|
96
|
+
return specs.some((spec) => {
|
|
97
|
+
const re = compileRegex(spec);
|
|
98
|
+
if (!re) return false;
|
|
99
|
+
|
|
100
|
+
if (typeof spec.min_matches === 'number') {
|
|
101
|
+
return countMatches(text, re) >= spec.min_matches;
|
|
102
|
+
}
|
|
103
|
+
if (typeof spec.requires_candidates === 'number') {
|
|
104
|
+
return re.test(text) && candidateCount >= spec.requires_candidates;
|
|
105
|
+
}
|
|
106
|
+
return re.test(text);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function hasMarker(text, config) {
|
|
111
|
+
const spec = config && config.marker_patterns && config.marker_patterns.any;
|
|
112
|
+
const re = compileRegex(spec);
|
|
113
|
+
return Boolean(re) && re.test(text);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function hasNeverListed(text, config) {
|
|
117
|
+
return anyRegexMatches(text, config && config.never_list);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Parse the marker fields (g1/g2/scope) for the ledger. Best-effort : a marker
|
|
121
|
+
// without fields still satisfies; the fields only enrich the audit trail.
|
|
122
|
+
function readMarkerFields(text, config) {
|
|
123
|
+
const fields = (config && config.marker_fields) || {};
|
|
124
|
+
const out = {};
|
|
125
|
+
const g1 = compileRegex(fields.g1);
|
|
126
|
+
const g2 = compileRegex(fields.g2);
|
|
127
|
+
const scope = compileRegex(fields.scope);
|
|
128
|
+
let m;
|
|
129
|
+
if (g1 && (m = text.match(g1))) out.g1 = Number(m[1]);
|
|
130
|
+
if (g2 && (m = text.match(g2))) out.g2 = Number(m[1]);
|
|
131
|
+
if (scope && (m = text.match(scope))) out.scope = m[1];
|
|
132
|
+
return out;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function markerKind(text, config) {
|
|
136
|
+
const mp = (config && config.marker_patterns) || {};
|
|
137
|
+
const doneRe = compileRegex(mp.done);
|
|
138
|
+
const skipRe = compileRegex(mp.skip);
|
|
139
|
+
if (doneRe && doneRe.test(text)) return 'done';
|
|
140
|
+
if (skipRe && skipRe.test(text)) return 'skip';
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Pure decision. No IO, no clock — fully unit-testable.
|
|
146
|
+
*
|
|
147
|
+
* Inputs : lastAssistantText (the finished prose), artifact (true when an
|
|
148
|
+
* AskUserQuestion tool_use was in the finished turn — the caller computes this
|
|
149
|
+
* from the raw content so the decision stays pure), armed (the enforcement gate),
|
|
150
|
+
* config, escapeHatch, blocked.
|
|
151
|
+
*
|
|
152
|
+
* Detection is ARTIFACT-primary : a real fork is present when the artifact is
|
|
153
|
+
* there OR, as a last-resort fallback, when the choice-language regex matches
|
|
154
|
+
* inline prose. block is true IFF a fork is present AND no marker AND not
|
|
155
|
+
* never-listed AND escape-hatch inactive AND ARMED AND not already blocked.
|
|
156
|
+
*
|
|
157
|
+
* Returns { block, reason?, ledger } where ledger is the audit record.
|
|
158
|
+
*/
|
|
159
|
+
function decideBench({ lastAssistantText, artifact, armed, config, escapeHatch, blocked }) {
|
|
160
|
+
const text = lastAssistantText || '';
|
|
161
|
+
|
|
162
|
+
const marker = hasMarker(text, config);
|
|
163
|
+
const neverHit = hasNeverListed(text, config);
|
|
164
|
+
const choiceLang = hasChoiceLanguage(text, config);
|
|
165
|
+
const hasArtifact = artifact === true;
|
|
166
|
+
// ARTIFACT-primary : the structural AskUserQuestion tool_use is the unambiguous
|
|
167
|
+
// fork signal. The lexical regex is only the fallback for prose forks that
|
|
168
|
+
// never called the tool.
|
|
169
|
+
const fork = hasArtifact || choiceLang;
|
|
170
|
+
const detection = hasArtifact ? 'artifact' : choiceLang ? 'regex-fallback' : null;
|
|
171
|
+
const armedOn = armed === true;
|
|
172
|
+
|
|
173
|
+
const ledger = { neverHit, choiceLang, artifact: hasArtifact, marker, detection, armed: armedOn };
|
|
174
|
+
|
|
175
|
+
if (marker) {
|
|
176
|
+
const kind = markerKind(text, config);
|
|
177
|
+
Object.assign(ledger, readMarkerFields(text, config));
|
|
178
|
+
ledger.event = kind === 'skip' ? 'satisfied-skip' : 'satisfied-marker';
|
|
179
|
+
return { block: false, ledger };
|
|
180
|
+
}
|
|
181
|
+
if (neverHit) {
|
|
182
|
+
ledger.event = 'satisfied-never';
|
|
183
|
+
return { block: false, ledger };
|
|
184
|
+
}
|
|
185
|
+
if (escapeHatch) {
|
|
186
|
+
ledger.event = 'satisfied-escape';
|
|
187
|
+
return { block: false, ledger };
|
|
188
|
+
}
|
|
189
|
+
if (!armedOn) {
|
|
190
|
+
// DISARMED by default (approach C) : never block. Record whether a fork WOULD
|
|
191
|
+
// have fired, so arming later is data-informed, not a blind flip.
|
|
192
|
+
ledger.event = fork ? 'observed-disarmed-fork' : 'observed-disarmed';
|
|
193
|
+
return { block: false, ledger };
|
|
194
|
+
}
|
|
195
|
+
if (blocked) {
|
|
196
|
+
// The regen pass : we already blocked this exact content once.
|
|
197
|
+
ledger.event = 'satisfied-already-blocked';
|
|
198
|
+
return { block: false, ledger };
|
|
199
|
+
}
|
|
200
|
+
if (!fork) {
|
|
201
|
+
ledger.event = 'no-choice';
|
|
202
|
+
return { block: false, ledger };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// A genuine miss while ARMED : the agent offered a fork without benchmarking it.
|
|
206
|
+
const reason =
|
|
207
|
+
(config && config.banners && config.banners.stop_block) ||
|
|
208
|
+
'Auto-benchmark: you presented a choice without a BYAN-BENCH marker. Re-present it as the compact benchmark table and emit the marker.';
|
|
209
|
+
ledger.event = 'fired-block';
|
|
210
|
+
return { block: true, reason, ledger };
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (require.main === module) {
|
|
214
|
+
(async () => {
|
|
215
|
+
// Wrap everything : the hook must NEVER trap a turn it cannot read.
|
|
216
|
+
try {
|
|
217
|
+
const config = loadAutobenchConfig();
|
|
218
|
+
const payload = parseJson(await readStdin());
|
|
219
|
+
const lastAssistantText = extractLastAssistantText(payload);
|
|
220
|
+
// ARTIFACT-primary signal : read the RAW last-assistant content (the block
|
|
221
|
+
// array extractLastAssistantText flattens away) so the structural
|
|
222
|
+
// AskUserQuestion tool_use is visible to the decision.
|
|
223
|
+
const artifact = hasChoiceArtifact(extractLastAssistantContent(payload));
|
|
224
|
+
|
|
225
|
+
const hash = turnHash(lastAssistantText);
|
|
226
|
+
const escapeHatch = escapeHatchActive(config);
|
|
227
|
+
const armed = isArmed(config);
|
|
228
|
+
// Loop-guard : the content-hash block token is primary; stop_hook_active is
|
|
229
|
+
// an additional belt from the runtime (a prior Stop hook already blocked
|
|
230
|
+
// this turn), so we never depend on it alone.
|
|
231
|
+
const blocked = readBlockToken(hash) || payload.stop_hook_active === true;
|
|
232
|
+
|
|
233
|
+
const decision = decideBench({ lastAssistantText, artifact, armed, config, escapeHatch, blocked });
|
|
234
|
+
|
|
235
|
+
// Audit trail : one JSONL line per invocation. turnHash is content-only;
|
|
236
|
+
// any timestamp/session comes from the environment, kept out of the pure
|
|
237
|
+
// decision so it stays deterministic.
|
|
238
|
+
appendLedger(
|
|
239
|
+
{
|
|
240
|
+
turnHash: hash,
|
|
241
|
+
event: decision.ledger.event,
|
|
242
|
+
g1: decision.ledger.g1,
|
|
243
|
+
g2: decision.ledger.g2,
|
|
244
|
+
scope: decision.ledger.scope,
|
|
245
|
+
neverHit: decision.ledger.neverHit,
|
|
246
|
+
choiceLang: decision.ledger.choiceLang,
|
|
247
|
+
artifact: decision.ledger.artifact,
|
|
248
|
+
detection: decision.ledger.detection,
|
|
249
|
+
armed: decision.ledger.armed,
|
|
250
|
+
marker: decision.ledger.marker,
|
|
251
|
+
ts: process.env.BYAN_HOOK_TS || undefined,
|
|
252
|
+
session: process.env.CLAUDE_SESSION_ID || undefined,
|
|
253
|
+
},
|
|
254
|
+
config
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
if (!decision.block) {
|
|
258
|
+
process.stdout.write(JSON.stringify({ continue: true }));
|
|
259
|
+
process.exit(0);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Block-once : write the token BEFORE emitting the block so the
|
|
263
|
+
// regenerated turn (same content if the agent fails to fix) is exempt.
|
|
264
|
+
writeBlockToken(hash);
|
|
265
|
+
process.stdout.write(
|
|
266
|
+
JSON.stringify({ decision: 'block', reason: decision.reason, systemMessage: decision.reason })
|
|
267
|
+
);
|
|
268
|
+
process.exit(2);
|
|
269
|
+
} catch {
|
|
270
|
+
// Last-resort net : on any unexpected failure, let the turn end.
|
|
271
|
+
process.stdout.write(JSON.stringify({ continue: true }));
|
|
272
|
+
process.exit(0);
|
|
273
|
+
}
|
|
274
|
+
})();
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
module.exports = {
|
|
278
|
+
decideBench,
|
|
279
|
+
hasChoiceLanguage,
|
|
280
|
+
hasMarker,
|
|
281
|
+
hasNeverListed,
|
|
282
|
+
markerKind,
|
|
283
|
+
readMarkerFields,
|
|
284
|
+
compileRegex,
|
|
285
|
+
countMatches,
|
|
286
|
+
};
|
|
@@ -16,33 +16,8 @@
|
|
|
16
16
|
* (not documentation).
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
const fs = require('fs');
|
|
20
19
|
const path = require('path');
|
|
21
|
-
|
|
22
|
-
const ABSOLUTES = [
|
|
23
|
-
/\btoujours\b/i,
|
|
24
|
-
/\bjamais\b/i,
|
|
25
|
-
/\bforc[eé]ment\b/i,
|
|
26
|
-
/\bobviously\b/i,
|
|
27
|
-
/\balways\b/i,
|
|
28
|
-
/\bnever\b/i,
|
|
29
|
-
/\bclearly\b/i,
|
|
30
|
-
/\bundoubtedly\b/i,
|
|
31
|
-
/\bfaster than\b/i,
|
|
32
|
-
/\bbetter than\b/i,
|
|
33
|
-
/\bplus rapide que\b/i,
|
|
34
|
-
/\bmeilleur que\b/i,
|
|
35
|
-
];
|
|
36
|
-
|
|
37
|
-
const SOURCE_MARKERS = [
|
|
38
|
-
/\bRFC\s*\d+/i,
|
|
39
|
-
/\bCVE-\d{4}-\d+/i,
|
|
40
|
-
/https?:\/\//,
|
|
41
|
-
/\[CLAIM\s+L[1-5]\]/i,
|
|
42
|
-
/\[FACT\s+USER-VERIFIED/i,
|
|
43
|
-
/\bsource\s*:/i,
|
|
44
|
-
/_byan\/knowledge\/sources\.md/,
|
|
45
|
-
];
|
|
20
|
+
const { stripNonClaimZones, findUnsourced } = require('./lib/fact-check-core');
|
|
46
21
|
|
|
47
22
|
const DOC_EXTS = ['.md', '.mdx', '.rst', '.txt'];
|
|
48
23
|
|
|
@@ -65,24 +40,6 @@ function isExemptPath(filePath) {
|
|
|
65
40
|
return EXEMPT_PATH_PATTERNS.some((re) => re.test(filePath));
|
|
66
41
|
}
|
|
67
42
|
|
|
68
|
-
// Strip content that cannot be a claim :
|
|
69
|
-
// - fenced code blocks ``` ... ```
|
|
70
|
-
// - inline backticks `...`
|
|
71
|
-
// - block quotes (lines starting with >)
|
|
72
|
-
// - regex / array syntax that contains the word as a token
|
|
73
|
-
function stripNonClaimZones(text) {
|
|
74
|
-
if (!text) return '';
|
|
75
|
-
return text
|
|
76
|
-
// Fenced code blocks
|
|
77
|
-
.replace(/```[\s\S]*?```/g, '')
|
|
78
|
-
// Inline code
|
|
79
|
-
.replace(/`[^`\n]+`/g, '')
|
|
80
|
-
// Markdown block quotes
|
|
81
|
-
.replace(/^> .*$/gm, '')
|
|
82
|
-
// Lines that look like list of patterns (e.g. "- toujours")
|
|
83
|
-
.replace(/^[\s-]*['"]?\b(toujours|jamais|forc[eé]ment|obviously|always|never|clearly|undoubtedly)\b['"]?/gim, '');
|
|
84
|
-
}
|
|
85
|
-
|
|
86
43
|
function readStdin() {
|
|
87
44
|
return new Promise((resolve) => {
|
|
88
45
|
if (process.stdin.isTTY) return resolve('');
|
|
@@ -107,23 +64,6 @@ function extractText(toolName, input) {
|
|
|
107
64
|
return '';
|
|
108
65
|
}
|
|
109
66
|
|
|
110
|
-
function findUnsourced(text) {
|
|
111
|
-
if (!text) return null;
|
|
112
|
-
for (const re of ABSOLUTES) {
|
|
113
|
-
const match = text.match(re);
|
|
114
|
-
if (!match) continue;
|
|
115
|
-
const idx = match.index || 0;
|
|
116
|
-
const windowStart = Math.max(0, idx - 240);
|
|
117
|
-
const windowEnd = Math.min(text.length, idx + match[0].length + 240);
|
|
118
|
-
const ctx = text.slice(windowStart, windowEnd);
|
|
119
|
-
const hasSource = SOURCE_MARKERS.some((sm) => sm.test(ctx));
|
|
120
|
-
if (!hasSource) {
|
|
121
|
-
return { absolute: match[0], context: text.slice(Math.max(0, idx - 80), idx + 80) };
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
return null;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
67
|
(async () => {
|
|
128
68
|
const raw = await readStdin();
|
|
129
69
|
let payload = {};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Stop hook — fact-check conversation claims (non-blocking).
|
|
4
|
+
*
|
|
5
|
+
* The PreToolUse twin (fact-check-absolutes.js) only fires when the agent
|
|
6
|
+
* WRITES an unsourced absolute into a doc file. This hook covers the other,
|
|
7
|
+
* more frequent surface : an unsourced absolute spoken in the assistant's
|
|
8
|
+
* final turn text. It NUDGES (systemMessage, continue) and never blocks —
|
|
9
|
+
* spoken claims are noisier than written docs, so a block would trap
|
|
10
|
+
* legitimate hypotheses/quotes. Same detection engine (fact-check-core).
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
'use strict';
|
|
14
|
+
|
|
15
|
+
const { stripNonClaimZones, findUnsourced } = require('./lib/fact-check-core');
|
|
16
|
+
const { extractLastAssistantText } = require('./lib/transcript-read');
|
|
17
|
+
|
|
18
|
+
function readStdin() {
|
|
19
|
+
return new Promise((resolve) => {
|
|
20
|
+
if (process.stdin.isTTY) return resolve('');
|
|
21
|
+
let data = '';
|
|
22
|
+
process.stdin.on('data', (c) => (data += c));
|
|
23
|
+
process.stdin.on('end', () => resolve(data));
|
|
24
|
+
process.stdin.on('error', () => resolve(data));
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Pure decision — no IO. Returns { nudge:false } or { nudge:true, absolute, context }.
|
|
29
|
+
function decideClaim({ lastAssistantText }) {
|
|
30
|
+
const text = stripNonClaimZones(lastAssistantText || '');
|
|
31
|
+
const hit = findUnsourced(text);
|
|
32
|
+
if (!hit) return { nudge: false };
|
|
33
|
+
return { nudge: true, absolute: hit.absolute, context: hit.context };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function nudgeMessage(hit) {
|
|
37
|
+
return [
|
|
38
|
+
`BYAN fact-check : unsourced absolute "${hit.absolute}" in this turn.`,
|
|
39
|
+
`Context : ...${hit.context}...`,
|
|
40
|
+
`Consider a source (RFC, CVE, URL, [CLAIM L<n>]) or hedge ("often", "in my tests", "tends to"). Advisory — not blocking.`,
|
|
41
|
+
].join('\n');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (require.main === module) {
|
|
45
|
+
(async () => {
|
|
46
|
+
let payload = {};
|
|
47
|
+
try {
|
|
48
|
+
const raw = await readStdin();
|
|
49
|
+
payload = raw ? JSON.parse(raw) : {};
|
|
50
|
+
} catch {
|
|
51
|
+
payload = {};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let decision = { nudge: false };
|
|
55
|
+
try {
|
|
56
|
+
decision = decideClaim({ lastAssistantText: extractLastAssistantText(payload) });
|
|
57
|
+
} catch {
|
|
58
|
+
decision = { nudge: false };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (decision.nudge) {
|
|
62
|
+
process.stdout.write(JSON.stringify({ systemMessage: nudgeMessage(decision), continue: true }));
|
|
63
|
+
} else {
|
|
64
|
+
process.stdout.write(JSON.stringify({ continue: true }));
|
|
65
|
+
}
|
|
66
|
+
})();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
module.exports = { decideClaim, nudgeMessage };
|
|
@@ -15,6 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
const fs = require('fs');
|
|
17
17
|
const path = require('path');
|
|
18
|
+
// Shared transcript reader — the real Stop payload has no inline transcript
|
|
19
|
+
// (last_assistant_message + transcript_path JSONL). Without it this hook read an
|
|
20
|
+
// empty turn and never enforced the [FD:PHASE] header live.
|
|
21
|
+
const { extractLastAssistantText } = require('./lib/transcript-read');
|
|
18
22
|
|
|
19
23
|
const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
20
24
|
const statePath = path.join(projectDir, '_byan-output', 'fd-state.json');
|
|
@@ -38,55 +42,42 @@ function readState() {
|
|
|
38
42
|
}
|
|
39
43
|
}
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
if (!Array.isArray(tx)) return '';
|
|
45
|
-
for (let i = tx.length - 1; i >= 0; i--) {
|
|
46
|
-
const m = tx[i];
|
|
47
|
-
if (m && m.role === 'assistant') {
|
|
48
|
-
if (typeof m.content === 'string') return m.content;
|
|
49
|
-
if (Array.isArray(m.content)) {
|
|
50
|
-
return m.content
|
|
51
|
-
.map((c) => (typeof c === 'object' && c.text ? c.text : ''))
|
|
52
|
-
.join(' ');
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
return '';
|
|
57
|
-
}
|
|
45
|
+
// Pure decision : returns { block, reason? }. No IO — unit-testable.
|
|
46
|
+
function decideFdResponse({ state, lastAssistantText }) {
|
|
47
|
+
if (!state || ['COMPLETED', 'ABORTED'].includes(state.phase)) return { block: false };
|
|
58
48
|
|
|
59
|
-
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
49
|
+
const expected = `[FD:${state.phase}]`;
|
|
50
|
+
const text = lastAssistantText || '';
|
|
51
|
+
// Empty text (cannot read the turn) degrades to allow — never trap a turn we
|
|
52
|
+
// cannot inspect. A present header satisfies.
|
|
53
|
+
if (!text || text.includes(expected)) return { block: false };
|
|
65
54
|
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
payload = raw ? JSON.parse(raw) : {};
|
|
70
|
-
} catch {
|
|
71
|
-
payload = {};
|
|
72
|
-
}
|
|
55
|
+
const reason = `FD active (phase=${state.phase}) but your last response did not include the required header "${expected}". Reformulate your answer starting with ${expected} to confirm you are operating in the correct phase. If you wanted to exit or change phase, call byan_fd_advance first.`;
|
|
56
|
+
return { block: true, reason };
|
|
57
|
+
}
|
|
73
58
|
|
|
74
|
-
|
|
75
|
-
|
|
59
|
+
if (require.main === module) {
|
|
60
|
+
(async () => {
|
|
61
|
+
const state = readState();
|
|
62
|
+
const raw = await readStdin();
|
|
63
|
+
let payload = {};
|
|
64
|
+
try {
|
|
65
|
+
payload = raw ? JSON.parse(raw) : {};
|
|
66
|
+
} catch {
|
|
67
|
+
payload = {};
|
|
68
|
+
}
|
|
76
69
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
70
|
+
const decision = decideFdResponse({ state, lastAssistantText: extractLastAssistantText(payload) });
|
|
71
|
+
if (!decision.block) {
|
|
72
|
+
process.stdout.write(JSON.stringify({ continue: true }));
|
|
73
|
+
process.exit(0);
|
|
74
|
+
}
|
|
81
75
|
|
|
82
|
-
|
|
76
|
+
process.stdout.write(
|
|
77
|
+
JSON.stringify({ decision: 'block', reason: decision.reason, systemMessage: decision.reason })
|
|
78
|
+
);
|
|
79
|
+
process.exit(2);
|
|
80
|
+
})();
|
|
81
|
+
}
|
|
83
82
|
|
|
84
|
-
|
|
85
|
-
JSON.stringify({
|
|
86
|
-
decision: 'block',
|
|
87
|
-
reason,
|
|
88
|
-
systemMessage: reason,
|
|
89
|
-
})
|
|
90
|
-
);
|
|
91
|
-
process.exit(2);
|
|
92
|
-
})();
|
|
83
|
+
module.exports = { decideFdResponse, extractLastAssistantText };
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* SessionStart hook — loads BYAN soul
|
|
4
|
-
*
|
|
3
|
+
* SessionStart hook — loads BYAN soul + soul-memory and injects them into
|
|
4
|
+
* the session's initial context via additionalContext. Tao is intentionally
|
|
5
|
+
* NOT bundled here: inject-tao.js injects the full tao on every
|
|
6
|
+
* UserPromptSubmit (including the first), so duplicating it at SessionStart
|
|
7
|
+
* would double-spend ~15 KB per session for no gain.
|
|
8
|
+
*
|
|
9
|
+
* Also resets the per-session mid-session-nudge one-shot marker so the
|
|
10
|
+
* soul-memory-triggers nudge is per-session (not per-lifetime). Without
|
|
11
|
+
* this reset the one-shot marker, once written, silences the nudge forever.
|
|
5
12
|
*
|
|
6
13
|
* Safe: missing files are skipped silently, script always exits 0.
|
|
7
14
|
*/
|
|
@@ -9,44 +16,76 @@
|
|
|
9
16
|
const fs = require('fs');
|
|
10
17
|
const path = require('path');
|
|
11
18
|
|
|
12
|
-
const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
13
|
-
|
|
14
19
|
// Gen3 puts BYAN's soul files under _byan/agent/byan/; Gen2 keeps them at the
|
|
15
20
|
// _byan/ root. Prefer Gen3 when present, fall back to Gen2 (self-contained so
|
|
16
21
|
// the hook never depends on a require that could fail).
|
|
17
|
-
function soulFile(label) {
|
|
22
|
+
function soulFile(projectDir, label) {
|
|
18
23
|
const g3 = path.join(projectDir, '_byan', 'agent', 'byan', `${label}.md`);
|
|
19
24
|
const g2 = path.join(projectDir, '_byan', `${label}.md`);
|
|
20
25
|
return fs.existsSync(g3) ? g3 : g2;
|
|
21
26
|
}
|
|
22
27
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
// Same resolution as soul-memory-triggers.js: Gen3 _byan/memoire/ first, Gen2
|
|
29
|
+
// _byan/_memory/ fallback. Kept in sync by hand (hooks avoid shared requires).
|
|
30
|
+
function nudgeMarkerPath(projectDir) {
|
|
31
|
+
const memoireDir = path.join(projectDir, '_byan', 'memoire');
|
|
32
|
+
const memoryDir = fs.existsSync(memoireDir)
|
|
33
|
+
? memoireDir
|
|
34
|
+
: path.join(projectDir, '_byan', '_memory');
|
|
35
|
+
return path.join(memoryDir, '.soul-memory-nudge-sent');
|
|
36
|
+
}
|
|
28
37
|
|
|
29
|
-
|
|
30
|
-
|
|
38
|
+
// Reset the one-shot nudge marker at session start so the mid-session
|
|
39
|
+
// soul-memory nudge can fire once per session instead of once per lifetime.
|
|
40
|
+
function resetNudgeMarker(projectDir) {
|
|
31
41
|
try {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (content.length > 0) {
|
|
35
|
-
chunks.push(`=== BYAN ${f.label.toUpperCase()} (${path.relative(projectDir, f.path)}) ===\n${content}`);
|
|
36
|
-
}
|
|
37
|
-
}
|
|
42
|
+
fs.rmSync(nudgeMarkerPath(projectDir), { force: true });
|
|
43
|
+
return true;
|
|
38
44
|
} catch {
|
|
39
|
-
|
|
45
|
+
return false;
|
|
40
46
|
}
|
|
41
47
|
}
|
|
42
48
|
|
|
43
|
-
|
|
44
|
-
|
|
49
|
+
function buildAdditionalContext(projectDir) {
|
|
50
|
+
const files = [
|
|
51
|
+
{ label: 'soul', path: soulFile(projectDir, 'soul') },
|
|
52
|
+
{ label: 'soul-memory', path: soulFile(projectDir, 'soul-memory') },
|
|
53
|
+
];
|
|
54
|
+
|
|
55
|
+
const chunks = [];
|
|
56
|
+
for (const f of files) {
|
|
57
|
+
try {
|
|
58
|
+
if (fs.existsSync(f.path)) {
|
|
59
|
+
const content = fs.readFileSync(f.path, 'utf8').trim();
|
|
60
|
+
if (content.length > 0) {
|
|
61
|
+
chunks.push(
|
|
62
|
+
`=== BYAN ${f.label.toUpperCase()} (${path.relative(projectDir, f.path)}) ===\n${content}`
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
} catch {
|
|
67
|
+
// Ignore read errors — hook must never block session start.
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return chunks.length > 0
|
|
45
72
|
? `BYAN Soul System (loaded at session start):\n\n${chunks.join('\n\n')}`
|
|
46
73
|
: '';
|
|
74
|
+
}
|
|
47
75
|
|
|
48
|
-
if (
|
|
49
|
-
process.
|
|
50
|
-
|
|
51
|
-
|
|
76
|
+
if (require.main === module) {
|
|
77
|
+
const projectDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
78
|
+
resetNudgeMarker(projectDir);
|
|
79
|
+
const additionalContext = buildAdditionalContext(projectDir);
|
|
80
|
+
if (additionalContext) {
|
|
81
|
+
process.stdout.write(
|
|
82
|
+
JSON.stringify({
|
|
83
|
+
hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext },
|
|
84
|
+
})
|
|
85
|
+
);
|
|
86
|
+
} else {
|
|
87
|
+
process.stdout.write('{}');
|
|
88
|
+
}
|
|
52
89
|
}
|
|
90
|
+
|
|
91
|
+
module.exports = { soulFile, nudgeMarkerPath, resetNudgeMarker, buildAdditionalContext };
|