create-byan-agent 2.23.0 → 2.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +230 -0
- package/README.md +9 -12
- package/install/bin/create-byan-agent-v2.js +29 -169
- package/install/lib/agent-generator.js +5 -5
- package/install/lib/byan-web-integration.js +1 -1
- package/install/lib/claude-native-setup.js +1 -1
- package/install/lib/phase2-chat.js +3 -10
- package/install/lib/platforms/claude-code.js +2 -2
- package/install/lib/platforms/index.js +0 -2
- package/install/lib/project-agents-generator.js +3 -3
- package/install/lib/staging-consent.js +3 -3
- package/install/lib/subagent-generator.js +3 -3
- package/install/lib/yanstaller/agent-launcher.js +1 -27
- package/install/lib/yanstaller/detector.js +4 -4
- package/install/lib/yanstaller/installer.js +0 -2
- package/install/lib/yanstaller/interviewer.js +1 -1
- package/install/lib/yanstaller/platform-selector.js +1 -13
- package/install/package.json +1 -1
- package/install/src/byan-v2/context/session-state.js +2 -2
- package/install/src/byan-v2/index.js +2 -6
- package/install/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/install/src/webui/api.js +0 -2
- package/install/src/webui/chat/bridge.js +1 -13
- package/install/src/webui/chat/cli-detector.js +0 -23
- package/install/src/webui/public/app.js +1 -3
- package/install/src/webui/public/chat.html +0 -2
- package/install/src/webui/public/chat.js +0 -1
- package/install/src/webui/public/index.html +2 -2
- package/install/templates/.claude/CLAUDE.md +13 -2
- package/install/templates/.claude/agents/bmad-byan.md +1 -1
- package/install/templates/.claude/hooks/autobench-stop-guard.js +286 -0
- package/install/templates/.claude/hooks/drain-advisory.js +85 -0
- package/install/templates/.claude/hooks/fact-check-absolutes.js +1 -61
- package/install/templates/.claude/hooks/fact-check-claims.js +69 -0
- package/install/templates/.claude/hooks/fd-response-check.js +37 -46
- package/install/templates/.claude/hooks/inject-soul.js +64 -25
- package/install/templates/.claude/hooks/leantime-fd-sync.js +216 -0
- package/install/templates/.claude/hooks/lib/autobench-config.json +81 -0
- package/install/templates/.claude/hooks/lib/autobench-fc-enrich.js +251 -0
- package/install/templates/.claude/hooks/lib/autobench-ledger-report.js +253 -0
- package/install/templates/.claude/hooks/lib/autobench-runtime.js +199 -0
- package/install/templates/.claude/hooks/lib/fact-check-core.js +69 -0
- package/install/templates/.claude/hooks/lib/failure-detector.js +18 -4
- package/install/templates/.claude/hooks/lib/transcript-read.js +137 -0
- package/install/templates/.claude/hooks/soul-memory-check.js +49 -25
- package/install/templates/.claude/hooks/soul-memory-triggers.js +27 -8
- package/install/templates/.claude/hooks/stage-to-byan.js +25 -7
- package/install/templates/.claude/hooks/strict-stop-guard.js +4 -16
- package/install/templates/.claude/rules/benchmark.md +251 -0
- package/install/templates/.claude/rules/byan-agents.md +0 -1
- package/install/templates/.claude/rules/byan-api.md +64 -0
- package/install/templates/.claude/rules/fact-check.md +1 -1
- package/install/templates/.claude/rules/strict-mode.md +10 -9
- package/install/templates/.claude/settings.json +16 -0
- package/install/templates/.claude/skills/byan-benchmark/SKILL.md +159 -0
- package/install/templates/.claude/skills/byan-byan/SKILL.md +73 -12
- package/install/templates/.claude/skills/byan-fact-check/SKILL.md +1 -1
- package/install/templates/.claude/skills/byan-hermes-dispatch/SKILL.md +5 -6
- package/install/templates/.claude/skills/byan-insight/SKILL.md +56 -0
- package/install/templates/.claude/skills/byan-orchestrate/SKILL.md +11 -3
- package/install/templates/.claude/skills/byan-strict/SKILL.md +4 -1
- package/install/templates/.claude/workflows/INDEX.md +2 -1
- package/install/templates/.claude/workflows/byan-benchmark.js +328 -0
- package/install/templates/.claude/workflows/check-implementation-readiness.js +1 -1
- package/install/templates/_byan/_config/agent-manifest.csv +1 -1
- package/install/templates/_byan/_config/autobench.yaml +510 -0
- package/install/templates/_byan/_config/strict-mode.yaml +9 -3
- package/install/templates/_byan/_config/workflow-manifest.csv +1 -0
- package/install/templates/_byan/agent/byan/byan.md +1 -3
- package/install/templates/_byan/agent/byan-flat/byan.md +1 -3
- package/install/templates/_byan/agent/byan-test/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan-test-flat/byan-test.md +2 -2
- package/install/templates/_byan/agent/byan.optimized/byan.optimized.md +2 -2
- package/install/templates/_byan/agent/byan.optimized-v2/byan.optimized-v2.md +2 -2
- package/install/templates/_byan/agent/claude/claude.md +0 -2
- package/install/templates/_byan/agent/codex/codex.md +0 -2
- package/install/templates/_byan/agent/rachid/rachid.md +2 -10
- package/install/templates/_byan/agent/rachid-flat/rachid.md +2 -11
- package/install/templates/_byan/agent/turbo-whisper/turbo-whisper.md +2 -5
- package/install/templates/_byan/agent/turbo-whisper-integration/turbo-whisper-integration.md +5 -13
- package/install/templates/_byan/agent/yanstaller/yanstaller.md +2 -24
- package/install/templates/_byan/config.yaml +0 -1
- package/install/templates/_byan/core/activation/soul-activation.md +3 -3
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-insight-digest.js +31 -0
- package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-sync-rules.js +20 -4
- package/install/templates/_byan/mcp/byan-mcp-server/lib/advisory-autofeed.js +96 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/index-generator.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/insight-harvest.js +220 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/kanban.js +6 -3
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-fd-core.js +205 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-sync.js +415 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/outcome-buffer.js +64 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/precommit-gate.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-activation.js +1 -1
- package/install/templates/_byan/mcp/byan-mcp-server/lib/strict-mode.js +8 -0
- package/install/templates/_byan/mcp/byan-mcp-server/lib/sync-rules.js +172 -23
- package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-generator.js +1 -0
- package/install/templates/_byan/mcp/byan-mcp-server/server.js +262 -81
- package/install/templates/_byan/worker/launchers/README.md +4 -24
- package/install/templates/_byan/worker/workers.md +8 -9
- package/install/templates/_byan/workflow/simple/bmb/byan-benchmark/workflow.md +86 -0
- package/install/templates/_byan/workflow/simple/byan/feature-workflow.md +2 -2
- package/install/templates/docs/leantime-integration.md +160 -0
- package/package.json +3 -7
- package/src/byan-v2/context/session-state.js +2 -2
- package/src/byan-v2/generation/mantra-validator.js +3 -3
- package/src/byan-v2/index.js +1 -5
- package/src/byan-v2/integration/voice-integration.js +1 -1
- package/src/byan-v2/orchestrator/generation-state.js +4 -4
- package/src/loadbalancer/loadbalancer.js +1 -1
- package/src/staging/staging.js +20 -6
- package/install/bin/build-copilot-stubs.js +0 -138
- package/install/lib/platforms/copilot-cli.js +0 -123
- package/install/lib/platforms/vscode.js +0 -51
- package/install/src/byan-v2/context/copilot-context.js +0 -79
- package/install/src/webui/chat/copilot-adapter.js +0 -68
- package/install/templates/.claude/agents/bmad-marc.md +0 -25
- package/install/templates/.claude/skills/byan-marc/SKILL.md +0 -20
- package/install/templates/.github/agents/bmad-agent-bmad-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-agent-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-module-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmb-workflow-builder.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-analyst.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-architect.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-pm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quick-flow-solo-dev.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-quinn.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-sm.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-tech-writer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-bmm-ux-designer.md +0 -16
- package/install/templates/.github/agents/bmad-agent-byan-test.md +0 -33
- package/install/templates/.github/agents/bmad-agent-byan-v2.md +0 -44
- package/install/templates/.github/agents/bmad-agent-byan.md +0 -1062
- package/install/templates/.github/agents/bmad-agent-carmack.md +0 -14
- package/install/templates/.github/agents/bmad-agent-cis-brainstorming-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-creative-problem-solver.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-design-thinking-coach.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-innovation-strategist.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-presentation-master.md +0 -16
- package/install/templates/.github/agents/bmad-agent-cis-storyteller.md +0 -16
- package/install/templates/.github/agents/bmad-agent-claude.md +0 -49
- package/install/templates/.github/agents/bmad-agent-codex.md +0 -49
- package/install/templates/.github/agents/bmad-agent-drawio.md +0 -45
- package/install/templates/.github/agents/bmad-agent-fact-checker.md +0 -16
- package/install/templates/.github/agents/bmad-agent-forgeron.md +0 -15
- package/install/templates/.github/agents/bmad-agent-jimmy.md +0 -15
- package/install/templates/.github/agents/bmad-agent-marc.md +0 -49
- package/install/templates/.github/agents/bmad-agent-mike.md +0 -15
- package/install/templates/.github/agents/bmad-agent-patnote.md +0 -49
- package/install/templates/.github/agents/bmad-agent-rachid.md +0 -48
- package/install/templates/.github/agents/bmad-agent-skeptic.md +0 -16
- package/install/templates/.github/agents/bmad-agent-tao.md +0 -14
- package/install/templates/.github/agents/bmad-agent-tea-tea.md +0 -16
- package/install/templates/.github/agents/bmad-agent-test-dynamic.md +0 -22
- package/install/templates/.github/agents/bmad-agent-yanstaller-interview.md +0 -50
- package/install/templates/.github/agents/bmad-agent-yanstaller-phase2.md +0 -189
- package/install/templates/.github/agents/bmad-agent-yanstaller.md +0 -350
- package/install/templates/.github/agents/expert-merise-agile.md +0 -178
- package/install/templates/.github/agents/franck.md +0 -379
- package/install/templates/.github/agents/hermes.md +0 -575
- package/install/templates/.github/extensions/byan-staging/extension.mjs +0 -169
- package/install/templates/.github/extensions/byan-staging/package.json +0 -8
- package/install/templates/_byan/agent/marc/marc-soul.md +0 -47
- package/install/templates/_byan/agent/marc/marc-tao.md +0 -77
- package/install/templates/_byan/agent/marc/marc.md +0 -324
- package/install/templates/_byan/agent/marc-flat/marc.md +0 -387
- package/install/templates/_byan/mcp/byan-mcp-server/lib/copilot.js +0 -148
- package/install/templates/_byan/worker/launchers/launch-yanstaller-copilot.md +0 -173
- package/install/templates/workers/cost-optimizer.js +0 -169
- package/src/byan-v2/context/copilot-context.js +0 -79
- package/src/core/dispatcher/execution-router.js +0 -66
|
@@ -9,8 +9,10 @@ import {
|
|
|
9
9
|
ListToolsRequestSchema,
|
|
10
10
|
} from '@modelcontextprotocol/sdk/types.js';
|
|
11
11
|
import { dispatch } from './lib/dispatch.js';
|
|
12
|
+
import { harvest as harvestInsights, renderDigest as renderInsightDigest } from './lib/insight-harvest.js';
|
|
13
|
+
import { appendOutcome } from './lib/outcome-buffer.js';
|
|
14
|
+
import { validateForLog, eloOutcomeForStrictComplete } from './lib/advisory-autofeed.js';
|
|
12
15
|
import { readSoul, appendSoulMemory } from './lib/soul.js';
|
|
13
|
-
import { listSessions, readSessionEvents, searchSessions } from './lib/copilot.js';
|
|
14
16
|
import {
|
|
15
17
|
start as fdStart,
|
|
16
18
|
status as fdStatus,
|
|
@@ -69,6 +71,17 @@ import {
|
|
|
69
71
|
syncEnabled as strictSyncEnabled,
|
|
70
72
|
resolveProjectId as strictResolveProjectId,
|
|
71
73
|
} from './lib/strict-sync.js';
|
|
74
|
+
import {
|
|
75
|
+
syncEnabled as leantimeEnabled,
|
|
76
|
+
rpc as leantimeRpc,
|
|
77
|
+
ensureProject as leantimeEnsureProject,
|
|
78
|
+
createTask as leantimeCreateTask,
|
|
79
|
+
moveTask as leantimeMoveTask,
|
|
80
|
+
assignTask as leantimeAssignTask,
|
|
81
|
+
getTask as leantimeGetTask,
|
|
82
|
+
getBoard as leantimeGetBoard,
|
|
83
|
+
METHODS as LEANTIME_METHODS,
|
|
84
|
+
} from './lib/leantime-sync.js';
|
|
72
85
|
|
|
73
86
|
// Compact view of a best-effort strict-sync result for tool responses.
|
|
74
87
|
function syncResult(sync) {
|
|
@@ -110,6 +123,14 @@ function requireToken() {
|
|
|
110
123
|
}
|
|
111
124
|
}
|
|
112
125
|
|
|
126
|
+
// Leantime uses its OWN env pair (LEANTIME_API_URL/LEANTIME_API_TOKEN), kept
|
|
127
|
+
// distinct from BYAN_API_URL so the two backends never get crossed.
|
|
128
|
+
function requireLeantime() {
|
|
129
|
+
if (!process.env.LEANTIME_API_URL || !process.env.LEANTIME_API_TOKEN) {
|
|
130
|
+
throw new Error('LEANTIME_API_URL + LEANTIME_API_TOKEN env vars are required for byan_leantime_* tools.');
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
113
134
|
async function apiRequest(path, options = {}) {
|
|
114
135
|
const url = `${BYAN_API_URL}${path}`;
|
|
115
136
|
const headers = {
|
|
@@ -404,39 +425,6 @@ const tools = [
|
|
|
404
425
|
additionalProperties: false,
|
|
405
426
|
},
|
|
406
427
|
},
|
|
407
|
-
{
|
|
408
|
-
name: 'byan_copilot_sessions',
|
|
409
|
-
description:
|
|
410
|
-
'List GitHub Copilot CLI sessions stored locally at ~/.copilot/session-state/. Returns sessionId, start/end time, cwd, branch, agent name, message and tool call counts. Sorted most-recent-first. Use to discover past Copilot CLI conversations for reference or import.',
|
|
411
|
-
inputSchema: {
|
|
412
|
-
type: 'object',
|
|
413
|
-
properties: {
|
|
414
|
-
limit: { type: 'number', description: 'Max sessions to return (default 20).' },
|
|
415
|
-
sinceIso: { type: 'string', description: 'ISO timestamp filter — only sessions started after this.' },
|
|
416
|
-
cwdFilter: { type: 'string', description: 'Substring match on session cwd (e.g. "byan_web").' },
|
|
417
|
-
},
|
|
418
|
-
additionalProperties: false,
|
|
419
|
-
},
|
|
420
|
-
},
|
|
421
|
-
{
|
|
422
|
-
name: 'byan_copilot_session_events',
|
|
423
|
-
description:
|
|
424
|
-
'Read events of a specific Copilot CLI session (events.jsonl). Optionally filter by event type (user.message, assistant.message, tool.execution_start, etc.). Useful to inspect the flow of a past session.',
|
|
425
|
-
inputSchema: {
|
|
426
|
-
type: 'object',
|
|
427
|
-
properties: {
|
|
428
|
-
sessionId: { type: 'string', description: 'Session UUID from byan_copilot_sessions.' },
|
|
429
|
-
types: {
|
|
430
|
-
type: 'array',
|
|
431
|
-
items: { type: 'string' },
|
|
432
|
-
description: 'Filter to these event types only.',
|
|
433
|
-
},
|
|
434
|
-
limit: { type: 'number', description: 'Max events (default 200).' },
|
|
435
|
-
},
|
|
436
|
-
required: ['sessionId'],
|
|
437
|
-
additionalProperties: false,
|
|
438
|
-
},
|
|
439
|
-
},
|
|
440
428
|
{
|
|
441
429
|
name: 'byan_fd_start',
|
|
442
430
|
description:
|
|
@@ -545,6 +533,34 @@ const tools = [
|
|
|
545
533
|
additionalProperties: false,
|
|
546
534
|
},
|
|
547
535
|
},
|
|
536
|
+
{
|
|
537
|
+
name: 'byan_insight_digest',
|
|
538
|
+
description:
|
|
539
|
+
'Harvest native Claude Code outcome trails (tool-log, strict-audit gaps, the suitability ledger, ELO) into a GATED improvement digest for BYAN. Read-only: it OBSERVES and PROPOSES; every proposal is gated for a human to ratify, nothing is auto-applied to routing / personas / mantras. Returns { toolHealth, recurringGaps, routingOutcomes, eloTrends, proposals }.',
|
|
540
|
+
inputSchema: {
|
|
541
|
+
type: 'object',
|
|
542
|
+
properties: {},
|
|
543
|
+
additionalProperties: false,
|
|
544
|
+
},
|
|
545
|
+
},
|
|
546
|
+
{
|
|
547
|
+
name: 'byan_outcome_log',
|
|
548
|
+
description:
|
|
549
|
+
'Log one ADVISORY outcome to the auto-feed buffer (cheap append; it never writes a ledger directly). The drain-advisory Stop hook records buffered outcomes into the ELO / suitability ledgers at end of turn, so BYAN auto-learns without the agent recording by hand. kind=elo needs { domain, result: VALIDATED|PARTIAL|BLOCKED }; kind=suitability needs { model, leafId, success }. Advisory-only: behavior surfaces (routing / personas / mantras) are never written.',
|
|
550
|
+
inputSchema: {
|
|
551
|
+
type: 'object',
|
|
552
|
+
properties: {
|
|
553
|
+
kind: { type: 'string', enum: ['elo', 'suitability'] },
|
|
554
|
+
domain: { type: 'string', description: 'elo: the technical domain of the claim' },
|
|
555
|
+
result: { type: 'string', enum: ['VALIDATED', 'PARTIAL', 'BLOCKED'], description: 'elo: the claim verdict' },
|
|
556
|
+
model: { type: 'string', description: 'suitability: the cheap model tier/id' },
|
|
557
|
+
leafId: { type: 'string', description: 'suitability: the workflow leaf' },
|
|
558
|
+
success: { type: 'boolean', description: 'suitability: did the cheap model survive adversarial review' },
|
|
559
|
+
},
|
|
560
|
+
required: ['kind'],
|
|
561
|
+
additionalProperties: false,
|
|
562
|
+
},
|
|
563
|
+
},
|
|
548
564
|
{
|
|
549
565
|
name: 'byan_strict_lock_scope',
|
|
550
566
|
description:
|
|
@@ -566,6 +582,10 @@ const tools = [
|
|
|
566
582
|
items: { type: 'string' },
|
|
567
583
|
description: 'Glob patterns of paths the agent may modify.',
|
|
568
584
|
},
|
|
585
|
+
domain: {
|
|
586
|
+
type: 'string',
|
|
587
|
+
description: 'Optional explicit ELO domain (e.g. security, performance, javascript). When set, a successful byan_strict_complete feeds one VALIDATED outcome to the ELO learning loop. Recorded verbatim (your explicit input, never inferred from text); omit to feed nothing.',
|
|
588
|
+
},
|
|
569
589
|
force: { type: 'boolean', description: 'Relock with different scope.' },
|
|
570
590
|
projectId: {
|
|
571
591
|
type: 'string',
|
|
@@ -627,7 +647,7 @@ const tools = [
|
|
|
627
647
|
{
|
|
628
648
|
name: 'byan_strict_suggest',
|
|
629
649
|
description:
|
|
630
|
-
'Check whether a piece of text (user request, feature name) signals a production-grade deliverable that should be built under strict mode. Reads activation keywords from _byan/_config/strict-mode.yaml. Returns { suggested, matched, message }. Use on any platform (Codex
|
|
650
|
+
'Check whether a piece of text (user request, feature name) signals a production-grade deliverable that should be built under strict mode. Reads activation keywords from _byan/_config/strict-mode.yaml. Returns { suggested, matched, message }. Use on any platform (Codex has no in-session hook) to decide whether to lock strict mode.',
|
|
631
651
|
inputSchema: {
|
|
632
652
|
type: 'object',
|
|
633
653
|
properties: {
|
|
@@ -817,25 +837,6 @@ const tools = [
|
|
|
817
837
|
additionalProperties: false,
|
|
818
838
|
},
|
|
819
839
|
},
|
|
820
|
-
{
|
|
821
|
-
name: 'byan_copilot_search',
|
|
822
|
-
description:
|
|
823
|
-
'Full-text search across all Copilot CLI sessions. Finds messages (user + assistant by default) containing the query string. Returns sessionId + timestamp + excerpt. Use to recall past discussions without knowing which session they were in.',
|
|
824
|
-
inputSchema: {
|
|
825
|
-
type: 'object',
|
|
826
|
-
properties: {
|
|
827
|
-
query: { type: 'string', description: 'Substring to search for (case-insensitive).' },
|
|
828
|
-
types: {
|
|
829
|
-
type: 'array',
|
|
830
|
-
items: { type: 'string' },
|
|
831
|
-
description: 'Event types to scan (default: user.message, assistant.message).',
|
|
832
|
-
},
|
|
833
|
-
limit: { type: 'number', description: 'Max matches (default 50).' },
|
|
834
|
-
},
|
|
835
|
-
required: ['query'],
|
|
836
|
-
additionalProperties: false,
|
|
837
|
-
},
|
|
838
|
-
},
|
|
839
840
|
|
|
840
841
|
// ─── Projects ─────────────────────────────────────────────────────────
|
|
841
842
|
{
|
|
@@ -1167,6 +1168,101 @@ const tools = [
|
|
|
1167
1168
|
additionalProperties: false,
|
|
1168
1169
|
},
|
|
1169
1170
|
},
|
|
1171
|
+
|
|
1172
|
+
// ─── Leantime (project-management mirror) ─────────────────────────────
|
|
1173
|
+
// Client-side automation of the self-hosted Leantime JSON-RPC API. Used by
|
|
1174
|
+
// the FD workflow to create a project + a task per feature and move task
|
|
1175
|
+
// status across phases. Needs LEANTIME_API_URL + LEANTIME_API_TOKEN.
|
|
1176
|
+
{
|
|
1177
|
+
name: 'byan_leantime_ping',
|
|
1178
|
+
description:
|
|
1179
|
+
'Healthcheck the Leantime integration: reports api_url, token presence, and (if configured) whether the JSON-RPC API is reachable. Surfaces the wrong-host guard (HTML instead of JSON). No required args.',
|
|
1180
|
+
inputSchema: { type: 'object', properties: {}, additionalProperties: false },
|
|
1181
|
+
},
|
|
1182
|
+
{
|
|
1183
|
+
name: 'byan_leantime_project_ensure',
|
|
1184
|
+
description:
|
|
1185
|
+
'Idempotent create-or-fetch of a Leantime project from the FD project_context. Matches an existing project by name first (no duplicate on FD re-run). Returns { id, created }. Requires LEANTIME_API_*.',
|
|
1186
|
+
inputSchema: {
|
|
1187
|
+
type: 'object',
|
|
1188
|
+
properties: {
|
|
1189
|
+
name: { type: 'string', description: 'Project name (defaults to slug).' },
|
|
1190
|
+
slug: { type: 'string', description: 'Project slug (fallback name).' },
|
|
1191
|
+
clientId: { type: 'number', description: 'Owning Leantime client id. Resolved if omitted.' },
|
|
1192
|
+
details: { type: 'string', description: 'Optional project description.' },
|
|
1193
|
+
},
|
|
1194
|
+
additionalProperties: false,
|
|
1195
|
+
},
|
|
1196
|
+
},
|
|
1197
|
+
{
|
|
1198
|
+
name: 'byan_leantime_task_create',
|
|
1199
|
+
description:
|
|
1200
|
+
'Create one Leantime task (ticket) from an FD backlog item. Returns the new task id to store back in fd-state (caller owns idempotency: create only if the item has no leantime_task_id). Requires LEANTIME_API_*.',
|
|
1201
|
+
inputSchema: {
|
|
1202
|
+
type: 'object',
|
|
1203
|
+
properties: {
|
|
1204
|
+
projectId: { type: 'number', description: 'Leantime project id.' },
|
|
1205
|
+
headline: { type: 'string', description: 'Task title.' },
|
|
1206
|
+
description: { type: 'string' },
|
|
1207
|
+
status: { type: 'number', description: 'Leantime status id (optional).' },
|
|
1208
|
+
priority: { type: 'number' },
|
|
1209
|
+
editorId: { type: 'number', description: 'Assignee/editor user id.' },
|
|
1210
|
+
tags: { type: 'string' },
|
|
1211
|
+
type: { type: 'string', description: "Ticket type, default 'task'." },
|
|
1212
|
+
},
|
|
1213
|
+
required: ['projectId', 'headline'],
|
|
1214
|
+
additionalProperties: false,
|
|
1215
|
+
},
|
|
1216
|
+
},
|
|
1217
|
+
{
|
|
1218
|
+
name: 'byan_leantime_task_move',
|
|
1219
|
+
description:
|
|
1220
|
+
'Move a Leantime task to a lifecycle column (todo|doing|blocked|review|done). Resolves the column to the project status id, then updates the ticket. Requires LEANTIME_API_*.',
|
|
1221
|
+
inputSchema: {
|
|
1222
|
+
type: 'object',
|
|
1223
|
+
properties: {
|
|
1224
|
+
taskId: { type: 'number', description: 'Leantime ticket id.' },
|
|
1225
|
+
projectId: { type: 'number', description: 'Project id (for status resolution).' },
|
|
1226
|
+
column: { type: 'string', enum: ['todo', 'doing', 'blocked', 'review', 'done'] },
|
|
1227
|
+
status: { type: 'number', description: 'Explicit status id (bypasses column resolution).' },
|
|
1228
|
+
},
|
|
1229
|
+
required: ['taskId'],
|
|
1230
|
+
additionalProperties: false,
|
|
1231
|
+
},
|
|
1232
|
+
},
|
|
1233
|
+
{
|
|
1234
|
+
name: 'byan_leantime_task_assign',
|
|
1235
|
+
description: 'Set the assignee/editor of a Leantime task. Requires LEANTIME_API_*.',
|
|
1236
|
+
inputSchema: {
|
|
1237
|
+
type: 'object',
|
|
1238
|
+
properties: {
|
|
1239
|
+
taskId: { type: 'number', description: 'Leantime ticket id.' },
|
|
1240
|
+
editorId: { type: 'number', description: 'Assignee/editor user id.' },
|
|
1241
|
+
},
|
|
1242
|
+
required: ['taskId', 'editorId'],
|
|
1243
|
+
additionalProperties: false,
|
|
1244
|
+
},
|
|
1245
|
+
},
|
|
1246
|
+
{
|
|
1247
|
+
name: 'byan_leantime_task_get',
|
|
1248
|
+
description: 'Fetch a single Leantime task by id. Requires LEANTIME_API_*.',
|
|
1249
|
+
inputSchema: {
|
|
1250
|
+
type: 'object',
|
|
1251
|
+
properties: { taskId: { type: 'number', description: 'Leantime ticket id.' } },
|
|
1252
|
+
required: ['taskId'],
|
|
1253
|
+
additionalProperties: false,
|
|
1254
|
+
},
|
|
1255
|
+
},
|
|
1256
|
+
{
|
|
1257
|
+
name: 'byan_leantime_board_get',
|
|
1258
|
+
description: "List a Leantime project's tasks grouped by lifecycle column. Requires LEANTIME_API_*.",
|
|
1259
|
+
inputSchema: {
|
|
1260
|
+
type: 'object',
|
|
1261
|
+
properties: { projectId: { type: 'number', description: 'Leantime project id.' } },
|
|
1262
|
+
required: ['projectId'],
|
|
1263
|
+
additionalProperties: false,
|
|
1264
|
+
},
|
|
1265
|
+
},
|
|
1170
1266
|
];
|
|
1171
1267
|
|
|
1172
1268
|
const server = new Server(
|
|
@@ -1309,33 +1405,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1309
1405
|
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
1310
1406
|
}
|
|
1311
1407
|
|
|
1312
|
-
if (name === 'byan_copilot_sessions') {
|
|
1313
|
-
const result = listSessions({
|
|
1314
|
-
limit: args.limit,
|
|
1315
|
-
sinceIso: args.sinceIso,
|
|
1316
|
-
cwdFilter: args.cwdFilter,
|
|
1317
|
-
});
|
|
1318
|
-
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
1319
|
-
}
|
|
1320
|
-
|
|
1321
|
-
if (name === 'byan_copilot_session_events') {
|
|
1322
|
-
const result = readSessionEvents({
|
|
1323
|
-
sessionId: args.sessionId,
|
|
1324
|
-
types: args.types,
|
|
1325
|
-
limit: args.limit,
|
|
1326
|
-
});
|
|
1327
|
-
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
1328
|
-
}
|
|
1329
|
-
|
|
1330
|
-
if (name === 'byan_copilot_search') {
|
|
1331
|
-
const result = searchSessions({
|
|
1332
|
-
query: args.query,
|
|
1333
|
-
types: args.types,
|
|
1334
|
-
limit: args.limit,
|
|
1335
|
-
});
|
|
1336
|
-
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
1337
|
-
}
|
|
1338
|
-
|
|
1339
1408
|
if (name === 'byan_fd_start') {
|
|
1340
1409
|
const state = fdStart({ featureName: args.featureName, force: args.force, strict: args.strict });
|
|
1341
1410
|
return { content: [{ type: 'text', text: JSON.stringify(state, null, 2) }] };
|
|
@@ -1383,11 +1452,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1383
1452
|
};
|
|
1384
1453
|
}
|
|
1385
1454
|
|
|
1455
|
+
if (name === 'byan_insight_digest') {
|
|
1456
|
+
const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
1457
|
+
const digest = harvestInsights({ rootDir });
|
|
1458
|
+
return {
|
|
1459
|
+
content: [
|
|
1460
|
+
{
|
|
1461
|
+
type: 'text',
|
|
1462
|
+
text: JSON.stringify({ gated: true, digest, render: renderInsightDigest(digest) }, null, 2),
|
|
1463
|
+
},
|
|
1464
|
+
],
|
|
1465
|
+
};
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
if (name === 'byan_outcome_log') {
|
|
1469
|
+
const line = validateForLog(args);
|
|
1470
|
+
if (!line) {
|
|
1471
|
+
return {
|
|
1472
|
+
content: [{ type: 'text', text: JSON.stringify({ logged: false, reason: 'invalid_outcome' }) }],
|
|
1473
|
+
};
|
|
1474
|
+
}
|
|
1475
|
+
const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
1476
|
+
const ok = appendOutcome(line, { rootDir });
|
|
1477
|
+
return {
|
|
1478
|
+
content: [{ type: 'text', text: JSON.stringify({ logged: ok, outcome: line }) }],
|
|
1479
|
+
};
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1386
1482
|
if (name === 'byan_strict_lock_scope') {
|
|
1387
1483
|
const r = strictLockScope({
|
|
1388
1484
|
scopeText: args.scopeText,
|
|
1389
1485
|
acceptanceCriteria: args.acceptanceCriteria,
|
|
1390
1486
|
allowedPaths: args.allowedPaths,
|
|
1487
|
+
domain: args.domain,
|
|
1391
1488
|
force: args.force,
|
|
1392
1489
|
});
|
|
1393
1490
|
const st = strictGetStatus();
|
|
@@ -1432,6 +1529,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1432
1529
|
if (name === 'byan_strict_complete') {
|
|
1433
1530
|
const r = strictComplete();
|
|
1434
1531
|
const st = strictGetStatus();
|
|
1532
|
+
// C3 learning loop: a completed strict session with an EXPLICIT ELO domain
|
|
1533
|
+
// is a VALIDATED outcome. eloOutcomeForStrictComplete builds the line (the
|
|
1534
|
+
// SAME helper the test exercises, so handler and test cannot drift); we
|
|
1535
|
+
// append it to the buffer drain-advisory drains. The domain is the user's
|
|
1536
|
+
// explicit lock_scope input, never inferred. Best-effort: a feed failure
|
|
1537
|
+
// must not break completion.
|
|
1538
|
+
try {
|
|
1539
|
+
const eloLine = eloOutcomeForStrictComplete(r);
|
|
1540
|
+
if (eloLine) appendOutcome(eloLine, { rootDir: process.env.CLAUDE_PROJECT_DIR || process.cwd() });
|
|
1541
|
+
} catch {
|
|
1542
|
+
// the learning feed must not break completion.
|
|
1543
|
+
}
|
|
1435
1544
|
const sync = await strictPushComplete({
|
|
1436
1545
|
sessionId: st.strict_session_id,
|
|
1437
1546
|
auditToken: r.audit_token,
|
|
@@ -1788,6 +1897,78 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1788
1897
|
return { content: [{ type: 'text', text: JSON.stringify(instructions, null, 2) }] };
|
|
1789
1898
|
}
|
|
1790
1899
|
|
|
1900
|
+
// ─── Leantime tools ───────────────────────────────────────────────
|
|
1901
|
+
if (name === 'byan_leantime_ping') {
|
|
1902
|
+
const status = {
|
|
1903
|
+
api_url: process.env.LEANTIME_API_URL || null,
|
|
1904
|
+
token_configured: Boolean(process.env.LEANTIME_API_TOKEN),
|
|
1905
|
+
assign_user_configured: Boolean(process.env.LEANTIME_ASSIGN_USER_ID),
|
|
1906
|
+
enabled: leantimeEnabled(),
|
|
1907
|
+
};
|
|
1908
|
+
if (status.enabled) {
|
|
1909
|
+
const probe = await leantimeRpc(LEANTIME_METHODS.getAllProjects, {});
|
|
1910
|
+
status.reachable = probe.ok;
|
|
1911
|
+
if (!probe.ok) status.reason = probe.reason;
|
|
1912
|
+
if (probe.hint) status.hint = probe.hint;
|
|
1913
|
+
}
|
|
1914
|
+
return { content: [{ type: 'text', text: JSON.stringify(status, null, 2) }] };
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
if (name === 'byan_leantime_project_ensure') {
|
|
1918
|
+
requireLeantime();
|
|
1919
|
+
const r = await leantimeEnsureProject({
|
|
1920
|
+
name: args.name,
|
|
1921
|
+
slug: args.slug,
|
|
1922
|
+
clientId: args.clientId,
|
|
1923
|
+
details: args.details,
|
|
1924
|
+
});
|
|
1925
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
if (name === 'byan_leantime_task_create') {
|
|
1929
|
+
requireLeantime();
|
|
1930
|
+
const r = await leantimeCreateTask({
|
|
1931
|
+
projectId: args.projectId,
|
|
1932
|
+
headline: args.headline,
|
|
1933
|
+
description: args.description,
|
|
1934
|
+
status: args.status,
|
|
1935
|
+
priority: args.priority,
|
|
1936
|
+
editorId: args.editorId,
|
|
1937
|
+
tags: args.tags,
|
|
1938
|
+
...(args.type !== undefined ? { type: args.type } : {}),
|
|
1939
|
+
});
|
|
1940
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
if (name === 'byan_leantime_task_move') {
|
|
1944
|
+
requireLeantime();
|
|
1945
|
+
const r = await leantimeMoveTask({
|
|
1946
|
+
taskId: args.taskId,
|
|
1947
|
+
projectId: args.projectId,
|
|
1948
|
+
column: args.column,
|
|
1949
|
+
status: args.status,
|
|
1950
|
+
});
|
|
1951
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
if (name === 'byan_leantime_task_assign') {
|
|
1955
|
+
requireLeantime();
|
|
1956
|
+
const r = await leantimeAssignTask({ taskId: args.taskId, editorId: args.editorId });
|
|
1957
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
if (name === 'byan_leantime_task_get') {
|
|
1961
|
+
requireLeantime();
|
|
1962
|
+
const r = await leantimeGetTask({ taskId: args.taskId });
|
|
1963
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
if (name === 'byan_leantime_board_get') {
|
|
1967
|
+
requireLeantime();
|
|
1968
|
+
const r = await leantimeGetBoard({ projectId: args.projectId });
|
|
1969
|
+
return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
|
|
1970
|
+
}
|
|
1971
|
+
|
|
1791
1972
|
throw new Error(`Unknown tool: ${name}`);
|
|
1792
1973
|
} catch (err) {
|
|
1793
1974
|
return {
|
|
@@ -19,7 +19,6 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
|
|
|
19
19
|
```
|
|
20
20
|
┌─────────────────────────────────────────────────────────┐
|
|
21
21
|
│ USER INVOKES AGENT │
|
|
22
|
-
│ gh copilot @bmad-agent-marc │
|
|
23
22
|
│ claude --agent claude │
|
|
24
23
|
│ codex skill bmad-byan │
|
|
25
24
|
└─────────────┬───────────────────────────────────────────┘
|
|
@@ -54,18 +53,7 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
|
|
|
54
53
|
|
|
55
54
|
## Workers
|
|
56
55
|
|
|
57
|
-
### 1. launch-yanstaller-
|
|
58
|
-
|
|
59
|
-
**Platform:** GitHub Copilot CLI
|
|
60
|
-
**Icon:** 🤖
|
|
61
|
-
**Command:** `npx create-byan-agent`
|
|
62
|
-
**Called by:** `@bmad-agent-marc`
|
|
63
|
-
|
|
64
|
-
**Purpose:** Launch yanstaller on Copilot CLI platform.
|
|
65
|
-
|
|
66
|
-
---
|
|
67
|
-
|
|
68
|
-
### 2. launch-yanstaller-claude.md
|
|
56
|
+
### 1. launch-yanstaller-claude.md
|
|
69
57
|
|
|
70
58
|
**Platform:** Claude Code
|
|
71
59
|
**Icon:** 🎭
|
|
@@ -79,7 +67,7 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
|
|
|
79
67
|
|
|
80
68
|
---
|
|
81
69
|
|
|
82
|
-
###
|
|
70
|
+
### 2. launch-yanstaller-codex.md
|
|
83
71
|
|
|
84
72
|
**Platform:** Codex/OpenCode
|
|
85
73
|
**Icon:** 📝
|
|
@@ -107,7 +95,6 @@ Each worker has ONE task: Launch yanstaller command.
|
|
|
107
95
|
### Platform Hints
|
|
108
96
|
Workers set environment variables to help yanstaller detect platform:
|
|
109
97
|
```bash
|
|
110
|
-
BYAN_PLATFORM_HINT=copilot # For Copilot CLI
|
|
111
98
|
BYAN_PLATFORM_HINT=claude # For Claude Code
|
|
112
99
|
BYAN_PLATFORM_HINT=codex # For Codex
|
|
113
100
|
```
|
|
@@ -166,7 +153,7 @@ Can be run multiple times safely.
|
|
|
166
153
|
|
|
167
154
|
## Separation of Concerns
|
|
168
155
|
|
|
169
|
-
### Stub Agents (
|
|
156
|
+
### Stub Agents (claude/codex)
|
|
170
157
|
- Detect invocation
|
|
171
158
|
- Call launcher worker
|
|
172
159
|
- Minimal logic
|
|
@@ -185,7 +172,6 @@ Can be run multiple times safely.
|
|
|
185
172
|
- Platform-specific integration
|
|
186
173
|
- MCP server creation (Claude)
|
|
187
174
|
- Skill file creation (Codex)
|
|
188
|
-
- GitHub agent installation (Copilot)
|
|
189
175
|
|
|
190
176
|
---
|
|
191
177
|
|
|
@@ -196,7 +182,6 @@ _byan/
|
|
|
196
182
|
└── workers/
|
|
197
183
|
└── launchers/
|
|
198
184
|
├── README.md (this file)
|
|
199
|
-
├── launch-yanstaller-copilot.md
|
|
200
185
|
├── launch-yanstaller-claude.md
|
|
201
186
|
└── launch-yanstaller-codex.md
|
|
202
187
|
```
|
|
@@ -207,9 +192,6 @@ _byan/
|
|
|
207
192
|
|
|
208
193
|
### Manual Test
|
|
209
194
|
```bash
|
|
210
|
-
# Test Copilot launcher
|
|
211
|
-
node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-copilot').launch()"
|
|
212
|
-
|
|
213
195
|
# Test Claude launcher
|
|
214
196
|
node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-claude').launch()"
|
|
215
197
|
|
|
@@ -219,7 +201,7 @@ node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-codex').laun
|
|
|
219
201
|
|
|
220
202
|
### Expected Output
|
|
221
203
|
```
|
|
222
|
-
|
|
204
|
+
🎭 Launching Yanstaller on Claude Code...
|
|
223
205
|
[Yanstaller interview UI appears]
|
|
224
206
|
```
|
|
225
207
|
|
|
@@ -287,7 +269,6 @@ install/
|
|
|
287
269
|
└── workers/
|
|
288
270
|
└── launchers/
|
|
289
271
|
├── README.md
|
|
290
|
-
├── launch-yanstaller-copilot.md
|
|
291
272
|
├── launch-yanstaller-claude.md
|
|
292
273
|
└── launch-yanstaller-codex.md
|
|
293
274
|
```
|
|
@@ -297,7 +278,6 @@ install/
|
|
|
297
278
|
## Version History
|
|
298
279
|
|
|
299
280
|
- **1.0.0** (2026-02-10): Initial release
|
|
300
|
-
- Copilot launcher
|
|
301
281
|
- Claude launcher
|
|
302
282
|
- Codex launcher
|
|
303
283
|
|
|
@@ -125,7 +125,6 @@ if (complexityScore < 30) {
|
|
|
125
125
|
**Utilisation :** Lancer yanstaller sur chaque plateforme
|
|
126
126
|
|
|
127
127
|
**Fichiers :**
|
|
128
|
-
- `_byan/worker/launchers/launch-yanstaller-copilot.md`
|
|
129
128
|
- `_byan/worker/launchers/launch-yanstaller-claude.md`
|
|
130
129
|
- `_byan/worker/launchers/launch-yanstaller-codex.md`
|
|
131
130
|
|
|
@@ -287,24 +286,25 @@ very different optimal targets depending on whether they run **alongside
|
|
|
287
286
|
siblings** (parallel) or **in sequence**. The v2 router adds a
|
|
288
287
|
`parallelizable` axis and emits an **execution strategy**, not a model.
|
|
289
288
|
|
|
290
|
-
Implementation :
|
|
291
|
-
|
|
289
|
+
Implementation : the MCP tool `byan_dispatch`
|
|
290
|
+
(`_byan/mcp/byan-mcp-server/lib/dispatch.js`), the single source of truth. The
|
|
291
|
+
strategy comes from the score + `parallelizable` ; the model tier is a separate
|
|
292
|
+
axis, derived from the task NATURE via `native-tiers.js`.
|
|
292
293
|
|
|
293
294
|
```
|
|
294
295
|
score < 15 → main-thread
|
|
295
296
|
score 15-39 + parallelizable: true → agent-subagent-worktree
|
|
296
|
-
score 15-39 + parallelizable: false → mcp-worker
|
|
297
|
-
score >= 40 → main-thread
|
|
297
|
+
score 15-39 + parallelizable: false → mcp-worker
|
|
298
|
+
score >= 40 → main-thread (heavy)
|
|
298
299
|
```
|
|
299
300
|
|
|
300
301
|
Rationale :
|
|
301
302
|
|
|
302
303
|
| Strategy | When | Why |
|
|
303
304
|
|---|---|---|
|
|
304
|
-
| `main-thread` | Trivial task | Spawning
|
|
305
|
+
| `main-thread` | Trivial or heavy task | Spawning costs more than solving inline (trivial), or the work is heavy and stays in the main thread. |
|
|
305
306
|
| `agent-subagent-worktree` | Medium parallel | Claude Code Agent tool with `isolation: "worktree"` amortizes boot cost across the wall-clock savings. |
|
|
306
|
-
| `mcp-worker
|
|
307
|
-
| `main-thread-opus` | Complex | Reasoning depth needed; subagent boot + context handoff would waste more than the delegation saves. |
|
|
307
|
+
| `mcp-worker` | Medium sequential | Delegate to a worker via MCP tool — no subagent boot, cheaper than the main thread. The model tier is set separately, by nature. |
|
|
308
308
|
|
|
309
309
|
The score threshold of 15 is where Claude Code `Agent` tool boot overhead
|
|
310
310
|
(~5-10k tokens for system prompt + tools) stops being worth it for
|
|
@@ -478,7 +478,6 @@ _byan/
|
|
|
478
478
|
└── workers/
|
|
479
479
|
└── launchers/
|
|
480
480
|
├── README.md
|
|
481
|
-
├── launch-yanstaller-copilot.md
|
|
482
481
|
├── launch-yanstaller-claude.md
|
|
483
482
|
└── launch-yanstaller-codex.md
|
|
484
483
|
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: byan-benchmark
|
|
3
|
+
description: 'DATA-only benchmark engine for any decision fork: options x weighted-criteria matrix + best-first reco + dissent. Markdown fallback for non-native platforms (dual-path).'
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# byan-benchmark Workflow (markdown fallback)
|
|
7
|
+
|
|
8
|
+
**Goal:** Given a decision fork (>=2 non-substitutable options + weighted
|
|
9
|
+
criteria + an optional judge panel), produce a scored options-x-criteria matrix,
|
|
10
|
+
a best-first recommendation, and the dissenting view - as DATA. The human gate
|
|
11
|
+
and the rendered table live in the orchestrating `byan-benchmark` skill, not here.
|
|
12
|
+
|
|
13
|
+
**Your Role:** You are the benchmark engine. You score; the user decides. State
|
|
14
|
+
mutations (FD/strict) stay out of this workflow - that is the skill's job at the
|
|
15
|
+
gate.
|
|
16
|
+
|
|
17
|
+
This markdown is the dual-path FALLBACK. The native engine is
|
|
18
|
+
`.claude/workflows/byan-benchmark.js`; `resolveWorkflow('byan-benchmark')`
|
|
19
|
+
prefers the `.js` and falls back to this file on platforms without the native
|
|
20
|
+
Workflow tool.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## ARGS CONTRACT
|
|
25
|
+
|
|
26
|
+
- `question` - the fork stated as a question.
|
|
27
|
+
- `options` - array of `{ name, note? }` (>=2 for a real benchmark).
|
|
28
|
+
- `criteria` - array of `{ name, weight }` (>=1).
|
|
29
|
+
- `judges` - optional reusable panel `[{ key, lens, weighting }]`; default a
|
|
30
|
+
single neutral judge.
|
|
31
|
+
- `domain` - drives strict floors (`security`/`performance` -> L2, `compliance`
|
|
32
|
+
-> L1).
|
|
33
|
+
- `scope` - `internal` (no external links, coherence-first) or `external`
|
|
34
|
+
(sourcing allowed, but a URL only if opened this turn).
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## STEPS
|
|
39
|
+
|
|
40
|
+
### 1. RECON - parse the fork
|
|
41
|
+
|
|
42
|
+
Normalise `options` to `[{name, note?}]` and `criteria` to `[{name, weight}]`
|
|
43
|
+
(default weight 1). The fork is **valid** only if there are >=2 distinct,
|
|
44
|
+
non-substitutable options AND >=1 criterion. A degenerate / obvious-default fork
|
|
45
|
+
is not benchmarkable - return `degenerate: true` with a reason so the skill emits
|
|
46
|
+
a `BYAN-BENCH:skip` marker.
|
|
47
|
+
|
|
48
|
+
### 2. SOURCE - gather evidence per option
|
|
49
|
+
|
|
50
|
+
For each option, write one evidence note per criterion. Routing decides links
|
|
51
|
+
before depth: `internal` stays on model-knowledge with no external links;
|
|
52
|
+
`external` may cite a source, but a URL appears only if WebFetch opened it this
|
|
53
|
+
turn - otherwise the claim is `unverified: true`. Honour the strict domain floor.
|
|
54
|
+
|
|
55
|
+
### 3. JUDGE - score each cell
|
|
56
|
+
|
|
57
|
+
Per the judge panel (default neutral), score each option on each criterion 1-10,
|
|
58
|
+
grade the evidence level against the 5-level rubric (L1 95% spec -> L5 20%
|
|
59
|
+
opinion), and compute `weightedTotal = sum(score * weight)`. A cell below the
|
|
60
|
+
strict-domain floor is flagged `unverified: true`.
|
|
61
|
+
|
|
62
|
+
### 4. RECOMMEND - rank best-first + dissent
|
|
63
|
+
|
|
64
|
+
Consolidate the judges into one matrix, best-first by combined weighted total.
|
|
65
|
+
Recommend the winner with a one-line best-first reco. Use `confidence: assertive`
|
|
66
|
+
only when the winner leads by a wide margin and its key cells are verified;
|
|
67
|
+
otherwise use `confidence: lean` (low-confidence, hedged verb). Capture the
|
|
68
|
+
dissent: the runner-up a reasonable judge would defend and the criterion it wins on.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## RETURN (DATA only)
|
|
73
|
+
|
|
74
|
+
```json
|
|
75
|
+
{ "workflow": "byan-benchmark", "question": "...", "scope": "internal",
|
|
76
|
+
"domain": "general", "options": [...], "criteria": [...],
|
|
77
|
+
"matrix": [{ "option": "...", "cells": [{ "criterion": "...", "verdict": "...",
|
|
78
|
+
"level": "L2", "score": 8, "source": "...", "unverified": false }],
|
|
79
|
+
"total": 0 }],
|
|
80
|
+
"recommendation": { "best": "...", "line": "...", "confidence": "assertive|lean" },
|
|
81
|
+
"dissent": { "option": "...", "why": "..." },
|
|
82
|
+
"degenerate": false, "needsHumanGate": true }
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
No state mutation. No emoji. The skill renders the compact 1-table, emits the
|
|
86
|
+
BYAN-BENCH marker, and records state via MCP at the human gate.
|
|
@@ -111,8 +111,8 @@ INIT
|
|
|
111
111
|
|------------------|-------|-----------|
|
|
112
112
|
| < 15 | `main-thread` | Inline dans le contexte courant, zéro overhead de délégation |
|
|
113
113
|
| < 40 + parallélisable | `agent-subagent-worktree` | Agent tool Claude Code avec isolation worktree |
|
|
114
|
-
| < 40 séquentiel | `mcp-worker
|
|
115
|
-
| ≥ 40 | `main-thread
|
|
114
|
+
| < 40 séquentiel | `mcp-worker` | Worker léger via MCP (le tier de modèle vient de la nature, pas de la taille) |
|
|
115
|
+
| ≥ 40 | `main-thread` | Garde en main thread (lourd) ; modèle hérité de la session |
|
|
116
116
|
|
|
117
117
|
> Le score (0-100) est estimé depuis la complexité de la tâche (longueur si absent). Appeler `byan_dispatch` pour le calcul — ne pas réinventer les seuils ici.
|
|
118
118
|
|