npm - create-byan-agent - Versions diffs - 2.23.0 → 2.26.0 - Mend

create-byan-agent 2.23.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/install/templates/_byan/mcp/byan-mcp-server/server.js CHANGED Viewed

@@ -9,8 +9,10 @@ import {
   ListToolsRequestSchema,
 } from '@modelcontextprotocol/sdk/types.js';
 import { dispatch } from './lib/dispatch.js';
+import { harvest as harvestInsights, renderDigest as renderInsightDigest } from './lib/insight-harvest.js';
+import { appendOutcome } from './lib/outcome-buffer.js';
+import { validateForLog, eloOutcomeForStrictComplete } from './lib/advisory-autofeed.js';
 import { readSoul, appendSoulMemory } from './lib/soul.js';
-import { listSessions, readSessionEvents, searchSessions } from './lib/copilot.js';
 import {
   start as fdStart,
   status as fdStatus,
@@ -69,6 +71,17 @@ import {
   syncEnabled as strictSyncEnabled,
   resolveProjectId as strictResolveProjectId,
 } from './lib/strict-sync.js';
+import {
+  syncEnabled as leantimeEnabled,
+  rpc as leantimeRpc,
+  ensureProject as leantimeEnsureProject,
+  createTask as leantimeCreateTask,
+  moveTask as leantimeMoveTask,
+  assignTask as leantimeAssignTask,
+  getTask as leantimeGetTask,
+  getBoard as leantimeGetBoard,
+  METHODS as LEANTIME_METHODS,
+} from './lib/leantime-sync.js';
 // Compact view of a best-effort strict-sync result for tool responses.
 function syncResult(sync) {
@@ -110,6 +123,14 @@ function requireToken() {
   }
 }
+// Leantime uses its OWN env pair (LEANTIME_API_URL/LEANTIME_API_TOKEN), kept
+// distinct from BYAN_API_URL so the two backends never get crossed.
+function requireLeantime() {
+  if (!process.env.LEANTIME_API_URL || !process.env.LEANTIME_API_TOKEN) {
+    throw new Error('LEANTIME_API_URL + LEANTIME_API_TOKEN env vars are required for byan_leantime_* tools.');
+  }
+}
 async function apiRequest(path, options = {}) {
   const url = `${BYAN_API_URL}${path}`;
   const headers = {
@@ -404,39 +425,6 @@ const tools = [
       additionalProperties: false,
     },
   },
-  {
-    name: 'byan_copilot_sessions',
-    description:
-      'List GitHub Copilot CLI sessions stored locally at ~/.copilot/session-state/. Returns sessionId, start/end time, cwd, branch, agent name, message and tool call counts. Sorted most-recent-first. Use to discover past Copilot CLI conversations for reference or import.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        limit: { type: 'number', description: 'Max sessions to return (default 20).' },
-        sinceIso: { type: 'string', description: 'ISO timestamp filter — only sessions started after this.' },
-        cwdFilter: { type: 'string', description: 'Substring match on session cwd (e.g. "byan_web").' },
-      },
-      additionalProperties: false,
-    },
-  },
-  {
-    name: 'byan_copilot_session_events',
-    description:
-      'Read events of a specific Copilot CLI session (events.jsonl). Optionally filter by event type (user.message, assistant.message, tool.execution_start, etc.). Useful to inspect the flow of a past session.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        sessionId: { type: 'string', description: 'Session UUID from byan_copilot_sessions.' },
-        types: {
-          type: 'array',
-          items: { type: 'string' },
-          description: 'Filter to these event types only.',
-        },
-        limit: { type: 'number', description: 'Max events (default 200).' },
-      },
-      required: ['sessionId'],
-      additionalProperties: false,
-    },
-  },
   {
     name: 'byan_fd_start',
     description:
@@ -545,6 +533,34 @@ const tools = [
       additionalProperties: false,
     },
   },
+  {
+    name: 'byan_insight_digest',
+    description:
+      'Harvest native Claude Code outcome trails (tool-log, strict-audit gaps, the suitability ledger, ELO) into a GATED improvement digest for BYAN. Read-only: it OBSERVES and PROPOSES; every proposal is gated for a human to ratify, nothing is auto-applied to routing / personas / mantras. Returns { toolHealth, recurringGaps, routingOutcomes, eloTrends, proposals }.',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_outcome_log',
+    description:
+      'Log one ADVISORY outcome to the auto-feed buffer (cheap append; it never writes a ledger directly). The drain-advisory Stop hook records buffered outcomes into the ELO / suitability ledgers at end of turn, so BYAN auto-learns without the agent recording by hand. kind=elo needs { domain, result: VALIDATED|PARTIAL|BLOCKED }; kind=suitability needs { model, leafId, success }. Advisory-only: behavior surfaces (routing / personas / mantras) are never written.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        kind: { type: 'string', enum: ['elo', 'suitability'] },
+        domain: { type: 'string', description: 'elo: the technical domain of the claim' },
+        result: { type: 'string', enum: ['VALIDATED', 'PARTIAL', 'BLOCKED'], description: 'elo: the claim verdict' },
+        model: { type: 'string', description: 'suitability: the cheap model tier/id' },
+        leafId: { type: 'string', description: 'suitability: the workflow leaf' },
+        success: { type: 'boolean', description: 'suitability: did the cheap model survive adversarial review' },
+      },
+      required: ['kind'],
+      additionalProperties: false,
+    },
+  },
   {
     name: 'byan_strict_lock_scope',
     description:
@@ -566,6 +582,10 @@ const tools = [
           items: { type: 'string' },
           description: 'Glob patterns of paths the agent may modify.',
         },
+        domain: {
+          type: 'string',
+          description: 'Optional explicit ELO domain (e.g. security, performance, javascript). When set, a successful byan_strict_complete feeds one VALIDATED outcome to the ELO learning loop. Recorded verbatim (your explicit input, never inferred from text); omit to feed nothing.',
+        },
         force: { type: 'boolean', description: 'Relock with different scope.' },
         projectId: {
           type: 'string',
@@ -627,7 +647,7 @@ const tools = [
   {
     name: 'byan_strict_suggest',
     description:
-      'Check whether a piece of text (user request, feature name) signals a production-grade deliverable that should be built under strict mode. Reads activation keywords from _byan/_config/strict-mode.yaml. Returns { suggested, matched, message }. Use on any platform (Codex/Copilot have no in-session hook) to decide whether to lock strict mode.',
+      'Check whether a piece of text (user request, feature name) signals a production-grade deliverable that should be built under strict mode. Reads activation keywords from _byan/_config/strict-mode.yaml. Returns { suggested, matched, message }. Use on any platform (Codex has no in-session hook) to decide whether to lock strict mode.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -817,25 +837,6 @@ const tools = [
       additionalProperties: false,
     },
   },
-  {
-    name: 'byan_copilot_search',
-    description:
-      'Full-text search across all Copilot CLI sessions. Finds messages (user + assistant by default) containing the query string. Returns sessionId + timestamp + excerpt. Use to recall past discussions without knowing which session they were in.',
-    inputSchema: {
-      type: 'object',
-      properties: {
-        query: { type: 'string', description: 'Substring to search for (case-insensitive).' },
-        types: {
-          type: 'array',
-          items: { type: 'string' },
-          description: 'Event types to scan (default: user.message, assistant.message).',
-        },
-        limit: { type: 'number', description: 'Max matches (default 50).' },
-      },
-      required: ['query'],
-      additionalProperties: false,
-    },
-  },
   // ─── Projects ─────────────────────────────────────────────────────────
   {
@@ -1167,6 +1168,101 @@ const tools = [
       additionalProperties: false,
     },
   },
+  // ─── Leantime (project-management mirror) ─────────────────────────────
+  // Client-side automation of the self-hosted Leantime JSON-RPC API. Used by
+  // the FD workflow to create a project + a task per feature and move task
+  // status across phases. Needs LEANTIME_API_URL + LEANTIME_API_TOKEN.
+  {
+    name: 'byan_leantime_ping',
+    description:
+      'Healthcheck the Leantime integration: reports api_url, token presence, and (if configured) whether the JSON-RPC API is reachable. Surfaces the wrong-host guard (HTML instead of JSON). No required args.',
+    inputSchema: { type: 'object', properties: {}, additionalProperties: false },
+  },
+  {
+    name: 'byan_leantime_project_ensure',
+    description:
+      'Idempotent create-or-fetch of a Leantime project from the FD project_context. Matches an existing project by name first (no duplicate on FD re-run). Returns { id, created }. Requires LEANTIME_API_*.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        name: { type: 'string', description: 'Project name (defaults to slug).' },
+        slug: { type: 'string', description: 'Project slug (fallback name).' },
+        clientId: { type: 'number', description: 'Owning Leantime client id. Resolved if omitted.' },
+        details: { type: 'string', description: 'Optional project description.' },
+      },
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_leantime_task_create',
+    description:
+      'Create one Leantime task (ticket) from an FD backlog item. Returns the new task id to store back in fd-state (caller owns idempotency: create only if the item has no leantime_task_id). Requires LEANTIME_API_*.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        projectId: { type: 'number', description: 'Leantime project id.' },
+        headline: { type: 'string', description: 'Task title.' },
+        description: { type: 'string' },
+        status: { type: 'number', description: 'Leantime status id (optional).' },
+        priority: { type: 'number' },
+        editorId: { type: 'number', description: 'Assignee/editor user id.' },
+        tags: { type: 'string' },
+        type: { type: 'string', description: "Ticket type, default 'task'." },
+      },
+      required: ['projectId', 'headline'],
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_leantime_task_move',
+    description:
+      'Move a Leantime task to a lifecycle column (todo|doing|blocked|review|done). Resolves the column to the project status id, then updates the ticket. Requires LEANTIME_API_*.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        taskId: { type: 'number', description: 'Leantime ticket id.' },
+        projectId: { type: 'number', description: 'Project id (for status resolution).' },
+        column: { type: 'string', enum: ['todo', 'doing', 'blocked', 'review', 'done'] },
+        status: { type: 'number', description: 'Explicit status id (bypasses column resolution).' },
+      },
+      required: ['taskId'],
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_leantime_task_assign',
+    description: 'Set the assignee/editor of a Leantime task. Requires LEANTIME_API_*.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        taskId: { type: 'number', description: 'Leantime ticket id.' },
+        editorId: { type: 'number', description: 'Assignee/editor user id.' },
+      },
+      required: ['taskId', 'editorId'],
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_leantime_task_get',
+    description: 'Fetch a single Leantime task by id. Requires LEANTIME_API_*.',
+    inputSchema: {
+      type: 'object',
+      properties: { taskId: { type: 'number', description: 'Leantime ticket id.' } },
+      required: ['taskId'],
+      additionalProperties: false,
+    },
+  },
+  {
+    name: 'byan_leantime_board_get',
+    description: "List a Leantime project's tasks grouped by lifecycle column. Requires LEANTIME_API_*.",
+    inputSchema: {
+      type: 'object',
+      properties: { projectId: { type: 'number', description: 'Leantime project id.' } },
+      required: ['projectId'],
+      additionalProperties: false,
+    },
+  },
 ];
 const server = new Server(
@@ -1309,33 +1405,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
     }
-    if (name === 'byan_copilot_sessions') {
-      const result = listSessions({
-        limit: args.limit,
-        sinceIso: args.sinceIso,
-        cwdFilter: args.cwdFilter,
-      });
-      return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
-    }
-    if (name === 'byan_copilot_session_events') {
-      const result = readSessionEvents({
-        sessionId: args.sessionId,
-        types: args.types,
-        limit: args.limit,
-      });
-      return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
-    }
-    if (name === 'byan_copilot_search') {
-      const result = searchSessions({
-        query: args.query,
-        types: args.types,
-        limit: args.limit,
-      });
-      return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
-    }
     if (name === 'byan_fd_start') {
       const state = fdStart({ featureName: args.featureName, force: args.force, strict: args.strict });
       return { content: [{ type: 'text', text: JSON.stringify(state, null, 2) }] };
@@ -1383,11 +1452,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       };
     }
+    if (name === 'byan_insight_digest') {
+      const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+      const digest = harvestInsights({ rootDir });
+      return {
+        content: [
+          {
+            type: 'text',
+            text: JSON.stringify({ gated: true, digest, render: renderInsightDigest(digest) }, null, 2),
+          },
+        ],
+      };
+    }
+    if (name === 'byan_outcome_log') {
+      const line = validateForLog(args);
+      if (!line) {
+        return {
+          content: [{ type: 'text', text: JSON.stringify({ logged: false, reason: 'invalid_outcome' }) }],
+        };
+      }
+      const rootDir = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+      const ok = appendOutcome(line, { rootDir });
+      return {
+        content: [{ type: 'text', text: JSON.stringify({ logged: ok, outcome: line }) }],
+      };
+    }
     if (name === 'byan_strict_lock_scope') {
       const r = strictLockScope({
         scopeText: args.scopeText,
         acceptanceCriteria: args.acceptanceCriteria,
         allowedPaths: args.allowedPaths,
+        domain: args.domain,
         force: args.force,
       });
       const st = strictGetStatus();
@@ -1432,6 +1529,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     if (name === 'byan_strict_complete') {
       const r = strictComplete();
       const st = strictGetStatus();
+      // C3 learning loop: a completed strict session with an EXPLICIT ELO domain
+      // is a VALIDATED outcome. eloOutcomeForStrictComplete builds the line (the
+      // SAME helper the test exercises, so handler and test cannot drift); we
+      // append it to the buffer drain-advisory drains. The domain is the user's
+      // explicit lock_scope input, never inferred. Best-effort: a feed failure
+      // must not break completion.
+      try {
+        const eloLine = eloOutcomeForStrictComplete(r);
+        if (eloLine) appendOutcome(eloLine, { rootDir: process.env.CLAUDE_PROJECT_DIR || process.cwd() });
+      } catch {
+        // the learning feed must not break completion.
+      }
       const sync = await strictPushComplete({
         sessionId: st.strict_session_id,
         auditToken: r.audit_token,
@@ -1788,6 +1897,78 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       return { content: [{ type: 'text', text: JSON.stringify(instructions, null, 2) }] };
     }
+    // ─── Leantime tools ───────────────────────────────────────────────
+    if (name === 'byan_leantime_ping') {
+      const status = {
+        api_url: process.env.LEANTIME_API_URL || null,
+        token_configured: Boolean(process.env.LEANTIME_API_TOKEN),
+        assign_user_configured: Boolean(process.env.LEANTIME_ASSIGN_USER_ID),
+        enabled: leantimeEnabled(),
+      };
+      if (status.enabled) {
+        const probe = await leantimeRpc(LEANTIME_METHODS.getAllProjects, {});
+        status.reachable = probe.ok;
+        if (!probe.ok) status.reason = probe.reason;
+        if (probe.hint) status.hint = probe.hint;
+      }
+      return { content: [{ type: 'text', text: JSON.stringify(status, null, 2) }] };
+    }
+    if (name === 'byan_leantime_project_ensure') {
+      requireLeantime();
+      const r = await leantimeEnsureProject({
+        name: args.name,
+        slug: args.slug,
+        clientId: args.clientId,
+        details: args.details,
+      });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
+    if (name === 'byan_leantime_task_create') {
+      requireLeantime();
+      const r = await leantimeCreateTask({
+        projectId: args.projectId,
+        headline: args.headline,
+        description: args.description,
+        status: args.status,
+        priority: args.priority,
+        editorId: args.editorId,
+        tags: args.tags,
+        ...(args.type !== undefined ? { type: args.type } : {}),
+      });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
+    if (name === 'byan_leantime_task_move') {
+      requireLeantime();
+      const r = await leantimeMoveTask({
+        taskId: args.taskId,
+        projectId: args.projectId,
+        column: args.column,
+        status: args.status,
+      });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
+    if (name === 'byan_leantime_task_assign') {
+      requireLeantime();
+      const r = await leantimeAssignTask({ taskId: args.taskId, editorId: args.editorId });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
+    if (name === 'byan_leantime_task_get') {
+      requireLeantime();
+      const r = await leantimeGetTask({ taskId: args.taskId });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
+    if (name === 'byan_leantime_board_get') {
+      requireLeantime();
+      const r = await leantimeGetBoard({ projectId: args.projectId });
+      return { content: [{ type: 'text', text: JSON.stringify(r, null, 2) }] };
+    }
     throw new Error(`Unknown tool: ${name}`);
   } catch (err) {
     return {

package/install/templates/_byan/worker/launchers/README.md CHANGED Viewed

@@ -19,7 +19,6 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
 ```
 ┌─────────────────────────────────────────────────────────┐
 │                  USER INVOKES AGENT                     │
-│  gh copilot @bmad-agent-marc                            │
 │  claude --agent claude                                  │
 │  codex skill bmad-byan                                  │
 └─────────────┬───────────────────────────────────────────┘
@@ -54,18 +53,7 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
 ## Workers
-### 1. launch-yanstaller-copilot.md
-**Platform:** GitHub Copilot CLI
-**Icon:** 🤖
-**Command:** `npx create-byan-agent`
-**Called by:** `@bmad-agent-marc`
-**Purpose:** Launch yanstaller on Copilot CLI platform.
----
-### 2. launch-yanstaller-claude.md
+### 1. launch-yanstaller-claude.md
 **Platform:** Claude Code
 **Icon:** 🎭
@@ -79,7 +67,7 @@ Launcher workers are lightweight, single-purpose components that bridge the gap
 ---
-### 3. launch-yanstaller-codex.md
+### 2. launch-yanstaller-codex.md
 **Platform:** Codex/OpenCode
 **Icon:** 📝
@@ -107,7 +95,6 @@ Each worker has ONE task: Launch yanstaller command.
 ### Platform Hints
 Workers set environment variables to help yanstaller detect platform:
 ```bash
-BYAN_PLATFORM_HINT=copilot  # For Copilot CLI
 BYAN_PLATFORM_HINT=claude   # For Claude Code
 BYAN_PLATFORM_HINT=codex    # For Codex
 ```
@@ -166,7 +153,7 @@ Can be run multiple times safely.
 ## Separation of Concerns
-### Stub Agents (marc/claude/codex)
+### Stub Agents (claude/codex)
 - Detect invocation
 - Call launcher worker
 - Minimal logic
@@ -185,7 +172,6 @@ Can be run multiple times safely.
 - Platform-specific integration
 - MCP server creation (Claude)
 - Skill file creation (Codex)
-- GitHub agent installation (Copilot)
 ---
@@ -196,7 +182,6 @@ _byan/
 └── workers/
     └── launchers/
         ├── README.md (this file)
-        ├── launch-yanstaller-copilot.md
         ├── launch-yanstaller-claude.md
         └── launch-yanstaller-codex.md
 ```
@@ -207,9 +192,6 @@ _byan/
 ### Manual Test
 ```bash
-# Test Copilot launcher
-node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-copilot').launch()"
 # Test Claude launcher
 node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-claude').launch()"
@@ -219,7 +201,7 @@ node -e "require('./_byan/worker/launchers/worker-launch-yanstaller-codex').laun
 ### Expected Output
 ```
-🤖 Launching Yanstaller on Copilot CLI...
+🎭 Launching Yanstaller on Claude Code...
 [Yanstaller interview UI appears]
 ```
@@ -287,7 +269,6 @@ install/
         └── workers/
             └── launchers/
                 ├── README.md
-                ├── launch-yanstaller-copilot.md
                 ├── launch-yanstaller-claude.md
                 └── launch-yanstaller-codex.md
 ```
@@ -297,7 +278,6 @@ install/
 ## Version History
 - **1.0.0** (2026-02-10): Initial release
-  - Copilot launcher
   - Claude launcher
   - Codex launcher

package/install/templates/_byan/worker/workers.md CHANGED Viewed

@@ -125,7 +125,6 @@ if (complexityScore < 30) {
 **Utilisation :** Lancer yanstaller sur chaque plateforme
 **Fichiers :**
-- `_byan/worker/launchers/launch-yanstaller-copilot.md`
 - `_byan/worker/launchers/launch-yanstaller-claude.md`
 - `_byan/worker/launchers/launch-yanstaller-codex.md`
@@ -287,24 +286,25 @@ very different optimal targets depending on whether they run **alongside
 siblings** (parallel) or **in sequence**. The v2 router adds a
 `parallelizable` axis and emits an **execution strategy**, not a model.
-Implementation : `src/core/dispatcher/execution-router.js` and the MCP
-tool `byan_dispatch` (both share the same table).
+Implementation : the MCP tool `byan_dispatch`
+(`_byan/mcp/byan-mcp-server/lib/dispatch.js`), the single source of truth. The
+strategy comes from the score + `parallelizable` ; the model tier is a separate
+axis, derived from the task NATURE via `native-tiers.js`.
 ```
 score < 15                           → main-thread
 score 15-39 + parallelizable: true   → agent-subagent-worktree
-score 15-39 + parallelizable: false  → mcp-worker-haiku
-score >= 40                          → main-thread-opus
+score 15-39 + parallelizable: false  → mcp-worker
+score >= 40                          → main-thread (heavy)
 ```
 Rationale :
 | Strategy | When | Why |
 |---|---|---|
-| `main-thread` | Trivial task | Spawning anything costs more than solving inline. |
+| `main-thread` | Trivial or heavy task | Spawning costs more than solving inline (trivial), or the work is heavy and stays in the main thread. |
 | `agent-subagent-worktree` | Medium parallel | Claude Code Agent tool with `isolation: "worktree"` amortizes boot cost across the wall-clock savings. |
-| `mcp-worker-haiku` | Medium sequential | Delegate to a lightweight Haiku via MCP tool — no subagent boot, cheaper than main thread. |
-| `main-thread-opus` | Complex | Reasoning depth needed; subagent boot + context handoff would waste more than the delegation saves. |
+| `mcp-worker` | Medium sequential | Delegate to a worker via MCP tool — no subagent boot, cheaper than the main thread. The model tier is set separately, by nature. |
 The score threshold of 15 is where Claude Code `Agent` tool boot overhead
 (~5-10k tokens for system prompt + tools) stops being worth it for
@@ -478,7 +478,6 @@ _byan/
 └── workers/
     └── launchers/
         ├── README.md
-        ├── launch-yanstaller-copilot.md
         ├── launch-yanstaller-claude.md
         └── launch-yanstaller-codex.md

package/install/templates/_byan/workflow/simple/bmb/byan-benchmark/workflow.md ADDED Viewed

@@ -0,0 +1,86 @@
+---
+name: byan-benchmark
+description: 'DATA-only benchmark engine for any decision fork: options x weighted-criteria matrix + best-first reco + dissent. Markdown fallback for non-native platforms (dual-path).'
+---
+# byan-benchmark Workflow (markdown fallback)
+**Goal:** Given a decision fork (>=2 non-substitutable options + weighted
+criteria + an optional judge panel), produce a scored options-x-criteria matrix,
+a best-first recommendation, and the dissenting view - as DATA. The human gate
+and the rendered table live in the orchestrating `byan-benchmark` skill, not here.
+**Your Role:** You are the benchmark engine. You score; the user decides. State
+mutations (FD/strict) stay out of this workflow - that is the skill's job at the
+gate.
+This markdown is the dual-path FALLBACK. The native engine is
+`.claude/workflows/byan-benchmark.js`; `resolveWorkflow('byan-benchmark')`
+prefers the `.js` and falls back to this file on platforms without the native
+Workflow tool.
+---
+## ARGS CONTRACT
+- `question` - the fork stated as a question.
+- `options` - array of `{ name, note? }` (>=2 for a real benchmark).
+- `criteria` - array of `{ name, weight }` (>=1).
+- `judges` - optional reusable panel `[{ key, lens, weighting }]`; default a
+  single neutral judge.
+- `domain` - drives strict floors (`security`/`performance` -> L2, `compliance`
+  -> L1).
+- `scope` - `internal` (no external links, coherence-first) or `external`
+  (sourcing allowed, but a URL only if opened this turn).
+---
+## STEPS
+### 1. RECON - parse the fork
+Normalise `options` to `[{name, note?}]` and `criteria` to `[{name, weight}]`
+(default weight 1). The fork is **valid** only if there are >=2 distinct,
+non-substitutable options AND >=1 criterion. A degenerate / obvious-default fork
+is not benchmarkable - return `degenerate: true` with a reason so the skill emits
+a `BYAN-BENCH:skip` marker.
+### 2. SOURCE - gather evidence per option
+For each option, write one evidence note per criterion. Routing decides links
+before depth: `internal` stays on model-knowledge with no external links;
+`external` may cite a source, but a URL appears only if WebFetch opened it this
+turn - otherwise the claim is `unverified: true`. Honour the strict domain floor.
+### 3. JUDGE - score each cell
+Per the judge panel (default neutral), score each option on each criterion 1-10,
+grade the evidence level against the 5-level rubric (L1 95% spec -> L5 20%
+opinion), and compute `weightedTotal = sum(score * weight)`. A cell below the
+strict-domain floor is flagged `unverified: true`.
+### 4. RECOMMEND - rank best-first + dissent
+Consolidate the judges into one matrix, best-first by combined weighted total.
+Recommend the winner with a one-line best-first reco. Use `confidence: assertive`
+only when the winner leads by a wide margin and its key cells are verified;
+otherwise use `confidence: lean` (low-confidence, hedged verb). Capture the
+dissent: the runner-up a reasonable judge would defend and the criterion it wins on.
+---
+## RETURN (DATA only)
+```json
+{ "workflow": "byan-benchmark", "question": "...", "scope": "internal",
+  "domain": "general", "options": [...], "criteria": [...],
+  "matrix": [{ "option": "...", "cells": [{ "criterion": "...", "verdict": "...",
+               "level": "L2", "score": 8, "source": "...", "unverified": false }],
+             "total": 0 }],
+  "recommendation": { "best": "...", "line": "...", "confidence": "assertive|lean" },
+  "dissent": { "option": "...", "why": "..." },
+  "degenerate": false, "needsHumanGate": true }
+```
+No state mutation. No emoji. The skill renders the compact 1-table, emits the
+BYAN-BENCH marker, and records state via MCP at the human gate.

package/install/templates/_byan/workflow/simple/byan/feature-workflow.md CHANGED Viewed

@@ -111,8 +111,8 @@ INIT
 |------------------|-------|-----------|
 | < 15 | `main-thread` | Inline dans le contexte courant, zéro overhead de délégation |
 | < 40 + parallélisable | `agent-subagent-worktree` | Agent tool Claude Code avec isolation worktree |
-| < 40 séquentiel | `mcp-worker-haiku` | Worker Haiku léger via MCP |
-| ≥ 40 | `main-thread-opus` | Garde en main thread, raisonnement Opus |
+| < 40 séquentiel | `mcp-worker` | Worker léger via MCP (le tier de modèle vient de la nature, pas de la taille) |
+| ≥ 40 | `main-thread` | Garde en main thread (lourd) ; modèle hérité de la session |
 > Le score (0-100) est estimé depuis la complexité de la tâche (longueur si absent). Appeler `byan_dispatch` pour le calcul — ne pas réinventer les seuils ici.