npm - create-byan-agent - Versions diffs - 2.23.0 → 2.26.0 - Mend

create-byan-agent 2.23.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/install/templates/_byan/mcp/byan-mcp-server/lib/insight-harvest.js ADDED Viewed

@@ -0,0 +1,220 @@
+// Session insight harvester — read native Claude Code outcome trails and
+// aggregate them into a GATED improvement digest for BYAN.
+//
+// Philosophy (the whole point): OBSERVE and PROPOSE, never silently self-modify.
+// BYAN already has advisory learning surfaces (ELO trust, the suitability
+// ledger) the agent updates by hand; the native hooks already leave outcome
+// trails on disk. This module closes the loop by READING those trails and
+// surfacing a digest with GATED proposals. It writes nothing back to a behavior
+// surface (routing / personas / mantras): applying any change stays a human
+// decision. An agent that rewrote its own routing on a heuristic would be the
+// exact silent-downgrade BYAN exists to prevent.
+//
+// The aggregation is PURE (no I/O) so it is exhaustively unit-testable; the I/O
+// entry takes an injected reader, mirroring template-sync.js / stub-sync.js.
+//
+// Trails consumed (shapes verified against the live repo):
+//   _byan-output/tool-log.jsonl   post line {phase:'post', tool, ok, est_output_tokens?}
+//   .byan-strict/audit.log        {event:'self_verify', verdict:'gap', findings:[]}
+//   _byan-output/suitability-ledger.json  { "model::leaf": {model, leafId, successes, failures} }
+//   _byan/memoire/elo-profile.json        { domains: { <domain>: {rating, blocked_streak, ...} } }
+import fs from 'node:fs';
+import path from 'node:path';
+// Parse a JSONL blob into an array of objects, skipping malformed lines.
+export function parseJsonl(text) {
+  if (!text) return [];
+  return text
+    .split('\n')
+    .filter(Boolean)
+    .map((l) => {
+      try {
+        return JSON.parse(l);
+      } catch {
+        return null;
+      }
+    })
+    .filter(Boolean);
+}
+// Tool health from tool-log.jsonl post lines: call count, failure rate, the top
+// failing tools, and an output-token cost proxy. est_output_tokens is absent on
+// older lines (added later by the hook), so it defaults to 0.
+export function harvestToolHealth(toolLogEntries) {
+  const post = (toolLogEntries || []).filter((e) => e && e.phase === 'post');
+  const failures = post.filter((e) => e.ok === false);
+  const byTool = {};
+  for (const f of failures) byTool[f.tool || 'unknown'] = (byTool[f.tool || 'unknown'] || 0) + 1;
+  const topFailing = Object.entries(byTool)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 5)
+    .map(([tool, count]) => ({ tool, count }));
+  const estOutputTokens = post.reduce((s, e) => s + (e.est_output_tokens || 0), 0);
+  return {
+    calls: post.length,
+    failures: failures.length,
+    failureRate: post.length ? +(failures.length / post.length).toFixed(3) : 0,
+    topFailing,
+    estOutputTokens,
+  };
+}
+// Coarse theme key for a strict gap finding. The categories mirror the recurring
+// gap types BYAN actually hits; anything unmatched is 'other' (never silently
+// dropped — it still counts under 'other').
+function normalizeGap(finding) {
+  const s = String(finding).toLowerCase();
+  if (/\btest|coverage|spec\b/.test(s)) return 'tests/coverage';
+  if (/error|edge|exception|fail|throw/.test(s)) return 'error/edge handling';
+  if (/doc|comment|changelog|readme/.test(s)) return 'documentation';
+  if (/template|fidelity|sync|twin/.test(s)) return 'template fidelity';
+  if (/emoji/.test(s)) return 'emoji';
+  if (/scope|downgrade|cut|stub|mvp/.test(s)) return 'scope/downgrade';
+  return 'other';
+}
+// Recurring strict-gap clustering (L3): mine self_verify gap findings from the
+// audit log and group them into themes. A theme is "recurring" at count >= 2.
+export function harvestStrictGaps(auditEntries) {
+  const findings = [];
+  for (const e of auditEntries || []) {
+    if (e && e.event === 'self_verify' && e.verdict === 'gap' && Array.isArray(e.findings)) {
+      findings.push(...e.findings);
+    }
+  }
+  const themes = {};
+  for (const f of findings) {
+    const key = normalizeGap(f);
+    if (!themes[key]) themes[key] = { theme: key, count: 0, samples: [] };
+    themes[key].count++;
+    if (themes[key].samples.length < 2) themes[key].samples.push(String(f).slice(0, 100));
+  }
+  const recurring = Object.values(themes)
+    .filter((t) => t.count >= 2)
+    .sort((a, b) => b.count - a.count);
+  return { totalGapFindings: findings.length, recurring };
+}
+// Routing outcomes (L1): surface the suitability ledger as per (cheap-model x
+// leaf) keep-rate rows, busiest first. keepRate = successes / (successes+failures).
+export function harvestRouting(ledger) {
+  const rows = [];
+  const entries = ledger && typeof ledger === 'object' ? Object.entries(ledger) : [];
+  for (const [key, v] of entries) {
+    if (!v || typeof v !== 'object') continue;
+    const successes = Number(v.successes || 0);
+    const failures = Number(v.failures || 0);
+    const n = successes + failures;
+    if (!n) continue;
+    const model = v.model || key.split('::')[0];
+    const leaf = v.leafId || key.split('::')[1] || key;
+    rows.push({ model, leaf, successes, failures, n, keepRate: +(successes / n).toFixed(2) });
+  }
+  return rows.sort((a, b) => b.n - a.n);
+}
+// Domain trust trends from the ELO profile: rating + blocked streak per domain.
+export function harvestEloTrends(eloProfile) {
+  const domains = (eloProfile && eloProfile.domains) || {};
+  const rows = [];
+  for (const [domain, d] of Object.entries(domains)) {
+    if (!d || typeof d !== 'object' || typeof d.rating !== 'number') continue;
+    rows.push({ domain, rating: d.rating, blockedStreak: d.blocked_streak || 0 });
+  }
+  return rows.sort((a, b) => b.rating - a.rating);
+}
+// Assemble the digest and derive GATED proposals. Every proposal is a suggestion
+// for the human to ratify (gated:true) — none is auto-applied. The thresholds
+// are deliberately conservative so noise does not generate proposals.
+export function buildDigest({ toolHealth, gaps, routing, elo } = {}) {
+  const proposals = [];
+  if (toolHealth && toolHealth.failureRate > 0.1 && toolHealth.topFailing.length) {
+    const t = toolHealth.topFailing[0];
+    proposals.push({
+      kind: 'tool-reliability',
+      gated: true,
+      suggestion: `Tool failure rate ${toolHealth.failureRate}; top offender ${t.tool} (${t.count}). Investigate before relying on it.`,
+    });
+  }
+  for (const g of (gaps && gaps.recurring) || []) {
+    if (g.count >= 3) {
+      proposals.push({
+        kind: 'recurring-gap',
+        gated: true,
+        suggestion: `Recurring self-verify gap "${g.theme}" (${g.count}x). Consider a pre-build checklist item.`,
+      });
+    }
+  }
+  for (const r of routing || []) {
+    if (r.n >= 5 && r.keepRate < 0.5) {
+      proposals.push({
+        kind: 'routing',
+        gated: true,
+        suggestion: `Cheap model ${r.model} underperforms on "${r.leaf}" (keepRate ${r.keepRate}, n=${r.n}). Consider keeping that leaf deep.`,
+      });
+    }
+  }
+  return {
+    toolHealth: toolHealth || null,
+    recurringGaps: gaps || { totalGapFindings: 0, recurring: [] },
+    routingOutcomes: routing || [],
+    eloTrends: elo || [],
+    proposals,
+  };
+}
+// Human-readable render of a digest (for the CLI and the skill).
+export function renderDigest(d) {
+  const lines = ['BYAN session insight digest', ''];
+  if (d.toolHealth) {
+    lines.push(
+      `Tool health: ${d.toolHealth.calls} calls, ${d.toolHealth.failures} failures (rate ${d.toolHealth.failureRate}), ~${d.toolHealth.estOutputTokens} output tokens.`
+    );
+    if (d.toolHealth.topFailing.length) {
+      lines.push(`  Top failing: ${d.toolHealth.topFailing.map((t) => `${t.tool}(${t.count})`).join(', ')}`);
+    }
+  }
+  lines.push(`Recurring gaps: ${d.recurringGaps.recurring.map((g) => `${g.theme}(${g.count})`).join(', ') || 'none'}`);
+  if (d.routingOutcomes.length) {
+    lines.push('Routing outcomes (cheap-model keep-rate):');
+    for (const r of d.routingOutcomes.slice(0, 8)) {
+      lines.push(`  ${r.model}::${r.leaf} -> keep ${r.keepRate} (n=${r.n})`);
+    }
+  }
+  if (d.eloTrends.length) {
+    lines.push(`ELO trends: ${d.eloTrends.slice(0, 6).map((e) => `${e.domain}=${e.rating}`).join(', ')}`);
+  }
+  lines.push('', `Proposals (GATED — human ratifies, nothing auto-applied): ${d.proposals.length}`);
+  for (const p of d.proposals) lines.push(`  [${p.kind}] ${p.suggestion}`);
+  return lines.join('\n');
+}
+// I/O entry: read the trails under rootDir (missing trail -> empty, so the digest
+// self-disables gracefully on a fresh checkout) and build the digest.
+export function harvest({ rootDir, io = fs } = {}) {
+  const readText = (rel) => {
+    try {
+      return io.readFileSync(path.join(rootDir, rel), 'utf8');
+    } catch {
+      return '';
+    }
+  };
+  const readJson = (rel) => {
+    const t = readText(rel);
+    if (!t) return null;
+    try {
+      return JSON.parse(t);
+    } catch {
+      return null;
+    }
+  };
+  const toolHealth = harvestToolHealth(parseJsonl(readText('_byan-output/tool-log.jsonl')));
+  const gaps = harvestStrictGaps(parseJsonl(readText('.byan-strict/audit.log')));
+  const routing = harvestRouting(readJson('_byan-output/suitability-ledger.json'));
+  const elo = harvestEloTrends(readJson('_byan/memoire/elo-profile.json'));
+  return buildDigest({ toolHealth, gaps, routing, elo });
+}

package/install/templates/_byan/mcp/byan-mcp-server/lib/kanban.js CHANGED Viewed

@@ -9,9 +9,12 @@
  * Stand-up : _byan-output/party-mode-sessions/<session_id>/standup.jsonl
  *   entries : { agent, timestamp, did, blockers, next }
  *
- * Hermes watches stand-ups : an agent with 2+ consecutive "blocked"
- * reports in the stand-up stream is flagged and their card is moved to
- * `blocked` column in the kanban.
+ * Wiring : the byan-orchestrate skill posts one stand-up per role at the
+ * aggregate step (byan_standup_post) and calls byan_standup_blocked to surface
+ * stuck roles. In the single-pass aggregate it uses minStreak:1 (one stand-up
+ * per role, so a 2-in-a-row streak is unreachable); a non-ok role's card is
+ * moved to the `blocked` column at the same step (byan_kanban_move). A
+ * longer-lived session that posts repeatedly uses the default minStreak:2.
  */
 import fs from 'node:fs';

package/install/templates/_byan/mcp/byan-mcp-server/lib/leantime-fd-core.js ADDED Viewed

@@ -0,0 +1,205 @@
+// Pure decision core for the FD -> Leantime auto-sync hook.
+//
+// WHY a separate pure module: a Claude Code hook is an I/O shell (stdin payload,
+// network, disk) that is awkward to unit-test. The risky logic — which Leantime
+// calls a phase transition implies, and the idempotence that stops duplicates —
+// lives here with ZERO I/O, so every transition is testable as a data
+// transform. The shell (.claude/hooks/leantime-fd-sync.js) feeds this the parsed
+// fd-state + the sidecar map and executes the returned intents against
+// lib/leantime-sync.js.
+//
+// State-coupling: this module reads fd-state echoed by the MCP tool; it does not
+// read or write fd-state.json. The Leantime id mapping is the caller's sidecar.
+// The two FD MCP tools whose result carries the post-transition fd-state.
+export const FD_ADVANCE = 'byan_fd_advance';
+export const FD_UPDATE = 'byan_fd_update';
+// Recognize the FD tool regardless of the mcp__<server>__ prefix or snake/camel
+// casing; the endsWith fallback keeps it working if the server key is renamed.
+export function fdToolKind(toolName) {
+  if (typeof toolName !== 'string') return null;
+  if (toolName === FD_ADVANCE || toolName.endsWith(`__${FD_ADVANCE}`) || toolName.endsWith(FD_ADVANCE)) {
+    return 'advance';
+  }
+  if (toolName === FD_UPDATE || toolName.endsWith(`__${FD_UPDATE}`) || toolName.endsWith(FD_UPDATE)) {
+    return 'update';
+  }
+  return null;
+}
+// Parse the fd-state the MCP tool echoed. The byan_fd_* handlers return
+// JSON.stringify(state); an MCP tool_response wraps it as
+// { content: [{ type:'text', text:'<json>' }] }. Accept that envelope, a raw
+// JSON string, or an already-parsed object. Returns the state object, or null
+// when the shape is unrecognized (the shell then degrades to a file fallback).
+export function parseFdState(toolResponse) {
+  if (!toolResponse) return null;
+  let candidate = toolResponse;
+  if (candidate && typeof candidate === 'object' && Array.isArray(candidate.content)) {
+    const textPart = candidate.content.find((p) => p && typeof p.text === 'string');
+    if (textPart) candidate = textPart.text;
+  }
+  if (typeof candidate === 'string') {
+    try {
+      candidate = JSON.parse(candidate);
+    } catch {
+      return null;
+    }
+  }
+  if (candidate && typeof candidate === 'object' && typeof candidate.phase === 'string') {
+    return candidate;
+  }
+  return null;
+}
+// FD phase order. Used to gate task creation to DISPATCH-onward (a feature's
+// task carries its assigned specialist, set at DISPATCH) and to recognize
+// terminal phases.
+const PHASE_RANK = {
+  DISCOVERY: 0,
+  BRAINSTORM: 1,
+  PRUNE: 2,
+  DISPATCH: 3,
+  BUILD: 4,
+  REVIEW: 5,
+  VALIDATE: 6,
+  REFACTOR: 7,
+  DOC: 8,
+  COMPLETED: 9,
+  ABORTED: 9,
+};
+function lastReviewStatus(state) {
+  const arr = Array.isArray(state.review_findings) ? state.review_findings : [];
+  for (let i = arr.length - 1; i >= 0; i -= 1) {
+    if (arr[i] && typeof arr[i].status === 'string') return arr[i].status;
+  }
+  return null;
+}
+// The board column the mapped tasks should reflect for the current fd-state.
+// Board-wide (not per-feature): a BYAN FD builds its backlog together, and the
+// per-item backlog status is agent-maintained and can lag a phase. Mirrors the
+// SKILL section 2.5 fire points. Returns a column name or null (no move).
+export function columnForState(state) {
+  switch (state.phase) {
+    case 'ABORTED':
+      return null; // leave the board verbatim: the diagnostic of where it died
+    case 'COMPLETED':
+      return 'done';
+    case 'DOC':
+      return 'review'; // validated, awaiting the COMPLETED sweep to done
+    case 'BUILD':
+      return 'doing';
+    case 'REFACTOR':
+      return 'blocked';
+    case 'VALIDATE': {
+      const v = state.validate_verdict;
+      if (v && v.status === 'KO') return 'blocked';
+      if (v && v.status === 'OK') return 'review';
+      return 'doing';
+    }
+    case 'REVIEW': {
+      const last = lastReviewStatus(state);
+      if (last === 'needs-rework') return 'blocked';
+      if (last === 'ready-for-validate') return 'review';
+      return 'doing';
+    }
+    default:
+      // DISCOVERY, BRAINSTORM, PRUNE, DISPATCH
+      return 'todo';
+  }
+}
+// Decide the ordered Leantime intents for one hook fire. Reconcile-from-state:
+// each fire (re)ensures the project exists, ensures every backlog task exists
+// (DISPATCH onward), and drives all mapped tasks to the column the current
+// fd-state implies. Idempotence is the sidecar's job — project_ensure/task_create
+// are emitted only when the id is absent, so a dropped call last fire is retried
+// and a duplicate is not created.
+//
+//   args: { toolName, state, sidecar, assignUserConfigured }
+//   sidecar: { projectId?, tasks?: { <backlogId>: taskId } } for THIS fd_id
+//   returns: { skip?: reason, intents: [...] }
+//
+// Intent ops (the shell maps each to a leantime-sync call):
+//   { op:'project_ensure', name, slug, details }
+//   { op:'assign_user' }                              // only if configured (shell sequences it after project_ensure)
+//   { op:'task_create', backlogId, headline }
+//   { op:'task_move', backlogId, column }
+export function decideActions({ toolName, state, sidecar = {}, assignUserConfigured = false } = {}) {
+  const kind = fdToolKind(toolName);
+  if (!kind) return { skip: 'not-fd-tool', intents: [] };
+  if (!state || typeof state.phase !== 'string') return { skip: 'no-state', intents: [] };
+  const projectName =
+    (state.project_context && state.project_context.name) || state.feature_name || null;
+  if (!projectName) return { skip: 'no-project-name', intents: [] };
+  const intents = [];
+  const tasks = sidecar.tasks || {};
+  // 1. Ensure the project. Emitted only when the sidecar has no projectId yet.
+  if (!sidecar.projectId) {
+    intents.push({
+      op: 'project_ensure',
+      name: projectName,
+      slug: (state.project_context && state.project_context.slug) || undefined,
+      details: `BYAN FD ${state.fd_id || ''} — auto-synced board.`.trim(),
+    });
+    if (assignUserConfigured) intents.push({ op: 'assign_user' });
+  }
+  const rank = PHASE_RANK[state.phase] ?? 0;
+  const backlog = Array.isArray(state.backlog) ? state.backlog : [];
+  // 2. Create a task per backlog item once the FD has reached DISPATCH (the task
+  //    then carries the dispatched specialist). Skipped items are not created.
+  if (rank >= PHASE_RANK.DISPATCH && state.phase !== 'ABORTED') {
+    for (const item of backlog) {
+      if (!item || !item.id) continue;
+      if (item.status === 'skipped') continue;
+      if (!tasks[item.id]) {
+        intents.push({ op: 'task_create', backlogId: item.id, headline: item.title || item.id });
+      }
+    }
+  }
+  // 3. Move every task (already-mapped + just-created) to the column the current
+  //    state implies. ABORTED yields no column -> no move. To bound RPC volume
+  //    (byan_fd_update fires several times per phase), moves are emitted only when
+  //    the target column changed since the last applied fire (sidecar.lastColumn),
+  //    OR a task was just created, OR the prior fire left a move unsynced
+  //    (sidecar.moveFailed) so a dropped move self-heals on the next event.
+  const column = columnForState(state);
+  const createdThisFire = intents.some((i) => i.op === 'task_create');
+  const moveNeeded = column && (column !== sidecar.lastColumn || createdThisFire || sidecar.moveFailed === true);
+  if (moveNeeded) {
+    const seen = new Set();
+    for (const item of backlog) {
+      if (!item || !item.id || item.status === 'skipped') continue;
+      // a just-created task is moved in the same fire; an already-mapped one is
+      // reconciled to the current column (self-heals a dropped earlier move).
+      const willExist = tasks[item.id] || intents.some((i) => i.op === 'task_create' && i.backlogId === item.id);
+      if (willExist && !seen.has(item.id)) {
+        intents.push({ op: 'task_move', backlogId: item.id, column });
+        seen.add(item.id);
+      }
+    }
+    // tasks in the sidecar that are no longer in the backlog still get swept to a
+    // terminal column on COMPLETED, so the board does not strand a renamed item.
+    if (state.phase === 'COMPLETED') {
+      for (const backlogId of Object.keys(tasks)) {
+        if (!seen.has(backlogId)) {
+          intents.push({ op: 'task_move', backlogId, column });
+          seen.add(backlogId);
+        }
+      }
+    }
+  }
+  // `column` is the current target; the shell persists it as sidecar.lastColumn so
+  // the next fire can skip a redundant move sweep.
+  return { intents, column };
+}