npm - create-byan-agent - Versions diffs - 2.25.0 → 2.26.0 - Mend

create-byan-agent 2.25.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/install/templates/.claude/hooks/leantime-fd-sync.js ADDED Viewed

@@ -0,0 +1,216 @@
+#!/usr/bin/env node
+// PostToolUse hook — mirror the BYAN FD lifecycle onto a Leantime board with no
+// agent action. Fires on byan_fd_advance / byan_fd_update; reads the fd-state the
+// tool echoed; drives lib/leantime-sync.js (ensure project, create tasks, move
+// columns) per the pure decision core lib/leantime-fd-core.js.
+//
+// Best-effort and bounded by design:
+//   - exits 0 in every path (a sync issue does not block the turn; this hook
+//     does not use the exit-2 blocking path);
+//   - no-ops silently when the tool is not an FD tool, no FD is active, or
+//     Leantime is not configured (syncEnabled false);
+//   - it never WRITES fd-state.json (state-coupling); it reads the state the tool
+//     echoed, with a read-only fd-state.json fallback. The Leantime id map lives
+//     in the gitignored .byan-leantime/ sidecar;
+//   - a per-call timeout plus a hook wall-clock budget keep a slow Leantime from
+//     stalling the turn; a dropped call self-heals on the next phase event
+//     (reconcile-from-state, tracked by sidecar.moveFailed).
+//
+// CJS shell + ESM libs reached via dynamic import() (the drain-advisory.js bridge).
+const fs = require('fs');
+const path = require('path');
+const { pathToFileURL } = require('url');
+const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+const SIDECAR_DIR = path.join(ROOT, '.byan-leantime');
+const MAP_PATH = path.join(SIDECAR_DIR, 'map.json');
+const LOG_PATH = path.join(SIDECAR_DIR, 'sync.jsonl');
+const FD_STATE_PATH = path.join(ROOT, '_byan-output', 'fd-state.json');
+const PER_CALL_MS = 2500; // below the lib default (5000) so the hook bounds each call
+const HOOK_BUDGET_MS = 8000; // between-stage advisory, checked at each stage boundary (not a hard ceiling); a move issues 2 RPCs so a late stage can overrun it, though no call hangs (each aborts at PER_CALL_MS)
+// Reasons that deserve a one-line breadcrumb (a real wire/host issue, not "off").
+const LOUD = new Set(['non_json', 'timeout', 'rpc_error']);
+const isLoud = (reason) => typeof reason === 'string' && (LOUD.has(reason) || reason.startsWith('http_'));
+function readStdin() {
+  return new Promise((resolve) => {
+    if (process.stdin.isTTY) return resolve('');
+    let data = '';
+    process.stdin.on('data', (c) => (data += c));
+    process.stdin.on('end', () => resolve(data));
+    process.stdin.on('error', () => resolve(data));
+  });
+}
+function emit(additionalContext = '') {
+  process.stdout.write(
+    JSON.stringify({ hookSpecificOutput: { hookEventName: 'PostToolUse', additionalContext } }),
+  );
+  process.exit(0);
+}
+function readMap() {
+  try {
+    return JSON.parse(fs.readFileSync(MAP_PATH, 'utf8'));
+  } catch {
+    return {};
+  }
+}
+function writeMap(map) {
+  try {
+    fs.mkdirSync(SIDECAR_DIR, { recursive: true });
+    const tmp = `${MAP_PATH}.${process.pid}.tmp`;
+    fs.writeFileSync(tmp, JSON.stringify(map, null, 2));
+    fs.renameSync(tmp, MAP_PATH); // atomic swap so a crash mid-write keeps the old map
+  } catch {
+    // the sidecar is housekeeping; a write failure must not break the hook
+  }
+}
+function logLine(entry) {
+  try {
+    fs.mkdirSync(SIDECAR_DIR, { recursive: true });
+    fs.appendFileSync(LOG_PATH, `${JSON.stringify(entry)}\n`);
+  } catch {
+    // the log is housekeeping; swallow
+  }
+}
+(async () => {
+  let payload = {};
+  try {
+    const raw = await readStdin();
+    payload = raw ? JSON.parse(raw) : {};
+  } catch {
+    return emit();
+  }
+  try {
+    const toolName = payload.tool_name || payload.toolName || '';
+    const esm = (rel) => import(pathToFileURL(path.join(ROOT, rel)).href);
+    const core = await esm('_byan/mcp/byan-mcp-server/lib/leantime-fd-core.js');
+    if (!core.fdToolKind(toolName)) return emit(); // not an FD tool
+    // Read state from the tool's echoed result (state-coupling: no fd-state write).
+    const resp = payload.tool_response ?? payload.toolResponse ?? payload.response ?? null;
+    let state = core.parseFdState(resp);
+    if (!state) {
+      try {
+        state = JSON.parse(fs.readFileSync(FD_STATE_PATH, 'utf8'));
+      } catch {
+        state = null;
+      }
+    }
+    if (!state || typeof state.phase !== 'string') return emit();
+    const lt = await esm('_byan/mcp/byan-mcp-server/lib/leantime-sync.js');
+    if (!lt.syncEnabled()) return emit(); // Leantime not configured -> silent no-op
+    const fdId = state.fd_id || 'unknown';
+    const map = readMap();
+    const sidecar = map[fdId] || { tasks: {} };
+    sidecar.tasks = sidecar.tasks || {};
+    const assignUserConfigured = lt.assignUserId() != null;
+    const { intents, column } = core.decideActions({ toolName, state, sidecar, assignUserConfigured });
+    if (!intents || !intents.length) {
+      if (column && sidecar.lastColumn !== column) {
+        sidecar.lastColumn = column;
+        map[fdId] = sidecar;
+        writeMap(map);
+      }
+      return emit();
+    }
+    const opts = { timeoutMs: PER_CALL_MS };
+    const deadline = Date.now() + HOOK_BUDGET_MS;
+    const timeLeft = () => deadline - Date.now();
+    let firstLoud = null;
+    let moveFailed = false;
+    const record = (event, target, r) => {
+      const ok = !!(r && r.ok);
+      const synced = !!(r && r.synced);
+      logLine({ ts: new Date().toISOString(), fd_id: fdId, phase: state.phase, event, target, ok, synced, reason: (r && r.reason) || null });
+      if (!synced && isLoud(r && r.reason) && !firstLoud) firstLoud = r.reason;
+    };
+    // 1. Ensure the project (sequential — every later call needs the projectId).
+    const ensureIntent = intents.find((i) => i.op === 'project_ensure');
+    if (ensureIntent && !sidecar.projectId) {
+      const r = await lt.ensureProject({ name: ensureIntent.name, slug: ensureIntent.slug, details: ensureIntent.details }, opts);
+      record('project_ensure', ensureIntent.name, r);
+      if (r.ok && r.id) {
+        sidecar.projectId = r.id;
+        map[fdId] = sidecar;
+        writeMap(map); // persist immediately so a crash cannot re-create the project
+      }
+    }
+    // 2. Make the project visible to the configured human (best-effort).
+    if (intents.some((i) => i.op === 'assign_user') && sidecar.projectId && timeLeft() > 0) {
+      const r = await lt.assignUserToProject({ projectId: sidecar.projectId }, opts);
+      record('assign_user', sidecar.projectId, r);
+    }
+    // 3. Create tasks (parallel, bounded by the wall-clock budget).
+    const createIntents = intents.filter((i) => i.op === 'task_create');
+    if (createIntents.length && sidecar.projectId && timeLeft() > 0) {
+      const results = await Promise.allSettled(
+        createIntents.map((i) =>
+          lt.createTask({ projectId: sidecar.projectId, headline: i.headline }, opts).then((r) => ({ i, r })),
+        ),
+      );
+      for (const s of results) {
+        if (s.status === 'fulfilled') {
+          const { i, r } = s.value;
+          record('task_create', i.backlogId, r);
+          if (r.ok && r.id) sidecar.tasks[i.backlogId] = r.id;
+        }
+      }
+      map[fdId] = sidecar;
+      writeMap(map);
+    }
+    // 4. Move tasks to the current column (parallel, bounded).
+    const moveIntents = intents.filter((i) => i.op === 'task_move');
+    if (moveIntents.length && sidecar.projectId && timeLeft() > 0) {
+      const results = await Promise.allSettled(
+        moveIntents.map((i) => {
+          const taskId = sidecar.tasks[i.backlogId];
+          if (!taskId) return Promise.resolve({ i, r: { ok: false, synced: false, reason: 'no_task_id' } });
+          return lt.moveTask({ taskId, projectId: sidecar.projectId, column: i.column }, opts).then((r) => ({ i, r }));
+        }),
+      );
+      for (const s of results) {
+        if (s.status === 'fulfilled') {
+          const { i, r } = s.value;
+          record('task_move', i.backlogId, r);
+          if (!(r && r.synced)) moveFailed = true;
+        } else {
+          moveFailed = true;
+        }
+      }
+    } else if (moveIntents.length) {
+      // could not run the moves this fire (budget/no project) -> retry next event
+      moveFailed = true;
+    }
+    if (column) sidecar.lastColumn = column;
+    sidecar.moveFailed = moveFailed;
+    sidecar.updatedAt = new Date().toISOString();
+    map[fdId] = sidecar;
+    writeMap(map);
+    if (firstLoud) {
+      return emit(`Leantime sync: ${firstLoud} on ${state.phase} (board may lag; retried next phase). Check LEANTIME_API_URL / token.`);
+    }
+    return emit();
+  } catch {
+    return emit(); // any failure degrades silently — the sync is housekeeping
+  }
+})();

package/install/templates/.claude/hooks/lib/autobench-config.json ADDED Viewed

@@ -0,0 +1,81 @@
+{
+  "_generated_by": "byan-sync-rules",
+  "_note": "Runtime subset read by autobench-stop-guard.js. Edit _byan/_config/autobench.yaml and regenerate; do not hand-edit. Regexes are {source, flags} pairs reconstructed into RegExp at load time.",
+  "version": 1,
+  "marker_patterns": {
+    "any": {
+      "source": "<!--\\s*BYAN-BENCH:(done|skip)\\b",
+      "flags": "i"
+    },
+    "done": {
+      "source": "<!--\\s*BYAN-BENCH:done\\b",
+      "flags": "i"
+    },
+    "skip": {
+      "source": "<!--\\s*BYAN-BENCH:skip\\b",
+      "flags": "i"
+    }
+  },
+  "marker_fields": {
+    "g1": {
+      "source": "g1=(\\d+)",
+      "flags": "i"
+    },
+    "g2": {
+      "source": "g2=(\\d+)",
+      "flags": "i"
+    },
+    "scope": {
+      "source": "scope=(internal|external)",
+      "flags": "i"
+    }
+  },
+  "never_list": [
+    {
+      "source": "\\b(yes/no|y/n|confirm|proceed\\?|continue\\?|ok\\?|on continue\\?|je confirme)\\b",
+      "flags": "i"
+    },
+    {
+      "source": "\\b(delete|drop|rm -rf|overwrite|force push|reset --hard|supprimer|écraser)\\b",
+      "flags": "i"
+    }
+  ],
+  "choice_language": [
+    {
+      "source": "\\boption\\s*[1-3a-c]\\b",
+      "flags": "ig",
+      "min_matches": 2
+    },
+    {
+      "source": "^[ \\t]*[-*][ \\t]+[A-Z][^\\n]{0,80}(:|[ \\t]-[ \\t])",
+      "flags": "gm",
+      "min_matches": 2
+    },
+    {
+      "source": "\\b(should I|veux-tu que je|do you want me to|préfères-tu|which (one|approach|option)|A or B|soit .* soit )\\b",
+      "flags": "i"
+    },
+    {
+      "source": "\\b(pros?|cons?|trade-?offs?|avantages?|inconvénients?)\\b",
+      "flags": "i",
+      "requires_candidates": 2
+    }
+  ],
+  "candidate_token": {
+    "source": "\\b(option|approach|approche|alternative|choix|solution|stack|library|librairie|vendor|standard)s?\\b",
+    "flags": "ig"
+  },
+  "escape_hatch": {
+    "session_flag": ".byan-autobench/off",
+    "disabled": false
+  },
+  "enforcement": {
+    "armed": false
+  },
+  "ledger": {
+    "path": "_byan-output/benchmark-ledger.jsonl"
+  },
+  "banners": {
+    "stop_block": "Auto-benchmark: you are presenting a choice between options but emitted no BYAN-BENCH marker. Re-present the fork as the compact 1-table benchmark (Option | criteria | Niv + best-first reco), then emit <!-- BYAN-BENCH:done g1=.. g2=.. scope=.. -->. If this is a confirm/destructive/obvious-default prompt, emit <!-- BYAN-BENCH:skip reason=.. -->. To disable for this session: touch .byan-autobench/off."
+  }
+}

package/install/templates/.claude/hooks/lib/autobench-fc-enrich.js ADDED Viewed

@@ -0,0 +1,251 @@
+// BYAN-only opt-in evidence enrichment for the byan-benchmark matrix (C5d).
+//
+// The native workflow (.claude/workflows/byan-benchmark.js) returns a DATA
+// matrix where each cell carries a self-graded evidence `level` (L1..L5) and an
+// `unverified` flag. That self-grade is the model judging its own claim. Inside
+// ~/BYAN the orchestrating skill can do better: it can call the byan_fc_check
+// MCP tool per factual cell and stamp an AUDITED evidence level onto the cell,
+// turning the Niv column into a fact-checked authority rather than a self-grade.
+//
+// This module is the pure, testable core of that wiring. It does NOT know about
+// MCP transport: the caller injects an async `check(text) -> { level, score, ...}`
+// function (in BYAN, a thin adapter over mcp__byan__byan_fc_check; in tests, a
+// mock). Without an injected checker the matrix is returned unchanged, which is
+// why the layer is OPT-IN and BYAN-only by construction: a platform that cannot
+// reach byan_fc_check simply does not pass a checker and gets the self-graded
+// matrix back, untouched.
+//
+// Strict-domain floors (mirrors .claude/rules/fact-check.md and the engine's
+// STRICT_FLOORS): a security/performance claim must reach L2, a compliance claim
+// L1, or the cell stays flagged [UNVERIFIED] no matter what the checker returned.
+// Enrichment can only RAISE authority or flag a shortfall; it never silently
+// upgrades a cell past its domain floor.
+'use strict';
+// Strict-domain minimum evidence levels. Numeric so floor comparison is a plain
+// `<=` (L1 is the strongest -> the smallest number). Kept in sync with the
+// engine's STRICT_FLOORS map and the fact-check rule doc.
+const STRICT_FLOORS = { security: 2, performance: 2, compliance: 1 };
+// Default heuristic: which cells are "hard claims" worth fact-checking. A cell
+// is a hard claim when it sits in a strict domain (every cell is then a claim
+// because the floor applies) OR its verdict text uses an absolute / superlative
+// the fact-check auto-detection also keys on. Low-stakes internal cells with a
+// hedged verdict are skipped to keep latency down (anti-bloat, C4).
+const ABSOLUTE_RE =
+  /\b(always|never|toujours|jamais|forcement|obviously|guaranteed|fastest|slowest|best|worst|optimal|superior|plus rapide|le plus|mieux|meilleur|fully|completely|zero|100%)\b/i;
+const STRICT_DOMAINS = Object.keys(STRICT_FLOORS);
+// Parse an "L3" / "l2" / 3 style level into the 1..5 integer, or null if absent.
+function parseLevel(level) {
+  if (typeof level === 'number' && Number.isFinite(level)) {
+    return level >= 1 && level <= 5 ? Math.round(level) : null;
+  }
+  if (typeof level === 'string') {
+    const m = level.match(/L?\s*([1-5])\b/i);
+    if (m) return Number(m[1]);
+  }
+  return null;
+}
+// Render a numeric level back to the canonical "L{n}" the matrix uses.
+function levelLabel(n) {
+  return n == null ? null : `L${n}`;
+}
+// Decide whether a cell is a hard claim worth an fc_check call.
+//   - any cell in a strict domain is a hard claim (the floor must be enforced);
+//   - otherwise, a cell whose verdict uses an absolute/superlative is a claim;
+//   - an explicit isHardClaim flag on the cell forces inclusion.
+// Returns false for hedged, low-stakes internal cells so enrichment stays cheap.
+function isHardClaim(cell, domain) {
+  if (!cell) return false;
+  if (cell.isHardClaim === true) return true;
+  if (STRICT_DOMAINS.includes(domain)) return true;
+  const verdict = typeof cell.verdict === 'string' ? cell.verdict : '';
+  const claim = typeof cell.claim === 'string' ? cell.claim : '';
+  return ABSOLUTE_RE.test(verdict) || ABSOLUTE_RE.test(claim);
+}
+// Build the text the checker scores for a cell. Prefer an explicit cell.claim
+// (the factual basis the SOURCE leaf wrote); fall back to the qualitative
+// verdict joined with the criterion so the checker has a self-contained claim.
+function cellClaimText(cell) {
+  if (cell && typeof cell.claim === 'string' && cell.claim.trim()) return cell.claim.trim();
+  const criterion = cell && cell.criterion ? String(cell.criterion) : '';
+  const verdict = cell && cell.verdict ? String(cell.verdict) : '';
+  return [criterion, verdict].filter(Boolean).join(': ').trim();
+}
+// Apply a single fc_check result to a cell. PURE given the result: returns a NEW
+// cell object (never mutates the input), records the audited level/score, the
+// fact-check status and assertionType, and re-evaluates the strict-domain floor.
+function applyCheckToCell(cell, result, domain) {
+  const checkedLevel = result ? parseLevel(result.level) : null;
+  const floor = STRICT_FLOORS[domain] || null;
+  // Below the domain floor (or unscored) -> the cell stays unverified regardless
+  // of the prior self-grade. A claim that cannot be sourced to its floor is not
+  // trustworthy in a strict domain.
+  const belowFloor =
+    floor != null && (checkedLevel == null || checkedLevel > floor);
+  const blocked = result && result.status === 'BLOCKED';
+  const out = Object.assign({}, cell);
+  out.fcChecked = true;
+  if (result) {
+    out.fcStatus = result.status;
+    out.fcAssertionType = result.assertionType;
+    if (typeof result.score === 'number') out.fcScore = result.score;
+  }
+  if (checkedLevel != null) {
+    out.level = levelLabel(checkedLevel);
+  }
+  if (belowFloor || blocked) {
+    out.unverified = true;
+    out.fcFloor = floor != null ? `L${floor}` : null;
+    out.fcBelowFloor = true;
+  } else if (checkedLevel != null) {
+    // A genuine audited level at or above the floor clears the unverified flag
+    // ONLY when the checker actually classified it as a CLAIM/FACT (not a bare
+    // HYPOTHESIS/OPINION). Otherwise leave the flag as the engine set it.
+    if (result && (result.status === 'CLAIM' || result.status === 'VERIFIED')) {
+      out.unverified = false;
+    }
+    out.fcBelowFloor = false;
+  }
+  return out;
+}
+/**
+ * Enrich a benchmark matrix in place-free fashion (returns a NEW matrix).
+ *
+ * @param {object} params
+ * @param {object} params.benchmark   The DATA object the workflow returned
+ *                                    ({ matrix, domain, scope, ... }).
+ * @param {(text: string) => Promise<object>} [params.check]
+ *                                    Async checker; in BYAN a thin adapter over
+ *                                    mcp__byan__byan_fc_check. If omitted, the
+ *                                    matrix is returned unchanged (opt-in).
+ * @param {boolean} [params.enabled=true]  Master opt-in switch.
+ * @param {(cell, domain) => boolean} [params.claimSelector]
+ *                                    Override the hard-claim heuristic.
+ * @returns {Promise<object>} A new benchmark object with enriched matrix and an
+ *                            `enrichment` report ({ enabled, checked, raised,
+ *                            flagged, skipped }).
+ */
+async function enrichMatrix(params) {
+  const {
+    benchmark,
+    check,
+    enabled = true,
+    claimSelector = isHardClaim,
+  } = params || {};
+  if (!benchmark || typeof benchmark !== 'object') {
+    throw new Error('enrichMatrix requires a benchmark object');
+  }
+  const domain = benchmark.domain || 'general';
+  const matrix = Array.isArray(benchmark.matrix) ? benchmark.matrix : [];
+  // Opt-in guard: no checker, disabled, or a degenerate (un-tabled) benchmark ->
+  // return the input untouched with an honest report. This is the BYAN-only
+  // gate: other platforms never inject a checker, so they get this branch.
+  if (!enabled || typeof check !== 'function' || benchmark.degenerate) {
+    return Object.assign({}, benchmark, {
+      enrichment: {
+        enabled: false,
+        reason: !enabled
+          ? 'disabled'
+          : typeof check !== 'function'
+            ? 'no-checker'
+            : 'degenerate',
+        checked: 0,
+        raised: 0,
+        flagged: 0,
+        skipped: countCells(matrix),
+      },
+    });
+  }
+  let checked = 0;
+  let raised = 0;
+  let flagged = 0;
+  let skipped = 0;
+  const newMatrix = [];
+  for (const row of matrix) {
+    const cells = Array.isArray(row && row.cells) ? row.cells : [];
+    const newCells = [];
+    for (const cell of cells) {
+      if (!claimSelector(cell, domain)) {
+        skipped += 1;
+        newCells.push(cell);
+        continue;
+      }
+      const text = cellClaimText(cell);
+      if (!text) {
+        skipped += 1;
+        newCells.push(cell);
+        continue;
+      }
+      let result = null;
+      try {
+        result = await check(text);
+      } catch {
+        // A checker failure must never break the benchmark: fall back to the
+        // self-graded cell, flagged so the gap is auditable, and keep going.
+        const fallback = Object.assign({}, cell, { fcChecked: false, fcError: true });
+        newCells.push(fallback);
+        skipped += 1;
+        continue;
+      }
+      const beforeLevel = parseLevel(cell && cell.level);
+      const enriched = applyCheckToCell(cell, result, domain);
+      checked += 1;
+      const afterLevel = parseLevel(enriched.level);
+      if (afterLevel != null && (beforeLevel == null || afterLevel < beforeLevel)) {
+        // A smaller number is a STRONGER level -> authority was raised.
+        raised += 1;
+      }
+      if (enriched.fcBelowFloor === true) flagged += 1;
+      newCells.push(enriched);
+    }
+    newMatrix.push(Object.assign({}, row, { cells: newCells }));
+  }
+  return Object.assign({}, benchmark, {
+    matrix: newMatrix,
+    enrichment: { enabled: true, checked, raised, flagged, skipped },
+  });
+}
+function countCells(matrix) {
+  if (!Array.isArray(matrix)) return 0;
+  return matrix.reduce(
+    (n, row) => n + (Array.isArray(row && row.cells) ? row.cells.length : 0),
+    0
+  );
+}
+module.exports = {
+  STRICT_FLOORS,
+  parseLevel,
+  levelLabel,
+  isHardClaim,
+  cellClaimText,
+  applyCheckToCell,
+  enrichMatrix,
+  countCells,
+};