npm - create-byan-agent - Versions diffs - 2.23.0 → 2.26.0 - Mend

create-byan-agent 2.23.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

package/install/templates/_byan/mcp/byan-mcp-server/lib/sync-rules.js CHANGED Viewed

@@ -9,7 +9,6 @@ import yaml from 'js-yaml';
 //   - .claude/skills/byan-strict/SKILL.md            (owned, full-file)
 //   - .claude/hooks/lib/strict-config.json           (owned, full-file)
 //   - AGENTS.md                                       (upsert block, Codex)
-//   - .github/copilot-instructions.md                 (upsert block, Copilot)
 //
 // Owned files are rewritten wholesale (they carry a generated-by header).
 // Shared files get a block upserted between BYAN-STRICT markers, leaving the
@@ -19,6 +18,10 @@ const BEGIN = 'BYAN-STRICT:BEGIN';
 const END = 'BYAN-STRICT:END';
 const DEFAULT_CONFIG_REL = path.join('_byan', '_config', 'strict-mode.yaml');
+const AUTOBENCH_BEGIN = 'BYAN-AUTOBENCH:BEGIN';
+const AUTOBENCH_END = 'BYAN-AUTOBENCH:END';
+const AUTOBENCH_CONFIG_REL = path.join('_byan', '_config', 'autobench.yaml');
 export function resolveRoot(projectRoot) {
   return projectRoot || process.env.CLAUDE_PROJECT_DIR || process.cwd();
 }
@@ -39,6 +42,31 @@ export function loadConfig({ projectRoot, configPath } = {}) {
   return cfg;
 }
+// Loads and validates the auto-benchmark source of truth. Mirrors loadConfig:
+// throws on a missing file, a non-object parse, an empty mantras list, or a
+// missing doctrine brick — so a malformed YAML fails the generator loudly
+// rather than emitting a half-rendered pointer block.
+export function loadAutobenchConfig({ projectRoot, configPath } = {}) {
+  const root = resolveRoot(projectRoot);
+  const file = configPath || path.join(root, AUTOBENCH_CONFIG_REL);
+  if (!fs.existsSync(file)) {
+    throw new Error(`autobench config not found at ${file}`);
+  }
+  const cfg = yaml.load(fs.readFileSync(file, 'utf8'));
+  if (!cfg || typeof cfg !== 'object') {
+    throw new Error(`autobench config at ${file} did not parse to an object`);
+  }
+  if (!Array.isArray(cfg.mantras) || cfg.mantras.length === 0) {
+    throw new Error('autobench config must define a non-empty mantras list');
+  }
+  for (const brick of ['trigger', 'scaler', 'format']) {
+    if (!cfg[brick] || typeof cfg[brick] !== 'object') {
+      throw new Error(`autobench config must define the '${brick}' brick`);
+    }
+  }
+  return cfg;
+}
 // ---------------------------------------------------------------------------
 // Renderers — pure functions config -> string.
 // ---------------------------------------------------------------------------
@@ -46,6 +74,9 @@ export function loadConfig({ projectRoot, configPath } = {}) {
 const GENERATED_NOTE =
   'Generated by byan-sync-rules from _byan/_config/strict-mode.yaml. Do not hand-edit.';
+const AUTOBENCH_GENERATED_NOTE =
+  'Generated by byan-sync-rules from _byan/_config/autobench.yaml. Do not hand-edit.';
 export function renderStrictConfig(cfg) {
   return {
     _generated_by: 'byan-sync-rules',
@@ -125,7 +156,10 @@ complete. Downgrading the scope is the failure this mode exists to prevent.
 1. **Lock the scope** with \`byan_strict_lock_scope\` before building. Provide a
    verbatim restatement of the request and testable \`acceptanceCriteria\`. The
-   locked scope is the contract.
+   locked scope is the contract. When one technical domain clearly dominates the
+   task, also pass \`domain\` (e.g. security, performance, javascript) — a
+   successful completion then feeds one VALIDATED tick to the ELO loop. Explicit
+   only; omit when no single domain is clear.
 2. **Build the full scope.** Do not substitute an MVP, a stub, or a simplified
    version. If a part cannot be done, surface it as a gap — do not cut silently.
 3. **Self-verify at least ${cfg.self_verify.min_passes} times** with
@@ -160,19 +194,84 @@ Hard mantras:
 ${mantraLines(cfg)}`;
 }
-export function renderCopilotBlock(cfg) {
-  // Copilot has no blocking mechanism; this is injection-only guidance.
-  return `## BYAN Strict Mode
-${cfg.injection.context_banner.trim()}
-Use the \`byan\` MCP strict tools to lock scope, self-verify (>= ${cfg.self_verify.min_passes} passes),
-and complete. The pre-commit gate is the final net: a commit without a fresh,
-matching audit token is rejected.
+// Maps the enriched cfg.hooks section to the EXACT runtime shape read by
+// autobench-stop-guard.js. Every key and every {source, flags} pair structure
+// must stay in sync with what compileRegex / hasChoiceLanguage / hasMarker /
+// hasNeverListed / readMarkerFields / escapeHatchActive / ledgerPath consume.
+//
+// This is the function that closes the single-source-of-truth gap: before it
+// existed, autobench-config.json was hand-authored and the YAML toggle
+// (escape_hatch.disabled) was never propagated to the runtime file.
+export function renderAutobenchConfig(cfg) {
+  const h = cfg.hooks;
+  const mp = h.marker_patterns;
+  const mf = h.marker_fields;
+  const eh = h.escape_hatch;
-Hard mantras:
+  return {
+    _generated_by: 'byan-sync-rules',
+    _note:
+      'Runtime subset read by autobench-stop-guard.js. Edit _byan/_config/autobench.yaml and regenerate; do not hand-edit. Regexes are {source, flags} pairs reconstructed into RegExp at load time.',
+    version: cfg.version,
+    marker_patterns: {
+      any: { source: mp.any.source, flags: mp.any.flags },
+      done: { source: mp.done.source, flags: mp.done.flags },
+      skip: { source: mp.skip.source, flags: mp.skip.flags },
+    },
+    marker_fields: {
+      g1: { source: mf.g1.source, flags: mf.g1.flags },
+      g2: { source: mf.g2.source, flags: mf.g2.flags },
+      scope: { source: mf.scope.source, flags: mf.scope.flags },
+    },
+    never_list: h.never_list.map((entry) => ({ source: entry.source, flags: entry.flags })),
+    choice_language: h.choice_language.map((entry) => {
+      const out = { source: entry.source, flags: entry.flags };
+      // Preserve optional threshold fields only when present; omitting them
+      // keeps the config lean and matches the runtime's typeof checks.
+      if (typeof entry.min_matches === 'number') out.min_matches = entry.min_matches;
+      if (typeof entry.requires_candidates === 'number') out.requires_candidates = entry.requires_candidates;
+      return out;
+    }),
+    candidate_token: { source: h.candidate_token.source, flags: h.candidate_token.flags },
+    escape_hatch: {
+      // session_flag is read by autobench-runtime.js but not stored in
+      // config.json (the runtime hardcodes the path). Carry only the two
+      // fields the runtime actually reads from config.json.
+      session_flag: eh.session_flag,
+      disabled: eh.disabled,
+    },
+    enforcement: {
+      // Disarmed-by-default (approach C): the Stop hook observes and ledgers but
+      // does not block until armed. Arming is config-only (armed: true in the
+      // YAML); there is no loose flag file, so a stray file cannot silently arm a
+      // machine. Defaulting armed to false keeps an older source inert by default.
+      armed: Boolean(h.enforcement && h.enforcement.armed === true),
+    },
+    ledger: {
+      path: h.ledger_path,
+    },
+    banners: {
+      stop_block: h.stop_block,
+    },
+  };
+}
-${mantraLines(cfg)}`;
+// The lean auto-benchmark pointer block. One shared block for both
+// platform files (CLAUDE.md / AGENTS.md): names the
+// feature, states the marker one-liner the agent must emit, and points to the
+// full doctrine. Kept short on purpose — CLAUDE.md stays lean via pointers, and
+// the full rule lives in .claude/rules/benchmark.md (owned, authored elsewhere).
+export function renderAutobenchPointerBlock(cfg) {
+  const name = (cfg.name || 'BYAN Auto-Benchmark').trim();
+  return `## ${name}
+Before asking the user to choose between options, benchmark the fork: render
+ONE compact table (Option | <= 4 criteria | Niv + a best-first reco line) when
+both gates hold (>= 2 non-substitutable options diverging on >= 1 weighted
+criterion). Emit the marker verbatim before the table:
+\`<!-- BYAN-BENCH:done g1=<#options> g2=<#divergent-criteria> scope=<internal|external> conf=<assertive|lean> -->\`.
+A confirm, a destructive prompt, or an obvious default is not a fork — emit
+\`<!-- BYAN-BENCH:skip reason=.. -->\` instead. Full doctrine: see @.claude/rules/benchmark.md`;
 }
 // ---------------------------------------------------------------------------
@@ -191,11 +290,29 @@ function writeIfChanged(filePath, content) {
   return existing === null ? 'created' : 'updated';
 }
+// Escapes a marker string for safe embedding in a RegExp source. Markers are
+// authored as plain identifiers today, but a literal-safe regex keeps the
+// generalized signature robust if a future marker carries a regex metacharacter.
+function escapeRegex(s) {
+  return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
 // Insert or replace a block delimited by HTML-comment markers. Preserves
 // everything outside the markers. If the file does not exist, creates it with
 // just the block.
-export function upsertBlock({ filePath, block }) {
-  const wrapped = `<!-- ${BEGIN} (${GENERATED_NOTE}) -->\n${block}\n<!-- ${END} -->`;
+//
+// markers defaults to the module-level STRICT pair so every existing strict
+// callsite keeps working unchanged. Passing a distinct pair (e.g. the
+// AUTOBENCH markers) scopes the replace-regex to that pair only, so a STRICT
+// block and an AUTOBENCH block coexist in one file without clobbering each
+// other.
+export function upsertBlock({
+  filePath,
+  block,
+  markers = { begin: BEGIN, end: END },
+  note = GENERATED_NOTE,
+}) {
+  const wrapped = `<!-- ${markers.begin} (${note}) -->\n${block}\n<!-- ${markers.end} -->`;
   const existing = fs.existsSync(filePath) ? fs.readFileSync(filePath, 'utf8') : null;
   if (existing === null) {
@@ -205,7 +322,7 @@ export function upsertBlock({ filePath, block }) {
   }
   const re = new RegExp(
-    `<!-- ${BEGIN}[\\s\\S]*?${END} -->`,
+    `<!-- ${escapeRegex(markers.begin)}[\\s\\S]*?${escapeRegex(markers.end)} -->`,
     'm'
   );
   let next;
@@ -216,7 +333,7 @@ export function upsertBlock({ filePath, block }) {
   }
   if (next === existing) return 'unchanged';
   fs.writeFileSync(filePath, next);
-  return existing.includes(BEGIN) ? 'updated' : 'appended';
+  return existing.includes(markers.begin) ? 'updated' : 'appended';
 }
 // ---------------------------------------------------------------------------
@@ -241,12 +358,6 @@ export function syncRules({ projectRoot, configPath } = {}) {
   const agentsPath = path.join(root, 'AGENTS.md');
   report['AGENTS.md'] = upsertBlock({ filePath: agentsPath, block: renderAgentsBlock(cfg) });
-  const copilotPath = path.join(root, '.github', 'copilot-instructions.md');
-  report['.github/copilot-instructions.md'] = upsertBlock({
-    filePath: copilotPath,
-    block: renderCopilotBlock(cfg),
-  });
   const mantrasPath = path.join(root, 'src', 'byan-v2', 'data', 'strict-mantras.json');
   if (fs.existsSync(path.dirname(mantrasPath))) {
     report['src/byan-v2/data/strict-mantras.json'] = writeIfChanged(
@@ -258,4 +369,42 @@ export function syncRules({ projectRoot, configPath } = {}) {
   return report;
 }
+// Auto-benchmark orchestrator. Kept SEPARATE from syncRules so the strict
+// generator stays untouched and independently testable. Upserts the lean
+// pointer block into the three platform files and writes the runtime config
+// that the Stop hook reads. The config write closes the single-source-of-truth
+// gap: escape_hatch.disabled toggled in the YAML now reaches the runtime file.
+// Returns a {file: action} report like syncRules.
+export function syncAutobench({ projectRoot, configPath } = {}) {
+  const root = resolveRoot(projectRoot);
+  const cfg = loadAutobenchConfig({ projectRoot: root, configPath });
+  const report = {};
+  const block = renderAutobenchPointerBlock(cfg);
+  const markers = { begin: AUTOBENCH_BEGIN, end: AUTOBENCH_END };
+  // CLAUDE.md uses the lean pointer convention (.claude/CLAUDE.md holds the
+  // other rule pointers: strict, fact-check, ELO). AGENTS.md (Codex) is the
+  // other cross-platform mechanism target.
+  const targets = {
+    '.claude/CLAUDE.md': path.join(root, '.claude', 'CLAUDE.md'),
+    'AGENTS.md': path.join(root, 'AGENTS.md'),
+  };
+  for (const [rel, filePath] of Object.entries(targets)) {
+    report[rel] = upsertBlock({ filePath, block, markers, note: AUTOBENCH_GENERATED_NOTE });
+  }
+  // Write the runtime config that autobench-stop-guard.js reads. Mirrors the
+  // strict pattern (syncRules writes strict-config.json from strict-mode.yaml).
+  const autobenchCfgPath = path.join(root, '.claude', 'hooks', 'lib', 'autobench-config.json');
+  report['.claude/hooks/lib/autobench-config.json'] = writeIfChanged(
+    autobenchCfgPath,
+    JSON.stringify(renderAutobenchConfig(cfg), null, 2) + '\n'
+  );
+  return report;
+}
 export const MARKERS = { BEGIN, END };
+export const AUTOBENCH_MARKERS = { BEGIN: AUTOBENCH_BEGIN, END: AUTOBENCH_END };

package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-generator.js CHANGED Viewed

@@ -47,6 +47,7 @@ export const PORTABLE = {
     'create-excalidraw-dataflow',
     'create-excalidraw-flowchart',
     'create-excalidraw-wireframe',
+    'byan-benchmark',
   ],
 };