npm - slice-tournament-zoo - Versions diffs - 0.5.6 - Mend

slice-tournament-zoo 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/LICENSE +202 -0
package/README.md +357 -0
package/bin/stz.mjs +15 -0
package/package.json +35 -0
package/src/README.md +19 -0
package/src/bridge.ts +950 -0
package/src/budget.ts +78 -0
package/src/cli.ts +126 -0
package/src/cost-tracker.ts +59 -0
package/src/escalation.ts +89 -0
package/src/eval-runner.ts +220 -0
package/src/grpo.ts +54 -0
package/src/hack-detector.ts +124 -0
package/src/index.ts +17 -0
package/src/merge.ts +245 -0
package/src/mock/README.md +40 -0
package/src/mock/interfaces.ts +114 -0
package/src/mock/mock.ts +223 -0
package/src/mock/orchestrator.ts +457 -0
package/src/pressure.ts +81 -0
package/src/project.ts +335 -0
package/src/seal.ts +182 -0
package/src/selection.ts +128 -0
package/src/specdiff.ts +141 -0
package/src/state.ts +95 -0
package/src/taxonomy.ts +161 -0
package/src/types.ts +305 -0

package/src/selection.ts ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Hybrid selection (F7): eval-gate then judge ranking.
+ *
+ *  Stage 1 (gate): a specimen must pass the sealed held-out suite AND carry no
+ *                  hack findings (F10/L3). Failures are eliminated.
+ *  Stage 2 (rank):  pairwise comparisons across passers, V=8 votes per pair
+ *                   (RTV default). Ranking is by win-count (the plain,
+ *                   deterministic aggregation — deliberately not clever).
+ *
+ * GRPO group-relative advantage (F8) is computed over a scalar reward derived
+ * from each passer's eval signal + pairwise win share, and is reported
+ * alongside the ranking so the winner pick and the pressure-log weighting share
+ * one number.
+ */
+import type {
+  Advantage,
+  EvalResult,
+  Judgment,
+  PairwiseVote,
+  SpecimenId,
+} from "./types.js";
+import { groupRelativeAdvantage } from "./grpo.js";
+/** Stage 1: eliminate specimens that fail the gate or trip a hack pattern. */
+export function evalGate(results: EvalResult[]): {
+  passers: SpecimenId[];
+  eliminated: { specimen: SpecimenId; reason: string }[];
+} {
+  const passers: SpecimenId[] = [];
+  const eliminated: { specimen: SpecimenId; reason: string }[] = [];
+  for (const r of results) {
+    if (r.hackFindings.length > 0) {
+      eliminated.push({
+        specimen: r.specimen,
+        reason: `hack-pattern: ${r.hackFindings.map((f) => f.pattern).join(", ")}`,
+      });
+    } else if (!r.passedGate) {
+      eliminated.push({
+        specimen: r.specimen,
+        reason: `gate-fail: testPassRate=${r.testPassRate.toFixed(2)}`,
+      });
+    } else {
+      passers.push(r.specimen);
+    }
+  }
+  return { passers, eliminated };
+}
+/** Tally pairwise votes into per-specimen win counts. */
+export function tallyVotes(votes: PairwiseVote[]): Map<SpecimenId, number> {
+  const wins = new Map<SpecimenId, number>();
+  for (const v of votes) {
+    // Ensure both contestants exist in the map even at 0 wins.
+    if (!wins.has(v.a)) wins.set(v.a, 0);
+    if (!wins.has(v.b)) wins.set(v.b, 0);
+    wins.set(v.winner, (wins.get(v.winner) ?? 0) + 1);
+  }
+  return wins;
+}
+/**
+ * Stage 2: rank passers by pairwise win-count (descending). Ties broken by
+ * the specimen's scalar eval reward, then lexicographically by id so ranking
+ * is fully deterministic (N6).
+ */
+export function rankByVotes(
+  passers: SpecimenId[],
+  votes: PairwiseVote[],
+  rewardOf: (s: SpecimenId) => number,
+): SpecimenId[] {
+  const wins = tallyVotes(votes);
+  return [...passers].sort((a, b) => {
+    const wd = (wins.get(b) ?? 0) - (wins.get(a) ?? 0);
+    if (wd !== 0) return wd;
+    const rd = rewardOf(b) - rewardOf(a);
+    if (rd !== 0) return rd;
+    return a < b ? -1 : a > b ? 1 : 0;
+  });
+}
+/**
+ * Scalar reward for a passer: blend of test pass rate, coverage, and mutation
+ * kill rate (1 − survival). Bounded to [0,1]. Feeds GRPO + tie-breaks.
+ */
+export function evalReward(r: EvalResult): number {
+  const killRate = 1 - r.mutationScore;
+  return 0.5 * r.testPassRate + 0.25 * r.coverage + 0.25 * killRate;
+}
+/** Full two-stage selection producing a Judgment (F7 + F8). */
+export function select(
+  results: EvalResult[],
+  votes: PairwiseVote[],
+): { judgment: Judgment; eliminated: { specimen: SpecimenId; reason: string }[] } {
+  const { passers, eliminated } = evalGate(results);
+  const rewardByName = new Map(results.map((r) => [r.specimen, evalReward(r)]));
+  const rewardOf = (s: SpecimenId) => rewardByName.get(s) ?? 0;
+  const ranking = rankByVotes(passers, votes, rewardOf);
+  // GRPO advantage is computed across the WHOLE specimen group (F8: "across
+  // the slice's specimen group"), including gate-eliminated specimens — so the
+  // pressure log can weight which *losers'* diffs are most informative (F9).
+  const advantages: Advantage[] = groupRelativeAdvantage(
+    results.map((r) => ({ specimen: r.specimen, reward: rewardOf(r.specimen) })),
+  );
+  const judgment: Judgment = {
+    ranking,
+    winner: ranking[0] ?? null,
+    advantages,
+    votes,
+  };
+  return { judgment, eliminated };
+}
+/**
+ * Generate the full round-robin pairing schedule for a set of passers, each
+ * pair to be voted V times by the judge. Order is deterministic (i<j).
+ */
+export function pairings(passers: SpecimenId[]): [SpecimenId, SpecimenId][] {
+  const pairs: [SpecimenId, SpecimenId][] = [];
+  for (let i = 0; i < passers.length; i++) {
+    for (let j = i + 1; j < passers.length; j++) {
+      pairs.push([passers[i]!, passers[j]!]);
+    }
+  }
+  return pairs;
+}

package/src/specdiff.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Intent-spec / as-built-spec diff (F13) — the canonical audit artifact.
+ *
+ * The planner produces an upfront intent spec; the documenter produces an
+ * as-built spec from the winning merged code + traces. Their diff is committed
+ * as `slice-NN/spec-diff.md`.
+ *
+ * Claims are matched by a stable KEY, not by wording. A claim's key is its
+ * explicit `id` when present, else its normalized text. This makes the legacy
+ * string-claim path a special case (key derived from text → content matching,
+ * case/whitespace-insensitive) while the id-keyed path lets the documenter
+ * reword a claim freely and still match the intent claim it adjudicates. An
+ * id-keyed as-built claim carries a `satisfied` verdict; `satisfied: false`
+ * means the intent claim was genuinely NOT delivered (it lands in `missing`,
+ * never silently in `kept`).
+ */
+/** One claim: a bare string (legacy/positional) or a keyed, adjudicated claim. */
+export type Claim =
+  | string
+  | { id?: string; text?: string; evidence?: string; satisfied?: boolean };
+export interface Spec {
+  claims: Claim[];
+}
+export interface SpecDiff {
+  /** In intent but not as-built — promised, not delivered (or not documented). */
+  missing: string[];
+  /** In as-built but not intent — delivered beyond the plan (scope creep / extras). */
+  added: string[];
+  /** Present in both — delivered as planned. */
+  kept: string[];
+}
+function norm(s: string): string {
+  return s.trim().toLowerCase().replace(/\s+/g, " ");
+}
+/** Display text for a claim — what shows in the spec-diff markdown. */
+function claimText(c: Claim): string {
+  if (typeof c === "string") return c;
+  return c.text ?? c.evidence ?? c.id ?? "(unspecified claim)";
+}
+/** Stable match key: explicit id if given, else the normalized display text. */
+function claimKey(c: Claim): string {
+  if (typeof c !== "string" && typeof c.id === "string" && c.id.trim() !== "") {
+    return `id:${c.id.trim()}`;
+  }
+  return `t:${norm(claimText(c))}`;
+}
+/** An as-built claim counts as delivering its intent claim unless it explicitly says otherwise. */
+function isSatisfied(c: Claim): boolean {
+  return typeof c === "string" ? true : c.satisfied !== false;
+}
+export function diffSpecs(intent: Spec, asBuilt: Spec): SpecDiff {
+  const builtByKey = new Map<string, Claim>();
+  for (const c of asBuilt.claims) builtByKey.set(claimKey(c), c);
+  const intendedKeys = new Set(intent.claims.map(claimKey));
+  const missing: string[] = [];
+  const kept: string[] = [];
+  for (const c of intent.claims) {
+    const b = builtByKey.get(claimKey(c));
+    if (b && isSatisfied(b)) kept.push(claimText(c));
+    else missing.push(claimText(c));
+  }
+  // An as-built claim whose key matches an intent claim is never "added" — even
+  // if satisfied:false (then it's a genuine miss counted above), so the same
+  // claim is never double-counted as both missing and added.
+  const added = asBuilt.claims
+    .filter((c) => !intendedKeys.has(claimKey(c)))
+    .map(claimText);
+  return { missing, added, kept };
+}
+/** Render the spec-diff as the markdown body of spec-diff.md (F13). */
+export function renderSpecDiff(sliceId: string, diff: SpecDiff): string {
+  const section = (title: string, items: string[]) =>
+    `## ${title} (${items.length})\n` +
+    (items.length ? items.map((i) => `- ${i}`).join("\n") : "_none_");
+  return [
+    `# Spec diff — ${sliceId}`,
+    "",
+    "Canonical audit record: intent spec vs. as-built spec.",
+    "",
+    section("✅ Delivered as planned", diff.kept),
+    "",
+    section("⚠️ Planned but missing", diff.missing),
+    "",
+    section("➕ Built beyond plan", diff.added),
+    "",
+  ].join("\n");
+}
+/** A slice is faithfully built when nothing planned is missing. */
+export function isFaithful(diff: SpecDiff): boolean {
+  return diff.missing.length === 0;
+}
+/**
+ * Intent claim ids that the as-built spec failed to account for cleanly — used
+ * by `finalize` to warn when the documenter mis-keyed a verdict (a mis-keyed id
+ * shows up as a false `missing` here). Returns intent ids with no satisfied
+ * as-built claim at their key; an empty array means every intent claim was
+ * adjudicated. Only meaningful for id-keyed specs.
+ */
+export function unmatchedIntentIds(intent: Spec, asBuilt: Spec): string[] {
+  const builtByKey = new Map<string, Claim>();
+  for (const c of asBuilt.claims) builtByKey.set(claimKey(c), c);
+  const ids: string[] = [];
+  for (const c of intent.claims) {
+    if (typeof c === "string" || !c.id) continue;
+    const b = builtByKey.get(claimKey(c));
+    if (!b || !isSatisfied(b)) ids.push(c.id);
+  }
+  return ids;
+}
+/**
+ * As-built claim ids that claim to satisfy an intent claim but reference an id
+ * not present in the intent spec — a documenter mis-key. Extras (ids the
+ * documenter coined for behaviour beyond intent) are expected and excluded by
+ * convention: an id is treated as an extra when it is absent from intent AND
+ * the claim does not assert `satisfied`. Anything else absent-from-intent but
+ * asserting satisfied is a likely mis-key and is surfaced.
+ */
+export function mismatchedAsBuiltIds(intent: Spec, asBuilt: Spec): string[] {
+  const intendedKeys = new Set(intent.claims.map(claimKey));
+  const out: string[] = [];
+  for (const c of asBuilt.claims) {
+    if (typeof c === "string" || !c.id) continue;
+    if (intendedKeys.has(claimKey(c))) continue; // matched an intent claim
+    if (c.satisfied === true) out.push(c.id); // asserts it satisfied something, but matches no intent id
+  }
+  return out;
+}

package/src/state.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * Per-slice state store (F16, N1). git is the artifact store; `state.json`
+ * tracks current phase, active specimens, escalation, budget, and an
+ * append-only event sequence. Crash recovery resumes from `state.json` + the
+ * last commit on the slice branch.
+ *
+ * Lives at `.stz/40-slices/<sliceId>/state.json`. (The §3 taxonomy lists
+ * state.json under 90-audit; we keep a per-slice copy beside the slice so a
+ * slice is self-contained, matching F16 "state.json per slice".)
+ */
+import { writeFile, readFile, mkdir } from "node:fs/promises";
+import { existsSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { PHASES, type Phase, type PhaseStatus, type SliceState } from "./types.js";
+import { STZ_DIR } from "./taxonomy.js";
+import { allocateBudget } from "./budget.js";
+export function statePath(root: string, sliceId: string): string {
+  return join(root, STZ_DIR, "40-slices", sliceId, "state.json");
+}
+export function freshState(sliceId: string, complexity = 1, poolRemaining = 5_000_000): SliceState {
+  const phaseStatus = Object.fromEntries(
+    PHASES.map((p) => [p, "pending" as PhaseStatus]),
+  ) as Record<Phase, PhaseStatus>;
+  return {
+    schemaVersion: 1,
+    sliceId,
+    currentPhase: PHASES[0],
+    phaseStatus,
+    escalation: "normal",
+    retryCount: 0,
+    replanCount: 0,
+    activeSpecimens: [],
+    budget: allocateBudget(complexity, poolRemaining),
+    events: [],
+    callCount: 0,
+    failureReport: null,
+  };
+}
+/** Append a structured event (N1 replay spine). Mutates and returns state. */
+export function appendEvent(
+  state: SliceState,
+  phase: Phase | "lifecycle",
+  kind: string,
+  detail: string,
+): SliceState {
+  state.events.push({ seq: state.events.length, phase, kind, detail });
+  return state;
+}
+export function setPhaseStatus(
+  state: SliceState,
+  phase: Phase,
+  status: PhaseStatus,
+): SliceState {
+  state.phaseStatus[phase] = status;
+  if (status === "running") state.currentPhase = phase;
+  return appendEvent(state, phase, `phase-${status}`, `${phase} → ${status}`);
+}
+export async function saveState(root: string, state: SliceState): Promise<void> {
+  const p = statePath(root, state.sliceId);
+  await mkdir(dirname(p), { recursive: true });
+  await writeFile(p, JSON.stringify(state, null, 2) + "\n", "utf8");
+}
+export async function loadState(root: string, sliceId: string): Promise<SliceState> {
+  const raw = await readFile(statePath(root, sliceId), "utf8");
+  return JSON.parse(raw) as SliceState;
+}
+export function stateExists(root: string, sliceId: string): boolean {
+  return existsSync(statePath(root, sliceId));
+}
+/**
+ * Crash recovery (F16): determine the phase to resume from. A phase left in
+ * "running" was interrupted and must be re-entered; otherwise resume at the
+ * first non-done phase. Returns null if the slice is fully complete or halted.
+ */
+export function resumePhase(state: SliceState): Phase | null {
+  if (state.escalation === "halted") return null;
+  const running = PHASES.find((p) => state.phaseStatus[p] === "running");
+  if (running) return running;
+  const pending = PHASES.find(
+    (p) => state.phaseStatus[p] === "pending" || state.phaseStatus[p] === "failed",
+  );
+  return pending ?? null;
+}
+export function isComplete(state: SliceState): boolean {
+  return PHASES.every((p) => state.phaseStatus[p] === "done");
+}

package/src/taxonomy.ts ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * The `.stz/` markdown taxonomy (§3 Data & Vector Store) — primary data store.
+ *
+ * Tiered tree:
+ *   00-intent/    10-research/   20-standards/   30-tests/
+ *   40-slices/    50-pressure/   90-audit/
+ *
+ * Every file carries YAML frontmatter with a ~200-token `summary` field for
+ * progressive disclosure (N2): phase agents load summaries by default and fetch
+ * full bodies only on named-anchor reference.
+ *
+ * Dependency-light by design (N10 "minimal toolchain"): a tiny hand-rolled
+ * frontmatter (de)serializer rather than a YAML lib. The supported subset is
+ * scalars + string arrays, which is all the schema uses.
+ */
+import { mkdir, writeFile, readFile } from "node:fs/promises";
+import { existsSync } from "node:fs";
+import { join, dirname } from "node:path";
+export const STZ_DIR = ".stz";
+export const TIERS = [
+  "00-intent",
+  "10-research",
+  "10-research/external",
+  "10-research/internal",
+  "10-research/spikes",
+  "20-standards",
+  "20-standards/architecture-decisions",
+  "30-tests",
+  "30-tests/held-out",
+  "40-slices",
+  "50-pressure",
+  "90-audit",
+  "90-audit/calls",
+] as const;
+export interface Frontmatter {
+  summary: string;
+  [key: string]: unknown;
+}
+export interface MarkdownDoc {
+  frontmatter: Frontmatter;
+  body: string;
+}
+// ── frontmatter (de)serialization ─────────────────────────────────────────
+function serializeValue(v: unknown): string {
+  if (Array.isArray(v)) {
+    if (v.length === 0) return "[]";
+    return "\n" + v.map((x) => `  - ${scalar(x)}`).join("\n");
+  }
+  return ` ${scalar(v)}`;
+}
+function scalar(v: unknown): string {
+  if (typeof v === "string") {
+    // Quote strings containing characters that would break the simple parser.
+    if (/[:#\n]|^\s|\s$/.test(v) || v === "") return JSON.stringify(v);
+    return v;
+  }
+  return String(v);
+}
+export function serializeFrontmatter(fm: Frontmatter): string {
+  const lines: string[] = ["---"];
+  for (const [k, v] of Object.entries(fm)) {
+    if (Array.isArray(v)) {
+      lines.push(`${k}:${serializeValue(v)}`);
+    } else {
+      lines.push(`${k}:${serializeValue(v)}`);
+    }
+  }
+  lines.push("---");
+  return lines.join("\n");
+}
+export function serializeDoc(doc: MarkdownDoc): string {
+  return `${serializeFrontmatter(doc.frontmatter)}\n\n${doc.body.trimEnd()}\n`;
+}
+export function parseDoc(raw: string): MarkdownDoc {
+  const m = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
+  if (!m) return { frontmatter: { summary: "" }, body: raw };
+  const [, fmBlock, body] = m;
+  const fm: Frontmatter = { summary: "" };
+  const lines = (fmBlock ?? "").split("\n");
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i]!;
+    const kv = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
+    if (!kv) continue;
+    const key = kv[1]!;
+    const rest = kv[2]!;
+    if (rest === "" && lines[i + 1]?.match(/^\s*-\s+/)) {
+      // string array
+      const arr: string[] = [];
+      while (lines[i + 1]?.match(/^\s*-\s+/)) {
+        arr.push(String(unscalar(lines[++i]!.replace(/^\s*-\s+/, ""))));
+      }
+      fm[key] = arr;
+    } else if (rest === "[]") {
+      fm[key] = [];
+    } else {
+      fm[key] = unscalar(rest);
+    }
+  }
+  return { frontmatter: fm, body: (body ?? "").replace(/^\n+/, "") };
+}
+function unscalar(s: string): string | number | boolean {
+  if (s.startsWith('"')) {
+    try {
+      return JSON.parse(s) as string;
+    } catch {
+      return s;
+    }
+  }
+  if (s === "true") return true;
+  if (s === "false") return false;
+  if (s !== "" && !Number.isNaN(Number(s))) return Number(s);
+  return s;
+}
+// ── filesystem operations ─────────────────────────────────────────────────
+/** Create the full `.stz/` tier tree under `root`. Idempotent. */
+export async function scaffold(root: string): Promise<string[]> {
+  const base = join(root, STZ_DIR);
+  const created: string[] = [];
+  for (const tier of TIERS) {
+    const dir = join(base, tier);
+    if (!existsSync(dir)) {
+      await mkdir(dir, { recursive: true });
+      created.push(tier);
+    }
+  }
+  return created;
+}
+/** Write a markdown doc (creating parent dirs) under `.stz/<relPath>`. */
+export async function writeDoc(
+  root: string,
+  relPath: string,
+  doc: MarkdownDoc,
+): Promise<void> {
+  const full = join(root, STZ_DIR, relPath);
+  await mkdir(dirname(full), { recursive: true });
+  await writeFile(full, serializeDoc(doc), "utf8");
+}
+export async function readDoc(root: string, relPath: string): Promise<MarkdownDoc> {
+  const full = join(root, STZ_DIR, relPath);
+  const raw = await readFile(full, "utf8");
+  return parseDoc(raw);
+}
+export function stzPath(root: string, relPath: string): string {
+  return join(root, STZ_DIR, relPath);
+}