npm - cclaw-cli - Versions diffs - 0.49.0 → 0.51.1 - Mend

cclaw-cli 0.49.0 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

package/README.md +57 -84
package/dist/artifact-linter.d.ts +4 -0
package/dist/artifact-linter.js +24 -3
package/dist/cli.d.ts +1 -19
package/dist/cli.js +49 -491
package/dist/constants.d.ts +2 -13
package/dist/constants.js +1 -43
package/dist/content/closeout-guidance.d.ts +14 -0
package/dist/content/closeout-guidance.js +42 -0
package/dist/content/core-agents.js +55 -17
package/dist/content/decision-protocol.d.ts +12 -0
package/dist/content/decision-protocol.js +20 -0
package/dist/content/diff-command.d.ts +1 -2
package/dist/content/diff-command.js +8 -94
package/dist/content/examples.d.ts +4 -10
package/dist/content/examples.js +10 -20
package/dist/content/hook-events.js +2 -2
package/dist/content/hook-inline-snippets.d.ts +5 -2
package/dist/content/hook-inline-snippets.js +33 -1
package/dist/content/hook-manifest.d.ts +3 -4
package/dist/content/hook-manifest.js +11 -12
package/dist/content/hooks.js +44 -21
package/dist/content/ideate-command.d.ts +2 -0
package/dist/content/ideate-command.js +34 -25
package/dist/content/iron-laws.d.ts +5 -5
package/dist/content/iron-laws.js +5 -5
package/dist/content/language-policy.d.ts +2 -0
package/dist/content/language-policy.js +13 -0
package/dist/content/learnings.d.ts +3 -4
package/dist/content/learnings.js +26 -50
package/dist/content/meta-skill.js +33 -22
package/dist/content/next-command.js +41 -38
package/dist/content/node-hooks.js +17 -345
package/dist/content/opencode-plugin.js +5 -103
package/dist/content/research-playbooks.js +14 -14
package/dist/content/review-loop.d.ts +2 -0
package/dist/content/review-loop.js +8 -0
package/dist/content/session-hooks.js +15 -47
package/dist/content/skills.d.ts +0 -5
package/dist/content/skills.js +55 -128
package/dist/content/stage-common-guidance.d.ts +0 -1
package/dist/content/stage-common-guidance.js +17 -14
package/dist/content/stage-schema.d.ts +26 -1
package/dist/content/stage-schema.js +121 -40
package/dist/content/stages/_lint-metadata/index.js +9 -15
package/dist/content/stages/brainstorm.js +22 -43
package/dist/content/stages/design.js +37 -57
package/dist/content/stages/plan.js +22 -13
package/dist/content/stages/review.js +24 -27
package/dist/content/stages/scope.js +34 -46
package/dist/content/stages/ship.js +7 -4
package/dist/content/stages/spec.js +20 -9
package/dist/content/stages/tdd.js +64 -44
package/dist/content/start-command.js +13 -12
package/dist/content/status-command.d.ts +2 -7
package/dist/content/status-command.js +19 -146
package/dist/content/subagents.d.ts +0 -5
package/dist/content/subagents.js +51 -28
package/dist/content/templates.d.ts +1 -1
package/dist/content/templates.js +126 -135
package/dist/content/track-render-context.d.ts +17 -0
package/dist/content/track-render-context.js +44 -0
package/dist/content/tree-command.d.ts +1 -2
package/dist/content/tree-command.js +4 -87
package/dist/content/utility-skills.d.ts +2 -29
package/dist/content/utility-skills.js +2 -1534
package/dist/content/view-command.js +31 -11
package/dist/delegation.d.ts +1 -1
package/dist/delegation.js +5 -15
package/dist/doctor-registry.js +20 -21
package/dist/doctor.js +88 -344
package/dist/flow-state.d.ts +3 -0
package/dist/flow-state.js +2 -0
package/dist/harness-adapters.d.ts +1 -1
package/dist/harness-adapters.js +51 -58
package/dist/install.js +128 -358
package/dist/internal/advance-stage.js +3 -9
package/dist/internal/compound-readiness.d.ts +1 -1
package/dist/internal/compound-readiness.js +1 -1
package/dist/internal/tdd-loop-status.d.ts +1 -1
package/dist/internal/tdd-loop-status.js +1 -1
package/dist/knowledge-store.d.ts +16 -10
package/dist/knowledge-store.js +51 -15
package/dist/policy.js +16 -105
package/dist/run-archive.d.ts +4 -6
package/dist/run-archive.js +15 -20
package/dist/run-persistence.d.ts +2 -2
package/dist/run-persistence.js +3 -9
package/package.json +1 -2
package/dist/content/archive-command.d.ts +0 -2
package/dist/content/archive-command.js +0 -124
package/dist/content/compound-command.d.ts +0 -5
package/dist/content/compound-command.js +0 -193
package/dist/content/contexts.d.ts +0 -18
package/dist/content/contexts.js +0 -24
package/dist/content/contracts.d.ts +0 -2
package/dist/content/contracts.js +0 -51
package/dist/content/doctor-references.d.ts +0 -2
package/dist/content/doctor-references.js +0 -150
package/dist/content/eval-scaffold.d.ts +0 -15
package/dist/content/eval-scaffold.js +0 -370
package/dist/content/feature-command.d.ts +0 -2
package/dist/content/feature-command.js +0 -123
package/dist/content/flow-map.d.ts +0 -23
package/dist/content/flow-map.js +0 -134
package/dist/content/harness-doc.d.ts +0 -2
package/dist/content/harness-doc.js +0 -202
package/dist/content/harness-playbooks.d.ts +0 -24
package/dist/content/harness-playbooks.js +0 -393
package/dist/content/harness-tool-refs.d.ts +0 -20
package/dist/content/harness-tool-refs.js +0 -268
package/dist/content/ops-command.d.ts +0 -2
package/dist/content/ops-command.js +0 -71
package/dist/content/protocols.d.ts +0 -7
package/dist/content/protocols.js +0 -215
package/dist/content/retro-command.d.ts +0 -2
package/dist/content/retro-command.js +0 -165
package/dist/content/rewind-command.d.ts +0 -2
package/dist/content/rewind-command.js +0 -106
package/dist/content/tdd-log-command.d.ts +0 -2
package/dist/content/tdd-log-command.js +0 -85
package/dist/eval/agents/single-shot.d.ts +0 -27
package/dist/eval/agents/single-shot.js +0 -79
package/dist/eval/agents/with-tools.d.ts +0 -44
package/dist/eval/agents/with-tools.js +0 -261
package/dist/eval/agents/workflow.d.ts +0 -31
package/dist/eval/agents/workflow.js +0 -155
package/dist/eval/baseline.d.ts +0 -38
package/dist/eval/baseline.js +0 -282
package/dist/eval/config-loader.d.ts +0 -14
package/dist/eval/config-loader.js +0 -395
package/dist/eval/corpus.d.ts +0 -30
package/dist/eval/corpus.js +0 -330
package/dist/eval/cost-guard.d.ts +0 -102
package/dist/eval/cost-guard.js +0 -190
package/dist/eval/diff.d.ts +0 -64
package/dist/eval/diff.js +0 -323
package/dist/eval/llm-client.d.ts +0 -176
package/dist/eval/llm-client.js +0 -267
package/dist/eval/mode.d.ts +0 -28
package/dist/eval/mode.js +0 -61
package/dist/eval/progress.d.ts +0 -83
package/dist/eval/progress.js +0 -59
package/dist/eval/report.d.ts +0 -11
package/dist/eval/report.js +0 -181
package/dist/eval/rubric-loader.d.ts +0 -20
package/dist/eval/rubric-loader.js +0 -143
package/dist/eval/runner.d.ts +0 -81
package/dist/eval/runner.js +0 -746
package/dist/eval/runs.d.ts +0 -41
package/dist/eval/runs.js +0 -114
package/dist/eval/sandbox.d.ts +0 -38
package/dist/eval/sandbox.js +0 -137
package/dist/eval/tools/glob.d.ts +0 -2
package/dist/eval/tools/glob.js +0 -163
package/dist/eval/tools/grep.d.ts +0 -2
package/dist/eval/tools/grep.js +0 -152
package/dist/eval/tools/index.d.ts +0 -7
package/dist/eval/tools/index.js +0 -35
package/dist/eval/tools/read.d.ts +0 -2
package/dist/eval/tools/read.js +0 -122
package/dist/eval/tools/types.d.ts +0 -49
package/dist/eval/tools/types.js +0 -41
package/dist/eval/tools/write.d.ts +0 -2
package/dist/eval/tools/write.js +0 -92
package/dist/eval/types.d.ts +0 -561
package/dist/eval/types.js +0 -47
package/dist/eval/verifiers/judge.d.ts +0 -40
package/dist/eval/verifiers/judge.js +0 -256
package/dist/eval/verifiers/rules.d.ts +0 -24
package/dist/eval/verifiers/rules.js +0 -218
package/dist/eval/verifiers/structural.d.ts +0 -14
package/dist/eval/verifiers/structural.js +0 -171
package/dist/eval/verifiers/traceability.d.ts +0 -23
package/dist/eval/verifiers/traceability.js +0 -84
package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
package/dist/eval/verifiers/workflow-consistency.js +0 -225
package/dist/eval/workflow-corpus.d.ts +0 -7
package/dist/eval/workflow-corpus.js +0 -207
package/dist/feature-system.d.ts +0 -42
package/dist/feature-system.js +0 -432
package/dist/internal/knowledge-digest.d.ts +0 -7
package/dist/internal/knowledge-digest.js +0 -93

package/dist/eval/verifiers/judge.js DELETED Viewed

@@ -1,256 +0,0 @@
-/**
- * LLM judge verifier — Step 3.
- *
- * Given an artifact and the stage's rubric, runs N judge samples (default
- * median-of-3) against the configured LLM, aggregates the per-check
- * scores, and returns one VerifierResult per rubric check plus one
- * aggregate result covering the whole stage.
- *
- * Deterministic pieces (JSON parsing, aggregation, scoring) are kept pure
- * so unit tests inject a stub EvalLlmClient and assert on the aggregate
- * math without touching the network.
- */
-import { EvalLlmError } from "../llm-client.js";
-import { computeUsageUsd } from "../cost-guard.js";
-const SCALE_MIN = 1;
-const SCALE_MAX = 5;
-const SYSTEM_PREAMBLE = `You are a strict reviewer for software engineering artifacts. ` +
-    `You will receive a rubric and an artifact. ` +
-    `Score each rubric check on an integer 1..5 scale, where:\n` +
-    `  1 = does not meet the bar at all\n` +
-    `  2 = barely meets the bar, major gaps\n` +
-    `  3 = partially meets the bar, noticeable gaps\n` +
-    `  4 = mostly meets the bar, small gaps\n` +
-    `  5 = fully meets the bar\n` +
-    `Respond with JSON only (no prose, no markdown fences). ` +
-    `Shape: {"scores": {"<check-id>": 1..5, ...}, "rationales": {"<check-id>": "one sentence", ...}}. ` +
-    `Include every check id in both maps. Use integer scores only.`;
-function median(values) {
-    if (values.length === 0)
-        return 0;
-    const sorted = [...values].sort((a, b) => a - b);
-    const mid = Math.floor(sorted.length / 2);
-    if (sorted.length % 2 === 1)
-        return sorted[mid];
-    return ((sorted[mid - 1] + sorted[mid]) / 2);
-}
-function mean(values) {
-    if (values.length === 0)
-        return 0;
-    return values.reduce((acc, v) => acc + v, 0) / values.length;
-}
-function clampScore(raw) {
-    if (typeof raw !== "number" || !Number.isFinite(raw))
-        return undefined;
-    const clamped = Math.round(Math.min(Math.max(raw, SCALE_MIN), SCALE_MAX));
-    return clamped;
-}
-function stripFences(raw) {
-    const trimmed = raw.trim();
-    if (!trimmed.startsWith("```"))
-        return trimmed;
-    return trimmed.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
-}
-/**
- * Parse one judge response into a JudgeSample. The parser is intentionally
- * forgiving with rationales (missing -> empty string) but strict with
- * scores: missing or non-numeric entries are dropped and the coverage
- * flag on the aggregate flips to false.
- */
-export function parseJudgeResponse(content, rubric) {
-    let parsed;
-    try {
-        parsed = JSON.parse(stripFences(content));
-    }
-    catch (err) {
-        throw new Error(`Judge response was not valid JSON: ${err instanceof Error ? err.message : String(err)}`);
-    }
-    if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
-        throw new Error("Judge response must be a JSON object with scores/rationales maps.");
-    }
-    const rawScores = parsed.scores;
-    const rawRationales = parsed.rationales;
-    if (!rawScores || typeof rawScores !== "object" || Array.isArray(rawScores)) {
-        throw new Error('Judge response missing "scores" object.');
-    }
-    const scores = {};
-    const rationales = {};
-    for (const check of rubric.checks) {
-        const rawScore = rawScores[check.id];
-        const clamped = clampScore(rawScore);
-        if (clamped !== undefined)
-            scores[check.id] = clamped;
-        let rationale = "";
-        if (rawRationales && typeof rawRationales === "object" && !Array.isArray(rawRationales)) {
-            const raw = rawRationales[check.id];
-            if (typeof raw === "string")
-                rationale = raw.trim();
-        }
-        rationales[check.id] = rationale;
-    }
-    return { scores, rationales };
-}
-function aggregateSamples(rubric, samples) {
-    return rubric.checks.map((check) => {
-        const values = [];
-        let covered = true;
-        for (const sample of samples) {
-            const value = sample.scores[check.id];
-            if (typeof value === "number")
-                values.push(value);
-            else
-                covered = false;
-        }
-        return {
-            checkId: check.id,
-            samples: values,
-            median: median(values),
-            mean: Number(mean(values).toFixed(4)),
-            coverage: covered && samples.length > 0
-        };
-    });
-}
-function buildMessages(artifact, rubric) {
-    const rubricLines = rubric.checks.map((check) => {
-        const scale = check.scale ? ` (${check.scale})` : "";
-        const critical = check.critical ? " [critical]" : "";
-        return `- ${check.id}${critical}: ${check.prompt}${scale}`;
-    });
-    const userContent = [
-        `Rubric (stage=${rubric.stage}, rubric=${rubric.id}):`,
-        ...rubricLines,
-        ``,
-        `Artifact:`,
-        `"""`,
-        artifact,
-        `"""`,
-        ``,
-        `Return JSON only.`
-    ].join("\n");
-    return [
-        { role: "system", content: SYSTEM_PREAMBLE },
-        { role: "user", content: userContent }
-    ];
-}
-function sumUsage(usages) {
-    let promptTokens = 0;
-    let completionTokens = 0;
-    let totalTokens = 0;
-    for (const u of usages) {
-        promptTokens += u.promptTokens;
-        completionTokens += u.completionTokens;
-        totalTokens += u.totalTokens;
-    }
-    return { promptTokens, completionTokens, totalTokens };
-}
-/** Run the judge against an artifact and return per-sample + aggregate data. */
-export async function runJudge(options) {
-    const { artifact, rubric, config, client, caseHint, baseSeed } = options;
-    const rawSamples = caseHint?.samples ?? config.judgeSamples ?? 3;
-    if (!Number.isInteger(rawSamples) || rawSamples < 1) {
-        throw new Error(`Invalid judge sample count: ${rawSamples}. Use a positive integer (1, 3, 5).`);
-    }
-    if (rawSamples % 2 === 0) {
-        throw new Error(`Judge sample count must be odd (so a true median exists), got: ${rawSamples}.`);
-    }
-    const started = Date.now();
-    const model = config.judgeModel ?? config.model;
-    const temperature = config.judgeTemperature ?? 0;
-    const messages = buildMessages(artifact, rubric);
-    const samples = [];
-    const usages = [];
-    for (let i = 0; i < rawSamples; i += 1) {
-        let response;
-        try {
-            response = await client.chat({
-                model,
-                messages,
-                temperature,
-                responseFormatJson: true,
-                ...(baseSeed !== undefined ? { seed: baseSeed + i } : {}),
-                timeoutMs: config.timeoutMs
-            });
-        }
-        catch (err) {
-            if (err instanceof EvalLlmError)
-                throw err;
-            throw err;
-        }
-        usages.push(response.usage);
-        samples.push(parseJudgeResponse(response.content, rubric));
-    }
-    const aggregates = aggregateSamples(rubric, samples);
-    const usage = sumUsage(usages);
-    const usageUsd = computeUsageUsd(model, usage, { tokenPricing: config.tokenPricing });
-    return {
-        rubricId: rubric.id,
-        samples,
-        aggregates,
-        usageUsd,
-        durationMs: Date.now() - started
-    };
-}
-function verifierIdFor(check) {
-    return `judge:${check.id}`;
-}
-/**
- * Convert a JudgeInvocation into VerifierResult[] for the runner. One
- * result per rubric check (score 0..1 normalized from the 1..5 median) +
- * one "coverage" result that flips to `ok:false` when any sample failed
- * to emit a score for a check.
- */
-export function judgeResultsToVerifiers(rubric, invocation, config, caseHint) {
-    const out = [];
-    const failIfCriticalBelow = config.regression.failIfCriticalBelow;
-    for (const aggregate of invocation.aggregates) {
-        const check = rubric.checks.find((c) => c.id === aggregate.checkId);
-        if (!check)
-            continue;
-        const normalized = (aggregate.median - SCALE_MIN) / (SCALE_MAX - SCALE_MIN);
-        const caseMinimum = caseHint?.minimumScores?.[check.id];
-        const criticalFloor = check.critical ? failIfCriticalBelow : undefined;
-        const floors = [];
-        if (typeof caseMinimum === "number")
-            floors.push(caseMinimum);
-        if (typeof criticalFloor === "number")
-            floors.push(criticalFloor);
-        const floor = floors.length > 0 ? Math.max(...floors) : undefined;
-        const ok = !aggregate.coverage
-            ? false
-            : floor === undefined || aggregate.median >= floor;
-        out.push({
-            kind: "judge",
-            id: verifierIdFor(check),
-            ok,
-            score: Number(Math.max(0, Math.min(1, normalized)).toFixed(4)),
-            message: ok
-                ? `median=${aggregate.median.toFixed(2)} across ${aggregate.samples.length} sample(s)`
-                : aggregate.coverage
-                    ? `median=${aggregate.median.toFixed(2)} below floor=${floor?.toFixed(2) ?? "n/a"}`
-                    : `judge did not score every sample (${aggregate.samples.length}/${invocation.samples.length}); treated as failing`,
-            details: {
-                median: aggregate.median,
-                mean: aggregate.mean,
-                samples: aggregate.samples,
-                coverage: aggregate.coverage,
-                critical: check.critical === true,
-                caseMinimum: caseMinimum ?? null,
-                criticalFloor: criticalFloor ?? null
-            }
-        });
-    }
-    const required = caseHint?.requiredChecks ?? [];
-    const covered = new Set(rubric.checks.map((c) => c.id));
-    const missingRequired = required.filter((id) => !covered.has(id));
-    if (missingRequired.length > 0) {
-        out.push({
-            kind: "judge",
-            id: "judge:required-checks",
-            ok: false,
-            score: 0,
-            message: `Rubric is missing required check id(s): ${missingRequired.join(", ")}`,
-            details: { missing: missingRequired, rubricId: rubric.id }
-        });
-    }
-    return out;
-}

package/dist/eval/verifiers/rules.d.ts DELETED Viewed

@@ -1,24 +0,0 @@
-/**
- * Rule-based verifier: deterministic, zero-LLM checks that are richer than
- * structural heading/length assertions. Each rule produces exactly one
- * `VerifierResult` so baselines diff at the check level, and authoring a
- * rule sideways in YAML never silently skips.
- *
- * Semantics:
- *
- * - All substring matching is case-insensitive. Regex matching uses the
- *   flags declared on the rule (default `"i"`).
- * - Rules operate on the artifact BODY (frontmatter stripped), mirroring
- *   the structural verifier so min/max counts and length checks agree on
- *   what "body" means.
- * - `uniqueBulletsInSection` scans every section (heading, case-insensitive
- *   substring match) and flags duplicate top-level bullets ("- item"). The
- *   search stops at the next heading of equal or lower depth.
- */
-import type { RulesExpected, VerifierResult } from "../types.js";
-/**
- * Run every configured rule check against the artifact body. Returns `[]`
- * when `expected` is undefined or empty so the runner can distinguish
- * "no rules declared" from "all rules passed".
- */
-export declare function verifyRules(artifact: string, expected: RulesExpected | undefined): VerifierResult[];

package/dist/eval/verifiers/rules.js DELETED Viewed

@@ -1,218 +0,0 @@
-import { splitFrontmatter } from "./structural.js";
-function slugify(input) {
-    return (input
-        .toLowerCase()
-        .replace(/[^a-z0-9]+/g, "-")
-        .replace(/(^-|-$)/g, "")
-        .slice(0, 64) || "rule");
-}
-function result(id, ok, message, details) {
-    return {
-        kind: "rules",
-        id,
-        ok,
-        score: ok ? 1 : 0,
-        message,
-        ...(details !== undefined ? { details } : {})
-    };
-}
-function countOccurrences(haystack, needle) {
-    if (needle.length === 0)
-        return 0;
-    let index = 0;
-    let count = 0;
-    while (true) {
-        const at = haystack.indexOf(needle, index);
-        if (at < 0)
-            return count;
-        count += 1;
-        index = at + needle.length;
-    }
-}
-function compileRegex(rule) {
-    const flags = rule.flags ?? "i";
-    try {
-        return new RegExp(rule.pattern, flags);
-    }
-    catch (err) {
-        throw new Error(`Invalid regex for rule "${rule.description ?? rule.pattern}" ` +
-            `(pattern=${JSON.stringify(rule.pattern)}, flags=${JSON.stringify(flags)}): ` +
-            (err instanceof Error ? err.message : String(err)));
-    }
-}
-function ruleLabel(rule) {
-    return rule.description?.trim() || rule.pattern;
-}
-function checkMustContain(needles, body) {
-    const bodyLower = body.toLowerCase();
-    return needles.map((needle) => {
-        const found = bodyLower.includes(needle.toLowerCase());
-        return result(`rules:contains:${slugify(needle)}`, found, found
-            ? `Required phrase "${needle}" present.`
-            : `Required phrase "${needle}" missing from body.`, { phrase: needle });
-    });
-}
-function checkMustNotContain(needles, body) {
-    const bodyLower = body.toLowerCase();
-    return needles.map((needle) => {
-        const lowered = needle.toLowerCase();
-        const occurrences = countOccurrences(bodyLower, lowered);
-        const ok = occurrences === 0;
-        return result(`rules:not-contains:${slugify(needle)}`, ok, ok
-            ? `Forbidden phrase "${needle}" absent (as required).`
-            : `Forbidden phrase "${needle}" appears ${occurrences} time(s).`, { phrase: needle, occurrences });
-    });
-}
-function checkRegexRequired(rules, body) {
-    return rules.map((rule) => {
-        const label = ruleLabel(rule);
-        const regex = compileRegex(rule);
-        const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
-        const count = matches ? matches.length : 0;
-        const ok = count > 0;
-        return result(`rules:regex-required:${slugify(label)}`, ok, ok
-            ? `Required pattern /${rule.pattern}/ matched ${count} time(s).`
-            : `Required pattern /${rule.pattern}/ did not match.`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
-    });
-}
-function checkRegexForbidden(rules, body) {
-    return rules.map((rule) => {
-        const label = ruleLabel(rule);
-        const regex = compileRegex(rule);
-        const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
-        const count = matches ? matches.length : 0;
-        const ok = count === 0;
-        return result(`rules:regex-forbidden:${slugify(label)}`, ok, ok
-            ? `Forbidden pattern /${rule.pattern}/ absent.`
-            : `Forbidden pattern /${rule.pattern}/ matched ${count} time(s).`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
-    });
-}
-function withGlobal(flags) {
-    return flags.includes("g") ? flags : `${flags}g`;
-}
-function checkMinOccurrences(bounds, body) {
-    const bodyLower = body.toLowerCase();
-    return Object.entries(bounds).map(([needle, min]) => {
-        const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
-        const ok = occurrences >= min;
-        return result(`rules:min-occurrences:${slugify(needle)}`, ok, ok
-            ? `Phrase "${needle}" appears ${occurrences} time(s) (>= ${min}).`
-            : `Phrase "${needle}" appears ${occurrences} time(s); expected at least ${min}.`, { phrase: needle, occurrences, min });
-    });
-}
-function checkMaxOccurrences(bounds, body) {
-    const bodyLower = body.toLowerCase();
-    return Object.entries(bounds).map(([needle, max]) => {
-        const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
-        const ok = occurrences <= max;
-        return result(`rules:max-occurrences:${slugify(needle)}`, ok, ok
-            ? `Phrase "${needle}" appears ${occurrences} time(s) (<= ${max}).`
-            : `Phrase "${needle}" appears ${occurrences} time(s); expected at most ${max}.`, { phrase: needle, occurrences, max });
-    });
-}
-function sliceBySection(body) {
-    const lines = body.split(/\r?\n/);
-    const slices = [];
-    let current = null;
-    for (const rawLine of lines) {
-        const line = rawLine.trimStart();
-        const match = line.match(/^(#{1,6})\s+(.+?)\s*$/);
-        if (match) {
-            if (current) {
-                slices.push({
-                    heading: current.heading,
-                    depth: current.depth,
-                    body: current.body.join("\n")
-                });
-            }
-            current = { heading: match[2].trim(), depth: match[1].length, body: [] };
-        }
-        else if (current) {
-            current.body.push(rawLine);
-        }
-    }
-    if (current) {
-        slices.push({
-            heading: current.heading,
-            depth: current.depth,
-            body: current.body.join("\n")
-        });
-    }
-    return slices;
-}
-function extractTopLevelBullets(sectionBody) {
-    const bullets = [];
-    for (const rawLine of sectionBody.split(/\r?\n/)) {
-        const line = rawLine.replace(/\s+$/, "");
-        const leading = line.match(/^(\s*)[-*]\s+(.+)$/);
-        if (!leading)
-            continue;
-        if (leading[1].length > 0)
-            continue;
-        bullets.push(leading[2].trim());
-    }
-    return bullets;
-}
-function checkUniqueBulletsInSection(sections, body) {
-    const slices = sliceBySection(body);
-    return sections.map((needle) => {
-        const lowerNeedle = needle.toLowerCase();
-        const slice = slices.find((s) => s.heading.toLowerCase().includes(lowerNeedle));
-        if (!slice) {
-            return result(`rules:unique-in-section:${slugify(needle)}`, false, `Section matching "${needle}" not found; cannot check uniqueness.`, { section: needle, found: false });
-        }
-        const bullets = extractTopLevelBullets(slice.body);
-        const seen = new Map();
-        for (const bullet of bullets) {
-            const key = bullet.toLowerCase();
-            seen.set(key, (seen.get(key) ?? 0) + 1);
-        }
-        const duplicates = [...seen.entries()]
-            .filter(([, count]) => count > 1)
-            .map(([entry, count]) => ({ entry, count }));
-        const ok = duplicates.length === 0;
-        return result(`rules:unique-in-section:${slugify(needle)}`, ok, ok
-            ? `Section "${slice.heading}" has ${bullets.length} unique bullet(s).`
-            : `Section "${slice.heading}" has duplicate bullet(s): ${duplicates
-                .map((d) => `"${d.entry}" x${d.count}`)
-                .join(", ")}.`, {
-            section: slice.heading,
-            bullets: bullets.length,
-            duplicates
-        });
-    });
-}
-/**
- * Run every configured rule check against the artifact body. Returns `[]`
- * when `expected` is undefined or empty so the runner can distinguish
- * "no rules declared" from "all rules passed".
- */
-export function verifyRules(artifact, expected) {
-    if (!expected)
-        return [];
-    const split = splitFrontmatter(artifact);
-    const body = split.body;
-    const results = [];
-    if (expected.mustContain?.length) {
-        results.push(...checkMustContain(expected.mustContain, body));
-    }
-    if (expected.mustNotContain?.length) {
-        results.push(...checkMustNotContain(expected.mustNotContain, body));
-    }
-    if (expected.regexRequired?.length) {
-        results.push(...checkRegexRequired(expected.regexRequired, body));
-    }
-    if (expected.regexForbidden?.length) {
-        results.push(...checkRegexForbidden(expected.regexForbidden, body));
-    }
-    if (expected.minOccurrences && Object.keys(expected.minOccurrences).length) {
-        results.push(...checkMinOccurrences(expected.minOccurrences, body));
-    }
-    if (expected.maxOccurrences && Object.keys(expected.maxOccurrences).length) {
-        results.push(...checkMaxOccurrences(expected.maxOccurrences, body));
-    }
-    if (expected.uniqueBulletsInSection?.length) {
-        results.push(...checkUniqueBulletsInSection(expected.uniqueBulletsInSection, body));
-    }
-    return results;
-}

package/dist/eval/verifiers/structural.d.ts DELETED Viewed

@@ -1,14 +0,0 @@
-import type { StructuralExpected, VerifierResult } from "../types.js";
-export interface ArtifactSplit {
-    hasFrontmatter: boolean;
-    frontmatterRaw: string;
-    frontmatterParsed?: Record<string, unknown>;
-    body: string;
-}
-export declare function splitFrontmatter(artifact: string): ArtifactSplit;
-/**
- * Run every configured structural check against the artifact text.
- * Returns [] when `expected` is undefined/empty so the runner can treat
- * "no structural expectations" as "no verifier results" rather than "pass".
- */
-export declare function verifyStructural(artifact: string, expected: StructuralExpected | undefined): VerifierResult[];

package/dist/eval/verifiers/structural.js DELETED Viewed

@@ -1,171 +0,0 @@
-/**
- * Structural verifier: deterministic, zero-LLM checks against a
- * single markdown artifact. Each structural expectation produces one
- * `VerifierResult` so baselines diff cleanly at the check level rather than
- * lumping everything into a single boolean.
- *
- * Design notes:
- *
- * - All pattern matching is case-insensitive. Authoring a check as
- *   `"Directions"` matches `## Directions` and `### directions-suggested`.
- * - Frontmatter detection is permissive: it must start at byte 0 with `---\n`
- *   and close on a subsequent `---` line. Anything else is treated as "no
- *   frontmatter", which fails every `requiredFrontmatterKeys` entry
- *   deterministically.
- * - `minLines`/`maxLines` intentionally exclude frontmatter so a rewrite that
- *   adds metadata does not accidentally drop the body below the floor.
- * - Scoring: each check scores 0 or 1. The case `passed` becomes the AND of
- *   all individual `ok` flags. This keeps the structural verifier
- *   deterministic; the 0..1 rubric scale shows up later in the LLM judge.
- */
-import { parse as parseYaml } from "yaml";
-const FRONTMATTER_OPEN = /^---\r?\n/;
-const FRONTMATTER_CLOSE = /\r?\n---\r?(?:\n|$)/;
-function slugify(input) {
-    return input
-        .toLowerCase()
-        .replace(/[^a-z0-9]+/g, "-")
-        .replace(/(^-|-$)/g, "")
-        .slice(0, 64);
-}
-export function splitFrontmatter(artifact) {
-    if (!FRONTMATTER_OPEN.test(artifact)) {
-        return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
-    }
-    const afterOpen = artifact.replace(FRONTMATTER_OPEN, "");
-    const closeMatch = afterOpen.match(FRONTMATTER_CLOSE);
-    if (!closeMatch || closeMatch.index === undefined) {
-        return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
-    }
-    const frontmatterRaw = afterOpen.slice(0, closeMatch.index);
-    const body = afterOpen.slice(closeMatch.index + closeMatch[0].length);
-    let frontmatterParsed;
-    try {
-        const parsed = parseYaml(frontmatterRaw);
-        if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
-            frontmatterParsed = parsed;
-        }
-    }
-    catch {
-        frontmatterParsed = undefined;
-    }
-    return {
-        hasFrontmatter: true,
-        frontmatterRaw,
-        frontmatterParsed,
-        body
-    };
-}
-function extractHeadingLines(body) {
-    return body
-        .split(/\r?\n/)
-        .map((line) => line.trimStart())
-        .filter((line) => /^#{1,6}\s+\S/.test(line));
-}
-function result(id, ok, message, details) {
-    return {
-        kind: "structural",
-        id,
-        ok,
-        score: ok ? 1 : 0,
-        message,
-        ...(details !== undefined ? { details } : {})
-    };
-}
-function checkRequiredSections(sections, body) {
-    const headings = extractHeadingLines(body).map((line) => line.toLowerCase());
-    return sections.map((section) => {
-        const needle = section.toLowerCase().trim();
-        const found = headings.some((heading) => heading.includes(needle));
-        return result(`structural:section:${slugify(section)}`, found, found
-            ? `Section matching "${section}" present.`
-            : `No heading contains "${section}".`, { pattern: section, searchedHeadings: headings.length });
-    });
-}
-function checkForbiddenPatterns(patterns, body) {
-    const bodyLower = body.toLowerCase();
-    return patterns.map((pattern) => {
-        const needle = pattern.toLowerCase();
-        const hits = countOccurrences(bodyLower, needle);
-        const ok = hits === 0;
-        return result(`structural:forbidden:${slugify(pattern)}`, ok, ok
-            ? `Pattern "${pattern}" absent (as required).`
-            : `Pattern "${pattern}" appears ${hits} time(s); remove.`, { pattern, occurrences: hits });
-    });
-}
-function countOccurrences(haystack, needle) {
-    if (needle.length === 0)
-        return 0;
-    let index = 0;
-    let count = 0;
-    while (true) {
-        const at = haystack.indexOf(needle, index);
-        if (at < 0)
-            return count;
-        count += 1;
-        index = at + needle.length;
-    }
-}
-function checkLengthBounds(expected, body) {
-    const results = [];
-    const lineCount = body.length === 0 ? 0 : body.split(/\r?\n/).length;
-    const charCount = body.length;
-    if (expected.minLines !== undefined || expected.maxLines !== undefined) {
-        const min = expected.minLines;
-        const max = expected.maxLines;
-        const withinMin = min === undefined || lineCount >= min;
-        const withinMax = max === undefined || lineCount <= max;
-        const ok = withinMin && withinMax;
-        results.push(result("structural:length:lines", ok, ok
-            ? `Body has ${lineCount} line(s), within bounds.`
-            : buildOutOfRangeMessage("line", lineCount, min, max), { lineCount, minLines: min, maxLines: max }));
-    }
-    if (expected.minChars !== undefined || expected.maxChars !== undefined) {
-        const min = expected.minChars;
-        const max = expected.maxChars;
-        const withinMin = min === undefined || charCount >= min;
-        const withinMax = max === undefined || charCount <= max;
-        const ok = withinMin && withinMax;
-        results.push(result("structural:length:chars", ok, ok
-            ? `Body has ${charCount} char(s), within bounds.`
-            : buildOutOfRangeMessage("char", charCount, min, max), { charCount, minChars: min, maxChars: max }));
-    }
-    return results;
-}
-function buildOutOfRangeMessage(unit, actual, min, max) {
-    const lo = min === undefined ? "0" : String(min);
-    const hi = max === undefined ? "∞" : String(max);
-    return `Body has ${actual} ${unit}(s); expected ${lo}..${hi}.`;
-}
-function checkFrontmatterKeys(keys, split) {
-    if (!split.hasFrontmatter || !split.frontmatterParsed) {
-        return keys.map((key) => result(`structural:frontmatter:${slugify(key)}`, false, `Frontmatter key "${key}" missing (no parseable frontmatter).`, { key, frontmatterPresent: split.hasFrontmatter }));
-    }
-    const present = new Set(Object.keys(split.frontmatterParsed));
-    return keys.map((key) => {
-        const ok = present.has(key);
-        return result(`structural:frontmatter:${slugify(key)}`, ok, ok ? `Frontmatter key "${key}" present.` : `Frontmatter key "${key}" missing.`, { key });
-    });
-}
-/**
- * Run every configured structural check against the artifact text.
- * Returns [] when `expected` is undefined/empty so the runner can treat
- * "no structural expectations" as "no verifier results" rather than "pass".
- */
-export function verifyStructural(artifact, expected) {
-    if (!expected)
-        return [];
-    const split = splitFrontmatter(artifact);
-    const results = [];
-    if (expected.requiredSections?.length) {
-        results.push(...checkRequiredSections(expected.requiredSections, split.body));
-    }
-    if (expected.forbiddenPatterns?.length) {
-        results.push(...checkForbiddenPatterns(expected.forbiddenPatterns, split.body));
-    }
-    results.push(...checkLengthBounds(expected, split.body));
-    if (expected.requiredFrontmatterKeys?.length) {
-        results.push(...checkFrontmatterKeys(expected.requiredFrontmatterKeys, split));
-    }
-    return results;
-}