npm - @interf/compiler - Versions diffs - 0.4.1 → 0.5.0 - Mend

@interf/compiler 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

package/README.md +71 -66
package/builtin-workflows/interf/README.md +6 -6
package/builtin-workflows/interf/compile/stages/shape/SKILL.md +7 -7
package/builtin-workflows/interf/compile/stages/structure/SKILL.md +2 -2
package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +1 -1
package/builtin-workflows/interf/{workspace.schema.json → compiled.schema.json} +5 -5
package/builtin-workflows/interf/improve/SKILL.md +3 -3
package/builtin-workflows/interf/use/query/SKILL.md +2 -2
package/builtin-workflows/interf/workflow.json +42 -31
package/dist/commands/check-draft.d.ts +19 -0
package/dist/commands/check-draft.js +110 -0
package/dist/commands/compile-controller.d.ts +4 -4
package/dist/commands/compile-controller.js +117 -81
package/dist/commands/compile.d.ts +5 -5
package/dist/commands/compile.js +61 -62
package/dist/commands/compiled-flow.d.ts +23 -0
package/dist/commands/compiled-flow.js +112 -0
package/dist/commands/create-workflow-wizard.d.ts +3 -3
package/dist/commands/create-workflow-wizard.js +11 -11
package/dist/commands/create.d.ts +2 -2
package/dist/commands/create.js +50 -57
package/dist/commands/default.js +2 -2
package/dist/commands/executor-flow.d.ts +20 -1
package/dist/commands/executor-flow.js +67 -7
package/dist/commands/init.js +242 -289
package/dist/commands/list.js +14 -10
package/dist/commands/reset.js +6 -6
package/dist/commands/source-config-wizard.d.ts +12 -8
package/dist/commands/source-config-wizard.js +356 -119
package/dist/commands/status.js +49 -26
package/dist/commands/test-flow.d.ts +23 -10
package/dist/commands/test-flow.js +278 -58
package/dist/commands/test.d.ts +7 -1
package/dist/commands/test.js +264 -65
package/dist/commands/verify.js +23 -14
package/dist/index.d.ts +7 -7
package/dist/index.js +4 -4
package/dist/lib/agent-args.js +2 -1
package/dist/lib/agent-constants.js +1 -1
package/dist/lib/agent-render.js +4 -4
package/dist/lib/agent-shells.d.ts +8 -8
package/dist/lib/agent-shells.js +231 -142
package/dist/lib/compiled-compile.d.ts +52 -0
package/dist/lib/compiled-compile.js +274 -0
package/dist/lib/compiled-home.d.ts +5 -0
package/dist/lib/compiled-home.js +32 -0
package/dist/lib/compiled-layout.d.ts +2 -0
package/dist/lib/compiled-layout.js +60 -0
package/dist/lib/compiled-paths.d.ts +41 -0
package/dist/lib/compiled-paths.js +111 -0
package/dist/lib/{workspace-raw.d.ts → compiled-raw.d.ts} +8 -7
package/dist/lib/{workspace-raw.js → compiled-raw.js} +16 -14
package/dist/lib/compiled-reset.d.ts +1 -0
package/dist/lib/compiled-reset.js +44 -0
package/dist/lib/compiled-schema.d.ts +27 -0
package/dist/lib/compiled-schema.js +110 -0
package/dist/lib/config.d.ts +0 -1
package/dist/lib/config.js +0 -1
package/dist/lib/discovery.d.ts +1 -1
package/dist/lib/discovery.js +3 -3
package/dist/lib/interf-bootstrap.d.ts +1 -1
package/dist/lib/interf-bootstrap.js +4 -4
package/dist/lib/interf-detect.d.ts +10 -10
package/dist/lib/interf-detect.js +78 -56
package/dist/lib/interf-scaffold.d.ts +2 -2
package/dist/lib/interf-scaffold.js +90 -57
package/dist/lib/interf-workflow-package.d.ts +3 -3
package/dist/lib/interf-workflow-package.js +30 -30
package/dist/lib/interf.d.ts +5 -5
package/dist/lib/interf.js +4 -4
package/dist/lib/local-workflows.d.ts +4 -4
package/dist/lib/local-workflows.js +35 -70
package/dist/lib/obsidian.d.ts +1 -1
package/dist/lib/parse.js +92 -1
package/dist/lib/project-paths.d.ts +13 -0
package/dist/lib/project-paths.js +29 -0
package/dist/lib/runtime-acceptance.d.ts +7 -1
package/dist/lib/runtime-acceptance.js +194 -59
package/dist/lib/runtime-contracts.d.ts +2 -4
package/dist/lib/runtime-contracts.js +17 -161
package/dist/lib/runtime-inventory.d.ts +7 -0
package/dist/lib/runtime-inventory.js +29 -0
package/dist/lib/runtime-paths.js +5 -5
package/dist/lib/runtime-prompt.js +9 -6
package/dist/lib/runtime-reconcile.d.ts +2 -3
package/dist/lib/runtime-reconcile.js +92 -171
package/dist/lib/runtime-runs.js +30 -39
package/dist/lib/runtime-types.d.ts +10 -19
package/dist/lib/runtime.d.ts +2 -2
package/dist/lib/runtime.js +1 -1
package/dist/lib/schema.d.ts +163 -140
package/dist/lib/schema.js +163 -124
package/dist/lib/source-config.d.ts +24 -20
package/dist/lib/source-config.js +154 -116
package/dist/lib/state-artifacts.d.ts +5 -5
package/dist/lib/state-artifacts.js +8 -8
package/dist/lib/state-health.d.ts +4 -4
package/dist/lib/state-health.js +108 -126
package/dist/lib/state-io.d.ts +8 -8
package/dist/lib/state-io.js +77 -50
package/dist/lib/state-paths.js +5 -5
package/dist/lib/state-view.d.ts +4 -4
package/dist/lib/state-view.js +52 -55
package/dist/lib/state.d.ts +5 -5
package/dist/lib/state.js +4 -4
package/dist/lib/summarize-plan.d.ts +3 -2
package/dist/lib/summarize-plan.js +18 -16
package/dist/lib/test-execution.js +9 -9
package/dist/lib/test-matrices.d.ts +3 -3
package/dist/lib/test-matrices.js +6 -6
package/dist/lib/test-paths.d.ts +4 -4
package/dist/lib/test-paths.js +16 -10
package/dist/lib/test-sandbox.d.ts +1 -1
package/dist/lib/test-sandbox.js +38 -31
package/dist/lib/test-targets.d.ts +2 -2
package/dist/lib/test-targets.js +11 -11
package/dist/lib/test-types.d.ts +1 -1
package/dist/lib/test.d.ts +1 -1
package/dist/lib/test.js +1 -1
package/dist/lib/util.d.ts +2 -0
package/dist/lib/util.js +14 -1
package/dist/lib/validate-compiled.d.ts +27 -0
package/dist/lib/validate-compiled.js +236 -0
package/dist/lib/validate-helpers.d.ts +0 -8
package/dist/lib/validate-helpers.js +0 -30
package/dist/lib/validate.d.ts +4 -4
package/dist/lib/validate.js +49 -15
package/dist/lib/workflow-abi.d.ts +37 -46
package/dist/lib/workflow-abi.js +51 -76
package/dist/lib/workflow-definitions.d.ts +11 -11
package/dist/lib/workflow-definitions.js +36 -53
package/dist/lib/workflow-helpers.d.ts +2 -3
package/dist/lib/workflow-helpers.js +9 -13
package/dist/lib/workflow-improvement.d.ts +3 -3
package/dist/lib/workflow-improvement.js +48 -48
package/dist/lib/workflow-review-paths.d.ts +3 -3
package/dist/lib/workflow-review-paths.js +11 -11
package/dist/lib/workflow-stage-runner.d.ts +1 -1
package/dist/lib/workflow-stage-runner.js +8 -8
package/dist/lib/workflows.d.ts +9 -9
package/dist/lib/workflows.js +15 -17
package/package.json +10 -9
package/dist/commands/workspace-flow.d.ts +0 -23
package/dist/commands/workspace-flow.js +0 -109
package/dist/lib/registry.d.ts +0 -16
package/dist/lib/registry.js +0 -65
package/dist/lib/validate-workspace.d.ts +0 -121
package/dist/lib/validate-workspace.js +0 -407
package/dist/lib/workspace-compile.d.ts +0 -54
package/dist/lib/workspace-compile.js +0 -476
package/dist/lib/workspace-home.d.ts +0 -5
package/dist/lib/workspace-home.js +0 -32
package/dist/lib/workspace-layout.d.ts +0 -2
package/dist/lib/workspace-layout.js +0 -60
package/dist/lib/workspace-paths.d.ts +0 -41
package/dist/lib/workspace-paths.js +0 -107
package/dist/lib/workspace-reset.d.ts +0 -1
package/dist/lib/workspace-reset.js +0 -43
package/dist/lib/workspace-schema.d.ts +0 -17
package/dist/lib/workspace-schema.js +0 -74

package/dist/commands/status.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import chalk from "chalk";
 import * as p from "@clack/prompts";
-import { detectInterf, listWorkspacesForSourceFolder, } from "../lib/interf.js";
-import { computeWorkspaceHealth } from "../lib/state.js";
-import { listWorkspaceEntries } from "../lib/registry.js";
+import { detectInterf, listCompiledDatasetsForSourceFolder, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
+import { computeCompiledHealth } from "../lib/state.js";
+import { readSavedTestComparison, printSavedTestComparisonState } from "./test-flow.js";
 function statusColor(status) {
     switch (status) {
         case "compiled":
@@ -17,54 +17,77 @@ function statusColor(status) {
 }
 export const statusCommand = {
     command: "status",
-    describe: "Show deterministic workspace health",
+    describe: "Show deterministic health for a compiled dataset",
     handler: async () => {
-        let workspacePath = null;
+        let compiledPath = null;
         const detected = detectInterf(process.cwd());
         if (detected) {
-            workspacePath = detected.path;
+            compiledPath = detected.path;
         }
         else {
-            const local = listWorkspacesForSourceFolder(process.cwd()).map(({ path, config }) => ({
+            const sourcePath = process.cwd();
+            const local = listCompiledDatasetsForSourceFolder(sourcePath).map(({ path, config }) => ({
                 path,
                 name: config.name,
             }));
-            const all = local.length > 0 ? local : listWorkspaceEntries().map((entry) => ({
-                path: entry.path,
-                name: entry.name,
-            }));
-            if (all.length === 0) {
+            if (local.length === 0) {
                 process.exitCode = 1;
-                console.log(chalk.red("  No workspaces found."));
+                console.log(chalk.red("  No compiled datasets found."));
+                console.log(chalk.dim("  Run `interf`, save truth checks, and compile a dataset first."));
                 return;
             }
-            if (all.length === 1) {
-                workspacePath = all[0].path;
+            if (local.length === 1) {
+                compiledPath = local[0].path;
             }
             else {
                 const selected = await p.select({
-                    message: "Which workspace?",
-                    options: all.map((entry) => ({ value: entry.path, label: entry.name })),
+                    message: "Which dataset?",
+                    options: local.map((entry) => ({ value: entry.path, label: entry.name })),
                 });
                 if (p.isCancel(selected))
                     return;
-                workspacePath = selected;
+                compiledPath = selected;
             }
         }
-        const health = computeWorkspaceHealth(workspacePath);
+        const health = computeCompiledHealth(compiledPath);
         const color = statusColor(health.status);
         console.log();
         console.log(color(`  ${health.target_name}`));
         console.log(chalk.dim(`  status: ${health.status}`));
         console.log(chalk.dim(`  stage: ${health.stage}`));
         console.log(chalk.dim(`  ${health.summary}`));
+        const compiledConfig = readInterfConfig(compiledPath);
+        const sourcePath = resolveSourceControlPath(compiledPath);
+        const latestComparison = compiledConfig
+            ? readSavedTestComparison(sourcePath, compiledConfig.name)
+            : null;
+        if (latestComparison) {
+            printSavedTestComparisonState(latestComparison);
+        }
+        else {
+            console.log();
+            console.log(chalk.dim("  No saved test result yet. Run `interf test` to measure files as-is and the compiled dataset."));
+        }
         console.log();
-        console.log(chalk.dim(`  source coverage: ${health.metrics.source_covered}/${health.metrics.source_total}`));
-        console.log(chalk.dim(`  summarized: ${health.metrics.summarized}`));
-        console.log(chalk.dim(`  compiled: ${health.metrics.compiled}`));
-        console.log(chalk.dim(`  entities: ${health.metrics.entities}`));
-        console.log(chalk.dim(`  claims: ${health.metrics.claims}`));
-        console.log(chalk.dim(`  warnings: ${health.metrics.warnings}`));
-        console.log(chalk.dim(`  errors: ${health.metrics.errors}`));
+        const metricOrder = [
+            "source_total",
+            "stage_total",
+            "completed_stages",
+            "warnings",
+            "errors",
+        ];
+        const printed = new Set();
+        for (const key of metricOrder) {
+            const value = health.metrics[key];
+            if (typeof value !== "number")
+                continue;
+            printed.add(key);
+            console.log(chalk.dim(`  ${key}: ${value}`));
+        }
+        for (const [key, value] of Object.entries(health.metrics)) {
+            if (printed.has(key))
+                continue;
+            console.log(chalk.dim(`  ${key}: ${value}`));
+        }
     },
 };

package/dist/commands/test-flow.d.ts CHANGED Viewed

@@ -1,36 +1,49 @@
 import { type TestTargetCandidate, type TestTargetResult } from "../lib/test.js";
 import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
 import type { TestSandboxRetentionMode } from "../lib/test-sandbox.js";
-import type { SourceWorkspaceConfig, TestRunMode } from "../lib/schema.js";
+import type { SourceDatasetConfig, TestRunComparison, TestRunMode } from "../lib/schema.js";
 export interface SavedTestOutcome {
     runPath: string;
+    displayRunPath?: string;
     target: TestTargetCandidate;
     result: TestTargetResult;
 }
+export interface AgentTestMatrixRow {
+    agentLabel: string;
+    rawOutcome?: SavedTestOutcome | null;
+    compiledOutcome?: SavedTestOutcome | null;
+}
 export declare function questionPassRate(outcome: SavedTestOutcome): number;
+export declare function readSavedTestComparison(projectPath: string, datasetName: string): TestRunComparison | null;
+export declare function printSavedTestComparisonState(payload: TestRunComparison, comparisonRunPath?: string | null): void;
+export declare function printAgentTestMatrix(rows: AgentTestMatrixRow[]): void;
+export declare function printAgentTestFailures(rows: AgentTestMatrixRow[]): void;
 export declare function printSavedTestOutcome(prefix: string, outcome: SavedTestOutcome): void;
-export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, workspaceOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
+export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
+export declare function printSavedTestComparisonSummary(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
 export declare function saveTestComparisonRun(options: {
     sourcePath: string;
-    workspacePath: string;
-    workspaceName: string;
+    compiledPath: string | null;
+    compiledName: string;
+    checksFingerprint: string;
     mode: TestRunMode;
     rawOutcome: SavedTestOutcome | null;
-    workspaceOutcome: SavedTestOutcome | null;
+    compiledOutcome: SavedTestOutcome | null;
 }): string;
 export declare function runSavedRawTest(options: {
     sourcePath: string;
-    workspaceConfig: SourceWorkspaceConfig;
-    workspacePath?: string | null;
+    datasetConfig: SourceDatasetConfig;
     executor?: WorkflowExecutor | null;
     executionProfile?: WorkflowExecutionProfile;
     preserveSandboxes?: TestSandboxRetentionMode;
+    runSuffix?: string | null;
 }): Promise<SavedTestOutcome | null>;
-export declare function runSavedWorkspaceTest(options: {
+export declare function runSavedCompiledTest(options: {
     sourcePath: string;
-    workspaceConfig: SourceWorkspaceConfig;
+    datasetConfig: SourceDatasetConfig;
     executor?: WorkflowExecutor | null;
     executionProfile?: WorkflowExecutionProfile;
-    workspacePath?: string | null;
+    compiledPath?: string | null;
     preserveSandboxes?: TestSandboxRetentionMode;
+    runSuffix?: string | null;
 }): Promise<SavedTestOutcome | null>;

package/dist/commands/test-flow.js CHANGED Viewed

@@ -1,21 +1,26 @@
 import chalk from "chalk";
-import { mkdirSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
-import { createRawTestTarget, createWorkspaceTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
-import { resolveWorkspaceRawPath, syncWorkspaceRawSnapshot } from "../lib/interf.js";
-import { buildTestSpecFromWorkspaceConfig } from "../lib/source-config.js";
-import { testRootForWorkspace, testRunsRootForWorkspace, } from "../lib/workspace-paths.js";
+import { existsSync, mkdirSync, writeFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
+import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
+import { datasetArtifactRoot, datasetLatestTestStatePath, datasetLatestTestSummaryPath, datasetTestRunsRoot, } from "../lib/project-paths.js";
+import { testRootForCompiled } from "../lib/compiled-paths.js";
+import { readJsonFileWithSchema } from "../lib/parse.js";
+import { TestRunComparisonSchema } from "../lib/schema.js";
 import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
-import { ensureWorkspaceFromConfig, findBuiltWorkspacePath } from "./workspace-flow.js";
+import { findBuiltCompiledPath } from "./compiled-flow.js";
 export function questionPassRate(outcome) {
     return outcome.result.totalCases > 0
         ? Math.round((outcome.result.passedCases / outcome.result.totalCases) * 100)
         : 0;
 }
+function visibleRunPath(outcome) {
+    return outcome.displayRunPath ?? outcome.runPath;
+}
 function summarizeSavedTestOutcome(label, outcome) {
     return {
         label,
-        run_path: outcome.runPath,
+        run_path: visibleRunPath(outcome),
         ok: outcome.result.ok,
         passed_cases: outcome.result.passedCases,
         total_cases: outcome.result.totalCases,
@@ -32,6 +37,152 @@ function normalizeTestRunId(input) {
         .replace(/^-+|-+$/g, "")
         .slice(0, 80);
 }
+function datasetRunPathForTarget(projectPath, datasetName, target, generatedAt, runId, runSuffix) {
+    return join(datasetTestRunsRoot(projectPath, datasetName, target), `${generatedAt.replace(/[:.]/g, "-")}-${runId}${runSuffix ? `-${normalizeTestRunId(runSuffix)}` : ""}.json`);
+}
+function writeDatasetTargetRun(options) {
+    const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
+    mkdirSync(dirPath, { recursive: true });
+    const runPath = datasetRunPathForTarget(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
+    writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
+    return runPath;
+}
+function loadLatestComparison(projectPath, datasetName) {
+    const latestPath = datasetLatestTestStatePath(projectPath, datasetName);
+    if (!existsSync(latestPath))
+        return null;
+    return readJsonFileWithSchema(latestPath, "latest test comparison", TestRunComparisonSchema);
+}
+export function readSavedTestComparison(projectPath, datasetName) {
+    return loadLatestComparison(projectPath, datasetName);
+}
+function renderLatestSummaryMarkdown(payload) {
+    const lines = [
+        "# Latest Test Result",
+        "",
+        "| Target | Truth checks |",
+        "| --- | --- |",
+    ];
+    if (payload.raw) {
+        lines.push(`| Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
+    }
+    if (payload.compiled) {
+        lines.push(`| Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
+    }
+    lines.push("");
+    if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
+        const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
+        lines.push(`Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`, "");
+    }
+    if (payload.raw) {
+        lines.push(`- Latest files-as-is run: ${payload.raw.run_path}`);
+    }
+    if (payload.compiled) {
+        lines.push(`- Latest compiled run: ${payload.compiled.run_path}`);
+    }
+    return `${lines.join("\n")}\n`;
+}
+export function printSavedTestComparisonState(payload, comparisonRunPath) {
+    console.log();
+    console.log(chalk.bold("  Latest saved test"));
+    console.log();
+    console.log("  | Target | Truth checks |");
+    console.log("  | --- | --- |");
+    if (payload.raw) {
+        console.log(`  | Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
+    }
+    if (payload.compiled) {
+        console.log(`  | Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
+    }
+    if (!payload.raw || !payload.compiled) {
+        console.log();
+        if (!payload.raw) {
+            console.log(chalk.dim("  No saved files-as-is baseline yet."));
+        }
+        if (!payload.compiled) {
+            console.log(chalk.dim("  No saved compiled-dataset run yet."));
+        }
+    }
+    if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
+        const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
+        const color = (payload.summary.pass_rate_delta ?? 0) >= 0 ? chalk.green : chalk.red;
+        console.log();
+        console.log(color(`  Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`));
+    }
+    if (comparisonRunPath) {
+        console.log();
+        console.log(chalk.dim(`  Saved summary: ${comparisonRunPath}`));
+    }
+}
+function padCell(value, width) {
+    return value.padEnd(width, " ");
+}
+function scoreCell(outcome) {
+    if (!outcome)
+        return "—";
+    return `${outcome.result.passedCases}/${outcome.result.totalCases}`;
+}
+function deltaCell(row) {
+    if (!row.rawOutcome || !row.compiledOutcome)
+        return "—";
+    const delta = row.compiledOutcome.result.passedCases - row.rawOutcome.result.passedCases;
+    return delta > 0 ? `+${delta}` : `${delta}`;
+}
+export function printAgentTestMatrix(rows) {
+    if (rows.length === 0)
+        return;
+    const includeRaw = rows.some((row) => Boolean(row.rawOutcome));
+    const includeCompiled = rows.some((row) => Boolean(row.compiledOutcome));
+    const includeDelta = includeRaw && includeCompiled;
+    const headers = [
+        "Agent",
+        ...(includeRaw ? ["Files as-is"] : []),
+        ...(includeCompiled ? ["Compiled dataset"] : []),
+        ...(includeDelta ? ["Delta"] : []),
+    ];
+    const body = rows.map((row) => [
+        row.agentLabel,
+        ...(includeRaw ? [scoreCell(row.rawOutcome)] : []),
+        ...(includeCompiled ? [scoreCell(row.compiledOutcome)] : []),
+        ...(includeDelta ? [deltaCell(row)] : []),
+    ]);
+    const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => (row[index] ?? "").length)));
+    const heading = includeDelta ? "  Comparison" : "  Results";
+    console.log();
+    console.log(chalk.bold(heading));
+    console.log();
+    console.log(`  | ${headers.map((header, index) => padCell(header, widths[index] ?? header.length)).join(" | ")} |`);
+    console.log(`  | ${widths.map((width) => "-".repeat(width)).join(" | ")} |`);
+    for (const row of body) {
+        console.log(`  | ${row.map((cell, index) => padCell(cell ?? "", widths[index] ?? cell.length)).join(" | ")} |`);
+    }
+}
+export function printAgentTestFailures(rows) {
+    for (const row of rows) {
+        const failures = [];
+        for (const [label, outcome] of [
+            ["Files as-is", row.rawOutcome ?? null],
+            ["Compiled dataset", row.compiledOutcome ?? null],
+        ]) {
+            if (!outcome || outcome.result.ok)
+                continue;
+            for (const [index, caseResult] of outcome.result.caseResults.entries()) {
+                if (caseResult.ok)
+                    continue;
+                const reason = caseResult.checks.find((entry) => !entry.ok)?.detail ?? "failed";
+                failures.push(`${label} · Truth Check ${index + 1}: ${reason}`);
+            }
+        }
+        if (failures.length === 0)
+            continue;
+        console.log();
+        console.log(chalk.bold(`  ${row.agentLabel} failures`));
+        console.log();
+        for (const failure of failures) {
+            console.log(`  - ${failure}`);
+        }
+    }
+}
 function specNeedsExecutor(spec) {
     return spec.cases.some((entry) => !entry.file || Boolean(entry.answer));
 }
@@ -69,69 +220,121 @@ export function printSavedTestOutcome(prefix, outcome) {
         console.log(chalk.dim(`  Preserved sandbox: ${outcome.result.sandbox_path}`));
         console.log();
     }
-    console.log(chalk.dim(`  Saved run: ${outcome.runPath}`));
+    console.log(chalk.dim(`  Saved run: ${visibleRunPath(outcome)}`));
 }
-export function printSavedTestComparison(rawOutcome, workspaceOutcome, comparisonRunPath) {
-    if (!rawOutcome && !workspaceOutcome)
+export function printSavedTestComparison(rawOutcome, compiledOutcome, comparisonRunPath) {
+    if (!rawOutcome && !compiledOutcome)
         return;
     console.log();
     if (rawOutcome) {
-        printSavedTestOutcome("Raw files", rawOutcome);
+        printSavedTestOutcome("Files as-is", rawOutcome);
     }
-    if (workspaceOutcome) {
+    if (compiledOutcome) {
         if (rawOutcome)
             console.log();
-        printSavedTestOutcome("Compiled workspace", workspaceOutcome);
+        printSavedTestOutcome("Compiled dataset", compiledOutcome);
     }
-    if (rawOutcome && workspaceOutcome) {
+    if (rawOutcome && compiledOutcome) {
         const rawQuestions = questionPassRate(rawOutcome);
-        const workspaceQuestions = questionPassRate(workspaceOutcome);
-        const delta = workspaceQuestions - rawQuestions;
+        const compiledQuestions = questionPassRate(compiledOutcome);
+        const delta = compiledQuestions - rawQuestions;
         const color = delta >= 0 ? chalk.green : chalk.red;
         const direction = delta >= 0 ? "improved" : "decreased";
         console.log();
-        console.log(color(`  Truth-check pass rate ${direction} from ${rawQuestions}% to ${workspaceQuestions}%.`));
+        console.log(color(`  Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
     }
     if (comparisonRunPath) {
         console.log();
-        console.log(chalk.dim(`  Saved comparison: ${comparisonRunPath}`));
+        console.log(chalk.dim(`  Saved summary: ${comparisonRunPath}`));
+    }
+}
+export function printSavedTestComparisonSummary(rawOutcome, compiledOutcome, comparisonRunPath) {
+    if (!rawOutcome && !compiledOutcome)
+        return;
+    console.log();
+    console.log(chalk.bold("  Comparison"));
+    console.log();
+    console.log("  | Target | Truth checks |");
+    console.log("  | --- | --- |");
+    if (rawOutcome) {
+        console.log(`  | Files as-is | \`${rawOutcome.result.passedCases}/${rawOutcome.result.totalCases}\` |`);
+    }
+    if (compiledOutcome) {
+        console.log(`  | Compiled dataset | \`${compiledOutcome.result.passedCases}/${compiledOutcome.result.totalCases}\` |`);
+    }
+    if (rawOutcome && compiledOutcome) {
+        const rawQuestions = questionPassRate(rawOutcome);
+        const compiledQuestions = questionPassRate(compiledOutcome);
+        const delta = compiledQuestions - rawQuestions;
+        const color = delta >= 0 ? chalk.green : chalk.red;
+        const direction = delta >= 0 ? "improved" : "decreased";
+        console.log();
+        console.log(color(`  Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
+    }
+    if (comparisonRunPath) {
+        console.log();
+        console.log(chalk.dim(`  Saved summary: ${comparisonRunPath}`));
     }
 }
 export function saveTestComparisonRun(options) {
     const generatedAt = new Date().toISOString();
-    const runRoot = join(testRunsRootForWorkspace(options.workspacePath), `${generatedAt.replace(/[:.]/g, "-")}-${normalizeTestRunId(options.workspaceName)}`);
-    mkdirSync(runRoot, { recursive: true });
-    const rawPassRate = options.rawOutcome ? questionPassRate(options.rawOutcome) : null;
-    const workspacePassRate = options.workspaceOutcome ? questionPassRate(options.workspaceOutcome) : null;
+    const existing = loadLatestComparison(options.sourcePath, options.compiledName);
+    const canReuseExisting = Boolean(existing?.checks_fingerprint) &&
+        existing?.checks_fingerprint === options.checksFingerprint;
+    const rawSummary = options.rawOutcome
+        ? summarizeSavedTestOutcome("Files as-is", options.rawOutcome)
+        : canReuseExisting
+            ? existing?.raw ?? null
+            : null;
+    const compiledSummary = options.compiledOutcome
+        ? summarizeSavedTestOutcome("Compiled dataset", options.compiledOutcome)
+        : canReuseExisting
+            ? existing?.compiled ?? null
+            : null;
+    const effectiveMode = rawSummary && compiledSummary
+        ? "both"
+        : rawSummary
+            ? "raw"
+            : "compiled";
+    const rawPassRate = rawSummary
+        ? Math.round((rawSummary.passed_cases / rawSummary.total_cases) * 100)
+        : null;
+    const compiledPassRate = compiledSummary
+        ? Math.round((compiledSummary.passed_cases / compiledSummary.total_cases) * 100)
+        : null;
     const payload = {
         kind: "interf-test-run",
         version: 1,
         generated_at: generatedAt,
-        mode: options.mode,
+        mode: effectiveMode,
         source_path: options.sourcePath,
-        workspace: {
-            name: options.workspaceName,
-            path: options.workspacePath,
+        checks_fingerprint: options.checksFingerprint,
+        dataset: {
+            name: options.compiledName,
+            compiled_path: options.compiledPath ?? (canReuseExisting ? existing?.dataset.compiled_path ?? null : null),
         },
-        raw: options.rawOutcome ? summarizeSavedTestOutcome("Raw files", options.rawOutcome) : null,
-        compiled_workspace: options.workspaceOutcome
-            ? summarizeSavedTestOutcome("Compiled workspace", options.workspaceOutcome)
-            : null,
+        raw: rawSummary,
+        compiled: compiledSummary,
         summary: {
             raw_pass_rate: rawPassRate,
-            compiled_pass_rate: workspacePassRate,
-            pass_rate_delta: rawPassRate !== null && workspacePassRate !== null ? workspacePassRate - rawPassRate : null,
+            compiled_pass_rate: compiledPassRate,
+            pass_rate_delta: rawPassRate !== null && compiledPassRate !== null ? compiledPassRate - rawPassRate : null,
         },
     };
-    const runPath = join(runRoot, "run.json");
-    writeFileSync(runPath, `${JSON.stringify(payload, null, 2)}\n`);
-    writeFileSync(join(testRootForWorkspace(options.workspacePath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
-    return runPath;
+    const latestStatePath = datasetLatestTestStatePath(options.sourcePath, options.compiledName);
+    mkdirSync(dirname(latestStatePath), { recursive: true });
+    writeFileSync(latestStatePath, `${JSON.stringify(payload, null, 2)}\n`);
+    writeFileSync(datasetLatestTestSummaryPath(options.sourcePath, options.compiledName), renderLatestSummaryMarkdown(payload));
+    if (options.compiledPath) {
+        mkdirSync(testRootForCompiled(options.compiledPath), { recursive: true });
+        writeFileSync(join(testRootForCompiled(options.compiledPath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
+    }
+    return latestStatePath;
 }
 export async function runSavedRawTest(options) {
-    const workspacePath = options.workspacePath ?? ensureWorkspaceFromConfig(options.sourcePath, options.workspaceConfig);
-    const spec = buildTestSpecFromWorkspaceConfig({
-        workspacePath,
+    const spec = buildTestSpecFromSourceFolderConfig({
+        sourcePath: options.sourcePath,
+        targetName: options.datasetConfig.name,
         targetType: "raw",
     });
     if (!spec) {
@@ -146,39 +349,45 @@ export async function runSavedRawTest(options) {
         console.log(chalk.red(error));
         return null;
     }
-    syncWorkspaceRawSnapshot(workspacePath, options.sourcePath);
-    const target = createRawTestTarget(resolveWorkspaceRawPath(workspacePath));
-    const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
+    const datasetSourcePath = resolveSourceDatasetPath(options.sourcePath, options.datasetConfig);
+    const target = createRawTestTarget(datasetSourcePath);
+    const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
         executor,
         preserveSandboxes: options.preserveSandboxes ?? "on-failure",
-        artifactRootPath: workspacePath,
+        artifactRootPath: datasetArtifactRoot(options.sourcePath, options.datasetConfig.name),
     });
     const result = run.results[0];
     if (!result)
         return null;
+    const datasetRunPath = writeDatasetTargetRun({
+        projectPath: options.sourcePath,
+        datasetName: options.datasetConfig.name,
+        target: "file-as-is",
+        generatedAt: run.generated_at,
+        runId: normalizeTestRunId(spec.id),
+        runSuffix: options.runSuffix,
+        payload: run,
+    });
     return {
-        runPath: saveTargetTestRun(workspacePath, run),
+        runPath: datasetRunPath,
         target,
         result,
     };
 }
-export async function runSavedWorkspaceTest(options) {
-    const workspacePath = options.workspacePath ?? findBuiltWorkspacePath(options.sourcePath, options.workspaceConfig.name);
-    if (!workspacePath) {
+export async function runSavedCompiledTest(options) {
+    const compiledPath = options.compiledPath ?? findBuiltCompiledPath(options.sourcePath, options.datasetConfig.name);
+    if (!compiledPath) {
         return null;
     }
-    const spec = buildTestSpecFromWorkspaceConfig({
-        workspacePath,
-        targetType: "workspace",
+    const spec = buildTestSpecFromCompiledDatasetConfig({
+        compiledPath,
+        targetType: "compiled",
     });
     if (!spec) {
         return null;
     }
-    const target = createWorkspaceTestTarget(workspacePath, options.workspaceConfig.name, options.workspaceConfig.workflow ?? "interf");
-    if (!target) {
-        return null;
-    }
-    if (!target.eligible) {
+    const target = createCompiledTestTarget(compiledPath, options.datasetConfig.name, options.datasetConfig.workflow ?? "interf");
+    if (!target || !target.eligible) {
         return null;
     }
     const { executor, error } = await resolveExecutorForSpec(spec, options.executor, options.executionProfile);
@@ -193,13 +402,24 @@ export async function runSavedWorkspaceTest(options) {
     const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
         executor,
         preserveSandboxes: options.preserveSandboxes ?? "on-failure",
-        artifactRootPath: workspacePath,
+        artifactRootPath: compiledPath,
     });
     const result = run.results[0];
     if (!result)
         return null;
+    const internalRunPath = saveTargetTestRun(compiledPath, run);
+    const datasetRunPath = writeDatasetTargetRun({
+        projectPath: options.sourcePath,
+        datasetName: options.datasetConfig.name,
+        target: "compiled",
+        generatedAt: run.generated_at,
+        runId: normalizeTestRunId(spec.id),
+        runSuffix: options.runSuffix,
+        payload: run,
+    });
     return {
-        runPath: saveTargetTestRun(workspacePath, run),
+        runPath: internalRunPath,
+        displayRunPath: datasetRunPath,
         target,
         result,
     };

package/dist/commands/test.d.ts CHANGED Viewed

@@ -1,3 +1,9 @@
 import type { CommandModule } from "yargs";
+import type { SourceDatasetConfig } from "../lib/schema.js";
+export declare function resolveConfiguredDatasetSelection(options: {
+    sourcePath: string;
+    requestedDatasetName?: string | null;
+    hintedDatasetConfig?: SourceDatasetConfig | null;
+}): SourceDatasetConfig | null;
 export declare const testCommand: CommandModule;
-export declare function runTestCommand(argv?: Record<string, unknown>): Promise<void>;
+export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;