@interf/compiler 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +71 -69
  2. package/builtin-workflows/interf/README.md +6 -6
  3. package/builtin-workflows/interf/compile/stages/shape/SKILL.md +7 -7
  4. package/builtin-workflows/interf/compile/stages/structure/SKILL.md +2 -2
  5. package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +1 -1
  6. package/builtin-workflows/interf/{workspace.schema.json → compiled.schema.json} +5 -5
  7. package/builtin-workflows/interf/improve/SKILL.md +3 -3
  8. package/builtin-workflows/interf/use/query/SKILL.md +2 -2
  9. package/builtin-workflows/interf/workflow.json +42 -31
  10. package/dist/commands/check-draft.d.ts +19 -0
  11. package/dist/commands/check-draft.js +110 -0
  12. package/dist/commands/compile-controller.d.ts +4 -4
  13. package/dist/commands/compile-controller.js +117 -81
  14. package/dist/commands/compile.d.ts +5 -5
  15. package/dist/commands/compile.js +61 -62
  16. package/dist/commands/compiled-flow.d.ts +23 -0
  17. package/dist/commands/compiled-flow.js +112 -0
  18. package/dist/commands/create-workflow-wizard.d.ts +3 -3
  19. package/dist/commands/create-workflow-wizard.js +11 -11
  20. package/dist/commands/create.d.ts +2 -2
  21. package/dist/commands/create.js +50 -57
  22. package/dist/commands/default.js +2 -2
  23. package/dist/commands/executor-flow.d.ts +20 -1
  24. package/dist/commands/executor-flow.js +67 -7
  25. package/dist/commands/init.js +242 -289
  26. package/dist/commands/list.js +14 -10
  27. package/dist/commands/reset.js +6 -6
  28. package/dist/commands/source-config-wizard.d.ts +12 -8
  29. package/dist/commands/source-config-wizard.js +356 -119
  30. package/dist/commands/status.js +49 -26
  31. package/dist/commands/test-flow.d.ts +23 -10
  32. package/dist/commands/test-flow.js +278 -58
  33. package/dist/commands/test.d.ts +7 -1
  34. package/dist/commands/test.js +264 -65
  35. package/dist/commands/verify.js +23 -14
  36. package/dist/index.d.ts +7 -7
  37. package/dist/index.js +4 -4
  38. package/dist/lib/agent-args.js +2 -1
  39. package/dist/lib/agent-constants.js +1 -1
  40. package/dist/lib/agent-render.js +4 -4
  41. package/dist/lib/agent-shells.d.ts +8 -8
  42. package/dist/lib/agent-shells.js +231 -142
  43. package/dist/lib/compiled-compile.d.ts +52 -0
  44. package/dist/lib/compiled-compile.js +274 -0
  45. package/dist/lib/compiled-home.d.ts +5 -0
  46. package/dist/lib/compiled-home.js +32 -0
  47. package/dist/lib/compiled-layout.d.ts +2 -0
  48. package/dist/lib/compiled-layout.js +60 -0
  49. package/dist/lib/compiled-paths.d.ts +41 -0
  50. package/dist/lib/compiled-paths.js +111 -0
  51. package/dist/lib/{workspace-raw.d.ts → compiled-raw.d.ts} +8 -7
  52. package/dist/lib/{workspace-raw.js → compiled-raw.js} +16 -14
  53. package/dist/lib/compiled-reset.d.ts +1 -0
  54. package/dist/lib/compiled-reset.js +44 -0
  55. package/dist/lib/compiled-schema.d.ts +27 -0
  56. package/dist/lib/compiled-schema.js +110 -0
  57. package/dist/lib/config.d.ts +0 -1
  58. package/dist/lib/config.js +0 -1
  59. package/dist/lib/discovery.d.ts +1 -1
  60. package/dist/lib/discovery.js +3 -3
  61. package/dist/lib/interf-bootstrap.d.ts +1 -1
  62. package/dist/lib/interf-bootstrap.js +4 -4
  63. package/dist/lib/interf-detect.d.ts +10 -10
  64. package/dist/lib/interf-detect.js +78 -56
  65. package/dist/lib/interf-scaffold.d.ts +2 -2
  66. package/dist/lib/interf-scaffold.js +90 -57
  67. package/dist/lib/interf-workflow-package.d.ts +3 -3
  68. package/dist/lib/interf-workflow-package.js +30 -30
  69. package/dist/lib/interf.d.ts +5 -5
  70. package/dist/lib/interf.js +4 -4
  71. package/dist/lib/local-workflows.d.ts +4 -4
  72. package/dist/lib/local-workflows.js +35 -70
  73. package/dist/lib/obsidian.d.ts +1 -1
  74. package/dist/lib/parse.js +92 -1
  75. package/dist/lib/project-paths.d.ts +13 -0
  76. package/dist/lib/project-paths.js +29 -0
  77. package/dist/lib/runtime-acceptance.d.ts +7 -1
  78. package/dist/lib/runtime-acceptance.js +194 -59
  79. package/dist/lib/runtime-contracts.d.ts +2 -4
  80. package/dist/lib/runtime-contracts.js +17 -161
  81. package/dist/lib/runtime-inventory.d.ts +7 -0
  82. package/dist/lib/runtime-inventory.js +29 -0
  83. package/dist/lib/runtime-paths.js +5 -5
  84. package/dist/lib/runtime-prompt.js +9 -6
  85. package/dist/lib/runtime-reconcile.d.ts +2 -3
  86. package/dist/lib/runtime-reconcile.js +92 -171
  87. package/dist/lib/runtime-runs.js +30 -39
  88. package/dist/lib/runtime-types.d.ts +10 -19
  89. package/dist/lib/runtime.d.ts +2 -2
  90. package/dist/lib/runtime.js +1 -1
  91. package/dist/lib/schema.d.ts +163 -140
  92. package/dist/lib/schema.js +163 -124
  93. package/dist/lib/source-config.d.ts +24 -20
  94. package/dist/lib/source-config.js +154 -116
  95. package/dist/lib/state-artifacts.d.ts +5 -5
  96. package/dist/lib/state-artifacts.js +8 -8
  97. package/dist/lib/state-health.d.ts +4 -4
  98. package/dist/lib/state-health.js +108 -126
  99. package/dist/lib/state-io.d.ts +8 -8
  100. package/dist/lib/state-io.js +77 -50
  101. package/dist/lib/state-paths.js +5 -5
  102. package/dist/lib/state-view.d.ts +4 -4
  103. package/dist/lib/state-view.js +52 -55
  104. package/dist/lib/state.d.ts +5 -5
  105. package/dist/lib/state.js +4 -4
  106. package/dist/lib/summarize-plan.d.ts +3 -2
  107. package/dist/lib/summarize-plan.js +18 -16
  108. package/dist/lib/test-execution.js +9 -9
  109. package/dist/lib/test-matrices.d.ts +3 -3
  110. package/dist/lib/test-matrices.js +6 -6
  111. package/dist/lib/test-paths.d.ts +4 -4
  112. package/dist/lib/test-paths.js +16 -10
  113. package/dist/lib/test-sandbox.d.ts +1 -1
  114. package/dist/lib/test-sandbox.js +38 -31
  115. package/dist/lib/test-targets.d.ts +2 -2
  116. package/dist/lib/test-targets.js +11 -11
  117. package/dist/lib/test-types.d.ts +1 -1
  118. package/dist/lib/test.d.ts +1 -1
  119. package/dist/lib/test.js +1 -1
  120. package/dist/lib/util.d.ts +2 -0
  121. package/dist/lib/util.js +14 -1
  122. package/dist/lib/validate-compiled.d.ts +27 -0
  123. package/dist/lib/validate-compiled.js +236 -0
  124. package/dist/lib/validate-helpers.d.ts +0 -8
  125. package/dist/lib/validate-helpers.js +0 -30
  126. package/dist/lib/validate.d.ts +4 -4
  127. package/dist/lib/validate.js +49 -15
  128. package/dist/lib/workflow-abi.d.ts +37 -46
  129. package/dist/lib/workflow-abi.js +51 -76
  130. package/dist/lib/workflow-definitions.d.ts +11 -11
  131. package/dist/lib/workflow-definitions.js +36 -53
  132. package/dist/lib/workflow-helpers.d.ts +2 -3
  133. package/dist/lib/workflow-helpers.js +9 -13
  134. package/dist/lib/workflow-improvement.d.ts +3 -3
  135. package/dist/lib/workflow-improvement.js +48 -48
  136. package/dist/lib/workflow-review-paths.d.ts +3 -3
  137. package/dist/lib/workflow-review-paths.js +11 -11
  138. package/dist/lib/workflow-stage-runner.d.ts +1 -1
  139. package/dist/lib/workflow-stage-runner.js +8 -8
  140. package/dist/lib/workflows.d.ts +9 -9
  141. package/dist/lib/workflows.js +15 -17
  142. package/package.json +10 -9
  143. package/dist/commands/workspace-flow.d.ts +0 -23
  144. package/dist/commands/workspace-flow.js +0 -109
  145. package/dist/lib/registry.d.ts +0 -16
  146. package/dist/lib/registry.js +0 -65
  147. package/dist/lib/validate-workspace.d.ts +0 -121
  148. package/dist/lib/validate-workspace.js +0 -407
  149. package/dist/lib/workspace-compile.d.ts +0 -54
  150. package/dist/lib/workspace-compile.js +0 -476
  151. package/dist/lib/workspace-home.d.ts +0 -5
  152. package/dist/lib/workspace-home.js +0 -32
  153. package/dist/lib/workspace-layout.d.ts +0 -2
  154. package/dist/lib/workspace-layout.js +0 -60
  155. package/dist/lib/workspace-paths.d.ts +0 -41
  156. package/dist/lib/workspace-paths.js +0 -107
  157. package/dist/lib/workspace-reset.d.ts +0 -1
  158. package/dist/lib/workspace-reset.js +0 -43
  159. package/dist/lib/workspace-schema.d.ts +0 -17
  160. package/dist/lib/workspace-schema.js +0 -74
@@ -1,8 +1,8 @@
1
1
  import chalk from "chalk";
2
2
  import * as p from "@clack/prompts";
3
- import { detectInterf, listWorkspacesForSourceFolder, } from "../lib/interf.js";
4
- import { computeWorkspaceHealth } from "../lib/state.js";
5
- import { listWorkspaceEntries } from "../lib/registry.js";
3
+ import { detectInterf, listCompiledDatasetsForSourceFolder, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
4
+ import { computeCompiledHealth } from "../lib/state.js";
5
+ import { readSavedTestComparison, printSavedTestComparisonState } from "./test-flow.js";
6
6
  function statusColor(status) {
7
7
  switch (status) {
8
8
  case "compiled":
@@ -17,54 +17,77 @@ function statusColor(status) {
17
17
  }
18
18
  export const statusCommand = {
19
19
  command: "status",
20
- describe: "Show deterministic workspace health",
20
+ describe: "Show deterministic health for a compiled dataset",
21
21
  handler: async () => {
22
- let workspacePath = null;
22
+ let compiledPath = null;
23
23
  const detected = detectInterf(process.cwd());
24
24
  if (detected) {
25
- workspacePath = detected.path;
25
+ compiledPath = detected.path;
26
26
  }
27
27
  else {
28
- const local = listWorkspacesForSourceFolder(process.cwd()).map(({ path, config }) => ({
28
+ const sourcePath = process.cwd();
29
+ const local = listCompiledDatasetsForSourceFolder(sourcePath).map(({ path, config }) => ({
29
30
  path,
30
31
  name: config.name,
31
32
  }));
32
- const all = local.length > 0 ? local : listWorkspaceEntries().map((entry) => ({
33
- path: entry.path,
34
- name: entry.name,
35
- }));
36
- if (all.length === 0) {
33
+ if (local.length === 0) {
37
34
  process.exitCode = 1;
38
- console.log(chalk.red(" No workspaces found."));
35
+ console.log(chalk.red(" No compiled datasets found."));
36
+ console.log(chalk.dim(" Run `interf`, save truth checks, and compile a dataset first."));
39
37
  return;
40
38
  }
41
- if (all.length === 1) {
42
- workspacePath = all[0].path;
39
+ if (local.length === 1) {
40
+ compiledPath = local[0].path;
43
41
  }
44
42
  else {
45
43
  const selected = await p.select({
46
- message: "Which workspace?",
47
- options: all.map((entry) => ({ value: entry.path, label: entry.name })),
44
+ message: "Which dataset?",
45
+ options: local.map((entry) => ({ value: entry.path, label: entry.name })),
48
46
  });
49
47
  if (p.isCancel(selected))
50
48
  return;
51
- workspacePath = selected;
49
+ compiledPath = selected;
52
50
  }
53
51
  }
54
- const health = computeWorkspaceHealth(workspacePath);
52
+ const health = computeCompiledHealth(compiledPath);
55
53
  const color = statusColor(health.status);
56
54
  console.log();
57
55
  console.log(color(` ${health.target_name}`));
58
56
  console.log(chalk.dim(` status: ${health.status}`));
59
57
  console.log(chalk.dim(` stage: ${health.stage}`));
60
58
  console.log(chalk.dim(` ${health.summary}`));
59
+ const compiledConfig = readInterfConfig(compiledPath);
60
+ const sourcePath = resolveSourceControlPath(compiledPath);
61
+ const latestComparison = compiledConfig
62
+ ? readSavedTestComparison(sourcePath, compiledConfig.name)
63
+ : null;
64
+ if (latestComparison) {
65
+ printSavedTestComparisonState(latestComparison);
66
+ }
67
+ else {
68
+ console.log();
69
+ console.log(chalk.dim(" No saved test result yet. Run `interf test` to measure files as-is and the compiled dataset."));
70
+ }
61
71
  console.log();
62
- console.log(chalk.dim(` source coverage: ${health.metrics.source_covered}/${health.metrics.source_total}`));
63
- console.log(chalk.dim(` summarized: ${health.metrics.summarized}`));
64
- console.log(chalk.dim(` compiled: ${health.metrics.compiled}`));
65
- console.log(chalk.dim(` entities: ${health.metrics.entities}`));
66
- console.log(chalk.dim(` claims: ${health.metrics.claims}`));
67
- console.log(chalk.dim(` warnings: ${health.metrics.warnings}`));
68
- console.log(chalk.dim(` errors: ${health.metrics.errors}`));
72
+ const metricOrder = [
73
+ "source_total",
74
+ "stage_total",
75
+ "completed_stages",
76
+ "warnings",
77
+ "errors",
78
+ ];
79
+ const printed = new Set();
80
+ for (const key of metricOrder) {
81
+ const value = health.metrics[key];
82
+ if (typeof value !== "number")
83
+ continue;
84
+ printed.add(key);
85
+ console.log(chalk.dim(` ${key}: ${value}`));
86
+ }
87
+ for (const [key, value] of Object.entries(health.metrics)) {
88
+ if (printed.has(key))
89
+ continue;
90
+ console.log(chalk.dim(` ${key}: ${value}`));
91
+ }
69
92
  },
70
93
  };
@@ -1,36 +1,49 @@
1
1
  import { type TestTargetCandidate, type TestTargetResult } from "../lib/test.js";
2
2
  import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
3
3
  import type { TestSandboxRetentionMode } from "../lib/test-sandbox.js";
4
- import type { SourceWorkspaceConfig, TestRunMode } from "../lib/schema.js";
4
+ import type { SourceDatasetConfig, TestRunComparison, TestRunMode } from "../lib/schema.js";
5
5
  export interface SavedTestOutcome {
6
6
  runPath: string;
7
+ displayRunPath?: string;
7
8
  target: TestTargetCandidate;
8
9
  result: TestTargetResult;
9
10
  }
11
+ export interface AgentTestMatrixRow {
12
+ agentLabel: string;
13
+ rawOutcome?: SavedTestOutcome | null;
14
+ compiledOutcome?: SavedTestOutcome | null;
15
+ }
10
16
  export declare function questionPassRate(outcome: SavedTestOutcome): number;
17
+ export declare function readSavedTestComparison(projectPath: string, datasetName: string): TestRunComparison | null;
18
+ export declare function printSavedTestComparisonState(payload: TestRunComparison, comparisonRunPath?: string | null): void;
19
+ export declare function printAgentTestMatrix(rows: AgentTestMatrixRow[]): void;
20
+ export declare function printAgentTestFailures(rows: AgentTestMatrixRow[]): void;
11
21
  export declare function printSavedTestOutcome(prefix: string, outcome: SavedTestOutcome): void;
12
- export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, workspaceOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
22
+ export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
23
+ export declare function printSavedTestComparisonSummary(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
13
24
  export declare function saveTestComparisonRun(options: {
14
25
  sourcePath: string;
15
- workspacePath: string;
16
- workspaceName: string;
26
+ compiledPath: string | null;
27
+ compiledName: string;
28
+ checksFingerprint: string;
17
29
  mode: TestRunMode;
18
30
  rawOutcome: SavedTestOutcome | null;
19
- workspaceOutcome: SavedTestOutcome | null;
31
+ compiledOutcome: SavedTestOutcome | null;
20
32
  }): string;
21
33
  export declare function runSavedRawTest(options: {
22
34
  sourcePath: string;
23
- workspaceConfig: SourceWorkspaceConfig;
24
- workspacePath?: string | null;
35
+ datasetConfig: SourceDatasetConfig;
25
36
  executor?: WorkflowExecutor | null;
26
37
  executionProfile?: WorkflowExecutionProfile;
27
38
  preserveSandboxes?: TestSandboxRetentionMode;
39
+ runSuffix?: string | null;
28
40
  }): Promise<SavedTestOutcome | null>;
29
- export declare function runSavedWorkspaceTest(options: {
41
+ export declare function runSavedCompiledTest(options: {
30
42
  sourcePath: string;
31
- workspaceConfig: SourceWorkspaceConfig;
43
+ datasetConfig: SourceDatasetConfig;
32
44
  executor?: WorkflowExecutor | null;
33
45
  executionProfile?: WorkflowExecutionProfile;
34
- workspacePath?: string | null;
46
+ compiledPath?: string | null;
35
47
  preserveSandboxes?: TestSandboxRetentionMode;
48
+ runSuffix?: string | null;
36
49
  }): Promise<SavedTestOutcome | null>;
@@ -1,21 +1,26 @@
1
1
  import chalk from "chalk";
2
- import { mkdirSync, writeFileSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { createRawTestTarget, createWorkspaceTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
5
- import { resolveWorkspaceRawPath, syncWorkspaceRawSnapshot } from "../lib/interf.js";
6
- import { buildTestSpecFromWorkspaceConfig } from "../lib/source-config.js";
7
- import { testRootForWorkspace, testRunsRootForWorkspace, } from "../lib/workspace-paths.js";
2
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
5
+ import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
6
+ import { datasetArtifactRoot, datasetLatestTestStatePath, datasetLatestTestSummaryPath, datasetTestRunsRoot, } from "../lib/project-paths.js";
7
+ import { testRootForCompiled } from "../lib/compiled-paths.js";
8
+ import { readJsonFileWithSchema } from "../lib/parse.js";
9
+ import { TestRunComparisonSchema } from "../lib/schema.js";
8
10
  import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
9
- import { ensureWorkspaceFromConfig, findBuiltWorkspacePath } from "./workspace-flow.js";
11
+ import { findBuiltCompiledPath } from "./compiled-flow.js";
10
12
  export function questionPassRate(outcome) {
11
13
  return outcome.result.totalCases > 0
12
14
  ? Math.round((outcome.result.passedCases / outcome.result.totalCases) * 100)
13
15
  : 0;
14
16
  }
17
+ function visibleRunPath(outcome) {
18
+ return outcome.displayRunPath ?? outcome.runPath;
19
+ }
15
20
  function summarizeSavedTestOutcome(label, outcome) {
16
21
  return {
17
22
  label,
18
- run_path: outcome.runPath,
23
+ run_path: visibleRunPath(outcome),
19
24
  ok: outcome.result.ok,
20
25
  passed_cases: outcome.result.passedCases,
21
26
  total_cases: outcome.result.totalCases,
@@ -32,6 +37,152 @@ function normalizeTestRunId(input) {
32
37
  .replace(/^-+|-+$/g, "")
33
38
  .slice(0, 80);
34
39
  }
40
+ function datasetRunPathForTarget(projectPath, datasetName, target, generatedAt, runId, runSuffix) {
41
+ return join(datasetTestRunsRoot(projectPath, datasetName, target), `${generatedAt.replace(/[:.]/g, "-")}-${runId}${runSuffix ? `-${normalizeTestRunId(runSuffix)}` : ""}.json`);
42
+ }
43
+ function writeDatasetTargetRun(options) {
44
+ const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
45
+ mkdirSync(dirPath, { recursive: true });
46
+ const runPath = datasetRunPathForTarget(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
47
+ writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
48
+ return runPath;
49
+ }
50
+ function loadLatestComparison(projectPath, datasetName) {
51
+ const latestPath = datasetLatestTestStatePath(projectPath, datasetName);
52
+ if (!existsSync(latestPath))
53
+ return null;
54
+ return readJsonFileWithSchema(latestPath, "latest test comparison", TestRunComparisonSchema);
55
+ }
56
+ export function readSavedTestComparison(projectPath, datasetName) {
57
+ return loadLatestComparison(projectPath, datasetName);
58
+ }
59
+ function renderLatestSummaryMarkdown(payload) {
60
+ const lines = [
61
+ "# Latest Test Result",
62
+ "",
63
+ "| Target | Truth checks |",
64
+ "| --- | --- |",
65
+ ];
66
+ if (payload.raw) {
67
+ lines.push(`| Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
68
+ }
69
+ if (payload.compiled) {
70
+ lines.push(`| Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
71
+ }
72
+ lines.push("");
73
+ if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
74
+ const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
75
+ lines.push(`Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`, "");
76
+ }
77
+ if (payload.raw) {
78
+ lines.push(`- Latest files-as-is run: ${payload.raw.run_path}`);
79
+ }
80
+ if (payload.compiled) {
81
+ lines.push(`- Latest compiled run: ${payload.compiled.run_path}`);
82
+ }
83
+ return `${lines.join("\n")}\n`;
84
+ }
85
+ export function printSavedTestComparisonState(payload, comparisonRunPath) {
86
+ console.log();
87
+ console.log(chalk.bold(" Latest saved test"));
88
+ console.log();
89
+ console.log(" | Target | Truth checks |");
90
+ console.log(" | --- | --- |");
91
+ if (payload.raw) {
92
+ console.log(` | Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
93
+ }
94
+ if (payload.compiled) {
95
+ console.log(` | Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
96
+ }
97
+ if (!payload.raw || !payload.compiled) {
98
+ console.log();
99
+ if (!payload.raw) {
100
+ console.log(chalk.dim(" No saved files-as-is baseline yet."));
101
+ }
102
+ if (!payload.compiled) {
103
+ console.log(chalk.dim(" No saved compiled-dataset run yet."));
104
+ }
105
+ }
106
+ if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
107
+ const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
108
+ const color = (payload.summary.pass_rate_delta ?? 0) >= 0 ? chalk.green : chalk.red;
109
+ console.log();
110
+ console.log(color(` Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`));
111
+ }
112
+ if (comparisonRunPath) {
113
+ console.log();
114
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
115
+ }
116
+ }
117
+ function padCell(value, width) {
118
+ return value.padEnd(width, " ");
119
+ }
120
+ function scoreCell(outcome) {
121
+ if (!outcome)
122
+ return "—";
123
+ return `${outcome.result.passedCases}/${outcome.result.totalCases}`;
124
+ }
125
+ function deltaCell(row) {
126
+ if (!row.rawOutcome || !row.compiledOutcome)
127
+ return "—";
128
+ const delta = row.compiledOutcome.result.passedCases - row.rawOutcome.result.passedCases;
129
+ return delta > 0 ? `+${delta}` : `${delta}`;
130
+ }
131
+ export function printAgentTestMatrix(rows) {
132
+ if (rows.length === 0)
133
+ return;
134
+ const includeRaw = rows.some((row) => Boolean(row.rawOutcome));
135
+ const includeCompiled = rows.some((row) => Boolean(row.compiledOutcome));
136
+ const includeDelta = includeRaw && includeCompiled;
137
+ const headers = [
138
+ "Agent",
139
+ ...(includeRaw ? ["Files as-is"] : []),
140
+ ...(includeCompiled ? ["Compiled dataset"] : []),
141
+ ...(includeDelta ? ["Delta"] : []),
142
+ ];
143
+ const body = rows.map((row) => [
144
+ row.agentLabel,
145
+ ...(includeRaw ? [scoreCell(row.rawOutcome)] : []),
146
+ ...(includeCompiled ? [scoreCell(row.compiledOutcome)] : []),
147
+ ...(includeDelta ? [deltaCell(row)] : []),
148
+ ]);
149
+ const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => (row[index] ?? "").length)));
150
+ const heading = includeDelta ? " Comparison" : " Results";
151
+ console.log();
152
+ console.log(chalk.bold(heading));
153
+ console.log();
154
+ console.log(` | ${headers.map((header, index) => padCell(header, widths[index] ?? header.length)).join(" | ")} |`);
155
+ console.log(` | ${widths.map((width) => "-".repeat(width)).join(" | ")} |`);
156
+ for (const row of body) {
157
+ console.log(` | ${row.map((cell, index) => padCell(cell ?? "", widths[index] ?? cell.length)).join(" | ")} |`);
158
+ }
159
+ }
160
+ export function printAgentTestFailures(rows) {
161
+ for (const row of rows) {
162
+ const failures = [];
163
+ for (const [label, outcome] of [
164
+ ["Files as-is", row.rawOutcome ?? null],
165
+ ["Compiled dataset", row.compiledOutcome ?? null],
166
+ ]) {
167
+ if (!outcome || outcome.result.ok)
168
+ continue;
169
+ for (const [index, caseResult] of outcome.result.caseResults.entries()) {
170
+ if (caseResult.ok)
171
+ continue;
172
+ const reason = caseResult.checks.find((entry) => !entry.ok)?.detail ?? "failed";
173
+ failures.push(`${label} · Truth Check ${index + 1}: ${reason}`);
174
+ }
175
+ }
176
+ if (failures.length === 0)
177
+ continue;
178
+ console.log();
179
+ console.log(chalk.bold(` ${row.agentLabel} failures`));
180
+ console.log();
181
+ for (const failure of failures) {
182
+ console.log(` - ${failure}`);
183
+ }
184
+ }
185
+ }
35
186
  function specNeedsExecutor(spec) {
36
187
  return spec.cases.some((entry) => !entry.file || Boolean(entry.answer));
37
188
  }
@@ -69,69 +220,121 @@ export function printSavedTestOutcome(prefix, outcome) {
69
220
  console.log(chalk.dim(` Preserved sandbox: ${outcome.result.sandbox_path}`));
70
221
  console.log();
71
222
  }
72
- console.log(chalk.dim(` Saved run: ${outcome.runPath}`));
223
+ console.log(chalk.dim(` Saved run: ${visibleRunPath(outcome)}`));
73
224
  }
74
- export function printSavedTestComparison(rawOutcome, workspaceOutcome, comparisonRunPath) {
75
- if (!rawOutcome && !workspaceOutcome)
225
+ export function printSavedTestComparison(rawOutcome, compiledOutcome, comparisonRunPath) {
226
+ if (!rawOutcome && !compiledOutcome)
76
227
  return;
77
228
  console.log();
78
229
  if (rawOutcome) {
79
- printSavedTestOutcome("Raw files", rawOutcome);
230
+ printSavedTestOutcome("Files as-is", rawOutcome);
80
231
  }
81
- if (workspaceOutcome) {
232
+ if (compiledOutcome) {
82
233
  if (rawOutcome)
83
234
  console.log();
84
- printSavedTestOutcome("Compiled workspace", workspaceOutcome);
235
+ printSavedTestOutcome("Compiled dataset", compiledOutcome);
85
236
  }
86
- if (rawOutcome && workspaceOutcome) {
237
+ if (rawOutcome && compiledOutcome) {
87
238
  const rawQuestions = questionPassRate(rawOutcome);
88
- const workspaceQuestions = questionPassRate(workspaceOutcome);
89
- const delta = workspaceQuestions - rawQuestions;
239
+ const compiledQuestions = questionPassRate(compiledOutcome);
240
+ const delta = compiledQuestions - rawQuestions;
90
241
  const color = delta >= 0 ? chalk.green : chalk.red;
91
242
  const direction = delta >= 0 ? "improved" : "decreased";
92
243
  console.log();
93
- console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${workspaceQuestions}%.`));
244
+ console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
94
245
  }
95
246
  if (comparisonRunPath) {
96
247
  console.log();
97
- console.log(chalk.dim(` Saved comparison: ${comparisonRunPath}`));
248
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
249
+ }
250
+ }
251
+ export function printSavedTestComparisonSummary(rawOutcome, compiledOutcome, comparisonRunPath) {
252
+ if (!rawOutcome && !compiledOutcome)
253
+ return;
254
+ console.log();
255
+ console.log(chalk.bold(" Comparison"));
256
+ console.log();
257
+ console.log(" | Target | Truth checks |");
258
+ console.log(" | --- | --- |");
259
+ if (rawOutcome) {
260
+ console.log(` | Files as-is | \`${rawOutcome.result.passedCases}/${rawOutcome.result.totalCases}\` |`);
261
+ }
262
+ if (compiledOutcome) {
263
+ console.log(` | Compiled dataset | \`${compiledOutcome.result.passedCases}/${compiledOutcome.result.totalCases}\` |`);
264
+ }
265
+ if (rawOutcome && compiledOutcome) {
266
+ const rawQuestions = questionPassRate(rawOutcome);
267
+ const compiledQuestions = questionPassRate(compiledOutcome);
268
+ const delta = compiledQuestions - rawQuestions;
269
+ const color = delta >= 0 ? chalk.green : chalk.red;
270
+ const direction = delta >= 0 ? "improved" : "decreased";
271
+ console.log();
272
+ console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
273
+ }
274
+ if (comparisonRunPath) {
275
+ console.log();
276
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
98
277
  }
99
278
  }
100
279
  export function saveTestComparisonRun(options) {
101
280
  const generatedAt = new Date().toISOString();
102
- const runRoot = join(testRunsRootForWorkspace(options.workspacePath), `${generatedAt.replace(/[:.]/g, "-")}-${normalizeTestRunId(options.workspaceName)}`);
103
- mkdirSync(runRoot, { recursive: true });
104
- const rawPassRate = options.rawOutcome ? questionPassRate(options.rawOutcome) : null;
105
- const workspacePassRate = options.workspaceOutcome ? questionPassRate(options.workspaceOutcome) : null;
281
+ const existing = loadLatestComparison(options.sourcePath, options.compiledName);
282
+ const canReuseExisting = Boolean(existing?.checks_fingerprint) &&
283
+ existing?.checks_fingerprint === options.checksFingerprint;
284
+ const rawSummary = options.rawOutcome
285
+ ? summarizeSavedTestOutcome("Files as-is", options.rawOutcome)
286
+ : canReuseExisting
287
+ ? existing?.raw ?? null
288
+ : null;
289
+ const compiledSummary = options.compiledOutcome
290
+ ? summarizeSavedTestOutcome("Compiled dataset", options.compiledOutcome)
291
+ : canReuseExisting
292
+ ? existing?.compiled ?? null
293
+ : null;
294
+ const effectiveMode = rawSummary && compiledSummary
295
+ ? "both"
296
+ : rawSummary
297
+ ? "raw"
298
+ : "compiled";
299
+ const rawPassRate = rawSummary
300
+ ? Math.round((rawSummary.passed_cases / rawSummary.total_cases) * 100)
301
+ : null;
302
+ const compiledPassRate = compiledSummary
303
+ ? Math.round((compiledSummary.passed_cases / compiledSummary.total_cases) * 100)
304
+ : null;
106
305
  const payload = {
107
306
  kind: "interf-test-run",
108
307
  version: 1,
109
308
  generated_at: generatedAt,
110
- mode: options.mode,
309
+ mode: effectiveMode,
111
310
  source_path: options.sourcePath,
112
- workspace: {
113
- name: options.workspaceName,
114
- path: options.workspacePath,
311
+ checks_fingerprint: options.checksFingerprint,
312
+ dataset: {
313
+ name: options.compiledName,
314
+ compiled_path: options.compiledPath ?? (canReuseExisting ? existing?.dataset.compiled_path ?? null : null),
115
315
  },
116
- raw: options.rawOutcome ? summarizeSavedTestOutcome("Raw files", options.rawOutcome) : null,
117
- compiled_workspace: options.workspaceOutcome
118
- ? summarizeSavedTestOutcome("Compiled workspace", options.workspaceOutcome)
119
- : null,
316
+ raw: rawSummary,
317
+ compiled: compiledSummary,
120
318
  summary: {
121
319
  raw_pass_rate: rawPassRate,
122
- compiled_pass_rate: workspacePassRate,
123
- pass_rate_delta: rawPassRate !== null && workspacePassRate !== null ? workspacePassRate - rawPassRate : null,
320
+ compiled_pass_rate: compiledPassRate,
321
+ pass_rate_delta: rawPassRate !== null && compiledPassRate !== null ? compiledPassRate - rawPassRate : null,
124
322
  },
125
323
  };
126
- const runPath = join(runRoot, "run.json");
127
- writeFileSync(runPath, `${JSON.stringify(payload, null, 2)}\n`);
128
- writeFileSync(join(testRootForWorkspace(options.workspacePath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
129
- return runPath;
324
+ const latestStatePath = datasetLatestTestStatePath(options.sourcePath, options.compiledName);
325
+ mkdirSync(dirname(latestStatePath), { recursive: true });
326
+ writeFileSync(latestStatePath, `${JSON.stringify(payload, null, 2)}\n`);
327
+ writeFileSync(datasetLatestTestSummaryPath(options.sourcePath, options.compiledName), renderLatestSummaryMarkdown(payload));
328
+ if (options.compiledPath) {
329
+ mkdirSync(testRootForCompiled(options.compiledPath), { recursive: true });
330
+ writeFileSync(join(testRootForCompiled(options.compiledPath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
331
+ }
332
+ return latestStatePath;
130
333
  }
131
334
  export async function runSavedRawTest(options) {
132
- const workspacePath = options.workspacePath ?? ensureWorkspaceFromConfig(options.sourcePath, options.workspaceConfig);
133
- const spec = buildTestSpecFromWorkspaceConfig({
134
- workspacePath,
335
+ const spec = buildTestSpecFromSourceFolderConfig({
336
+ sourcePath: options.sourcePath,
337
+ targetName: options.datasetConfig.name,
135
338
  targetType: "raw",
136
339
  });
137
340
  if (!spec) {
@@ -146,39 +349,45 @@ export async function runSavedRawTest(options) {
146
349
  console.log(chalk.red(error));
147
350
  return null;
148
351
  }
149
- syncWorkspaceRawSnapshot(workspacePath, options.sourcePath);
150
- const target = createRawTestTarget(resolveWorkspaceRawPath(workspacePath));
151
- const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
352
+ const datasetSourcePath = resolveSourceDatasetPath(options.sourcePath, options.datasetConfig);
353
+ const target = createRawTestTarget(datasetSourcePath);
354
+ const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
152
355
  executor,
153
356
  preserveSandboxes: options.preserveSandboxes ?? "on-failure",
154
- artifactRootPath: workspacePath,
357
+ artifactRootPath: datasetArtifactRoot(options.sourcePath, options.datasetConfig.name),
155
358
  });
156
359
  const result = run.results[0];
157
360
  if (!result)
158
361
  return null;
362
+ const datasetRunPath = writeDatasetTargetRun({
363
+ projectPath: options.sourcePath,
364
+ datasetName: options.datasetConfig.name,
365
+ target: "file-as-is",
366
+ generatedAt: run.generated_at,
367
+ runId: normalizeTestRunId(spec.id),
368
+ runSuffix: options.runSuffix,
369
+ payload: run,
370
+ });
159
371
  return {
160
- runPath: saveTargetTestRun(workspacePath, run),
372
+ runPath: datasetRunPath,
161
373
  target,
162
374
  result,
163
375
  };
164
376
  }
165
- export async function runSavedWorkspaceTest(options) {
166
- const workspacePath = options.workspacePath ?? findBuiltWorkspacePath(options.sourcePath, options.workspaceConfig.name);
167
- if (!workspacePath) {
377
+ export async function runSavedCompiledTest(options) {
378
+ const compiledPath = options.compiledPath ?? findBuiltCompiledPath(options.sourcePath, options.datasetConfig.name);
379
+ if (!compiledPath) {
168
380
  return null;
169
381
  }
170
- const spec = buildTestSpecFromWorkspaceConfig({
171
- workspacePath,
172
- targetType: "workspace",
382
+ const spec = buildTestSpecFromCompiledDatasetConfig({
383
+ compiledPath,
384
+ targetType: "compiled",
173
385
  });
174
386
  if (!spec) {
175
387
  return null;
176
388
  }
177
- const target = createWorkspaceTestTarget(workspacePath, options.workspaceConfig.name, options.workspaceConfig.workflow ?? "interf");
178
- if (!target) {
179
- return null;
180
- }
181
- if (!target.eligible) {
389
+ const target = createCompiledTestTarget(compiledPath, options.datasetConfig.name, options.datasetConfig.workflow ?? "interf");
390
+ if (!target || !target.eligible) {
182
391
  return null;
183
392
  }
184
393
  const { executor, error } = await resolveExecutorForSpec(spec, options.executor, options.executionProfile);
@@ -193,13 +402,24 @@ export async function runSavedWorkspaceTest(options) {
193
402
  const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
194
403
  executor,
195
404
  preserveSandboxes: options.preserveSandboxes ?? "on-failure",
196
- artifactRootPath: workspacePath,
405
+ artifactRootPath: compiledPath,
197
406
  });
198
407
  const result = run.results[0];
199
408
  if (!result)
200
409
  return null;
410
+ const internalRunPath = saveTargetTestRun(compiledPath, run);
411
+ const datasetRunPath = writeDatasetTargetRun({
412
+ projectPath: options.sourcePath,
413
+ datasetName: options.datasetConfig.name,
414
+ target: "compiled",
415
+ generatedAt: run.generated_at,
416
+ runId: normalizeTestRunId(spec.id),
417
+ runSuffix: options.runSuffix,
418
+ payload: run,
419
+ });
201
420
  return {
202
- runPath: saveTargetTestRun(workspacePath, run),
421
+ runPath: internalRunPath,
422
+ displayRunPath: datasetRunPath,
203
423
  target,
204
424
  result,
205
425
  };
@@ -1,3 +1,9 @@
1
1
  import type { CommandModule } from "yargs";
2
+ import type { SourceDatasetConfig } from "../lib/schema.js";
3
+ export declare function resolveConfiguredDatasetSelection(options: {
4
+ sourcePath: string;
5
+ requestedDatasetName?: string | null;
6
+ hintedDatasetConfig?: SourceDatasetConfig | null;
7
+ }): SourceDatasetConfig | null;
2
8
  export declare const testCommand: CommandModule;
3
- export declare function runTestCommand(argv?: Record<string, unknown>): Promise<void>;
9
+ export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;