@interf/compiler 0.4.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/README.md +70 -66
  2. package/builtin-workflows/interf/README.md +6 -6
  3. package/builtin-workflows/interf/compile/stages/shape/SKILL.md +7 -7
  4. package/builtin-workflows/interf/compile/stages/structure/SKILL.md +2 -2
  5. package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +1 -1
  6. package/builtin-workflows/interf/{workspace.schema.json → compiled.schema.json} +5 -5
  7. package/builtin-workflows/interf/improve/SKILL.md +3 -3
  8. package/builtin-workflows/interf/use/query/SKILL.md +2 -2
  9. package/builtin-workflows/interf/workflow.json +42 -31
  10. package/dist/commands/check-draft.d.ts +19 -0
  11. package/dist/commands/check-draft.js +110 -0
  12. package/dist/commands/compile-controller.d.ts +4 -4
  13. package/dist/commands/compile-controller.js +117 -81
  14. package/dist/commands/compile.d.ts +5 -5
  15. package/dist/commands/compile.js +61 -62
  16. package/dist/commands/compiled-flow.d.ts +23 -0
  17. package/dist/commands/compiled-flow.js +112 -0
  18. package/dist/commands/create-workflow-wizard.d.ts +3 -3
  19. package/dist/commands/create-workflow-wizard.js +11 -11
  20. package/dist/commands/create.d.ts +2 -2
  21. package/dist/commands/create.js +50 -57
  22. package/dist/commands/default.js +2 -2
  23. package/dist/commands/executor-flow.d.ts +20 -1
  24. package/dist/commands/executor-flow.js +67 -7
  25. package/dist/commands/init.js +242 -289
  26. package/dist/commands/list.js +14 -10
  27. package/dist/commands/reset.js +6 -6
  28. package/dist/commands/source-config-wizard.d.ts +12 -8
  29. package/dist/commands/source-config-wizard.js +356 -119
  30. package/dist/commands/status.js +49 -26
  31. package/dist/commands/test-flow.d.ts +23 -10
  32. package/dist/commands/test-flow.js +274 -65
  33. package/dist/commands/test.d.ts +7 -1
  34. package/dist/commands/test.js +264 -65
  35. package/dist/commands/verify.js +23 -14
  36. package/dist/index.d.ts +7 -7
  37. package/dist/index.js +4 -4
  38. package/dist/lib/agent-args.js +2 -1
  39. package/dist/lib/agent-constants.js +1 -1
  40. package/dist/lib/agent-render.js +4 -4
  41. package/dist/lib/agent-shells.d.ts +8 -8
  42. package/dist/lib/agent-shells.js +231 -142
  43. package/dist/lib/{workflow-abi.d.ts → builtin-compiled-workflow.d.ts} +37 -46
  44. package/dist/lib/builtin-compiled-workflow.js +153 -0
  45. package/dist/lib/compiled-compile.d.ts +52 -0
  46. package/dist/lib/compiled-compile.js +274 -0
  47. package/dist/lib/compiled-home.d.ts +5 -0
  48. package/dist/lib/compiled-home.js +32 -0
  49. package/dist/lib/compiled-paths.d.ts +39 -0
  50. package/dist/lib/compiled-paths.js +103 -0
  51. package/dist/lib/{workspace-raw.d.ts → compiled-raw.d.ts} +9 -8
  52. package/dist/lib/{workspace-raw.js → compiled-raw.js} +16 -14
  53. package/dist/lib/compiled-reset.d.ts +1 -0
  54. package/dist/lib/compiled-reset.js +44 -0
  55. package/dist/lib/compiled-schema.d.ts +27 -0
  56. package/dist/lib/compiled-schema.js +110 -0
  57. package/dist/lib/config.d.ts +0 -1
  58. package/dist/lib/config.js +0 -1
  59. package/dist/lib/discovery.d.ts +1 -1
  60. package/dist/lib/discovery.js +3 -3
  61. package/dist/lib/interf-bootstrap.d.ts +1 -1
  62. package/dist/lib/interf-bootstrap.js +4 -4
  63. package/dist/lib/interf-detect.d.ts +9 -10
  64. package/dist/lib/interf-detect.js +70 -59
  65. package/dist/lib/interf-scaffold.d.ts +2 -2
  66. package/dist/lib/interf-scaffold.js +90 -57
  67. package/dist/lib/interf-workflow-package.d.ts +3 -3
  68. package/dist/lib/interf-workflow-package.js +30 -30
  69. package/dist/lib/interf.d.ts +5 -5
  70. package/dist/lib/interf.js +4 -4
  71. package/dist/lib/local-workflows.d.ts +4 -4
  72. package/dist/lib/local-workflows.js +35 -70
  73. package/dist/lib/obsidian.d.ts +1 -1
  74. package/dist/lib/parse.js +92 -1
  75. package/dist/lib/project-paths.d.ts +11 -0
  76. package/dist/lib/project-paths.js +32 -0
  77. package/dist/lib/runtime-acceptance.d.ts +7 -1
  78. package/dist/lib/runtime-acceptance.js +194 -59
  79. package/dist/lib/runtime-contracts.d.ts +2 -4
  80. package/dist/lib/runtime-contracts.js +17 -161
  81. package/dist/lib/runtime-inventory.d.ts +7 -0
  82. package/dist/lib/runtime-inventory.js +29 -0
  83. package/dist/lib/runtime-paths.js +5 -5
  84. package/dist/lib/runtime-prompt.js +7 -6
  85. package/dist/lib/runtime-reconcile.d.ts +2 -3
  86. package/dist/lib/runtime-reconcile.js +94 -184
  87. package/dist/lib/runtime-runs.js +25 -119
  88. package/dist/lib/runtime-types.d.ts +10 -19
  89. package/dist/lib/runtime.d.ts +2 -2
  90. package/dist/lib/runtime.js +1 -1
  91. package/dist/lib/schema.d.ts +169 -153
  92. package/dist/lib/schema.js +116 -164
  93. package/dist/lib/source-config.d.ts +24 -20
  94. package/dist/lib/source-config.js +159 -122
  95. package/dist/lib/state-artifacts.d.ts +5 -5
  96. package/dist/lib/state-artifacts.js +8 -8
  97. package/dist/lib/state-health.d.ts +4 -4
  98. package/dist/lib/state-health.js +110 -126
  99. package/dist/lib/state-io.d.ts +8 -8
  100. package/dist/lib/state-io.js +21 -102
  101. package/dist/lib/state-paths.js +5 -5
  102. package/dist/lib/state-view.d.ts +4 -4
  103. package/dist/lib/state-view.js +52 -55
  104. package/dist/lib/state.d.ts +5 -5
  105. package/dist/lib/state.js +4 -4
  106. package/dist/lib/summarize-plan.d.ts +3 -2
  107. package/dist/lib/summarize-plan.js +19 -21
  108. package/dist/lib/test-execution.js +9 -9
  109. package/dist/lib/test-matrices.d.ts +3 -3
  110. package/dist/lib/test-matrices.js +6 -6
  111. package/dist/lib/test-paths.d.ts +4 -4
  112. package/dist/lib/test-paths.js +26 -11
  113. package/dist/lib/test-sandbox.d.ts +1 -1
  114. package/dist/lib/test-sandbox.js +32 -38
  115. package/dist/lib/test-specs.js +1 -1
  116. package/dist/lib/test-targets.d.ts +2 -2
  117. package/dist/lib/test-targets.js +11 -11
  118. package/dist/lib/test-types.d.ts +1 -1
  119. package/dist/lib/test.d.ts +1 -1
  120. package/dist/lib/test.js +1 -1
  121. package/dist/lib/util.d.ts +2 -0
  122. package/dist/lib/util.js +14 -1
  123. package/dist/lib/validate-compiled.d.ts +27 -0
  124. package/dist/lib/validate-compiled.js +238 -0
  125. package/dist/lib/validate-helpers.d.ts +0 -8
  126. package/dist/lib/validate-helpers.js +0 -30
  127. package/dist/lib/validate.d.ts +6 -4
  128. package/dist/lib/validate.js +76 -27
  129. package/dist/lib/workflow-definitions.d.ts +12 -11
  130. package/dist/lib/workflow-definitions.js +45 -55
  131. package/dist/lib/workflow-helpers.d.ts +2 -3
  132. package/dist/lib/workflow-helpers.js +9 -13
  133. package/dist/lib/workflow-improvement.d.ts +3 -3
  134. package/dist/lib/workflow-improvement.js +48 -48
  135. package/dist/lib/workflow-primitives.d.ts +2 -0
  136. package/dist/lib/workflow-primitives.js +5 -0
  137. package/dist/lib/workflow-review-paths.d.ts +3 -3
  138. package/dist/lib/workflow-review-paths.js +11 -11
  139. package/dist/lib/workflow-stage-runner.d.ts +1 -1
  140. package/dist/lib/workflow-stage-runner.js +8 -8
  141. package/dist/lib/workflows.d.ts +9 -9
  142. package/dist/lib/workflows.js +15 -17
  143. package/package.json +13 -12
  144. package/dist/commands/workspace-flow.d.ts +0 -23
  145. package/dist/commands/workspace-flow.js +0 -109
  146. package/dist/lib/registry.d.ts +0 -16
  147. package/dist/lib/registry.js +0 -65
  148. package/dist/lib/validate-workspace.d.ts +0 -121
  149. package/dist/lib/validate-workspace.js +0 -407
  150. package/dist/lib/workflow-abi.js +0 -181
  151. package/dist/lib/workspace-compile.d.ts +0 -54
  152. package/dist/lib/workspace-compile.js +0 -476
  153. package/dist/lib/workspace-home.d.ts +0 -5
  154. package/dist/lib/workspace-home.js +0 -32
  155. package/dist/lib/workspace-layout.d.ts +0 -2
  156. package/dist/lib/workspace-layout.js +0 -60
  157. package/dist/lib/workspace-paths.d.ts +0 -41
  158. package/dist/lib/workspace-paths.js +0 -107
  159. package/dist/lib/workspace-reset.d.ts +0 -1
  160. package/dist/lib/workspace-reset.js +0 -43
  161. package/dist/lib/workspace-schema.d.ts +0 -17
  162. package/dist/lib/workspace-schema.js +0 -74
@@ -1,8 +1,8 @@
1
1
  import chalk from "chalk";
2
2
  import * as p from "@clack/prompts";
3
- import { detectInterf, listWorkspacesForSourceFolder, } from "../lib/interf.js";
4
- import { computeWorkspaceHealth } from "../lib/state.js";
5
- import { listWorkspaceEntries } from "../lib/registry.js";
3
+ import { detectInterf, listCompiledDatasetsForSourceFolder, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
4
+ import { computeCompiledHealth } from "../lib/state.js";
5
+ import { readSavedTestComparison, printSavedTestComparisonState } from "./test-flow.js";
6
6
  function statusColor(status) {
7
7
  switch (status) {
8
8
  case "compiled":
@@ -17,54 +17,77 @@ function statusColor(status) {
17
17
  }
18
18
  export const statusCommand = {
19
19
  command: "status",
20
- describe: "Show deterministic workspace health",
20
+ describe: "Show deterministic health for a compiled dataset",
21
21
  handler: async () => {
22
- let workspacePath = null;
22
+ let compiledPath = null;
23
23
  const detected = detectInterf(process.cwd());
24
24
  if (detected) {
25
- workspacePath = detected.path;
25
+ compiledPath = detected.path;
26
26
  }
27
27
  else {
28
- const local = listWorkspacesForSourceFolder(process.cwd()).map(({ path, config }) => ({
28
+ const sourcePath = process.cwd();
29
+ const local = listCompiledDatasetsForSourceFolder(sourcePath).map(({ path, config }) => ({
29
30
  path,
30
31
  name: config.name,
31
32
  }));
32
- const all = local.length > 0 ? local : listWorkspaceEntries().map((entry) => ({
33
- path: entry.path,
34
- name: entry.name,
35
- }));
36
- if (all.length === 0) {
33
+ if (local.length === 0) {
37
34
  process.exitCode = 1;
38
- console.log(chalk.red(" No workspaces found."));
35
+ console.log(chalk.red(" No compiled datasets found."));
36
+ console.log(chalk.dim(" Run `interf`, save truth checks, and compile a dataset first."));
39
37
  return;
40
38
  }
41
- if (all.length === 1) {
42
- workspacePath = all[0].path;
39
+ if (local.length === 1) {
40
+ compiledPath = local[0].path;
43
41
  }
44
42
  else {
45
43
  const selected = await p.select({
46
- message: "Which workspace?",
47
- options: all.map((entry) => ({ value: entry.path, label: entry.name })),
44
+ message: "Which dataset?",
45
+ options: local.map((entry) => ({ value: entry.path, label: entry.name })),
48
46
  });
49
47
  if (p.isCancel(selected))
50
48
  return;
51
- workspacePath = selected;
49
+ compiledPath = selected;
52
50
  }
53
51
  }
54
- const health = computeWorkspaceHealth(workspacePath);
52
+ const health = computeCompiledHealth(compiledPath);
55
53
  const color = statusColor(health.status);
56
54
  console.log();
57
55
  console.log(color(` ${health.target_name}`));
58
56
  console.log(chalk.dim(` status: ${health.status}`));
59
57
  console.log(chalk.dim(` stage: ${health.stage}`));
60
58
  console.log(chalk.dim(` ${health.summary}`));
59
+ const compiledConfig = readInterfConfig(compiledPath);
60
+ const sourcePath = resolveSourceControlPath(compiledPath);
61
+ const latestComparison = compiledConfig
62
+ ? readSavedTestComparison(sourcePath, compiledConfig.name)
63
+ : null;
64
+ if (latestComparison) {
65
+ printSavedTestComparisonState(latestComparison);
66
+ }
67
+ else {
68
+ console.log();
69
+ console.log(chalk.dim(" No saved test result yet. Run `interf test` to measure files as-is and the compiled dataset."));
70
+ }
61
71
  console.log();
62
- console.log(chalk.dim(` source coverage: ${health.metrics.source_covered}/${health.metrics.source_total}`));
63
- console.log(chalk.dim(` summarized: ${health.metrics.summarized}`));
64
- console.log(chalk.dim(` compiled: ${health.metrics.compiled}`));
65
- console.log(chalk.dim(` entities: ${health.metrics.entities}`));
66
- console.log(chalk.dim(` claims: ${health.metrics.claims}`));
67
- console.log(chalk.dim(` warnings: ${health.metrics.warnings}`));
68
- console.log(chalk.dim(` errors: ${health.metrics.errors}`));
72
+ const metricOrder = [
73
+ "source_total",
74
+ "stage_total",
75
+ "completed_stages",
76
+ "warnings",
77
+ "errors",
78
+ ];
79
+ const printed = new Set();
80
+ for (const key of metricOrder) {
81
+ const value = health.metrics[key];
82
+ if (typeof value !== "number")
83
+ continue;
84
+ printed.add(key);
85
+ console.log(chalk.dim(` ${key}: ${value}`));
86
+ }
87
+ for (const [key, value] of Object.entries(health.metrics)) {
88
+ if (printed.has(key))
89
+ continue;
90
+ console.log(chalk.dim(` ${key}: ${value}`));
91
+ }
69
92
  },
70
93
  };
@@ -1,36 +1,49 @@
1
1
  import { type TestTargetCandidate, type TestTargetResult } from "../lib/test.js";
2
2
  import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
3
3
  import type { TestSandboxRetentionMode } from "../lib/test-sandbox.js";
4
- import type { SourceWorkspaceConfig, TestRunMode } from "../lib/schema.js";
4
+ import type { SourceDatasetConfig, TestRunComparison, TestRunMode } from "../lib/schema.js";
5
5
  export interface SavedTestOutcome {
6
6
  runPath: string;
7
+ displayRunPath?: string;
7
8
  target: TestTargetCandidate;
8
9
  result: TestTargetResult;
9
10
  }
11
+ export interface AgentTestMatrixRow {
12
+ agentLabel: string;
13
+ rawOutcome?: SavedTestOutcome | null;
14
+ compiledOutcome?: SavedTestOutcome | null;
15
+ }
10
16
  export declare function questionPassRate(outcome: SavedTestOutcome): number;
17
+ export declare function readSavedTestComparison(projectPath: string, datasetName: string): TestRunComparison | null;
18
+ export declare function printSavedTestComparisonState(payload: TestRunComparison, comparisonRunPath?: string | null): void;
19
+ export declare function printAgentTestMatrix(rows: AgentTestMatrixRow[]): void;
20
+ export declare function printAgentTestFailures(rows: AgentTestMatrixRow[]): void;
11
21
  export declare function printSavedTestOutcome(prefix: string, outcome: SavedTestOutcome): void;
12
- export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, workspaceOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
22
+ export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
23
+ export declare function printSavedTestComparisonSummary(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
13
24
  export declare function saveTestComparisonRun(options: {
14
25
  sourcePath: string;
15
- workspacePath: string;
16
- workspaceName: string;
26
+ compiledPath: string | null;
27
+ compiledName: string;
28
+ checksFingerprint: string;
17
29
  mode: TestRunMode;
18
30
  rawOutcome: SavedTestOutcome | null;
19
- workspaceOutcome: SavedTestOutcome | null;
31
+ compiledOutcome: SavedTestOutcome | null;
20
32
  }): string;
21
33
  export declare function runSavedRawTest(options: {
22
34
  sourcePath: string;
23
- workspaceConfig: SourceWorkspaceConfig;
24
- workspacePath?: string | null;
35
+ datasetConfig: SourceDatasetConfig;
25
36
  executor?: WorkflowExecutor | null;
26
37
  executionProfile?: WorkflowExecutionProfile;
27
38
  preserveSandboxes?: TestSandboxRetentionMode;
39
+ runSuffix?: string | null;
28
40
  }): Promise<SavedTestOutcome | null>;
29
- export declare function runSavedWorkspaceTest(options: {
41
+ export declare function runSavedCompiledTest(options: {
30
42
  sourcePath: string;
31
- workspaceConfig: SourceWorkspaceConfig;
43
+ datasetConfig: SourceDatasetConfig;
32
44
  executor?: WorkflowExecutor | null;
33
45
  executionProfile?: WorkflowExecutionProfile;
34
- workspacePath?: string | null;
46
+ compiledPath?: string | null;
35
47
  preserveSandboxes?: TestSandboxRetentionMode;
48
+ runSuffix?: string | null;
36
49
  }): Promise<SavedTestOutcome | null>;
@@ -1,21 +1,26 @@
1
1
  import chalk from "chalk";
2
- import { mkdirSync, writeFileSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { createRawTestTarget, createWorkspaceTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
5
- import { resolveWorkspaceRawPath, syncWorkspaceRawSnapshot } from "../lib/interf.js";
6
- import { buildTestSpecFromWorkspaceConfig } from "../lib/source-config.js";
7
- import { testRootForWorkspace, testRunsRootForWorkspace, } from "../lib/workspace-paths.js";
2
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
5
+ import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
6
+ import { datasetLatestTestStatePath, datasetLatestTestSummaryPath, normalizeDatasetTestRunId, datasetTestRunPath, datasetTestRunsRoot, datasetTestsRoot, } from "../lib/project-paths.js";
7
+ import { testRootForCompiled } from "../lib/compiled-paths.js";
8
+ import { readJsonFileWithSchema } from "../lib/parse.js";
9
+ import { TestRunComparisonSchema } from "../lib/schema.js";
8
10
  import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
9
- import { ensureWorkspaceFromConfig, findBuiltWorkspacePath } from "./workspace-flow.js";
11
+ import { findBuiltCompiledPath } from "./compiled-flow.js";
10
12
  export function questionPassRate(outcome) {
11
13
  return outcome.result.totalCases > 0
12
14
  ? Math.round((outcome.result.passedCases / outcome.result.totalCases) * 100)
13
15
  : 0;
14
16
  }
17
+ function visibleRunPath(outcome) {
18
+ return outcome.displayRunPath ?? outcome.runPath;
19
+ }
15
20
  function summarizeSavedTestOutcome(label, outcome) {
16
21
  return {
17
22
  label,
18
- run_path: outcome.runPath,
23
+ run_path: visibleRunPath(outcome),
19
24
  ok: outcome.result.ok,
20
25
  passed_cases: outcome.result.passedCases,
21
26
  total_cases: outcome.result.totalCases,
@@ -24,13 +29,148 @@ function summarizeSavedTestOutcome(label, outcome) {
24
29
  target: outcome.target,
25
30
  };
26
31
  }
27
- function normalizeTestRunId(input) {
28
- return input
29
- .toLowerCase()
30
- .trim()
31
- .replace(/[^a-z0-9]+/g, "-")
32
- .replace(/^-+|-+$/g, "")
33
- .slice(0, 80);
32
+ function writeDatasetTargetRun(options) {
33
+ const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
34
+ mkdirSync(dirPath, { recursive: true });
35
+ const runPath = datasetTestRunPath(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
36
+ writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
37
+ return runPath;
38
+ }
39
+ function loadLatestComparison(projectPath, datasetName) {
40
+ const latestPath = datasetLatestTestStatePath(projectPath, datasetName);
41
+ if (!existsSync(latestPath))
42
+ return null;
43
+ return readJsonFileWithSchema(latestPath, "latest test comparison", TestRunComparisonSchema);
44
+ }
45
+ export function readSavedTestComparison(projectPath, datasetName) {
46
+ return loadLatestComparison(projectPath, datasetName);
47
+ }
48
+ function renderLatestSummaryMarkdown(payload) {
49
+ const lines = [
50
+ "# Latest Test Result",
51
+ "",
52
+ "| Target | Truth checks |",
53
+ "| --- | --- |",
54
+ ];
55
+ if (payload.raw) {
56
+ lines.push(`| Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
57
+ }
58
+ if (payload.compiled) {
59
+ lines.push(`| Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
60
+ }
61
+ lines.push("");
62
+ if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
63
+ const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
64
+ lines.push(`Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`, "");
65
+ }
66
+ if (payload.raw) {
67
+ lines.push(`- Latest files-as-is run: ${payload.raw.run_path}`);
68
+ }
69
+ if (payload.compiled) {
70
+ lines.push(`- Latest compiled run: ${payload.compiled.run_path}`);
71
+ }
72
+ return `${lines.join("\n")}\n`;
73
+ }
74
+ export function printSavedTestComparisonState(payload, comparisonRunPath) {
75
+ console.log();
76
+ console.log(chalk.bold(" Latest saved test"));
77
+ console.log();
78
+ console.log(" | Target | Truth checks |");
79
+ console.log(" | --- | --- |");
80
+ if (payload.raw) {
81
+ console.log(` | Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
82
+ }
83
+ if (payload.compiled) {
84
+ console.log(` | Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
85
+ }
86
+ if (!payload.raw || !payload.compiled) {
87
+ console.log();
88
+ if (!payload.raw) {
89
+ console.log(chalk.dim(" No saved files-as-is baseline yet."));
90
+ }
91
+ if (!payload.compiled) {
92
+ console.log(chalk.dim(" No saved compiled-dataset run yet."));
93
+ }
94
+ }
95
+ if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
96
+ const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
97
+ const color = (payload.summary.pass_rate_delta ?? 0) >= 0 ? chalk.green : chalk.red;
98
+ console.log();
99
+ console.log(color(` Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`));
100
+ }
101
+ if (comparisonRunPath) {
102
+ console.log();
103
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
104
+ }
105
+ }
106
+ function padCell(value, width) {
107
+ return value.padEnd(width, " ");
108
+ }
109
+ function scoreCell(outcome) {
110
+ if (!outcome)
111
+ return "—";
112
+ return `${outcome.result.passedCases}/${outcome.result.totalCases}`;
113
+ }
114
+ function deltaCell(row) {
115
+ if (!row.rawOutcome || !row.compiledOutcome)
116
+ return "—";
117
+ const delta = row.compiledOutcome.result.passedCases - row.rawOutcome.result.passedCases;
118
+ return delta > 0 ? `+${delta}` : `${delta}`;
119
+ }
120
+ export function printAgentTestMatrix(rows) {
121
+ if (rows.length === 0)
122
+ return;
123
+ const includeRaw = rows.some((row) => Boolean(row.rawOutcome));
124
+ const includeCompiled = rows.some((row) => Boolean(row.compiledOutcome));
125
+ const includeDelta = includeRaw && includeCompiled;
126
+ const headers = [
127
+ "Agent",
128
+ ...(includeRaw ? ["Files as-is"] : []),
129
+ ...(includeCompiled ? ["Compiled dataset"] : []),
130
+ ...(includeDelta ? ["Delta"] : []),
131
+ ];
132
+ const body = rows.map((row) => [
133
+ row.agentLabel,
134
+ ...(includeRaw ? [scoreCell(row.rawOutcome)] : []),
135
+ ...(includeCompiled ? [scoreCell(row.compiledOutcome)] : []),
136
+ ...(includeDelta ? [deltaCell(row)] : []),
137
+ ]);
138
+ const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => (row[index] ?? "").length)));
139
+ const heading = includeDelta ? " Comparison" : " Results";
140
+ console.log();
141
+ console.log(chalk.bold(heading));
142
+ console.log();
143
+ console.log(` | ${headers.map((header, index) => padCell(header, widths[index] ?? header.length)).join(" | ")} |`);
144
+ console.log(` | ${widths.map((width) => "-".repeat(width)).join(" | ")} |`);
145
+ for (const row of body) {
146
+ console.log(` | ${row.map((cell, index) => padCell(cell ?? "", widths[index] ?? cell.length)).join(" | ")} |`);
147
+ }
148
+ }
149
+ export function printAgentTestFailures(rows) {
150
+ for (const row of rows) {
151
+ const failures = [];
152
+ for (const [label, outcome] of [
153
+ ["Files as-is", row.rawOutcome ?? null],
154
+ ["Compiled dataset", row.compiledOutcome ?? null],
155
+ ]) {
156
+ if (!outcome || outcome.result.ok)
157
+ continue;
158
+ for (const [index, caseResult] of outcome.result.caseResults.entries()) {
159
+ if (caseResult.ok)
160
+ continue;
161
+ const reason = caseResult.checks.find((entry) => !entry.ok)?.detail ?? "failed";
162
+ failures.push(`${label} · Truth Check ${index + 1}: ${reason}`);
163
+ }
164
+ }
165
+ if (failures.length === 0)
166
+ continue;
167
+ console.log();
168
+ console.log(chalk.bold(` ${row.agentLabel} failures`));
169
+ console.log();
170
+ for (const failure of failures) {
171
+ console.log(` - ${failure}`);
172
+ }
173
+ }
34
174
  }
35
175
  function specNeedsExecutor(spec) {
36
176
  return spec.cases.some((entry) => !entry.file || Boolean(entry.answer));
@@ -69,69 +209,121 @@ export function printSavedTestOutcome(prefix, outcome) {
69
209
  console.log(chalk.dim(` Preserved sandbox: ${outcome.result.sandbox_path}`));
70
210
  console.log();
71
211
  }
72
- console.log(chalk.dim(` Saved run: ${outcome.runPath}`));
212
+ console.log(chalk.dim(` Saved run: ${visibleRunPath(outcome)}`));
73
213
  }
74
- export function printSavedTestComparison(rawOutcome, workspaceOutcome, comparisonRunPath) {
75
- if (!rawOutcome && !workspaceOutcome)
214
+ export function printSavedTestComparison(rawOutcome, compiledOutcome, comparisonRunPath) {
215
+ if (!rawOutcome && !compiledOutcome)
76
216
  return;
77
217
  console.log();
78
218
  if (rawOutcome) {
79
- printSavedTestOutcome("Raw files", rawOutcome);
219
+ printSavedTestOutcome("Files as-is", rawOutcome);
80
220
  }
81
- if (workspaceOutcome) {
221
+ if (compiledOutcome) {
82
222
  if (rawOutcome)
83
223
  console.log();
84
- printSavedTestOutcome("Compiled workspace", workspaceOutcome);
224
+ printSavedTestOutcome("Compiled dataset", compiledOutcome);
225
+ }
226
+ if (rawOutcome && compiledOutcome) {
227
+ const rawQuestions = questionPassRate(rawOutcome);
228
+ const compiledQuestions = questionPassRate(compiledOutcome);
229
+ const delta = compiledQuestions - rawQuestions;
230
+ const color = delta >= 0 ? chalk.green : chalk.red;
231
+ const direction = delta >= 0 ? "improved" : "decreased";
232
+ console.log();
233
+ console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
234
+ }
235
+ if (comparisonRunPath) {
236
+ console.log();
237
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
238
+ }
239
+ }
240
+ export function printSavedTestComparisonSummary(rawOutcome, compiledOutcome, comparisonRunPath) {
241
+ if (!rawOutcome && !compiledOutcome)
242
+ return;
243
+ console.log();
244
+ console.log(chalk.bold(" Comparison"));
245
+ console.log();
246
+ console.log(" | Target | Truth checks |");
247
+ console.log(" | --- | --- |");
248
+ if (rawOutcome) {
249
+ console.log(` | Files as-is | \`${rawOutcome.result.passedCases}/${rawOutcome.result.totalCases}\` |`);
250
+ }
251
+ if (compiledOutcome) {
252
+ console.log(` | Compiled dataset | \`${compiledOutcome.result.passedCases}/${compiledOutcome.result.totalCases}\` |`);
85
253
  }
86
- if (rawOutcome && workspaceOutcome) {
254
+ if (rawOutcome && compiledOutcome) {
87
255
  const rawQuestions = questionPassRate(rawOutcome);
88
- const workspaceQuestions = questionPassRate(workspaceOutcome);
89
- const delta = workspaceQuestions - rawQuestions;
256
+ const compiledQuestions = questionPassRate(compiledOutcome);
257
+ const delta = compiledQuestions - rawQuestions;
90
258
  const color = delta >= 0 ? chalk.green : chalk.red;
91
259
  const direction = delta >= 0 ? "improved" : "decreased";
92
260
  console.log();
93
- console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${workspaceQuestions}%.`));
261
+ console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
94
262
  }
95
263
  if (comparisonRunPath) {
96
264
  console.log();
97
- console.log(chalk.dim(` Saved comparison: ${comparisonRunPath}`));
265
+ console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
98
266
  }
99
267
  }
100
268
  export function saveTestComparisonRun(options) {
101
269
  const generatedAt = new Date().toISOString();
102
- const runRoot = join(testRunsRootForWorkspace(options.workspacePath), `${generatedAt.replace(/[:.]/g, "-")}-${normalizeTestRunId(options.workspaceName)}`);
103
- mkdirSync(runRoot, { recursive: true });
104
- const rawPassRate = options.rawOutcome ? questionPassRate(options.rawOutcome) : null;
105
- const workspacePassRate = options.workspaceOutcome ? questionPassRate(options.workspaceOutcome) : null;
270
+ const existing = loadLatestComparison(options.sourcePath, options.compiledName);
271
+ const canReuseExisting = Boolean(existing?.checks_fingerprint) &&
272
+ existing?.checks_fingerprint === options.checksFingerprint;
273
+ const rawSummary = options.rawOutcome
274
+ ? summarizeSavedTestOutcome("Files as-is", options.rawOutcome)
275
+ : canReuseExisting
276
+ ? existing?.raw ?? null
277
+ : null;
278
+ const compiledSummary = options.compiledOutcome
279
+ ? summarizeSavedTestOutcome("Compiled dataset", options.compiledOutcome)
280
+ : canReuseExisting
281
+ ? existing?.compiled ?? null
282
+ : null;
283
+ const effectiveMode = rawSummary && compiledSummary
284
+ ? "both"
285
+ : rawSummary
286
+ ? "raw"
287
+ : "compiled";
288
+ const rawPassRate = rawSummary
289
+ ? Math.round((rawSummary.passed_cases / rawSummary.total_cases) * 100)
290
+ : null;
291
+ const compiledPassRate = compiledSummary
292
+ ? Math.round((compiledSummary.passed_cases / compiledSummary.total_cases) * 100)
293
+ : null;
106
294
  const payload = {
107
295
  kind: "interf-test-run",
108
296
  version: 1,
109
297
  generated_at: generatedAt,
110
- mode: options.mode,
298
+ mode: effectiveMode,
111
299
  source_path: options.sourcePath,
112
- workspace: {
113
- name: options.workspaceName,
114
- path: options.workspacePath,
300
+ checks_fingerprint: options.checksFingerprint,
301
+ dataset: {
302
+ name: options.compiledName,
303
+ compiled_path: options.compiledPath ?? (canReuseExisting ? existing?.dataset.compiled_path ?? null : null),
115
304
  },
116
- raw: options.rawOutcome ? summarizeSavedTestOutcome("Raw files", options.rawOutcome) : null,
117
- compiled_workspace: options.workspaceOutcome
118
- ? summarizeSavedTestOutcome("Compiled workspace", options.workspaceOutcome)
119
- : null,
305
+ raw: rawSummary,
306
+ compiled: compiledSummary,
120
307
  summary: {
121
308
  raw_pass_rate: rawPassRate,
122
- compiled_pass_rate: workspacePassRate,
123
- pass_rate_delta: rawPassRate !== null && workspacePassRate !== null ? workspacePassRate - rawPassRate : null,
309
+ compiled_pass_rate: compiledPassRate,
310
+ pass_rate_delta: rawPassRate !== null && compiledPassRate !== null ? compiledPassRate - rawPassRate : null,
124
311
  },
125
312
  };
126
- const runPath = join(runRoot, "run.json");
127
- writeFileSync(runPath, `${JSON.stringify(payload, null, 2)}\n`);
128
- writeFileSync(join(testRootForWorkspace(options.workspacePath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
129
- return runPath;
313
+ const latestStatePath = datasetLatestTestStatePath(options.sourcePath, options.compiledName);
314
+ mkdirSync(dirname(latestStatePath), { recursive: true });
315
+ writeFileSync(latestStatePath, `${JSON.stringify(payload, null, 2)}\n`);
316
+ writeFileSync(datasetLatestTestSummaryPath(options.sourcePath, options.compiledName), renderLatestSummaryMarkdown(payload));
317
+ if (options.compiledPath) {
318
+ mkdirSync(testRootForCompiled(options.compiledPath), { recursive: true });
319
+ writeFileSync(join(testRootForCompiled(options.compiledPath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
320
+ }
321
+ return latestStatePath;
130
322
  }
131
323
  export async function runSavedRawTest(options) {
132
- const workspacePath = options.workspacePath ?? ensureWorkspaceFromConfig(options.sourcePath, options.workspaceConfig);
133
- const spec = buildTestSpecFromWorkspaceConfig({
134
- workspacePath,
324
+ const spec = buildTestSpecFromSourceFolderConfig({
325
+ sourcePath: options.sourcePath,
326
+ targetName: options.datasetConfig.name,
135
327
  targetType: "raw",
136
328
  });
137
329
  if (!spec) {
@@ -146,39 +338,45 @@ export async function runSavedRawTest(options) {
146
338
  console.log(chalk.red(error));
147
339
  return null;
148
340
  }
149
- syncWorkspaceRawSnapshot(workspacePath, options.sourcePath);
150
- const target = createRawTestTarget(resolveWorkspaceRawPath(workspacePath));
151
- const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
341
+ const datasetSourcePath = resolveSourceDatasetPath(options.sourcePath, options.datasetConfig);
342
+ const target = createRawTestTarget(datasetSourcePath);
343
+ const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
152
344
  executor,
153
345
  preserveSandboxes: options.preserveSandboxes ?? "on-failure",
154
- artifactRootPath: workspacePath,
346
+ artifactRootPath: datasetTestsRoot(options.sourcePath, options.datasetConfig.name),
155
347
  });
156
348
  const result = run.results[0];
157
349
  if (!result)
158
350
  return null;
351
+ const datasetRunPath = writeDatasetTargetRun({
352
+ projectPath: options.sourcePath,
353
+ datasetName: options.datasetConfig.name,
354
+ target: "file-as-is",
355
+ generatedAt: run.generated_at,
356
+ runId: normalizeDatasetTestRunId(spec.id),
357
+ runSuffix: options.runSuffix,
358
+ payload: run,
359
+ });
159
360
  return {
160
- runPath: saveTargetTestRun(workspacePath, run),
361
+ runPath: datasetRunPath,
161
362
  target,
162
363
  result,
163
364
  };
164
365
  }
165
- export async function runSavedWorkspaceTest(options) {
166
- const workspacePath = options.workspacePath ?? findBuiltWorkspacePath(options.sourcePath, options.workspaceConfig.name);
167
- if (!workspacePath) {
366
+ export async function runSavedCompiledTest(options) {
367
+ const compiledPath = options.compiledPath ?? findBuiltCompiledPath(options.sourcePath, options.datasetConfig.name);
368
+ if (!compiledPath) {
168
369
  return null;
169
370
  }
170
- const spec = buildTestSpecFromWorkspaceConfig({
171
- workspacePath,
172
- targetType: "workspace",
371
+ const spec = buildTestSpecFromCompiledDatasetConfig({
372
+ compiledPath,
373
+ targetType: "compiled",
173
374
  });
174
375
  if (!spec) {
175
376
  return null;
176
377
  }
177
- const target = createWorkspaceTestTarget(workspacePath, options.workspaceConfig.name, options.workspaceConfig.workflow ?? "interf");
178
- if (!target) {
179
- return null;
180
- }
181
- if (!target.eligible) {
378
+ const target = createCompiledTestTarget(compiledPath, options.datasetConfig.name, options.datasetConfig.workflow ?? "interf");
379
+ if (!target || !target.eligible) {
182
380
  return null;
183
381
  }
184
382
  const { executor, error } = await resolveExecutorForSpec(spec, options.executor, options.executionProfile);
@@ -193,13 +391,24 @@ export async function runSavedWorkspaceTest(options) {
193
391
  const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
194
392
  executor,
195
393
  preserveSandboxes: options.preserveSandboxes ?? "on-failure",
196
- artifactRootPath: workspacePath,
394
+ artifactRootPath: compiledPath,
197
395
  });
198
396
  const result = run.results[0];
199
397
  if (!result)
200
398
  return null;
399
+ const internalRunPath = saveTargetTestRun(compiledPath, run);
400
+ const datasetRunPath = writeDatasetTargetRun({
401
+ projectPath: options.sourcePath,
402
+ datasetName: options.datasetConfig.name,
403
+ target: "compiled",
404
+ generatedAt: run.generated_at,
405
+ runId: normalizeDatasetTestRunId(spec.id),
406
+ runSuffix: options.runSuffix,
407
+ payload: run,
408
+ });
201
409
  return {
202
- runPath: saveTargetTestRun(workspacePath, run),
410
+ runPath: internalRunPath,
411
+ displayRunPath: datasetRunPath,
203
412
  target,
204
413
  result,
205
414
  };
@@ -1,3 +1,9 @@
1
1
  import type { CommandModule } from "yargs";
2
+ import type { SourceDatasetConfig } from "../lib/schema.js";
3
+ export declare function resolveConfiguredDatasetSelection(options: {
4
+ sourcePath: string;
5
+ requestedDatasetName?: string | null;
6
+ hintedDatasetConfig?: SourceDatasetConfig | null;
7
+ }): SourceDatasetConfig | null;
2
8
  export declare const testCommand: CommandModule;
3
- export declare function runTestCommand(argv?: Record<string, unknown>): Promise<void>;
9
+ export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;