@interf/compiler 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -66
- package/builtin-workflows/interf/README.md +6 -6
- package/builtin-workflows/interf/compile/stages/shape/SKILL.md +7 -7
- package/builtin-workflows/interf/compile/stages/structure/SKILL.md +2 -2
- package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +1 -1
- package/builtin-workflows/interf/{workspace.schema.json → compiled.schema.json} +5 -5
- package/builtin-workflows/interf/improve/SKILL.md +3 -3
- package/builtin-workflows/interf/use/query/SKILL.md +2 -2
- package/builtin-workflows/interf/workflow.json +42 -31
- package/dist/commands/check-draft.d.ts +19 -0
- package/dist/commands/check-draft.js +110 -0
- package/dist/commands/compile-controller.d.ts +4 -4
- package/dist/commands/compile-controller.js +117 -81
- package/dist/commands/compile.d.ts +5 -5
- package/dist/commands/compile.js +61 -62
- package/dist/commands/compiled-flow.d.ts +23 -0
- package/dist/commands/compiled-flow.js +112 -0
- package/dist/commands/create-workflow-wizard.d.ts +3 -3
- package/dist/commands/create-workflow-wizard.js +11 -11
- package/dist/commands/create.d.ts +2 -2
- package/dist/commands/create.js +50 -57
- package/dist/commands/default.js +2 -2
- package/dist/commands/executor-flow.d.ts +20 -1
- package/dist/commands/executor-flow.js +67 -7
- package/dist/commands/init.js +242 -289
- package/dist/commands/list.js +14 -10
- package/dist/commands/reset.js +6 -6
- package/dist/commands/source-config-wizard.d.ts +12 -8
- package/dist/commands/source-config-wizard.js +356 -119
- package/dist/commands/status.js +49 -26
- package/dist/commands/test-flow.d.ts +23 -10
- package/dist/commands/test-flow.js +278 -58
- package/dist/commands/test.d.ts +7 -1
- package/dist/commands/test.js +264 -65
- package/dist/commands/verify.js +23 -14
- package/dist/index.d.ts +7 -7
- package/dist/index.js +4 -4
- package/dist/lib/agent-args.js +2 -1
- package/dist/lib/agent-constants.js +1 -1
- package/dist/lib/agent-render.js +4 -4
- package/dist/lib/agent-shells.d.ts +8 -8
- package/dist/lib/agent-shells.js +231 -142
- package/dist/lib/compiled-compile.d.ts +52 -0
- package/dist/lib/compiled-compile.js +274 -0
- package/dist/lib/compiled-home.d.ts +5 -0
- package/dist/lib/compiled-home.js +32 -0
- package/dist/lib/compiled-layout.d.ts +2 -0
- package/dist/lib/compiled-layout.js +60 -0
- package/dist/lib/compiled-paths.d.ts +41 -0
- package/dist/lib/compiled-paths.js +111 -0
- package/dist/lib/{workspace-raw.d.ts → compiled-raw.d.ts} +8 -7
- package/dist/lib/{workspace-raw.js → compiled-raw.js} +16 -14
- package/dist/lib/compiled-reset.d.ts +1 -0
- package/dist/lib/compiled-reset.js +44 -0
- package/dist/lib/compiled-schema.d.ts +27 -0
- package/dist/lib/compiled-schema.js +110 -0
- package/dist/lib/config.d.ts +0 -1
- package/dist/lib/config.js +0 -1
- package/dist/lib/discovery.d.ts +1 -1
- package/dist/lib/discovery.js +3 -3
- package/dist/lib/interf-bootstrap.d.ts +1 -1
- package/dist/lib/interf-bootstrap.js +4 -4
- package/dist/lib/interf-detect.d.ts +10 -10
- package/dist/lib/interf-detect.js +78 -56
- package/dist/lib/interf-scaffold.d.ts +2 -2
- package/dist/lib/interf-scaffold.js +90 -57
- package/dist/lib/interf-workflow-package.d.ts +3 -3
- package/dist/lib/interf-workflow-package.js +30 -30
- package/dist/lib/interf.d.ts +5 -5
- package/dist/lib/interf.js +4 -4
- package/dist/lib/local-workflows.d.ts +4 -4
- package/dist/lib/local-workflows.js +35 -70
- package/dist/lib/obsidian.d.ts +1 -1
- package/dist/lib/parse.js +92 -1
- package/dist/lib/project-paths.d.ts +13 -0
- package/dist/lib/project-paths.js +29 -0
- package/dist/lib/runtime-acceptance.d.ts +7 -1
- package/dist/lib/runtime-acceptance.js +194 -59
- package/dist/lib/runtime-contracts.d.ts +2 -4
- package/dist/lib/runtime-contracts.js +17 -161
- package/dist/lib/runtime-inventory.d.ts +7 -0
- package/dist/lib/runtime-inventory.js +29 -0
- package/dist/lib/runtime-paths.js +5 -5
- package/dist/lib/runtime-prompt.js +9 -6
- package/dist/lib/runtime-reconcile.d.ts +2 -3
- package/dist/lib/runtime-reconcile.js +92 -171
- package/dist/lib/runtime-runs.js +30 -39
- package/dist/lib/runtime-types.d.ts +10 -19
- package/dist/lib/runtime.d.ts +2 -2
- package/dist/lib/runtime.js +1 -1
- package/dist/lib/schema.d.ts +163 -140
- package/dist/lib/schema.js +163 -124
- package/dist/lib/source-config.d.ts +24 -20
- package/dist/lib/source-config.js +154 -116
- package/dist/lib/state-artifacts.d.ts +5 -5
- package/dist/lib/state-artifacts.js +8 -8
- package/dist/lib/state-health.d.ts +4 -4
- package/dist/lib/state-health.js +108 -126
- package/dist/lib/state-io.d.ts +8 -8
- package/dist/lib/state-io.js +77 -50
- package/dist/lib/state-paths.js +5 -5
- package/dist/lib/state-view.d.ts +4 -4
- package/dist/lib/state-view.js +52 -55
- package/dist/lib/state.d.ts +5 -5
- package/dist/lib/state.js +4 -4
- package/dist/lib/summarize-plan.d.ts +3 -2
- package/dist/lib/summarize-plan.js +18 -16
- package/dist/lib/test-execution.js +9 -9
- package/dist/lib/test-matrices.d.ts +3 -3
- package/dist/lib/test-matrices.js +6 -6
- package/dist/lib/test-paths.d.ts +4 -4
- package/dist/lib/test-paths.js +16 -10
- package/dist/lib/test-sandbox.d.ts +1 -1
- package/dist/lib/test-sandbox.js +38 -31
- package/dist/lib/test-targets.d.ts +2 -2
- package/dist/lib/test-targets.js +11 -11
- package/dist/lib/test-types.d.ts +1 -1
- package/dist/lib/test.d.ts +1 -1
- package/dist/lib/test.js +1 -1
- package/dist/lib/util.d.ts +2 -0
- package/dist/lib/util.js +14 -1
- package/dist/lib/validate-compiled.d.ts +27 -0
- package/dist/lib/validate-compiled.js +236 -0
- package/dist/lib/validate-helpers.d.ts +0 -8
- package/dist/lib/validate-helpers.js +0 -30
- package/dist/lib/validate.d.ts +4 -4
- package/dist/lib/validate.js +49 -15
- package/dist/lib/workflow-abi.d.ts +37 -46
- package/dist/lib/workflow-abi.js +51 -76
- package/dist/lib/workflow-definitions.d.ts +11 -11
- package/dist/lib/workflow-definitions.js +36 -53
- package/dist/lib/workflow-helpers.d.ts +2 -3
- package/dist/lib/workflow-helpers.js +9 -13
- package/dist/lib/workflow-improvement.d.ts +3 -3
- package/dist/lib/workflow-improvement.js +48 -48
- package/dist/lib/workflow-review-paths.d.ts +3 -3
- package/dist/lib/workflow-review-paths.js +11 -11
- package/dist/lib/workflow-stage-runner.d.ts +1 -1
- package/dist/lib/workflow-stage-runner.js +8 -8
- package/dist/lib/workflows.d.ts +9 -9
- package/dist/lib/workflows.js +15 -17
- package/package.json +10 -9
- package/dist/commands/workspace-flow.d.ts +0 -23
- package/dist/commands/workspace-flow.js +0 -109
- package/dist/lib/registry.d.ts +0 -16
- package/dist/lib/registry.js +0 -65
- package/dist/lib/validate-workspace.d.ts +0 -121
- package/dist/lib/validate-workspace.js +0 -407
- package/dist/lib/workspace-compile.d.ts +0 -54
- package/dist/lib/workspace-compile.js +0 -476
- package/dist/lib/workspace-home.d.ts +0 -5
- package/dist/lib/workspace-home.js +0 -32
- package/dist/lib/workspace-layout.d.ts +0 -2
- package/dist/lib/workspace-layout.js +0 -60
- package/dist/lib/workspace-paths.d.ts +0 -41
- package/dist/lib/workspace-paths.js +0 -107
- package/dist/lib/workspace-reset.d.ts +0 -1
- package/dist/lib/workspace-reset.js +0 -43
- package/dist/lib/workspace-schema.d.ts +0 -17
- package/dist/lib/workspace-schema.js +0 -74
package/dist/commands/status.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
2
|
import * as p from "@clack/prompts";
|
|
3
|
-
import { detectInterf,
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
3
|
+
import { detectInterf, listCompiledDatasetsForSourceFolder, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
|
|
4
|
+
import { computeCompiledHealth } from "../lib/state.js";
|
|
5
|
+
import { readSavedTestComparison, printSavedTestComparisonState } from "./test-flow.js";
|
|
6
6
|
function statusColor(status) {
|
|
7
7
|
switch (status) {
|
|
8
8
|
case "compiled":
|
|
@@ -17,54 +17,77 @@ function statusColor(status) {
|
|
|
17
17
|
}
|
|
18
18
|
export const statusCommand = {
|
|
19
19
|
command: "status",
|
|
20
|
-
describe: "Show deterministic
|
|
20
|
+
describe: "Show deterministic health for a compiled dataset",
|
|
21
21
|
handler: async () => {
|
|
22
|
-
let
|
|
22
|
+
let compiledPath = null;
|
|
23
23
|
const detected = detectInterf(process.cwd());
|
|
24
24
|
if (detected) {
|
|
25
|
-
|
|
25
|
+
compiledPath = detected.path;
|
|
26
26
|
}
|
|
27
27
|
else {
|
|
28
|
-
const
|
|
28
|
+
const sourcePath = process.cwd();
|
|
29
|
+
const local = listCompiledDatasetsForSourceFolder(sourcePath).map(({ path, config }) => ({
|
|
29
30
|
path,
|
|
30
31
|
name: config.name,
|
|
31
32
|
}));
|
|
32
|
-
|
|
33
|
-
path: entry.path,
|
|
34
|
-
name: entry.name,
|
|
35
|
-
}));
|
|
36
|
-
if (all.length === 0) {
|
|
33
|
+
if (local.length === 0) {
|
|
37
34
|
process.exitCode = 1;
|
|
38
|
-
console.log(chalk.red(" No
|
|
35
|
+
console.log(chalk.red(" No compiled datasets found."));
|
|
36
|
+
console.log(chalk.dim(" Run `interf`, save truth checks, and compile a dataset first."));
|
|
39
37
|
return;
|
|
40
38
|
}
|
|
41
|
-
if (
|
|
42
|
-
|
|
39
|
+
if (local.length === 1) {
|
|
40
|
+
compiledPath = local[0].path;
|
|
43
41
|
}
|
|
44
42
|
else {
|
|
45
43
|
const selected = await p.select({
|
|
46
|
-
message: "Which
|
|
47
|
-
options:
|
|
44
|
+
message: "Which dataset?",
|
|
45
|
+
options: local.map((entry) => ({ value: entry.path, label: entry.name })),
|
|
48
46
|
});
|
|
49
47
|
if (p.isCancel(selected))
|
|
50
48
|
return;
|
|
51
|
-
|
|
49
|
+
compiledPath = selected;
|
|
52
50
|
}
|
|
53
51
|
}
|
|
54
|
-
const health =
|
|
52
|
+
const health = computeCompiledHealth(compiledPath);
|
|
55
53
|
const color = statusColor(health.status);
|
|
56
54
|
console.log();
|
|
57
55
|
console.log(color(` ${health.target_name}`));
|
|
58
56
|
console.log(chalk.dim(` status: ${health.status}`));
|
|
59
57
|
console.log(chalk.dim(` stage: ${health.stage}`));
|
|
60
58
|
console.log(chalk.dim(` ${health.summary}`));
|
|
59
|
+
const compiledConfig = readInterfConfig(compiledPath);
|
|
60
|
+
const sourcePath = resolveSourceControlPath(compiledPath);
|
|
61
|
+
const latestComparison = compiledConfig
|
|
62
|
+
? readSavedTestComparison(sourcePath, compiledConfig.name)
|
|
63
|
+
: null;
|
|
64
|
+
if (latestComparison) {
|
|
65
|
+
printSavedTestComparisonState(latestComparison);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
console.log();
|
|
69
|
+
console.log(chalk.dim(" No saved test result yet. Run `interf test` to measure files as-is and the compiled dataset."));
|
|
70
|
+
}
|
|
61
71
|
console.log();
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
72
|
+
const metricOrder = [
|
|
73
|
+
"source_total",
|
|
74
|
+
"stage_total",
|
|
75
|
+
"completed_stages",
|
|
76
|
+
"warnings",
|
|
77
|
+
"errors",
|
|
78
|
+
];
|
|
79
|
+
const printed = new Set();
|
|
80
|
+
for (const key of metricOrder) {
|
|
81
|
+
const value = health.metrics[key];
|
|
82
|
+
if (typeof value !== "number")
|
|
83
|
+
continue;
|
|
84
|
+
printed.add(key);
|
|
85
|
+
console.log(chalk.dim(` ${key}: ${value}`));
|
|
86
|
+
}
|
|
87
|
+
for (const [key, value] of Object.entries(health.metrics)) {
|
|
88
|
+
if (printed.has(key))
|
|
89
|
+
continue;
|
|
90
|
+
console.log(chalk.dim(` ${key}: ${value}`));
|
|
91
|
+
}
|
|
69
92
|
},
|
|
70
93
|
};
|
|
@@ -1,36 +1,49 @@
|
|
|
1
1
|
import { type TestTargetCandidate, type TestTargetResult } from "../lib/test.js";
|
|
2
2
|
import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
|
|
3
3
|
import type { TestSandboxRetentionMode } from "../lib/test-sandbox.js";
|
|
4
|
-
import type {
|
|
4
|
+
import type { SourceDatasetConfig, TestRunComparison, TestRunMode } from "../lib/schema.js";
|
|
5
5
|
export interface SavedTestOutcome {
|
|
6
6
|
runPath: string;
|
|
7
|
+
displayRunPath?: string;
|
|
7
8
|
target: TestTargetCandidate;
|
|
8
9
|
result: TestTargetResult;
|
|
9
10
|
}
|
|
11
|
+
export interface AgentTestMatrixRow {
|
|
12
|
+
agentLabel: string;
|
|
13
|
+
rawOutcome?: SavedTestOutcome | null;
|
|
14
|
+
compiledOutcome?: SavedTestOutcome | null;
|
|
15
|
+
}
|
|
10
16
|
export declare function questionPassRate(outcome: SavedTestOutcome): number;
|
|
17
|
+
export declare function readSavedTestComparison(projectPath: string, datasetName: string): TestRunComparison | null;
|
|
18
|
+
export declare function printSavedTestComparisonState(payload: TestRunComparison, comparisonRunPath?: string | null): void;
|
|
19
|
+
export declare function printAgentTestMatrix(rows: AgentTestMatrixRow[]): void;
|
|
20
|
+
export declare function printAgentTestFailures(rows: AgentTestMatrixRow[]): void;
|
|
11
21
|
export declare function printSavedTestOutcome(prefix: string, outcome: SavedTestOutcome): void;
|
|
12
|
-
export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null,
|
|
22
|
+
export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
|
|
23
|
+
export declare function printSavedTestComparisonSummary(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
|
|
13
24
|
export declare function saveTestComparisonRun(options: {
|
|
14
25
|
sourcePath: string;
|
|
15
|
-
|
|
16
|
-
|
|
26
|
+
compiledPath: string | null;
|
|
27
|
+
compiledName: string;
|
|
28
|
+
checksFingerprint: string;
|
|
17
29
|
mode: TestRunMode;
|
|
18
30
|
rawOutcome: SavedTestOutcome | null;
|
|
19
|
-
|
|
31
|
+
compiledOutcome: SavedTestOutcome | null;
|
|
20
32
|
}): string;
|
|
21
33
|
export declare function runSavedRawTest(options: {
|
|
22
34
|
sourcePath: string;
|
|
23
|
-
|
|
24
|
-
workspacePath?: string | null;
|
|
35
|
+
datasetConfig: SourceDatasetConfig;
|
|
25
36
|
executor?: WorkflowExecutor | null;
|
|
26
37
|
executionProfile?: WorkflowExecutionProfile;
|
|
27
38
|
preserveSandboxes?: TestSandboxRetentionMode;
|
|
39
|
+
runSuffix?: string | null;
|
|
28
40
|
}): Promise<SavedTestOutcome | null>;
|
|
29
|
-
export declare function
|
|
41
|
+
export declare function runSavedCompiledTest(options: {
|
|
30
42
|
sourcePath: string;
|
|
31
|
-
|
|
43
|
+
datasetConfig: SourceDatasetConfig;
|
|
32
44
|
executor?: WorkflowExecutor | null;
|
|
33
45
|
executionProfile?: WorkflowExecutionProfile;
|
|
34
|
-
|
|
46
|
+
compiledPath?: string | null;
|
|
35
47
|
preserveSandboxes?: TestSandboxRetentionMode;
|
|
48
|
+
runSuffix?: string | null;
|
|
36
49
|
}): Promise<SavedTestOutcome | null>;
|
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
|
-
import { mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { join } from "node:path";
|
|
4
|
-
import { createRawTestTarget,
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
2
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
|
|
5
|
+
import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
|
|
6
|
+
import { datasetArtifactRoot, datasetLatestTestStatePath, datasetLatestTestSummaryPath, datasetTestRunsRoot, } from "../lib/project-paths.js";
|
|
7
|
+
import { testRootForCompiled } from "../lib/compiled-paths.js";
|
|
8
|
+
import { readJsonFileWithSchema } from "../lib/parse.js";
|
|
9
|
+
import { TestRunComparisonSchema } from "../lib/schema.js";
|
|
8
10
|
import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
|
|
9
|
-
import {
|
|
11
|
+
import { findBuiltCompiledPath } from "./compiled-flow.js";
|
|
10
12
|
export function questionPassRate(outcome) {
|
|
11
13
|
return outcome.result.totalCases > 0
|
|
12
14
|
? Math.round((outcome.result.passedCases / outcome.result.totalCases) * 100)
|
|
13
15
|
: 0;
|
|
14
16
|
}
|
|
17
|
+
function visibleRunPath(outcome) {
|
|
18
|
+
return outcome.displayRunPath ?? outcome.runPath;
|
|
19
|
+
}
|
|
15
20
|
function summarizeSavedTestOutcome(label, outcome) {
|
|
16
21
|
return {
|
|
17
22
|
label,
|
|
18
|
-
run_path: outcome
|
|
23
|
+
run_path: visibleRunPath(outcome),
|
|
19
24
|
ok: outcome.result.ok,
|
|
20
25
|
passed_cases: outcome.result.passedCases,
|
|
21
26
|
total_cases: outcome.result.totalCases,
|
|
@@ -32,6 +37,152 @@ function normalizeTestRunId(input) {
|
|
|
32
37
|
.replace(/^-+|-+$/g, "")
|
|
33
38
|
.slice(0, 80);
|
|
34
39
|
}
|
|
40
|
+
function datasetRunPathForTarget(projectPath, datasetName, target, generatedAt, runId, runSuffix) {
|
|
41
|
+
return join(datasetTestRunsRoot(projectPath, datasetName, target), `${generatedAt.replace(/[:.]/g, "-")}-${runId}${runSuffix ? `-${normalizeTestRunId(runSuffix)}` : ""}.json`);
|
|
42
|
+
}
|
|
43
|
+
function writeDatasetTargetRun(options) {
|
|
44
|
+
const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
|
|
45
|
+
mkdirSync(dirPath, { recursive: true });
|
|
46
|
+
const runPath = datasetRunPathForTarget(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
|
|
47
|
+
writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
|
|
48
|
+
return runPath;
|
|
49
|
+
}
|
|
50
|
+
function loadLatestComparison(projectPath, datasetName) {
|
|
51
|
+
const latestPath = datasetLatestTestStatePath(projectPath, datasetName);
|
|
52
|
+
if (!existsSync(latestPath))
|
|
53
|
+
return null;
|
|
54
|
+
return readJsonFileWithSchema(latestPath, "latest test comparison", TestRunComparisonSchema);
|
|
55
|
+
}
|
|
56
|
+
export function readSavedTestComparison(projectPath, datasetName) {
|
|
57
|
+
return loadLatestComparison(projectPath, datasetName);
|
|
58
|
+
}
|
|
59
|
+
function renderLatestSummaryMarkdown(payload) {
|
|
60
|
+
const lines = [
|
|
61
|
+
"# Latest Test Result",
|
|
62
|
+
"",
|
|
63
|
+
"| Target | Truth checks |",
|
|
64
|
+
"| --- | --- |",
|
|
65
|
+
];
|
|
66
|
+
if (payload.raw) {
|
|
67
|
+
lines.push(`| Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
|
|
68
|
+
}
|
|
69
|
+
if (payload.compiled) {
|
|
70
|
+
lines.push(`| Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
|
|
71
|
+
}
|
|
72
|
+
lines.push("");
|
|
73
|
+
if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
|
|
74
|
+
const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
|
|
75
|
+
lines.push(`Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`, "");
|
|
76
|
+
}
|
|
77
|
+
if (payload.raw) {
|
|
78
|
+
lines.push(`- Latest files-as-is run: ${payload.raw.run_path}`);
|
|
79
|
+
}
|
|
80
|
+
if (payload.compiled) {
|
|
81
|
+
lines.push(`- Latest compiled run: ${payload.compiled.run_path}`);
|
|
82
|
+
}
|
|
83
|
+
return `${lines.join("\n")}\n`;
|
|
84
|
+
}
|
|
85
|
+
export function printSavedTestComparisonState(payload, comparisonRunPath) {
|
|
86
|
+
console.log();
|
|
87
|
+
console.log(chalk.bold(" Latest saved test"));
|
|
88
|
+
console.log();
|
|
89
|
+
console.log(" | Target | Truth checks |");
|
|
90
|
+
console.log(" | --- | --- |");
|
|
91
|
+
if (payload.raw) {
|
|
92
|
+
console.log(` | Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
|
|
93
|
+
}
|
|
94
|
+
if (payload.compiled) {
|
|
95
|
+
console.log(` | Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
|
|
96
|
+
}
|
|
97
|
+
if (!payload.raw || !payload.compiled) {
|
|
98
|
+
console.log();
|
|
99
|
+
if (!payload.raw) {
|
|
100
|
+
console.log(chalk.dim(" No saved files-as-is baseline yet."));
|
|
101
|
+
}
|
|
102
|
+
if (!payload.compiled) {
|
|
103
|
+
console.log(chalk.dim(" No saved compiled-dataset run yet."));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
|
|
107
|
+
const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
|
|
108
|
+
const color = (payload.summary.pass_rate_delta ?? 0) >= 0 ? chalk.green : chalk.red;
|
|
109
|
+
console.log();
|
|
110
|
+
console.log(color(` Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`));
|
|
111
|
+
}
|
|
112
|
+
if (comparisonRunPath) {
|
|
113
|
+
console.log();
|
|
114
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
function padCell(value, width) {
|
|
118
|
+
return value.padEnd(width, " ");
|
|
119
|
+
}
|
|
120
|
+
function scoreCell(outcome) {
|
|
121
|
+
if (!outcome)
|
|
122
|
+
return "—";
|
|
123
|
+
return `${outcome.result.passedCases}/${outcome.result.totalCases}`;
|
|
124
|
+
}
|
|
125
|
+
function deltaCell(row) {
|
|
126
|
+
if (!row.rawOutcome || !row.compiledOutcome)
|
|
127
|
+
return "—";
|
|
128
|
+
const delta = row.compiledOutcome.result.passedCases - row.rawOutcome.result.passedCases;
|
|
129
|
+
return delta > 0 ? `+${delta}` : `${delta}`;
|
|
130
|
+
}
|
|
131
|
+
export function printAgentTestMatrix(rows) {
|
|
132
|
+
if (rows.length === 0)
|
|
133
|
+
return;
|
|
134
|
+
const includeRaw = rows.some((row) => Boolean(row.rawOutcome));
|
|
135
|
+
const includeCompiled = rows.some((row) => Boolean(row.compiledOutcome));
|
|
136
|
+
const includeDelta = includeRaw && includeCompiled;
|
|
137
|
+
const headers = [
|
|
138
|
+
"Agent",
|
|
139
|
+
...(includeRaw ? ["Files as-is"] : []),
|
|
140
|
+
...(includeCompiled ? ["Compiled dataset"] : []),
|
|
141
|
+
...(includeDelta ? ["Delta"] : []),
|
|
142
|
+
];
|
|
143
|
+
const body = rows.map((row) => [
|
|
144
|
+
row.agentLabel,
|
|
145
|
+
...(includeRaw ? [scoreCell(row.rawOutcome)] : []),
|
|
146
|
+
...(includeCompiled ? [scoreCell(row.compiledOutcome)] : []),
|
|
147
|
+
...(includeDelta ? [deltaCell(row)] : []),
|
|
148
|
+
]);
|
|
149
|
+
const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => (row[index] ?? "").length)));
|
|
150
|
+
const heading = includeDelta ? " Comparison" : " Results";
|
|
151
|
+
console.log();
|
|
152
|
+
console.log(chalk.bold(heading));
|
|
153
|
+
console.log();
|
|
154
|
+
console.log(` | ${headers.map((header, index) => padCell(header, widths[index] ?? header.length)).join(" | ")} |`);
|
|
155
|
+
console.log(` | ${widths.map((width) => "-".repeat(width)).join(" | ")} |`);
|
|
156
|
+
for (const row of body) {
|
|
157
|
+
console.log(` | ${row.map((cell, index) => padCell(cell ?? "", widths[index] ?? cell.length)).join(" | ")} |`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
export function printAgentTestFailures(rows) {
|
|
161
|
+
for (const row of rows) {
|
|
162
|
+
const failures = [];
|
|
163
|
+
for (const [label, outcome] of [
|
|
164
|
+
["Files as-is", row.rawOutcome ?? null],
|
|
165
|
+
["Compiled dataset", row.compiledOutcome ?? null],
|
|
166
|
+
]) {
|
|
167
|
+
if (!outcome || outcome.result.ok)
|
|
168
|
+
continue;
|
|
169
|
+
for (const [index, caseResult] of outcome.result.caseResults.entries()) {
|
|
170
|
+
if (caseResult.ok)
|
|
171
|
+
continue;
|
|
172
|
+
const reason = caseResult.checks.find((entry) => !entry.ok)?.detail ?? "failed";
|
|
173
|
+
failures.push(`${label} · Truth Check ${index + 1}: ${reason}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
if (failures.length === 0)
|
|
177
|
+
continue;
|
|
178
|
+
console.log();
|
|
179
|
+
console.log(chalk.bold(` ${row.agentLabel} failures`));
|
|
180
|
+
console.log();
|
|
181
|
+
for (const failure of failures) {
|
|
182
|
+
console.log(` - ${failure}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
35
186
|
function specNeedsExecutor(spec) {
|
|
36
187
|
return spec.cases.some((entry) => !entry.file || Boolean(entry.answer));
|
|
37
188
|
}
|
|
@@ -69,69 +220,121 @@ export function printSavedTestOutcome(prefix, outcome) {
|
|
|
69
220
|
console.log(chalk.dim(` Preserved sandbox: ${outcome.result.sandbox_path}`));
|
|
70
221
|
console.log();
|
|
71
222
|
}
|
|
72
|
-
console.log(chalk.dim(` Saved run: ${outcome
|
|
223
|
+
console.log(chalk.dim(` Saved run: ${visibleRunPath(outcome)}`));
|
|
73
224
|
}
|
|
74
|
-
export function printSavedTestComparison(rawOutcome,
|
|
75
|
-
if (!rawOutcome && !
|
|
225
|
+
export function printSavedTestComparison(rawOutcome, compiledOutcome, comparisonRunPath) {
|
|
226
|
+
if (!rawOutcome && !compiledOutcome)
|
|
76
227
|
return;
|
|
77
228
|
console.log();
|
|
78
229
|
if (rawOutcome) {
|
|
79
|
-
printSavedTestOutcome("
|
|
230
|
+
printSavedTestOutcome("Files as-is", rawOutcome);
|
|
80
231
|
}
|
|
81
|
-
if (
|
|
232
|
+
if (compiledOutcome) {
|
|
82
233
|
if (rawOutcome)
|
|
83
234
|
console.log();
|
|
84
|
-
printSavedTestOutcome("Compiled
|
|
235
|
+
printSavedTestOutcome("Compiled dataset", compiledOutcome);
|
|
85
236
|
}
|
|
86
|
-
if (rawOutcome &&
|
|
237
|
+
if (rawOutcome && compiledOutcome) {
|
|
87
238
|
const rawQuestions = questionPassRate(rawOutcome);
|
|
88
|
-
const
|
|
89
|
-
const delta =
|
|
239
|
+
const compiledQuestions = questionPassRate(compiledOutcome);
|
|
240
|
+
const delta = compiledQuestions - rawQuestions;
|
|
90
241
|
const color = delta >= 0 ? chalk.green : chalk.red;
|
|
91
242
|
const direction = delta >= 0 ? "improved" : "decreased";
|
|
92
243
|
console.log();
|
|
93
|
-
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${
|
|
244
|
+
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
|
|
94
245
|
}
|
|
95
246
|
if (comparisonRunPath) {
|
|
96
247
|
console.log();
|
|
97
|
-
console.log(chalk.dim(` Saved
|
|
248
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
export function printSavedTestComparisonSummary(rawOutcome, compiledOutcome, comparisonRunPath) {
|
|
252
|
+
if (!rawOutcome && !compiledOutcome)
|
|
253
|
+
return;
|
|
254
|
+
console.log();
|
|
255
|
+
console.log(chalk.bold(" Comparison"));
|
|
256
|
+
console.log();
|
|
257
|
+
console.log(" | Target | Truth checks |");
|
|
258
|
+
console.log(" | --- | --- |");
|
|
259
|
+
if (rawOutcome) {
|
|
260
|
+
console.log(` | Files as-is | \`${rawOutcome.result.passedCases}/${rawOutcome.result.totalCases}\` |`);
|
|
261
|
+
}
|
|
262
|
+
if (compiledOutcome) {
|
|
263
|
+
console.log(` | Compiled dataset | \`${compiledOutcome.result.passedCases}/${compiledOutcome.result.totalCases}\` |`);
|
|
264
|
+
}
|
|
265
|
+
if (rawOutcome && compiledOutcome) {
|
|
266
|
+
const rawQuestions = questionPassRate(rawOutcome);
|
|
267
|
+
const compiledQuestions = questionPassRate(compiledOutcome);
|
|
268
|
+
const delta = compiledQuestions - rawQuestions;
|
|
269
|
+
const color = delta >= 0 ? chalk.green : chalk.red;
|
|
270
|
+
const direction = delta >= 0 ? "improved" : "decreased";
|
|
271
|
+
console.log();
|
|
272
|
+
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
|
|
273
|
+
}
|
|
274
|
+
if (comparisonRunPath) {
|
|
275
|
+
console.log();
|
|
276
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
98
277
|
}
|
|
99
278
|
}
|
|
100
279
|
export function saveTestComparisonRun(options) {
|
|
101
280
|
const generatedAt = new Date().toISOString();
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
const
|
|
281
|
+
const existing = loadLatestComparison(options.sourcePath, options.compiledName);
|
|
282
|
+
const canReuseExisting = Boolean(existing?.checks_fingerprint) &&
|
|
283
|
+
existing?.checks_fingerprint === options.checksFingerprint;
|
|
284
|
+
const rawSummary = options.rawOutcome
|
|
285
|
+
? summarizeSavedTestOutcome("Files as-is", options.rawOutcome)
|
|
286
|
+
: canReuseExisting
|
|
287
|
+
? existing?.raw ?? null
|
|
288
|
+
: null;
|
|
289
|
+
const compiledSummary = options.compiledOutcome
|
|
290
|
+
? summarizeSavedTestOutcome("Compiled dataset", options.compiledOutcome)
|
|
291
|
+
: canReuseExisting
|
|
292
|
+
? existing?.compiled ?? null
|
|
293
|
+
: null;
|
|
294
|
+
const effectiveMode = rawSummary && compiledSummary
|
|
295
|
+
? "both"
|
|
296
|
+
: rawSummary
|
|
297
|
+
? "raw"
|
|
298
|
+
: "compiled";
|
|
299
|
+
const rawPassRate = rawSummary
|
|
300
|
+
? Math.round((rawSummary.passed_cases / rawSummary.total_cases) * 100)
|
|
301
|
+
: null;
|
|
302
|
+
const compiledPassRate = compiledSummary
|
|
303
|
+
? Math.round((compiledSummary.passed_cases / compiledSummary.total_cases) * 100)
|
|
304
|
+
: null;
|
|
106
305
|
const payload = {
|
|
107
306
|
kind: "interf-test-run",
|
|
108
307
|
version: 1,
|
|
109
308
|
generated_at: generatedAt,
|
|
110
|
-
mode:
|
|
309
|
+
mode: effectiveMode,
|
|
111
310
|
source_path: options.sourcePath,
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
311
|
+
checks_fingerprint: options.checksFingerprint,
|
|
312
|
+
dataset: {
|
|
313
|
+
name: options.compiledName,
|
|
314
|
+
compiled_path: options.compiledPath ?? (canReuseExisting ? existing?.dataset.compiled_path ?? null : null),
|
|
115
315
|
},
|
|
116
|
-
raw:
|
|
117
|
-
|
|
118
|
-
? summarizeSavedTestOutcome("Compiled workspace", options.workspaceOutcome)
|
|
119
|
-
: null,
|
|
316
|
+
raw: rawSummary,
|
|
317
|
+
compiled: compiledSummary,
|
|
120
318
|
summary: {
|
|
121
319
|
raw_pass_rate: rawPassRate,
|
|
122
|
-
compiled_pass_rate:
|
|
123
|
-
pass_rate_delta: rawPassRate !== null &&
|
|
320
|
+
compiled_pass_rate: compiledPassRate,
|
|
321
|
+
pass_rate_delta: rawPassRate !== null && compiledPassRate !== null ? compiledPassRate - rawPassRate : null,
|
|
124
322
|
},
|
|
125
323
|
};
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
writeFileSync(
|
|
129
|
-
|
|
324
|
+
const latestStatePath = datasetLatestTestStatePath(options.sourcePath, options.compiledName);
|
|
325
|
+
mkdirSync(dirname(latestStatePath), { recursive: true });
|
|
326
|
+
writeFileSync(latestStatePath, `${JSON.stringify(payload, null, 2)}\n`);
|
|
327
|
+
writeFileSync(datasetLatestTestSummaryPath(options.sourcePath, options.compiledName), renderLatestSummaryMarkdown(payload));
|
|
328
|
+
if (options.compiledPath) {
|
|
329
|
+
mkdirSync(testRootForCompiled(options.compiledPath), { recursive: true });
|
|
330
|
+
writeFileSync(join(testRootForCompiled(options.compiledPath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
|
|
331
|
+
}
|
|
332
|
+
return latestStatePath;
|
|
130
333
|
}
|
|
131
334
|
export async function runSavedRawTest(options) {
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
335
|
+
const spec = buildTestSpecFromSourceFolderConfig({
|
|
336
|
+
sourcePath: options.sourcePath,
|
|
337
|
+
targetName: options.datasetConfig.name,
|
|
135
338
|
targetType: "raw",
|
|
136
339
|
});
|
|
137
340
|
if (!spec) {
|
|
@@ -146,39 +349,45 @@ export async function runSavedRawTest(options) {
|
|
|
146
349
|
console.log(chalk.red(error));
|
|
147
350
|
return null;
|
|
148
351
|
}
|
|
149
|
-
|
|
150
|
-
const target = createRawTestTarget(
|
|
151
|
-
const run = await runTargetTestsAuto(
|
|
352
|
+
const datasetSourcePath = resolveSourceDatasetPath(options.sourcePath, options.datasetConfig);
|
|
353
|
+
const target = createRawTestTarget(datasetSourcePath);
|
|
354
|
+
const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
|
|
152
355
|
executor,
|
|
153
356
|
preserveSandboxes: options.preserveSandboxes ?? "on-failure",
|
|
154
|
-
artifactRootPath:
|
|
357
|
+
artifactRootPath: datasetArtifactRoot(options.sourcePath, options.datasetConfig.name),
|
|
155
358
|
});
|
|
156
359
|
const result = run.results[0];
|
|
157
360
|
if (!result)
|
|
158
361
|
return null;
|
|
362
|
+
const datasetRunPath = writeDatasetTargetRun({
|
|
363
|
+
projectPath: options.sourcePath,
|
|
364
|
+
datasetName: options.datasetConfig.name,
|
|
365
|
+
target: "file-as-is",
|
|
366
|
+
generatedAt: run.generated_at,
|
|
367
|
+
runId: normalizeTestRunId(spec.id),
|
|
368
|
+
runSuffix: options.runSuffix,
|
|
369
|
+
payload: run,
|
|
370
|
+
});
|
|
159
371
|
return {
|
|
160
|
-
runPath:
|
|
372
|
+
runPath: datasetRunPath,
|
|
161
373
|
target,
|
|
162
374
|
result,
|
|
163
375
|
};
|
|
164
376
|
}
|
|
165
|
-
export async function
|
|
166
|
-
const
|
|
167
|
-
if (!
|
|
377
|
+
export async function runSavedCompiledTest(options) {
|
|
378
|
+
const compiledPath = options.compiledPath ?? findBuiltCompiledPath(options.sourcePath, options.datasetConfig.name);
|
|
379
|
+
if (!compiledPath) {
|
|
168
380
|
return null;
|
|
169
381
|
}
|
|
170
|
-
const spec =
|
|
171
|
-
|
|
172
|
-
targetType: "
|
|
382
|
+
const spec = buildTestSpecFromCompiledDatasetConfig({
|
|
383
|
+
compiledPath,
|
|
384
|
+
targetType: "compiled",
|
|
173
385
|
});
|
|
174
386
|
if (!spec) {
|
|
175
387
|
return null;
|
|
176
388
|
}
|
|
177
|
-
const target =
|
|
178
|
-
if (!target) {
|
|
179
|
-
return null;
|
|
180
|
-
}
|
|
181
|
-
if (!target.eligible) {
|
|
389
|
+
const target = createCompiledTestTarget(compiledPath, options.datasetConfig.name, options.datasetConfig.workflow ?? "interf");
|
|
390
|
+
if (!target || !target.eligible) {
|
|
182
391
|
return null;
|
|
183
392
|
}
|
|
184
393
|
const { executor, error } = await resolveExecutorForSpec(spec, options.executor, options.executionProfile);
|
|
@@ -193,13 +402,24 @@ export async function runSavedWorkspaceTest(options) {
|
|
|
193
402
|
const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
|
|
194
403
|
executor,
|
|
195
404
|
preserveSandboxes: options.preserveSandboxes ?? "on-failure",
|
|
196
|
-
artifactRootPath:
|
|
405
|
+
artifactRootPath: compiledPath,
|
|
197
406
|
});
|
|
198
407
|
const result = run.results[0];
|
|
199
408
|
if (!result)
|
|
200
409
|
return null;
|
|
410
|
+
const internalRunPath = saveTargetTestRun(compiledPath, run);
|
|
411
|
+
const datasetRunPath = writeDatasetTargetRun({
|
|
412
|
+
projectPath: options.sourcePath,
|
|
413
|
+
datasetName: options.datasetConfig.name,
|
|
414
|
+
target: "compiled",
|
|
415
|
+
generatedAt: run.generated_at,
|
|
416
|
+
runId: normalizeTestRunId(spec.id),
|
|
417
|
+
runSuffix: options.runSuffix,
|
|
418
|
+
payload: run,
|
|
419
|
+
});
|
|
201
420
|
return {
|
|
202
|
-
runPath:
|
|
421
|
+
runPath: internalRunPath,
|
|
422
|
+
displayRunPath: datasetRunPath,
|
|
203
423
|
target,
|
|
204
424
|
result,
|
|
205
425
|
};
|
package/dist/commands/test.d.ts
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
1
|
import type { CommandModule } from "yargs";
|
|
2
|
+
import type { SourceDatasetConfig } from "../lib/schema.js";
|
|
3
|
+
export declare function resolveConfiguredDatasetSelection(options: {
|
|
4
|
+
sourcePath: string;
|
|
5
|
+
requestedDatasetName?: string | null;
|
|
6
|
+
hintedDatasetConfig?: SourceDatasetConfig | null;
|
|
7
|
+
}): SourceDatasetConfig | null;
|
|
2
8
|
export declare const testCommand: CommandModule;
|
|
3
|
-
export declare function runTestCommand(argv?: Record<string, unknown>): Promise<
|
|
9
|
+
export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;
|