@interf/compiler 0.4.1 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -66
- package/builtin-workflows/interf/README.md +6 -6
- package/builtin-workflows/interf/compile/stages/shape/SKILL.md +7 -7
- package/builtin-workflows/interf/compile/stages/structure/SKILL.md +2 -2
- package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +1 -1
- package/builtin-workflows/interf/{workspace.schema.json → compiled.schema.json} +5 -5
- package/builtin-workflows/interf/improve/SKILL.md +3 -3
- package/builtin-workflows/interf/use/query/SKILL.md +2 -2
- package/builtin-workflows/interf/workflow.json +42 -31
- package/dist/commands/check-draft.d.ts +19 -0
- package/dist/commands/check-draft.js +110 -0
- package/dist/commands/compile-controller.d.ts +4 -4
- package/dist/commands/compile-controller.js +117 -81
- package/dist/commands/compile.d.ts +5 -5
- package/dist/commands/compile.js +61 -62
- package/dist/commands/compiled-flow.d.ts +23 -0
- package/dist/commands/compiled-flow.js +112 -0
- package/dist/commands/create-workflow-wizard.d.ts +3 -3
- package/dist/commands/create-workflow-wizard.js +11 -11
- package/dist/commands/create.d.ts +2 -2
- package/dist/commands/create.js +50 -57
- package/dist/commands/default.js +2 -2
- package/dist/commands/executor-flow.d.ts +20 -1
- package/dist/commands/executor-flow.js +67 -7
- package/dist/commands/init.js +242 -289
- package/dist/commands/list.js +14 -10
- package/dist/commands/reset.js +6 -6
- package/dist/commands/source-config-wizard.d.ts +12 -8
- package/dist/commands/source-config-wizard.js +356 -119
- package/dist/commands/status.js +49 -26
- package/dist/commands/test-flow.d.ts +23 -10
- package/dist/commands/test-flow.js +274 -65
- package/dist/commands/test.d.ts +7 -1
- package/dist/commands/test.js +264 -65
- package/dist/commands/verify.js +23 -14
- package/dist/index.d.ts +7 -7
- package/dist/index.js +4 -4
- package/dist/lib/agent-args.js +2 -1
- package/dist/lib/agent-constants.js +1 -1
- package/dist/lib/agent-render.js +4 -4
- package/dist/lib/agent-shells.d.ts +8 -8
- package/dist/lib/agent-shells.js +231 -142
- package/dist/lib/{workflow-abi.d.ts → builtin-compiled-workflow.d.ts} +37 -46
- package/dist/lib/builtin-compiled-workflow.js +153 -0
- package/dist/lib/compiled-compile.d.ts +52 -0
- package/dist/lib/compiled-compile.js +274 -0
- package/dist/lib/compiled-home.d.ts +5 -0
- package/dist/lib/compiled-home.js +32 -0
- package/dist/lib/compiled-paths.d.ts +39 -0
- package/dist/lib/compiled-paths.js +103 -0
- package/dist/lib/{workspace-raw.d.ts → compiled-raw.d.ts} +9 -8
- package/dist/lib/{workspace-raw.js → compiled-raw.js} +16 -14
- package/dist/lib/compiled-reset.d.ts +1 -0
- package/dist/lib/compiled-reset.js +44 -0
- package/dist/lib/compiled-schema.d.ts +27 -0
- package/dist/lib/compiled-schema.js +110 -0
- package/dist/lib/config.d.ts +0 -1
- package/dist/lib/config.js +0 -1
- package/dist/lib/discovery.d.ts +1 -1
- package/dist/lib/discovery.js +3 -3
- package/dist/lib/interf-bootstrap.d.ts +1 -1
- package/dist/lib/interf-bootstrap.js +4 -4
- package/dist/lib/interf-detect.d.ts +9 -10
- package/dist/lib/interf-detect.js +70 -59
- package/dist/lib/interf-scaffold.d.ts +2 -2
- package/dist/lib/interf-scaffold.js +90 -57
- package/dist/lib/interf-workflow-package.d.ts +3 -3
- package/dist/lib/interf-workflow-package.js +30 -30
- package/dist/lib/interf.d.ts +5 -5
- package/dist/lib/interf.js +4 -4
- package/dist/lib/local-workflows.d.ts +4 -4
- package/dist/lib/local-workflows.js +35 -70
- package/dist/lib/obsidian.d.ts +1 -1
- package/dist/lib/parse.js +92 -1
- package/dist/lib/project-paths.d.ts +11 -0
- package/dist/lib/project-paths.js +32 -0
- package/dist/lib/runtime-acceptance.d.ts +7 -1
- package/dist/lib/runtime-acceptance.js +194 -59
- package/dist/lib/runtime-contracts.d.ts +2 -4
- package/dist/lib/runtime-contracts.js +17 -161
- package/dist/lib/runtime-inventory.d.ts +7 -0
- package/dist/lib/runtime-inventory.js +29 -0
- package/dist/lib/runtime-paths.js +5 -5
- package/dist/lib/runtime-prompt.js +7 -6
- package/dist/lib/runtime-reconcile.d.ts +2 -3
- package/dist/lib/runtime-reconcile.js +94 -184
- package/dist/lib/runtime-runs.js +25 -119
- package/dist/lib/runtime-types.d.ts +10 -19
- package/dist/lib/runtime.d.ts +2 -2
- package/dist/lib/runtime.js +1 -1
- package/dist/lib/schema.d.ts +169 -153
- package/dist/lib/schema.js +116 -164
- package/dist/lib/source-config.d.ts +24 -20
- package/dist/lib/source-config.js +159 -122
- package/dist/lib/state-artifacts.d.ts +5 -5
- package/dist/lib/state-artifacts.js +8 -8
- package/dist/lib/state-health.d.ts +4 -4
- package/dist/lib/state-health.js +110 -126
- package/dist/lib/state-io.d.ts +8 -8
- package/dist/lib/state-io.js +21 -102
- package/dist/lib/state-paths.js +5 -5
- package/dist/lib/state-view.d.ts +4 -4
- package/dist/lib/state-view.js +52 -55
- package/dist/lib/state.d.ts +5 -5
- package/dist/lib/state.js +4 -4
- package/dist/lib/summarize-plan.d.ts +3 -2
- package/dist/lib/summarize-plan.js +19 -21
- package/dist/lib/test-execution.js +9 -9
- package/dist/lib/test-matrices.d.ts +3 -3
- package/dist/lib/test-matrices.js +6 -6
- package/dist/lib/test-paths.d.ts +4 -4
- package/dist/lib/test-paths.js +26 -11
- package/dist/lib/test-sandbox.d.ts +1 -1
- package/dist/lib/test-sandbox.js +32 -38
- package/dist/lib/test-specs.js +1 -1
- package/dist/lib/test-targets.d.ts +2 -2
- package/dist/lib/test-targets.js +11 -11
- package/dist/lib/test-types.d.ts +1 -1
- package/dist/lib/test.d.ts +1 -1
- package/dist/lib/test.js +1 -1
- package/dist/lib/util.d.ts +2 -0
- package/dist/lib/util.js +14 -1
- package/dist/lib/validate-compiled.d.ts +27 -0
- package/dist/lib/validate-compiled.js +238 -0
- package/dist/lib/validate-helpers.d.ts +0 -8
- package/dist/lib/validate-helpers.js +0 -30
- package/dist/lib/validate.d.ts +6 -4
- package/dist/lib/validate.js +76 -27
- package/dist/lib/workflow-definitions.d.ts +12 -11
- package/dist/lib/workflow-definitions.js +45 -55
- package/dist/lib/workflow-helpers.d.ts +2 -3
- package/dist/lib/workflow-helpers.js +9 -13
- package/dist/lib/workflow-improvement.d.ts +3 -3
- package/dist/lib/workflow-improvement.js +48 -48
- package/dist/lib/workflow-primitives.d.ts +2 -0
- package/dist/lib/workflow-primitives.js +5 -0
- package/dist/lib/workflow-review-paths.d.ts +3 -3
- package/dist/lib/workflow-review-paths.js +11 -11
- package/dist/lib/workflow-stage-runner.d.ts +1 -1
- package/dist/lib/workflow-stage-runner.js +8 -8
- package/dist/lib/workflows.d.ts +9 -9
- package/dist/lib/workflows.js +15 -17
- package/package.json +13 -12
- package/dist/commands/workspace-flow.d.ts +0 -23
- package/dist/commands/workspace-flow.js +0 -109
- package/dist/lib/registry.d.ts +0 -16
- package/dist/lib/registry.js +0 -65
- package/dist/lib/validate-workspace.d.ts +0 -121
- package/dist/lib/validate-workspace.js +0 -407
- package/dist/lib/workflow-abi.js +0 -181
- package/dist/lib/workspace-compile.d.ts +0 -54
- package/dist/lib/workspace-compile.js +0 -476
- package/dist/lib/workspace-home.d.ts +0 -5
- package/dist/lib/workspace-home.js +0 -32
- package/dist/lib/workspace-layout.d.ts +0 -2
- package/dist/lib/workspace-layout.js +0 -60
- package/dist/lib/workspace-paths.d.ts +0 -41
- package/dist/lib/workspace-paths.js +0 -107
- package/dist/lib/workspace-reset.d.ts +0 -1
- package/dist/lib/workspace-reset.js +0 -43
- package/dist/lib/workspace-schema.d.ts +0 -17
- package/dist/lib/workspace-schema.js +0 -74
package/dist/commands/status.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
2
|
import * as p from "@clack/prompts";
|
|
3
|
-
import { detectInterf,
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
3
|
+
import { detectInterf, listCompiledDatasetsForSourceFolder, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
|
|
4
|
+
import { computeCompiledHealth } from "../lib/state.js";
|
|
5
|
+
import { readSavedTestComparison, printSavedTestComparisonState } from "./test-flow.js";
|
|
6
6
|
function statusColor(status) {
|
|
7
7
|
switch (status) {
|
|
8
8
|
case "compiled":
|
|
@@ -17,54 +17,77 @@ function statusColor(status) {
|
|
|
17
17
|
}
|
|
18
18
|
export const statusCommand = {
|
|
19
19
|
command: "status",
|
|
20
|
-
describe: "Show deterministic
|
|
20
|
+
describe: "Show deterministic health for a compiled dataset",
|
|
21
21
|
handler: async () => {
|
|
22
|
-
let
|
|
22
|
+
let compiledPath = null;
|
|
23
23
|
const detected = detectInterf(process.cwd());
|
|
24
24
|
if (detected) {
|
|
25
|
-
|
|
25
|
+
compiledPath = detected.path;
|
|
26
26
|
}
|
|
27
27
|
else {
|
|
28
|
-
const
|
|
28
|
+
const sourcePath = process.cwd();
|
|
29
|
+
const local = listCompiledDatasetsForSourceFolder(sourcePath).map(({ path, config }) => ({
|
|
29
30
|
path,
|
|
30
31
|
name: config.name,
|
|
31
32
|
}));
|
|
32
|
-
|
|
33
|
-
path: entry.path,
|
|
34
|
-
name: entry.name,
|
|
35
|
-
}));
|
|
36
|
-
if (all.length === 0) {
|
|
33
|
+
if (local.length === 0) {
|
|
37
34
|
process.exitCode = 1;
|
|
38
|
-
console.log(chalk.red(" No
|
|
35
|
+
console.log(chalk.red(" No compiled datasets found."));
|
|
36
|
+
console.log(chalk.dim(" Run `interf`, save truth checks, and compile a dataset first."));
|
|
39
37
|
return;
|
|
40
38
|
}
|
|
41
|
-
if (
|
|
42
|
-
|
|
39
|
+
if (local.length === 1) {
|
|
40
|
+
compiledPath = local[0].path;
|
|
43
41
|
}
|
|
44
42
|
else {
|
|
45
43
|
const selected = await p.select({
|
|
46
|
-
message: "Which
|
|
47
|
-
options:
|
|
44
|
+
message: "Which dataset?",
|
|
45
|
+
options: local.map((entry) => ({ value: entry.path, label: entry.name })),
|
|
48
46
|
});
|
|
49
47
|
if (p.isCancel(selected))
|
|
50
48
|
return;
|
|
51
|
-
|
|
49
|
+
compiledPath = selected;
|
|
52
50
|
}
|
|
53
51
|
}
|
|
54
|
-
const health =
|
|
52
|
+
const health = computeCompiledHealth(compiledPath);
|
|
55
53
|
const color = statusColor(health.status);
|
|
56
54
|
console.log();
|
|
57
55
|
console.log(color(` ${health.target_name}`));
|
|
58
56
|
console.log(chalk.dim(` status: ${health.status}`));
|
|
59
57
|
console.log(chalk.dim(` stage: ${health.stage}`));
|
|
60
58
|
console.log(chalk.dim(` ${health.summary}`));
|
|
59
|
+
const compiledConfig = readInterfConfig(compiledPath);
|
|
60
|
+
const sourcePath = resolveSourceControlPath(compiledPath);
|
|
61
|
+
const latestComparison = compiledConfig
|
|
62
|
+
? readSavedTestComparison(sourcePath, compiledConfig.name)
|
|
63
|
+
: null;
|
|
64
|
+
if (latestComparison) {
|
|
65
|
+
printSavedTestComparisonState(latestComparison);
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
console.log();
|
|
69
|
+
console.log(chalk.dim(" No saved test result yet. Run `interf test` to measure files as-is and the compiled dataset."));
|
|
70
|
+
}
|
|
61
71
|
console.log();
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
72
|
+
const metricOrder = [
|
|
73
|
+
"source_total",
|
|
74
|
+
"stage_total",
|
|
75
|
+
"completed_stages",
|
|
76
|
+
"warnings",
|
|
77
|
+
"errors",
|
|
78
|
+
];
|
|
79
|
+
const printed = new Set();
|
|
80
|
+
for (const key of metricOrder) {
|
|
81
|
+
const value = health.metrics[key];
|
|
82
|
+
if (typeof value !== "number")
|
|
83
|
+
continue;
|
|
84
|
+
printed.add(key);
|
|
85
|
+
console.log(chalk.dim(` ${key}: ${value}`));
|
|
86
|
+
}
|
|
87
|
+
for (const [key, value] of Object.entries(health.metrics)) {
|
|
88
|
+
if (printed.has(key))
|
|
89
|
+
continue;
|
|
90
|
+
console.log(chalk.dim(` ${key}: ${value}`));
|
|
91
|
+
}
|
|
69
92
|
},
|
|
70
93
|
};
|
|
@@ -1,36 +1,49 @@
|
|
|
1
1
|
import { type TestTargetCandidate, type TestTargetResult } from "../lib/test.js";
|
|
2
2
|
import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
|
|
3
3
|
import type { TestSandboxRetentionMode } from "../lib/test-sandbox.js";
|
|
4
|
-
import type {
|
|
4
|
+
import type { SourceDatasetConfig, TestRunComparison, TestRunMode } from "../lib/schema.js";
|
|
5
5
|
export interface SavedTestOutcome {
|
|
6
6
|
runPath: string;
|
|
7
|
+
displayRunPath?: string;
|
|
7
8
|
target: TestTargetCandidate;
|
|
8
9
|
result: TestTargetResult;
|
|
9
10
|
}
|
|
11
|
+
export interface AgentTestMatrixRow {
|
|
12
|
+
agentLabel: string;
|
|
13
|
+
rawOutcome?: SavedTestOutcome | null;
|
|
14
|
+
compiledOutcome?: SavedTestOutcome | null;
|
|
15
|
+
}
|
|
10
16
|
export declare function questionPassRate(outcome: SavedTestOutcome): number;
|
|
17
|
+
export declare function readSavedTestComparison(projectPath: string, datasetName: string): TestRunComparison | null;
|
|
18
|
+
export declare function printSavedTestComparisonState(payload: TestRunComparison, comparisonRunPath?: string | null): void;
|
|
19
|
+
export declare function printAgentTestMatrix(rows: AgentTestMatrixRow[]): void;
|
|
20
|
+
export declare function printAgentTestFailures(rows: AgentTestMatrixRow[]): void;
|
|
11
21
|
export declare function printSavedTestOutcome(prefix: string, outcome: SavedTestOutcome): void;
|
|
12
|
-
export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null,
|
|
22
|
+
export declare function printSavedTestComparison(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
|
|
23
|
+
export declare function printSavedTestComparisonSummary(rawOutcome: SavedTestOutcome | null, compiledOutcome: SavedTestOutcome | null, comparisonRunPath?: string | null): void;
|
|
13
24
|
export declare function saveTestComparisonRun(options: {
|
|
14
25
|
sourcePath: string;
|
|
15
|
-
|
|
16
|
-
|
|
26
|
+
compiledPath: string | null;
|
|
27
|
+
compiledName: string;
|
|
28
|
+
checksFingerprint: string;
|
|
17
29
|
mode: TestRunMode;
|
|
18
30
|
rawOutcome: SavedTestOutcome | null;
|
|
19
|
-
|
|
31
|
+
compiledOutcome: SavedTestOutcome | null;
|
|
20
32
|
}): string;
|
|
21
33
|
export declare function runSavedRawTest(options: {
|
|
22
34
|
sourcePath: string;
|
|
23
|
-
|
|
24
|
-
workspacePath?: string | null;
|
|
35
|
+
datasetConfig: SourceDatasetConfig;
|
|
25
36
|
executor?: WorkflowExecutor | null;
|
|
26
37
|
executionProfile?: WorkflowExecutionProfile;
|
|
27
38
|
preserveSandboxes?: TestSandboxRetentionMode;
|
|
39
|
+
runSuffix?: string | null;
|
|
28
40
|
}): Promise<SavedTestOutcome | null>;
|
|
29
|
-
export declare function
|
|
41
|
+
export declare function runSavedCompiledTest(options: {
|
|
30
42
|
sourcePath: string;
|
|
31
|
-
|
|
43
|
+
datasetConfig: SourceDatasetConfig;
|
|
32
44
|
executor?: WorkflowExecutor | null;
|
|
33
45
|
executionProfile?: WorkflowExecutionProfile;
|
|
34
|
-
|
|
46
|
+
compiledPath?: string | null;
|
|
35
47
|
preserveSandboxes?: TestSandboxRetentionMode;
|
|
48
|
+
runSuffix?: string | null;
|
|
36
49
|
}): Promise<SavedTestOutcome | null>;
|
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
|
-
import { mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { join } from "node:path";
|
|
4
|
-
import { createRawTestTarget,
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
2
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
|
|
5
|
+
import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
|
|
6
|
+
import { datasetLatestTestStatePath, datasetLatestTestSummaryPath, normalizeDatasetTestRunId, datasetTestRunPath, datasetTestRunsRoot, datasetTestsRoot, } from "../lib/project-paths.js";
|
|
7
|
+
import { testRootForCompiled } from "../lib/compiled-paths.js";
|
|
8
|
+
import { readJsonFileWithSchema } from "../lib/parse.js";
|
|
9
|
+
import { TestRunComparisonSchema } from "../lib/schema.js";
|
|
8
10
|
import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
|
|
9
|
-
import {
|
|
11
|
+
import { findBuiltCompiledPath } from "./compiled-flow.js";
|
|
10
12
|
export function questionPassRate(outcome) {
|
|
11
13
|
return outcome.result.totalCases > 0
|
|
12
14
|
? Math.round((outcome.result.passedCases / outcome.result.totalCases) * 100)
|
|
13
15
|
: 0;
|
|
14
16
|
}
|
|
17
|
+
function visibleRunPath(outcome) {
|
|
18
|
+
return outcome.displayRunPath ?? outcome.runPath;
|
|
19
|
+
}
|
|
15
20
|
function summarizeSavedTestOutcome(label, outcome) {
|
|
16
21
|
return {
|
|
17
22
|
label,
|
|
18
|
-
run_path: outcome
|
|
23
|
+
run_path: visibleRunPath(outcome),
|
|
19
24
|
ok: outcome.result.ok,
|
|
20
25
|
passed_cases: outcome.result.passedCases,
|
|
21
26
|
total_cases: outcome.result.totalCases,
|
|
@@ -24,13 +29,148 @@ function summarizeSavedTestOutcome(label, outcome) {
|
|
|
24
29
|
target: outcome.target,
|
|
25
30
|
};
|
|
26
31
|
}
|
|
27
|
-
function
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
function writeDatasetTargetRun(options) {
|
|
33
|
+
const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
|
|
34
|
+
mkdirSync(dirPath, { recursive: true });
|
|
35
|
+
const runPath = datasetTestRunPath(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
|
|
36
|
+
writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
|
|
37
|
+
return runPath;
|
|
38
|
+
}
|
|
39
|
+
function loadLatestComparison(projectPath, datasetName) {
|
|
40
|
+
const latestPath = datasetLatestTestStatePath(projectPath, datasetName);
|
|
41
|
+
if (!existsSync(latestPath))
|
|
42
|
+
return null;
|
|
43
|
+
return readJsonFileWithSchema(latestPath, "latest test comparison", TestRunComparisonSchema);
|
|
44
|
+
}
|
|
45
|
+
export function readSavedTestComparison(projectPath, datasetName) {
|
|
46
|
+
return loadLatestComparison(projectPath, datasetName);
|
|
47
|
+
}
|
|
48
|
+
function renderLatestSummaryMarkdown(payload) {
|
|
49
|
+
const lines = [
|
|
50
|
+
"# Latest Test Result",
|
|
51
|
+
"",
|
|
52
|
+
"| Target | Truth checks |",
|
|
53
|
+
"| --- | --- |",
|
|
54
|
+
];
|
|
55
|
+
if (payload.raw) {
|
|
56
|
+
lines.push(`| Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
|
|
57
|
+
}
|
|
58
|
+
if (payload.compiled) {
|
|
59
|
+
lines.push(`| Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
|
|
60
|
+
}
|
|
61
|
+
lines.push("");
|
|
62
|
+
if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
|
|
63
|
+
const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
|
|
64
|
+
lines.push(`Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`, "");
|
|
65
|
+
}
|
|
66
|
+
if (payload.raw) {
|
|
67
|
+
lines.push(`- Latest files-as-is run: ${payload.raw.run_path}`);
|
|
68
|
+
}
|
|
69
|
+
if (payload.compiled) {
|
|
70
|
+
lines.push(`- Latest compiled run: ${payload.compiled.run_path}`);
|
|
71
|
+
}
|
|
72
|
+
return `${lines.join("\n")}\n`;
|
|
73
|
+
}
|
|
74
|
+
export function printSavedTestComparisonState(payload, comparisonRunPath) {
|
|
75
|
+
console.log();
|
|
76
|
+
console.log(chalk.bold(" Latest saved test"));
|
|
77
|
+
console.log();
|
|
78
|
+
console.log(" | Target | Truth checks |");
|
|
79
|
+
console.log(" | --- | --- |");
|
|
80
|
+
if (payload.raw) {
|
|
81
|
+
console.log(` | Files as-is | \`${payload.raw.passed_cases}/${payload.raw.total_cases}\` |`);
|
|
82
|
+
}
|
|
83
|
+
if (payload.compiled) {
|
|
84
|
+
console.log(` | Compiled dataset | \`${payload.compiled.passed_cases}/${payload.compiled.total_cases}\` |`);
|
|
85
|
+
}
|
|
86
|
+
if (!payload.raw || !payload.compiled) {
|
|
87
|
+
console.log();
|
|
88
|
+
if (!payload.raw) {
|
|
89
|
+
console.log(chalk.dim(" No saved files-as-is baseline yet."));
|
|
90
|
+
}
|
|
91
|
+
if (!payload.compiled) {
|
|
92
|
+
console.log(chalk.dim(" No saved compiled-dataset run yet."));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (payload.summary.raw_pass_rate != null && payload.summary.compiled_pass_rate != null) {
|
|
96
|
+
const direction = (payload.summary.pass_rate_delta ?? 0) >= 0 ? "improved" : "decreased";
|
|
97
|
+
const color = (payload.summary.pass_rate_delta ?? 0) >= 0 ? chalk.green : chalk.red;
|
|
98
|
+
console.log();
|
|
99
|
+
console.log(color(` Truth-check pass rate ${direction} from ${payload.summary.raw_pass_rate}% to ${payload.summary.compiled_pass_rate}%.`));
|
|
100
|
+
}
|
|
101
|
+
if (comparisonRunPath) {
|
|
102
|
+
console.log();
|
|
103
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
function padCell(value, width) {
|
|
107
|
+
return value.padEnd(width, " ");
|
|
108
|
+
}
|
|
109
|
+
function scoreCell(outcome) {
|
|
110
|
+
if (!outcome)
|
|
111
|
+
return "—";
|
|
112
|
+
return `${outcome.result.passedCases}/${outcome.result.totalCases}`;
|
|
113
|
+
}
|
|
114
|
+
function deltaCell(row) {
|
|
115
|
+
if (!row.rawOutcome || !row.compiledOutcome)
|
|
116
|
+
return "—";
|
|
117
|
+
const delta = row.compiledOutcome.result.passedCases - row.rawOutcome.result.passedCases;
|
|
118
|
+
return delta > 0 ? `+${delta}` : `${delta}`;
|
|
119
|
+
}
|
|
120
|
+
export function printAgentTestMatrix(rows) {
|
|
121
|
+
if (rows.length === 0)
|
|
122
|
+
return;
|
|
123
|
+
const includeRaw = rows.some((row) => Boolean(row.rawOutcome));
|
|
124
|
+
const includeCompiled = rows.some((row) => Boolean(row.compiledOutcome));
|
|
125
|
+
const includeDelta = includeRaw && includeCompiled;
|
|
126
|
+
const headers = [
|
|
127
|
+
"Agent",
|
|
128
|
+
...(includeRaw ? ["Files as-is"] : []),
|
|
129
|
+
...(includeCompiled ? ["Compiled dataset"] : []),
|
|
130
|
+
...(includeDelta ? ["Delta"] : []),
|
|
131
|
+
];
|
|
132
|
+
const body = rows.map((row) => [
|
|
133
|
+
row.agentLabel,
|
|
134
|
+
...(includeRaw ? [scoreCell(row.rawOutcome)] : []),
|
|
135
|
+
...(includeCompiled ? [scoreCell(row.compiledOutcome)] : []),
|
|
136
|
+
...(includeDelta ? [deltaCell(row)] : []),
|
|
137
|
+
]);
|
|
138
|
+
const widths = headers.map((header, index) => Math.max(header.length, ...body.map((row) => (row[index] ?? "").length)));
|
|
139
|
+
const heading = includeDelta ? " Comparison" : " Results";
|
|
140
|
+
console.log();
|
|
141
|
+
console.log(chalk.bold(heading));
|
|
142
|
+
console.log();
|
|
143
|
+
console.log(` | ${headers.map((header, index) => padCell(header, widths[index] ?? header.length)).join(" | ")} |`);
|
|
144
|
+
console.log(` | ${widths.map((width) => "-".repeat(width)).join(" | ")} |`);
|
|
145
|
+
for (const row of body) {
|
|
146
|
+
console.log(` | ${row.map((cell, index) => padCell(cell ?? "", widths[index] ?? cell.length)).join(" | ")} |`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
export function printAgentTestFailures(rows) {
|
|
150
|
+
for (const row of rows) {
|
|
151
|
+
const failures = [];
|
|
152
|
+
for (const [label, outcome] of [
|
|
153
|
+
["Files as-is", row.rawOutcome ?? null],
|
|
154
|
+
["Compiled dataset", row.compiledOutcome ?? null],
|
|
155
|
+
]) {
|
|
156
|
+
if (!outcome || outcome.result.ok)
|
|
157
|
+
continue;
|
|
158
|
+
for (const [index, caseResult] of outcome.result.caseResults.entries()) {
|
|
159
|
+
if (caseResult.ok)
|
|
160
|
+
continue;
|
|
161
|
+
const reason = caseResult.checks.find((entry) => !entry.ok)?.detail ?? "failed";
|
|
162
|
+
failures.push(`${label} · Truth Check ${index + 1}: ${reason}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
if (failures.length === 0)
|
|
166
|
+
continue;
|
|
167
|
+
console.log();
|
|
168
|
+
console.log(chalk.bold(` ${row.agentLabel} failures`));
|
|
169
|
+
console.log();
|
|
170
|
+
for (const failure of failures) {
|
|
171
|
+
console.log(` - ${failure}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
34
174
|
}
|
|
35
175
|
function specNeedsExecutor(spec) {
|
|
36
176
|
return spec.cases.some((entry) => !entry.file || Boolean(entry.answer));
|
|
@@ -69,69 +209,121 @@ export function printSavedTestOutcome(prefix, outcome) {
|
|
|
69
209
|
console.log(chalk.dim(` Preserved sandbox: ${outcome.result.sandbox_path}`));
|
|
70
210
|
console.log();
|
|
71
211
|
}
|
|
72
|
-
console.log(chalk.dim(` Saved run: ${outcome
|
|
212
|
+
console.log(chalk.dim(` Saved run: ${visibleRunPath(outcome)}`));
|
|
73
213
|
}
|
|
74
|
-
export function printSavedTestComparison(rawOutcome,
|
|
75
|
-
if (!rawOutcome && !
|
|
214
|
+
export function printSavedTestComparison(rawOutcome, compiledOutcome, comparisonRunPath) {
|
|
215
|
+
if (!rawOutcome && !compiledOutcome)
|
|
76
216
|
return;
|
|
77
217
|
console.log();
|
|
78
218
|
if (rawOutcome) {
|
|
79
|
-
printSavedTestOutcome("
|
|
219
|
+
printSavedTestOutcome("Files as-is", rawOutcome);
|
|
80
220
|
}
|
|
81
|
-
if (
|
|
221
|
+
if (compiledOutcome) {
|
|
82
222
|
if (rawOutcome)
|
|
83
223
|
console.log();
|
|
84
|
-
printSavedTestOutcome("Compiled
|
|
224
|
+
printSavedTestOutcome("Compiled dataset", compiledOutcome);
|
|
225
|
+
}
|
|
226
|
+
if (rawOutcome && compiledOutcome) {
|
|
227
|
+
const rawQuestions = questionPassRate(rawOutcome);
|
|
228
|
+
const compiledQuestions = questionPassRate(compiledOutcome);
|
|
229
|
+
const delta = compiledQuestions - rawQuestions;
|
|
230
|
+
const color = delta >= 0 ? chalk.green : chalk.red;
|
|
231
|
+
const direction = delta >= 0 ? "improved" : "decreased";
|
|
232
|
+
console.log();
|
|
233
|
+
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
|
|
234
|
+
}
|
|
235
|
+
if (comparisonRunPath) {
|
|
236
|
+
console.log();
|
|
237
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
export function printSavedTestComparisonSummary(rawOutcome, compiledOutcome, comparisonRunPath) {
|
|
241
|
+
if (!rawOutcome && !compiledOutcome)
|
|
242
|
+
return;
|
|
243
|
+
console.log();
|
|
244
|
+
console.log(chalk.bold(" Comparison"));
|
|
245
|
+
console.log();
|
|
246
|
+
console.log(" | Target | Truth checks |");
|
|
247
|
+
console.log(" | --- | --- |");
|
|
248
|
+
if (rawOutcome) {
|
|
249
|
+
console.log(` | Files as-is | \`${rawOutcome.result.passedCases}/${rawOutcome.result.totalCases}\` |`);
|
|
250
|
+
}
|
|
251
|
+
if (compiledOutcome) {
|
|
252
|
+
console.log(` | Compiled dataset | \`${compiledOutcome.result.passedCases}/${compiledOutcome.result.totalCases}\` |`);
|
|
85
253
|
}
|
|
86
|
-
if (rawOutcome &&
|
|
254
|
+
if (rawOutcome && compiledOutcome) {
|
|
87
255
|
const rawQuestions = questionPassRate(rawOutcome);
|
|
88
|
-
const
|
|
89
|
-
const delta =
|
|
256
|
+
const compiledQuestions = questionPassRate(compiledOutcome);
|
|
257
|
+
const delta = compiledQuestions - rawQuestions;
|
|
90
258
|
const color = delta >= 0 ? chalk.green : chalk.red;
|
|
91
259
|
const direction = delta >= 0 ? "improved" : "decreased";
|
|
92
260
|
console.log();
|
|
93
|
-
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${
|
|
261
|
+
console.log(color(` Truth-check pass rate ${direction} from ${rawQuestions}% to ${compiledQuestions}%.`));
|
|
94
262
|
}
|
|
95
263
|
if (comparisonRunPath) {
|
|
96
264
|
console.log();
|
|
97
|
-
console.log(chalk.dim(` Saved
|
|
265
|
+
console.log(chalk.dim(` Saved summary: ${comparisonRunPath}`));
|
|
98
266
|
}
|
|
99
267
|
}
|
|
100
268
|
export function saveTestComparisonRun(options) {
|
|
101
269
|
const generatedAt = new Date().toISOString();
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
const
|
|
270
|
+
const existing = loadLatestComparison(options.sourcePath, options.compiledName);
|
|
271
|
+
const canReuseExisting = Boolean(existing?.checks_fingerprint) &&
|
|
272
|
+
existing?.checks_fingerprint === options.checksFingerprint;
|
|
273
|
+
const rawSummary = options.rawOutcome
|
|
274
|
+
? summarizeSavedTestOutcome("Files as-is", options.rawOutcome)
|
|
275
|
+
: canReuseExisting
|
|
276
|
+
? existing?.raw ?? null
|
|
277
|
+
: null;
|
|
278
|
+
const compiledSummary = options.compiledOutcome
|
|
279
|
+
? summarizeSavedTestOutcome("Compiled dataset", options.compiledOutcome)
|
|
280
|
+
: canReuseExisting
|
|
281
|
+
? existing?.compiled ?? null
|
|
282
|
+
: null;
|
|
283
|
+
const effectiveMode = rawSummary && compiledSummary
|
|
284
|
+
? "both"
|
|
285
|
+
: rawSummary
|
|
286
|
+
? "raw"
|
|
287
|
+
: "compiled";
|
|
288
|
+
const rawPassRate = rawSummary
|
|
289
|
+
? Math.round((rawSummary.passed_cases / rawSummary.total_cases) * 100)
|
|
290
|
+
: null;
|
|
291
|
+
const compiledPassRate = compiledSummary
|
|
292
|
+
? Math.round((compiledSummary.passed_cases / compiledSummary.total_cases) * 100)
|
|
293
|
+
: null;
|
|
106
294
|
const payload = {
|
|
107
295
|
kind: "interf-test-run",
|
|
108
296
|
version: 1,
|
|
109
297
|
generated_at: generatedAt,
|
|
110
|
-
mode:
|
|
298
|
+
mode: effectiveMode,
|
|
111
299
|
source_path: options.sourcePath,
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
300
|
+
checks_fingerprint: options.checksFingerprint,
|
|
301
|
+
dataset: {
|
|
302
|
+
name: options.compiledName,
|
|
303
|
+
compiled_path: options.compiledPath ?? (canReuseExisting ? existing?.dataset.compiled_path ?? null : null),
|
|
115
304
|
},
|
|
116
|
-
raw:
|
|
117
|
-
|
|
118
|
-
? summarizeSavedTestOutcome("Compiled workspace", options.workspaceOutcome)
|
|
119
|
-
: null,
|
|
305
|
+
raw: rawSummary,
|
|
306
|
+
compiled: compiledSummary,
|
|
120
307
|
summary: {
|
|
121
308
|
raw_pass_rate: rawPassRate,
|
|
122
|
-
compiled_pass_rate:
|
|
123
|
-
pass_rate_delta: rawPassRate !== null &&
|
|
309
|
+
compiled_pass_rate: compiledPassRate,
|
|
310
|
+
pass_rate_delta: rawPassRate !== null && compiledPassRate !== null ? compiledPassRate - rawPassRate : null,
|
|
124
311
|
},
|
|
125
312
|
};
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
writeFileSync(
|
|
129
|
-
|
|
313
|
+
const latestStatePath = datasetLatestTestStatePath(options.sourcePath, options.compiledName);
|
|
314
|
+
mkdirSync(dirname(latestStatePath), { recursive: true });
|
|
315
|
+
writeFileSync(latestStatePath, `${JSON.stringify(payload, null, 2)}\n`);
|
|
316
|
+
writeFileSync(datasetLatestTestSummaryPath(options.sourcePath, options.compiledName), renderLatestSummaryMarkdown(payload));
|
|
317
|
+
if (options.compiledPath) {
|
|
318
|
+
mkdirSync(testRootForCompiled(options.compiledPath), { recursive: true });
|
|
319
|
+
writeFileSync(join(testRootForCompiled(options.compiledPath), "latest.json"), `${JSON.stringify(payload, null, 2)}\n`);
|
|
320
|
+
}
|
|
321
|
+
return latestStatePath;
|
|
130
322
|
}
|
|
131
323
|
export async function runSavedRawTest(options) {
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
324
|
+
const spec = buildTestSpecFromSourceFolderConfig({
|
|
325
|
+
sourcePath: options.sourcePath,
|
|
326
|
+
targetName: options.datasetConfig.name,
|
|
135
327
|
targetType: "raw",
|
|
136
328
|
});
|
|
137
329
|
if (!spec) {
|
|
@@ -146,39 +338,45 @@ export async function runSavedRawTest(options) {
|
|
|
146
338
|
console.log(chalk.red(error));
|
|
147
339
|
return null;
|
|
148
340
|
}
|
|
149
|
-
|
|
150
|
-
const target = createRawTestTarget(
|
|
151
|
-
const run = await runTargetTestsAuto(
|
|
341
|
+
const datasetSourcePath = resolveSourceDatasetPath(options.sourcePath, options.datasetConfig);
|
|
342
|
+
const target = createRawTestTarget(datasetSourcePath);
|
|
343
|
+
const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
|
|
152
344
|
executor,
|
|
153
345
|
preserveSandboxes: options.preserveSandboxes ?? "on-failure",
|
|
154
|
-
artifactRootPath:
|
|
346
|
+
artifactRootPath: datasetTestsRoot(options.sourcePath, options.datasetConfig.name),
|
|
155
347
|
});
|
|
156
348
|
const result = run.results[0];
|
|
157
349
|
if (!result)
|
|
158
350
|
return null;
|
|
351
|
+
const datasetRunPath = writeDatasetTargetRun({
|
|
352
|
+
projectPath: options.sourcePath,
|
|
353
|
+
datasetName: options.datasetConfig.name,
|
|
354
|
+
target: "file-as-is",
|
|
355
|
+
generatedAt: run.generated_at,
|
|
356
|
+
runId: normalizeDatasetTestRunId(spec.id),
|
|
357
|
+
runSuffix: options.runSuffix,
|
|
358
|
+
payload: run,
|
|
359
|
+
});
|
|
159
360
|
return {
|
|
160
|
-
runPath:
|
|
361
|
+
runPath: datasetRunPath,
|
|
161
362
|
target,
|
|
162
363
|
result,
|
|
163
364
|
};
|
|
164
365
|
}
|
|
165
|
-
export async function
|
|
166
|
-
const
|
|
167
|
-
if (!
|
|
366
|
+
export async function runSavedCompiledTest(options) {
|
|
367
|
+
const compiledPath = options.compiledPath ?? findBuiltCompiledPath(options.sourcePath, options.datasetConfig.name);
|
|
368
|
+
if (!compiledPath) {
|
|
168
369
|
return null;
|
|
169
370
|
}
|
|
170
|
-
const spec =
|
|
171
|
-
|
|
172
|
-
targetType: "
|
|
371
|
+
const spec = buildTestSpecFromCompiledDatasetConfig({
|
|
372
|
+
compiledPath,
|
|
373
|
+
targetType: "compiled",
|
|
173
374
|
});
|
|
174
375
|
if (!spec) {
|
|
175
376
|
return null;
|
|
176
377
|
}
|
|
177
|
-
const target =
|
|
178
|
-
if (!target) {
|
|
179
|
-
return null;
|
|
180
|
-
}
|
|
181
|
-
if (!target.eligible) {
|
|
378
|
+
const target = createCompiledTestTarget(compiledPath, options.datasetConfig.name, options.datasetConfig.workflow ?? "interf");
|
|
379
|
+
if (!target || !target.eligible) {
|
|
182
380
|
return null;
|
|
183
381
|
}
|
|
184
382
|
const { executor, error } = await resolveExecutorForSpec(spec, options.executor, options.executionProfile);
|
|
@@ -193,13 +391,24 @@ export async function runSavedWorkspaceTest(options) {
|
|
|
193
391
|
const run = await runTargetTestsAuto(options.sourcePath, spec, [target], {
|
|
194
392
|
executor,
|
|
195
393
|
preserveSandboxes: options.preserveSandboxes ?? "on-failure",
|
|
196
|
-
artifactRootPath:
|
|
394
|
+
artifactRootPath: compiledPath,
|
|
197
395
|
});
|
|
198
396
|
const result = run.results[0];
|
|
199
397
|
if (!result)
|
|
200
398
|
return null;
|
|
399
|
+
const internalRunPath = saveTargetTestRun(compiledPath, run);
|
|
400
|
+
const datasetRunPath = writeDatasetTargetRun({
|
|
401
|
+
projectPath: options.sourcePath,
|
|
402
|
+
datasetName: options.datasetConfig.name,
|
|
403
|
+
target: "compiled",
|
|
404
|
+
generatedAt: run.generated_at,
|
|
405
|
+
runId: normalizeDatasetTestRunId(spec.id),
|
|
406
|
+
runSuffix: options.runSuffix,
|
|
407
|
+
payload: run,
|
|
408
|
+
});
|
|
201
409
|
return {
|
|
202
|
-
runPath:
|
|
410
|
+
runPath: internalRunPath,
|
|
411
|
+
displayRunPath: datasetRunPath,
|
|
203
412
|
target,
|
|
204
413
|
result,
|
|
205
414
|
};
|
package/dist/commands/test.d.ts
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
1
|
import type { CommandModule } from "yargs";
|
|
2
|
+
import type { SourceDatasetConfig } from "../lib/schema.js";
|
|
3
|
+
export declare function resolveConfiguredDatasetSelection(options: {
|
|
4
|
+
sourcePath: string;
|
|
5
|
+
requestedDatasetName?: string | null;
|
|
6
|
+
hintedDatasetConfig?: SourceDatasetConfig | null;
|
|
7
|
+
}): SourceDatasetConfig | null;
|
|
2
8
|
export declare const testCommand: CommandModule;
|
|
3
|
-
export declare function runTestCommand(argv?: Record<string, unknown>): Promise<
|
|
9
|
+
export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;
|