@dailephd/my-dev-kit-lab 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +272 -0
- package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
- package/benchmarks/contracts/todo-behavior.md +70 -0
- package/benchmarks/contracts/todo-benchmark-case.json +227 -0
- package/benchmarks/projects/README.md +34 -0
- package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
- package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
- package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
- package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
- package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
- package/benchmarks/projects/todo-js/README.md +3 -0
- package/benchmarks/projects/todo-js/package.json +11 -0
- package/benchmarks/projects/todo-js/src/index.js +2 -0
- package/benchmarks/projects/todo-js/src/taskService.js +37 -0
- package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
- package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
- package/benchmarks/projects/todo-js/vitest.config.js +5 -0
- package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
- package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
- package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
- package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
- package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
- package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
- package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
- package/benchmarks/projects/todo-python/README.md +3 -0
- package/benchmarks/projects/todo-python/src/__init__.py +4 -0
- package/benchmarks/projects/todo-python/src/task_service.py +32 -0
- package/benchmarks/projects/todo-python/src/task_store.py +28 -0
- package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
- package/benchmarks/projects/todo-ts/README.md +3 -0
- package/benchmarks/projects/todo-ts/package.json +12 -0
- package/benchmarks/projects/todo-ts/src/index.ts +2 -0
- package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
- package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
- package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
- package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
- package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
- package/dist/scripts/build-gallery.js +3 -0
- package/dist/scripts/capture-demo-report.js +3 -0
- package/dist/scripts/evaluate-token-savings.js +2 -0
- package/dist/scripts/experiments/describeExperiment.js +143 -0
- package/dist/scripts/experiments/listExperiments.js +44 -0
- package/dist/scripts/experiments/runExperiment.js +199 -0
- package/dist/scripts/generate-experiment-plots.js +3 -0
- package/dist/scripts/generate-prompt-variants.js +2 -0
- package/dist/scripts/render-experiment-report.js +2 -0
- package/dist/scripts/run-agent-prompt.js +2 -0
- package/dist/scripts/run-controlled-experiment.js +2 -0
- package/dist/scripts/run-final-demo.js +3 -0
- package/dist/scripts/run-lab-demo.js +5 -0
- package/dist/scripts/run-visualization-demos.js +3 -0
- package/dist/scripts/security/runCodeql.js +57 -0
- package/dist/scripts/security/runDependencyChecks.js +57 -0
- package/dist/scripts/security/runFuzzSmoke.js +29 -0
- package/dist/scripts/security/runPackageChecks.js +56 -0
- package/dist/scripts/security/runSemgrep.js +63 -0
- package/dist/scripts/security/validate.js +117 -0
- package/dist/scripts/verify-benchmarks.js +202 -0
- package/dist/src/agents/adapters/claudeAdapter.js +37 -0
- package/dist/src/agents/adapters/codexAdapter.js +110 -0
- package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
- package/dist/src/agents/agentRegistry.js +21 -0
- package/dist/src/agents/index.js +7 -0
- package/dist/src/agents/parseAgentTokenUsage.js +137 -0
- package/dist/src/agents/runAgentPrompt.js +38 -0
- package/dist/src/agents/types.js +1 -0
- package/dist/src/commands/buildGalleryCommand.js +56 -0
- package/dist/src/commands/captureDemoReport.js +116 -0
- package/dist/src/commands/evaluateTokenSavings.js +175 -0
- package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
- package/dist/src/commands/generatePromptVariants.js +67 -0
- package/dist/src/commands/renderExperimentReportCommand.js +131 -0
- package/dist/src/commands/runAgentPromptCommand.js +132 -0
- package/dist/src/commands/runControlledExperimentCommand.js +174 -0
- package/dist/src/commands/runFinalDemoCommand.js +123 -0
- package/dist/src/commands/runLabDemo.js +62 -0
- package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
- package/dist/src/core/commandLine.js +59 -0
- package/dist/src/core/countTokens.js +8 -0
- package/dist/src/core/fileGlobs.js +100 -0
- package/dist/src/core/localProjectTarget.js +75 -0
- package/dist/src/core/pathSafety.js +19 -0
- package/dist/src/core/pythonCommand.js +30 -0
- package/dist/src/core/resolveCommand.js +110 -0
- package/dist/src/core/runMeasuredCommand.js +143 -0
- package/dist/src/evaluation/benchmarkMetadata.js +207 -0
- package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
- package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
- package/dist/src/evaluation/compareExperimentRuns.js +79 -0
- package/dist/src/evaluation/compareTokenSavings.js +47 -0
- package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
- package/dist/src/evaluation/index.js +18 -0
- package/dist/src/evaluation/parseAgentAnswer.js +230 -0
- package/dist/src/evaluation/projectComplexity.js +126 -0
- package/dist/src/evaluation/projectFileTree.js +83 -0
- package/dist/src/evaluation/readEvaluationCases.js +59 -0
- package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
- package/dist/src/evaluation/runControlledExperiment.js +158 -0
- package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
- package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
- package/dist/src/evaluation/scoreCorrectness.js +127 -0
- package/dist/src/evaluation/types.js +1 -0
- package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
- package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
- package/dist/src/experiments/config.js +24 -0
- package/dist/src/experiments/defaultRegistry.js +7 -0
- package/dist/src/experiments/errors.js +18 -0
- package/dist/src/experiments/index.js +9 -0
- package/dist/src/experiments/outputPaths.js +25 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
- package/dist/src/experiments/plugins/index.js +1 -0
- package/dist/src/experiments/registry.js +43 -0
- package/dist/src/experiments/results.js +48 -0
- package/dist/src/experiments/runner.js +181 -0
- package/dist/src/experiments/target.js +8 -0
- package/dist/src/experiments/types.js +1 -0
- package/dist/src/gallery/index.js +2 -0
- package/dist/src/gallery/types.js +1 -0
- package/dist/src/gallery/writeGalleryManifest.js +214 -0
- package/dist/src/index.js +12 -0
- package/dist/src/plots/buildExperimentPlotData.js +137 -0
- package/dist/src/plots/index.js +4 -0
- package/dist/src/plots/renderSvgChart.js +82 -0
- package/dist/src/plots/types.js +1 -0
- package/dist/src/plots/writePlotArtifacts.js +46 -0
- package/dist/src/prompts/buildPromptContext.js +68 -0
- package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
- package/dist/src/prompts/generatePromptVariants.js +36 -0
- package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
- package/dist/src/prompts/index.js +7 -0
- package/dist/src/prompts/measurePromptComplexity.js +41 -0
- package/dist/src/prompts/types.js +1 -0
- package/dist/src/prompts/writePromptArtifacts.js +43 -0
- package/dist/src/report/buildExperimentReportInput.js +339 -0
- package/dist/src/report/experimentReportTypes.js +1 -0
- package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
- package/dist/src/report/experiments/experimentReportModel.js +1 -0
- package/dist/src/report/experiments/index.js +4 -0
- package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
- package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
- package/dist/src/report/index.js +8 -0
- package/dist/src/report/renderExperimentHtmlReport.js +354 -0
- package/dist/src/report/renderHtmlReport.js +103 -0
- package/dist/src/report/types.js +10 -0
- package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
- package/dist/src/report/writeReportArtifacts.js +39 -0
- package/dist/src/screenshot/captureReportScreenshot.js +75 -0
- package/dist/src/screenshot/index.js +2 -0
- package/dist/src/screenshot/types.js +1 -0
- package/dist/src/securityValidation/artifacts.js +15 -0
- package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
- package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
- package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
- package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
- package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
- package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
- package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
- package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
- package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
- package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
- package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
- package/dist/src/securityValidation/commandRunner.js +136 -0
- package/dist/src/securityValidation/config.js +39 -0
- package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
- package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
- package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
- package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
- package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
- package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
- package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
- package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
- package/dist/src/securityValidation/index.js +34 -0
- package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
- package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
- package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
- package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
- package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
- package/dist/src/securityValidation/staticScans/codeql.js +66 -0
- package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
- package/dist/src/securityValidation/testMatrix.js +535 -0
- package/dist/src/securityValidation/types.js +34 -0
- package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
- package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
- package/dist/src/securityValidation/validate/verdict.js +73 -0
- package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
- package/dist/src/visualizationDemos/index.js +4 -0
- package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
- package/dist/src/visualizationDemos/types.js +1 -0
- package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
- package/docs/METRICS.md +286 -0
- package/examples/demo-report-input.json +78 -0
- package/examples/lab-demo-cases.json +35 -0
- package/examples/real-agent-campaign-cases.json +118 -0
- package/examples/token-savings-cases.json +122 -0
- package/package.json +91 -0
- package/tests/fixtures/fake-adversarial-cli.js +152 -0
- package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export function validExperimentConfig(config, warnings = []) {
|
|
2
|
+
return {
|
|
3
|
+
valid: true,
|
|
4
|
+
config,
|
|
5
|
+
errors: [],
|
|
6
|
+
warnings,
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
export function invalidExperimentConfig(errors, warnings = []) {
|
|
10
|
+
return {
|
|
11
|
+
valid: false,
|
|
12
|
+
errors,
|
|
13
|
+
warnings,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
export function mergeConfig(defaults, config) {
|
|
17
|
+
if (!isPlainObject(config)) {
|
|
18
|
+
return { ...defaults };
|
|
19
|
+
}
|
|
20
|
+
return { ...defaults, ...config };
|
|
21
|
+
}
|
|
22
|
+
export function isPlainObject(value) {
|
|
23
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
24
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ExperimentPluginRegistry } from "./registry.js";
|
|
2
|
+
import { contextStrategyComparisonPlugin } from "./plugins/contextStrategyComparison/index.js";
|
|
3
|
+
export function createDefaultExperimentPluginRegistry() {
|
|
4
|
+
const registry = new ExperimentPluginRegistry();
|
|
5
|
+
registry.register(contextStrategyComparisonPlugin);
|
|
6
|
+
return registry;
|
|
7
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export class ExperimentRegistryError extends Error {
|
|
2
|
+
constructor(message) {
|
|
3
|
+
super(message);
|
|
4
|
+
this.name = "ExperimentRegistryError";
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
export class ExperimentTargetError extends Error {
|
|
8
|
+
constructor(message) {
|
|
9
|
+
super(message);
|
|
10
|
+
this.name = "ExperimentTargetError";
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
export class ExperimentConfigValidationError extends Error {
|
|
14
|
+
constructor(message) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.name = "ExperimentConfigValidationError";
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export * from "./config.js";
|
|
2
|
+
export * from "./defaultRegistry.js";
|
|
3
|
+
export * from "./errors.js";
|
|
4
|
+
export * from "./plugins/index.js";
|
|
5
|
+
export * from "./registry.js";
|
|
6
|
+
export * from "./results.js";
|
|
7
|
+
export * from "./runner.js";
|
|
8
|
+
export * from "./target.js";
|
|
9
|
+
export * from "./types.js";
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
export function buildDefaultExperimentOutputRoot(args) {
|
|
3
|
+
return path.join(args.toolRoot, "lab-output", "experiments", sanitizePathSegment(args.pluginId), targetSlug(args.target), sanitizePathSegment(args.runId));
|
|
4
|
+
}
|
|
5
|
+
export function targetSlug(target) {
|
|
6
|
+
const name = target.packageName ?? path.basename(target.targetRoot);
|
|
7
|
+
const version = target.packageVersion ? `-v${target.packageVersion}` : "";
|
|
8
|
+
const mode = target.isSelf ? "self" : "external";
|
|
9
|
+
return sanitizePathSegment(`${name}${version}-${mode}`);
|
|
10
|
+
}
|
|
11
|
+
export function sanitizePathSegment(value) {
|
|
12
|
+
const sanitized = value
|
|
13
|
+
.replaceAll("\\", "-")
|
|
14
|
+
.replaceAll("/", "-")
|
|
15
|
+
.replaceAll(":", "-")
|
|
16
|
+
.replaceAll("@", "")
|
|
17
|
+
.replace(/[^a-zA-Z0-9._-]+/g, "-")
|
|
18
|
+
.replace(/-+/g, "-")
|
|
19
|
+
.replace(/^-|-$/g, "");
|
|
20
|
+
return sanitized || "target";
|
|
21
|
+
}
|
|
22
|
+
export function isPathInside(parent, candidate) {
|
|
23
|
+
const relative = path.relative(path.resolve(parent), path.resolve(candidate));
|
|
24
|
+
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
|
|
25
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { invalidExperimentConfig, isPlainObject, mergeConfig, validExperimentConfig } from "../../config.js";
|
|
2
|
+
export const defaultContextStrategyComparisonConfig = {
|
|
3
|
+
casesPath: "examples/token-savings-cases.json",
|
|
4
|
+
projectProfilesPath: "benchmarks/contracts/benchmark-project-profiles.json",
|
|
5
|
+
agents: ["fake-agent"],
|
|
6
|
+
strategies: ["raw-full-file", "my-dev-kit-guided"],
|
|
7
|
+
complexityLevels: ["short"],
|
|
8
|
+
outDir: "lab-output/context-strategy-comparison",
|
|
9
|
+
continueOnFailure: true,
|
|
10
|
+
includeRealAgents: false,
|
|
11
|
+
};
|
|
12
|
+
export function validateContextStrategyComparisonConfig(config) {
|
|
13
|
+
if (config !== undefined && !isPlainObject(config)) {
|
|
14
|
+
return invalidExperimentConfig(["context strategy comparison config must be an object."]);
|
|
15
|
+
}
|
|
16
|
+
const normalized = mergeConfig(defaultContextStrategyComparisonConfig, config);
|
|
17
|
+
const errors = [];
|
|
18
|
+
if (!normalized.casesPath || typeof normalized.casesPath !== "string") {
|
|
19
|
+
errors.push("casesPath is required.");
|
|
20
|
+
}
|
|
21
|
+
if (!normalized.outDir || typeof normalized.outDir !== "string") {
|
|
22
|
+
errors.push("outDir is required.");
|
|
23
|
+
}
|
|
24
|
+
if (normalized.agents && !Array.isArray(normalized.agents)) {
|
|
25
|
+
errors.push("agents must be an array when provided.");
|
|
26
|
+
}
|
|
27
|
+
if (normalized.strategies && !Array.isArray(normalized.strategies)) {
|
|
28
|
+
errors.push("strategies must be an array when provided.");
|
|
29
|
+
}
|
|
30
|
+
if (normalized.complexityLevels && !Array.isArray(normalized.complexityLevels)) {
|
|
31
|
+
errors.push("complexityLevels must be an array when provided.");
|
|
32
|
+
}
|
|
33
|
+
if (errors.length > 0) {
|
|
34
|
+
return invalidExperimentConfig(errors);
|
|
35
|
+
}
|
|
36
|
+
return validExperimentConfig(normalized);
|
|
37
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { runControlledExperiment } from "../../../evaluation/runControlledExperiment.js";
|
|
4
|
+
import { defaultContextStrategyComparisonConfig, validateContextStrategyComparisonConfig, } from "./config.js";
|
|
5
|
+
import { mapLegacyArtifactsToExperimentRun, } from "./resultMapping.js";
|
|
6
|
+
export const contextStrategyComparisonMetadata = {
|
|
7
|
+
id: "context-strategy-comparison",
|
|
8
|
+
name: "Context Strategy Comparison",
|
|
9
|
+
description: "Compare raw full-file context against my-dev-kit guided retrieval/context strategies.",
|
|
10
|
+
schemaVersion: "1.0.0",
|
|
11
|
+
status: "experimental",
|
|
12
|
+
supportedTargets: ["self", "external-local"],
|
|
13
|
+
supportedOutputs: ["json", "html", "plot", "screenshot", "artifact"],
|
|
14
|
+
};
|
|
15
|
+
export const contextStrategyComparisonPlugin = {
|
|
16
|
+
metadata: contextStrategyComparisonMetadata,
|
|
17
|
+
defaultConfig: defaultContextStrategyComparisonConfig,
|
|
18
|
+
configDefinition: {
|
|
19
|
+
fields: [
|
|
20
|
+
{ name: "casesPath", type: "string", required: true },
|
|
21
|
+
{ name: "projectProfilesPath", type: "string" },
|
|
22
|
+
{ name: "agents", type: "array" },
|
|
23
|
+
{ name: "strategies", type: "array" },
|
|
24
|
+
{ name: "complexityLevels", type: "array" },
|
|
25
|
+
{ name: "outDir", type: "string", required: true },
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
validateConfig: validateContextStrategyComparisonConfig,
|
|
29
|
+
async run(context) {
|
|
30
|
+
const cases = readInputArray(context.inputs, "cases");
|
|
31
|
+
const projectProfiles = readInputArray(context.inputs, "projectProfiles");
|
|
32
|
+
const env = readEnv(context.inputs);
|
|
33
|
+
const startedAt = context.startedAt.toISOString();
|
|
34
|
+
const legacyArtifacts = await runControlledExperiment({
|
|
35
|
+
config: context.config,
|
|
36
|
+
cases,
|
|
37
|
+
projectProfiles,
|
|
38
|
+
repoRoot: context.target.targetRoot,
|
|
39
|
+
env,
|
|
40
|
+
});
|
|
41
|
+
const completedAt = new Date().toISOString();
|
|
42
|
+
const outDir = context.outputRoot ?? path.resolve(context.toolRoot, context.config.outDir);
|
|
43
|
+
await mkdir(outDir, { recursive: true });
|
|
44
|
+
const pluginResultPath = path.join(outDir, "experiment-plugin-result.json");
|
|
45
|
+
const result = mapLegacyArtifactsToExperimentRun({
|
|
46
|
+
runId: context.runId,
|
|
47
|
+
startedAt,
|
|
48
|
+
completedAt,
|
|
49
|
+
target: context.target,
|
|
50
|
+
legacyArtifacts,
|
|
51
|
+
pluginResultPath,
|
|
52
|
+
});
|
|
53
|
+
await writeFile(pluginResultPath, `${JSON.stringify(redactLegacyArtifacts(result), null, 2)}\n`, "utf8");
|
|
54
|
+
return result;
|
|
55
|
+
},
|
|
56
|
+
summarize(result) {
|
|
57
|
+
if (!result.summary) {
|
|
58
|
+
throw new Error("Context strategy comparison result is missing a summary.");
|
|
59
|
+
}
|
|
60
|
+
return result.summary;
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
function readInputArray(inputs, key) {
|
|
64
|
+
const value = inputs?.[key];
|
|
65
|
+
if (!Array.isArray(value)) {
|
|
66
|
+
throw new Error(`Context strategy comparison requires ${key} input.`);
|
|
67
|
+
}
|
|
68
|
+
return value;
|
|
69
|
+
}
|
|
70
|
+
function readEnv(inputs) {
|
|
71
|
+
const env = inputs?.env;
|
|
72
|
+
if (!env || typeof env !== "object" || Array.isArray(env)) {
|
|
73
|
+
return undefined;
|
|
74
|
+
}
|
|
75
|
+
return env;
|
|
76
|
+
}
|
|
77
|
+
function redactLegacyArtifacts(result) {
|
|
78
|
+
const { legacyArtifacts, ...rest } = result;
|
|
79
|
+
return {
|
|
80
|
+
...rest,
|
|
81
|
+
legacyArtifactPaths: legacyArtifacts.artifactPaths,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import { summarizeExperimentRun } from "../../results.js";
|
|
2
|
+
export function mapLegacyArtifactsToExperimentRun(args) {
|
|
3
|
+
const variants = buildVariants(args.legacyArtifacts.runs);
|
|
4
|
+
const cases = buildCases(args.legacyArtifacts.runs);
|
|
5
|
+
const warnings = buildWarnings(args.legacyArtifacts);
|
|
6
|
+
const failures = buildFailures(args.legacyArtifacts.runs);
|
|
7
|
+
const metrics = buildRunMetrics(args.legacyArtifacts);
|
|
8
|
+
const artifacts = buildArtifacts(args.legacyArtifacts, args.pluginResultPath);
|
|
9
|
+
const status = statusFromArtifacts(args.legacyArtifacts);
|
|
10
|
+
const run = {
|
|
11
|
+
runId: args.runId,
|
|
12
|
+
pluginId: "context-strategy-comparison",
|
|
13
|
+
startedAt: args.startedAt,
|
|
14
|
+
completedAt: args.completedAt,
|
|
15
|
+
status,
|
|
16
|
+
target: args.target,
|
|
17
|
+
variants,
|
|
18
|
+
cases,
|
|
19
|
+
metrics,
|
|
20
|
+
artifacts,
|
|
21
|
+
warnings,
|
|
22
|
+
failures,
|
|
23
|
+
legacyArtifacts: args.legacyArtifacts,
|
|
24
|
+
metadata: {
|
|
25
|
+
pluginName: "Context Strategy Comparison",
|
|
26
|
+
pluginSchemaVersion: "1.0.0",
|
|
27
|
+
legacySummaryPath: args.legacyArtifacts.artifactPaths.summaryPath,
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
return {
|
|
31
|
+
...run,
|
|
32
|
+
summary: summarizeExperimentRun(run),
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
function buildVariants(runs) {
|
|
36
|
+
const strategies = [...new Set(runs.map((run) => run.promptStrategy))];
|
|
37
|
+
return strategies.map((strategy) => ({
|
|
38
|
+
id: strategy,
|
|
39
|
+
name: strategy === "raw-full-file" ? "Raw Full File" : "My Dev Kit Guided",
|
|
40
|
+
description: strategy === "raw-full-file"
|
|
41
|
+
? "Full source files are embedded directly in the prompt."
|
|
42
|
+
: "The prompt asks the agent to use my-dev-kit retrieval/context commands.",
|
|
43
|
+
metadata: { strategy },
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
function buildCases(runs) {
|
|
47
|
+
const caseIds = [...new Set(runs.map((run) => run.caseId))];
|
|
48
|
+
return caseIds.map((caseId) => {
|
|
49
|
+
const caseRuns = runs.filter((run) => run.caseId === caseId);
|
|
50
|
+
return {
|
|
51
|
+
id: caseId,
|
|
52
|
+
name: caseId,
|
|
53
|
+
outcomes: caseRuns.map(mapOutcome),
|
|
54
|
+
metadata: {
|
|
55
|
+
benchmarkProject: caseRuns[0]?.benchmarkProject ?? "",
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
function mapOutcome(run) {
|
|
61
|
+
return {
|
|
62
|
+
id: run.runId,
|
|
63
|
+
caseId: run.caseId,
|
|
64
|
+
variantId: run.promptStrategy,
|
|
65
|
+
status: outcomeStatus(run),
|
|
66
|
+
metrics: buildOutcomeMetrics(run),
|
|
67
|
+
artifacts: buildOutcomeArtifacts(run),
|
|
68
|
+
warnings: run.warnings.map((message) => ({
|
|
69
|
+
code: "legacy-run-warning",
|
|
70
|
+
message,
|
|
71
|
+
caseId: run.caseId,
|
|
72
|
+
variantId: run.promptStrategy,
|
|
73
|
+
})),
|
|
74
|
+
failures: run.errors.map((message) => ({
|
|
75
|
+
code: run.status,
|
|
76
|
+
message,
|
|
77
|
+
caseId: run.caseId,
|
|
78
|
+
variantId: run.promptStrategy,
|
|
79
|
+
recoverable: true,
|
|
80
|
+
})),
|
|
81
|
+
startedAt: run.startedAt,
|
|
82
|
+
completedAt: run.endedAt,
|
|
83
|
+
metadata: {
|
|
84
|
+
agentId: run.agentId,
|
|
85
|
+
benchmarkProject: run.benchmarkProject,
|
|
86
|
+
promptComplexityLevel: run.promptComplexityLevel,
|
|
87
|
+
promptVariantId: run.promptVariantId,
|
|
88
|
+
legacyStatus: run.status,
|
|
89
|
+
statusReason: run.statusReason,
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
function buildOutcomeMetrics(run) {
|
|
94
|
+
const metrics = [
|
|
95
|
+
{
|
|
96
|
+
id: "correctness-score",
|
|
97
|
+
name: "Correctness score",
|
|
98
|
+
value: run.correctness.correctnessScore,
|
|
99
|
+
variantId: run.promptStrategy,
|
|
100
|
+
caseId: run.caseId,
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
id: "duration-ms",
|
|
104
|
+
name: "Duration",
|
|
105
|
+
value: run.durationMs,
|
|
106
|
+
unit: "ms",
|
|
107
|
+
variantId: run.promptStrategy,
|
|
108
|
+
caseId: run.caseId,
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
if (typeof run.tokenUsage.totalTokens === "number") {
|
|
112
|
+
metrics.push({
|
|
113
|
+
id: "total-tokens",
|
|
114
|
+
name: "Total tokens",
|
|
115
|
+
value: run.tokenUsage.totalTokens,
|
|
116
|
+
unit: "tokens",
|
|
117
|
+
variantId: run.promptStrategy,
|
|
118
|
+
caseId: run.caseId,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
return metrics;
|
|
122
|
+
}
|
|
123
|
+
function buildOutcomeArtifacts(run) {
|
|
124
|
+
const artifacts = [];
|
|
125
|
+
if (run.artifactPaths.promptPath) {
|
|
126
|
+
artifacts.push({
|
|
127
|
+
id: `${run.runId}-prompt`,
|
|
128
|
+
label: "Prompt",
|
|
129
|
+
path: run.artifactPaths.promptPath,
|
|
130
|
+
kind: "text",
|
|
131
|
+
caseId: run.caseId,
|
|
132
|
+
variantId: run.promptStrategy,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
if (run.artifactPaths.agentRunResultPath) {
|
|
136
|
+
artifacts.push({
|
|
137
|
+
id: `${run.runId}-agent-result`,
|
|
138
|
+
label: "Agent run result",
|
|
139
|
+
path: run.artifactPaths.agentRunResultPath,
|
|
140
|
+
kind: "json",
|
|
141
|
+
caseId: run.caseId,
|
|
142
|
+
variantId: run.promptStrategy,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
if (run.artifactPaths.parsedAnswerPath) {
|
|
146
|
+
artifacts.push({
|
|
147
|
+
id: `${run.runId}-parsed-answer`,
|
|
148
|
+
label: "Parsed answer",
|
|
149
|
+
path: run.artifactPaths.parsedAnswerPath,
|
|
150
|
+
kind: "json",
|
|
151
|
+
caseId: run.caseId,
|
|
152
|
+
variantId: run.promptStrategy,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
if (run.artifactPaths.correctnessScorePath) {
|
|
156
|
+
artifacts.push({
|
|
157
|
+
id: `${run.runId}-correctness`,
|
|
158
|
+
label: "Correctness score",
|
|
159
|
+
path: run.artifactPaths.correctnessScorePath,
|
|
160
|
+
kind: "json",
|
|
161
|
+
caseId: run.caseId,
|
|
162
|
+
variantId: run.promptStrategy,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
return artifacts;
|
|
166
|
+
}
|
|
167
|
+
function buildArtifacts(legacyArtifacts, pluginResultPath) {
|
|
168
|
+
const artifacts = [
|
|
169
|
+
{ id: "legacy-summary", label: "Experiment summary", path: legacyArtifacts.artifactPaths.summaryPath, kind: "json" },
|
|
170
|
+
{ id: "legacy-runs", label: "Experiment runs", path: legacyArtifacts.artifactPaths.runsPath, kind: "json" },
|
|
171
|
+
{ id: "legacy-comparisons", label: "Experiment comparisons", path: legacyArtifacts.artifactPaths.comparisonsPath, kind: "json" },
|
|
172
|
+
{ id: "legacy-config", label: "Experiment config", path: legacyArtifacts.artifactPaths.configPath, kind: "json" },
|
|
173
|
+
];
|
|
174
|
+
if (pluginResultPath) {
|
|
175
|
+
artifacts.push({
|
|
176
|
+
id: "plugin-result",
|
|
177
|
+
label: "Plugin result",
|
|
178
|
+
path: pluginResultPath,
|
|
179
|
+
kind: "json",
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
return artifacts;
|
|
183
|
+
}
|
|
184
|
+
function buildRunMetrics(legacyArtifacts) {
|
|
185
|
+
const summary = legacyArtifacts.summary;
|
|
186
|
+
return [
|
|
187
|
+
{ id: "total-runs", name: "Total runs", value: summary.totalRuns },
|
|
188
|
+
{ id: "completed-runs", name: "Completed runs", value: summary.completedRuns },
|
|
189
|
+
{ id: "failed-runs", name: "Failed runs", value: summary.failedRuns },
|
|
190
|
+
{ id: "total-comparisons", name: "Total comparisons", value: summary.totalComparisons },
|
|
191
|
+
{
|
|
192
|
+
id: "average-token-savings-percent",
|
|
193
|
+
name: "Average token savings",
|
|
194
|
+
value: summary.averageTokenSavingsPercent,
|
|
195
|
+
unit: "percent",
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
id: "average-duration-reduction-percent",
|
|
199
|
+
name: "Average duration reduction",
|
|
200
|
+
value: summary.averageDurationReductionPercent,
|
|
201
|
+
unit: "percent",
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
id: "average-correctness-delta",
|
|
205
|
+
name: "Average correctness delta",
|
|
206
|
+
value: summary.averageCorrectnessDelta,
|
|
207
|
+
},
|
|
208
|
+
];
|
|
209
|
+
}
|
|
210
|
+
function buildWarnings(legacyArtifacts) {
|
|
211
|
+
return legacyArtifacts.warnings.map((message) => ({
|
|
212
|
+
code: "legacy-artifact-warning",
|
|
213
|
+
message,
|
|
214
|
+
}));
|
|
215
|
+
}
|
|
216
|
+
function buildFailures(runs) {
|
|
217
|
+
return runs.flatMap((run) => run.errors.map((message) => ({
|
|
218
|
+
code: run.status,
|
|
219
|
+
message,
|
|
220
|
+
variantId: run.promptStrategy,
|
|
221
|
+
caseId: run.caseId,
|
|
222
|
+
recoverable: true,
|
|
223
|
+
})));
|
|
224
|
+
}
|
|
225
|
+
function outcomeStatus(run) {
|
|
226
|
+
if (run.status === "completed")
|
|
227
|
+
return "completed";
|
|
228
|
+
if (run.status === "skipped")
|
|
229
|
+
return "skipped";
|
|
230
|
+
return "failed";
|
|
231
|
+
}
|
|
232
|
+
function statusFromArtifacts(legacyArtifacts) {
|
|
233
|
+
const summary = legacyArtifacts.summary;
|
|
234
|
+
if (summary.totalRuns === 0)
|
|
235
|
+
return "skipped";
|
|
236
|
+
if (summary.completedRuns === summary.totalRuns)
|
|
237
|
+
return "completed";
|
|
238
|
+
if (summary.completedRuns > 0)
|
|
239
|
+
return "partial";
|
|
240
|
+
if (summary.skippedRuns === summary.totalRuns)
|
|
241
|
+
return "skipped";
|
|
242
|
+
return "failed";
|
|
243
|
+
}
|
|
244
|
+
export function comparisonMetrics(comparison) {
|
|
245
|
+
return [
|
|
246
|
+
{
|
|
247
|
+
id: `${comparison.comparisonId}-token-savings-percent`,
|
|
248
|
+
name: "Token savings",
|
|
249
|
+
value: comparison.tokenSavingsPercent ?? null,
|
|
250
|
+
unit: "percent",
|
|
251
|
+
caseId: comparison.caseId,
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
id: `${comparison.comparisonId}-correctness-delta`,
|
|
255
|
+
name: "Correctness delta",
|
|
256
|
+
value: comparison.correctnessDelta ?? null,
|
|
257
|
+
caseId: comparison.caseId,
|
|
258
|
+
},
|
|
259
|
+
];
|
|
260
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./contextStrategyComparison/index.js";
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { ExperimentRegistryError } from "./errors.js";
|
|
2
|
+
export class ExperimentPluginRegistry {
|
|
3
|
+
plugins = new Map();
|
|
4
|
+
metadataById = new Map();
|
|
5
|
+
register(plugin) {
|
|
6
|
+
const id = plugin.metadata.id;
|
|
7
|
+
if (!id) {
|
|
8
|
+
throw new ExperimentRegistryError("Experiment plugin metadata.id is required.");
|
|
9
|
+
}
|
|
10
|
+
if (this.plugins.has(id)) {
|
|
11
|
+
throw new ExperimentRegistryError(`Experiment plugin is already registered: ${id}`);
|
|
12
|
+
}
|
|
13
|
+
this.plugins.set(id, plugin);
|
|
14
|
+
this.metadataById.set(id, cloneMetadata(plugin.metadata));
|
|
15
|
+
}
|
|
16
|
+
list() {
|
|
17
|
+
return [...this.metadataById.values()].map(cloneMetadata);
|
|
18
|
+
}
|
|
19
|
+
describe(id) {
|
|
20
|
+
const metadata = this.metadataById.get(id);
|
|
21
|
+
if (!metadata) {
|
|
22
|
+
throw new ExperimentRegistryError(`Experiment plugin not found: ${id}`);
|
|
23
|
+
}
|
|
24
|
+
return cloneMetadata(metadata);
|
|
25
|
+
}
|
|
26
|
+
find(id) {
|
|
27
|
+
return this.plugins.get(id);
|
|
28
|
+
}
|
|
29
|
+
get(id) {
|
|
30
|
+
const plugin = this.find(id);
|
|
31
|
+
if (!plugin) {
|
|
32
|
+
throw new ExperimentRegistryError(`Experiment plugin not found: ${id}`);
|
|
33
|
+
}
|
|
34
|
+
return plugin;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function cloneMetadata(metadata) {
|
|
38
|
+
return {
|
|
39
|
+
...metadata,
|
|
40
|
+
supportedTargets: [...metadata.supportedTargets],
|
|
41
|
+
supportedOutputs: [...metadata.supportedOutputs],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export const experimentRunStatuses = [
|
|
2
|
+
"completed",
|
|
3
|
+
"partial",
|
|
4
|
+
"failed",
|
|
5
|
+
"skipped",
|
|
6
|
+
];
|
|
7
|
+
export function summarizeExperimentRun(run) {
|
|
8
|
+
const caseStatuses = run.cases.map(statusForCase);
|
|
9
|
+
const completedCases = caseStatuses.filter((status) => status === "completed").length;
|
|
10
|
+
const partialCases = caseStatuses.filter((status) => status === "partial").length;
|
|
11
|
+
const failedCases = caseStatuses.filter((status) => status === "failed").length;
|
|
12
|
+
const skippedCases = caseStatuses.filter((status) => status === "skipped").length;
|
|
13
|
+
return {
|
|
14
|
+
status: run.status,
|
|
15
|
+
totalCases: run.cases.length,
|
|
16
|
+
completedCases,
|
|
17
|
+
partialCases,
|
|
18
|
+
failedCases,
|
|
19
|
+
skippedCases,
|
|
20
|
+
metrics: [...run.metrics],
|
|
21
|
+
warnings: collectWarnings(run),
|
|
22
|
+
failures: collectFailures(run),
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
function statusForCase(experimentCase) {
|
|
26
|
+
if (experimentCase.outcomes.length === 0)
|
|
27
|
+
return "skipped";
|
|
28
|
+
const statuses = experimentCase.outcomes.map((outcome) => outcome.status);
|
|
29
|
+
if (statuses.every((status) => status === "completed"))
|
|
30
|
+
return "completed";
|
|
31
|
+
if (statuses.every((status) => status === "skipped"))
|
|
32
|
+
return "skipped";
|
|
33
|
+
if (statuses.every((status) => status === "failed"))
|
|
34
|
+
return "failed";
|
|
35
|
+
return "partial";
|
|
36
|
+
}
|
|
37
|
+
function collectWarnings(run) {
|
|
38
|
+
return [
|
|
39
|
+
...run.warnings,
|
|
40
|
+
...run.cases.flatMap((experimentCase) => experimentCase.outcomes.flatMap((outcome) => outcome.warnings)),
|
|
41
|
+
];
|
|
42
|
+
}
|
|
43
|
+
function collectFailures(run) {
|
|
44
|
+
return [
|
|
45
|
+
...run.failures,
|
|
46
|
+
...run.cases.flatMap((experimentCase) => experimentCase.outcomes.flatMap((outcome) => outcome.failures)),
|
|
47
|
+
];
|
|
48
|
+
}
|