npm - @agentv/core - Versions diffs - 2.7.1-next.5 → 2.8.0-next.1 - Mend

@agentv/core 2.7.1-next.5 → 2.8.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-6W5E3VR6.js → chunk-P2465XAH.js} +24 -49
package/dist/chunk-P2465XAH.js.map +1 -0
package/dist/evaluation/validation/index.cjs +28 -58
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +21 -44
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +295 -220
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +44 -42
package/dist/index.d.ts +44 -42
package/dist/index.js +273 -173
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-6W5E3VR6.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -187,8 +187,6 @@ interface TargetDefinition {
     readonly subagentRoot?: string | unknown | undefined;
     readonly workspace_template?: string | unknown | undefined;
     readonly workspaceTemplate?: string | unknown | undefined;
-    readonly command_template?: string | unknown | undefined;
-    readonly commandTemplate?: string | unknown | undefined;
     readonly files_format?: string | unknown | undefined;
     readonly filesFormat?: string | unknown | undefined;
     readonly attachments_format?: string | unknown | undefined;
@@ -466,16 +464,18 @@ type TargetAccessConfig = {
     readonly max_calls?: number;
 };
 /**
- * Configuration for workspace lifecycle scripts (before_all, after_all, before_each, after_each).
- * Scripts are executed with workspace context passed via stdin.
+ * Configuration for workspace lifecycle commands (before_all, after_all, before_each, after_each).
+ * Commands are executed with workspace context passed via stdin.
  */
 type WorkspaceScriptConfig = {
     /** Command array to execute (e.g., ["bun", "run", "setup.ts"]) */
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     /** Optional timeout in milliseconds (default: 60000 for setup, 30000 for teardown) */
     readonly timeout_ms?: number;
     readonly timeoutMs?: number;
-    /** Optional working directory for script execution */
+    /** Optional working directory for command execution */
     readonly cwd?: string;
 };
 /**
@@ -493,19 +493,21 @@ type WorkspaceConfig = {
     /** Template directory or .code-workspace file. Directories are copied to temp workspace.
      *  .code-workspace files are used by VS Code providers; CLI providers use the parent directory. */
     readonly template?: string;
-    /** Script to run once before first test (after workspace creation, before git baseline) */
+    /** Command to run once before first test (after workspace creation, before git baseline) */
     readonly before_all?: WorkspaceScriptConfig;
-    /** Script to run once after last test (before workspace cleanup) */
+    /** Command to run once after last test (before workspace cleanup) */
     readonly after_all?: WorkspaceScriptConfig;
-    /** Script to run before each test */
+    /** Command to run before each test */
     readonly before_each?: WorkspaceScriptConfig;
-    /** Script to run after each test (e.g., git reset for workspace reuse) */
+    /** Command to run after each test (e.g., git reset for workspace reuse) */
     readonly after_each?: WorkspaceScriptConfig;
 };
 type CodeEvaluatorConfig = {
     readonly name: string;
     readonly type: 'code';
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     readonly resolvedScriptPath?: string;
     readonly cwd?: string;
     readonly resolvedCwd?: string;
@@ -513,9 +515,9 @@ type CodeEvaluatorConfig = {
     readonly required?: boolean | number;
     /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
-    /** Pass-through configuration for the code_judge script (any unrecognized YAML properties) */
+    /** Pass-through configuration for the code_judge (any unrecognized YAML properties) */
     readonly config?: JsonObject;
-    /** When present, enables target access for the script via local proxy */
+    /** When present, enables target access via local proxy */
     readonly target?: TargetAccessConfig;
 };
 /**
@@ -524,7 +526,9 @@ type CodeEvaluatorConfig = {
  */
 type PromptScriptConfig = {
     /** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     /** Pass-through configuration for the prompt template */
     readonly config?: Record<string, unknown>;
 };
@@ -949,6 +953,8 @@ interface EvaluationResult {
     readonly aggregation?: TrialAggregation;
     /** Whether the trial loop was terminated early due to cost limit */
     readonly costLimited?: boolean;
+    /** Whether the evaluation was skipped due to suite-level budget exhaustion */
+    readonly budgetExceeded?: boolean;
 }
 type EvaluationVerdict = 'pass' | 'fail' | 'borderline';
 interface EvaluatorResult {
@@ -1110,6 +1116,8 @@ type EvalSuiteResult = {
     readonly cacheConfig?: CacheConfig;
     /** Suite-level metadata (name, description, version, etc.) */
     readonly metadata?: EvalMetadata;
+    /** Suite-level total cost budget in USD */
+    readonly totalBudgetUsd?: number;
 };
 /**
  * Load tests and suite metadata from a single parse.
@@ -1178,7 +1186,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
  * @example
  * ```typescript
  * const config: CliNormalizedConfig = {
- *   commandTemplate: 'agent run {PROMPT}',
+ *   command: 'agent run {PROMPT}',
  *   timeoutMs: 120000,
  *   verbose: true,
  * };
@@ -1186,72 +1194,62 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
  * ```
  */
 declare const CliTargetConfigSchema: z.ZodObject<{
-    commandTemplate: z.ZodString;
+    command: z.ZodString;
     filesFormat: z.ZodOptional<z.ZodString>;
     cwd: z.ZodOptional<z.ZodString>;
     workspaceTemplate: z.ZodOptional<z.ZodString>;
     timeoutMs: z.ZodOptional<z.ZodNumber>;
-    healthcheck: z.ZodOptional<z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
-        type: z.ZodLiteral<"http">;
+    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
         url: z.ZodString;
         timeoutMs: z.ZodOptional<z.ZodNumber>;
     }, "strict", z.ZodTypeAny, {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     }, {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     }>, z.ZodObject<{
-        type: z.ZodLiteral<"command">;
-        commandTemplate: z.ZodString;
+        command: z.ZodString;
         cwd: z.ZodOptional<z.ZodString>;
         timeoutMs: z.ZodOptional<z.ZodNumber>;
     }, "strict", z.ZodTypeAny, {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     }, {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     }>]>>;
     verbose: z.ZodOptional<z.ZodBoolean>;
     keepTempFiles: z.ZodOptional<z.ZodBoolean>;
 }, "strict", z.ZodTypeAny, {
-    commandTemplate: string;
+    command: string;
     cwd?: string | undefined;
     verbose?: boolean | undefined;
     filesFormat?: string | undefined;
     workspaceTemplate?: string | undefined;
     healthcheck?: {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     } | {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     } | undefined;
     keepTempFiles?: boolean | undefined;
     timeoutMs?: number | undefined;
 }, {
-    commandTemplate: string;
+    command: string;
     cwd?: string | undefined;
     verbose?: boolean | undefined;
     filesFormat?: string | undefined;
     workspaceTemplate?: string | undefined;
     healthcheck?: {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     } | {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     } | undefined;
@@ -1698,17 +1696,19 @@ declare function deepEqual(a: unknown, b: unknown): boolean;
 declare function negateScore(score: EvaluationScore): EvaluationScore;
 interface CodeEvaluatorOptions {
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     readonly cwd?: string;
     readonly agentTimeoutMs?: number;
     /** Pass-through configuration from YAML (any unrecognized properties) */
     readonly config?: Record<string, unknown>;
-    /** Target access config - when present, enables target invocation for the script */
+    /** Target access config - when present, enables target invocation */
     readonly target?: TargetAccessConfig;
 }
 declare class CodeEvaluator implements Evaluator {
     readonly kind = "code";
-    private readonly script;
+    private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
     private readonly config?;
@@ -2198,6 +2198,8 @@ interface RunEvaluationOptions {
     readonly trials?: TrialsConfig;
     /** Real-time observability callbacks passed to the provider */
     readonly streamCallbacks?: ProviderStreamCallbacks;
+    /** Suite-level total cost budget in USD (stops dispatching when exceeded) */
+    readonly totalBudgetUsd?: number;
 }
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
 declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
@@ -2634,13 +2636,13 @@ interface ScriptExecutionContext {
 }
 type ScriptFailureMode = 'fatal' | 'warn';
 /**
- * Executes a workspace lifecycle script (before_all, after_all, before_each, after_each).
+ * Executes a workspace lifecycle command (before_all, after_all, before_each, after_each).
  *
- * @param config - Workspace script configuration (script, timeout_ms, cwd)
- * @param context - Context passed to script via stdin (JSON)
+ * @param config - Workspace command configuration (command, timeout_ms, cwd)
+ * @param context - Context passed to command via stdin (JSON)
  * @param failureMode - 'fatal' throws on non-zero exit; 'warn' logs warning
- * @returns Captured stdout from the script
- * @throws Error if script exits with non-zero code (fatal mode) or times out
+ * @returns Captured stdout from the command
+ * @throws Error if command exits with non-zero code (fatal mode) or times out
  */
 declare function executeWorkspaceScript(config: WorkspaceScriptConfig, context: ScriptExecutionContext, failureMode?: ScriptFailureMode): Promise<string>;

package/dist/index.d.ts CHANGED Viewed

@@ -187,8 +187,6 @@ interface TargetDefinition {
     readonly subagentRoot?: string | unknown | undefined;
     readonly workspace_template?: string | unknown | undefined;
     readonly workspaceTemplate?: string | unknown | undefined;
-    readonly command_template?: string | unknown | undefined;
-    readonly commandTemplate?: string | unknown | undefined;
     readonly files_format?: string | unknown | undefined;
     readonly filesFormat?: string | unknown | undefined;
     readonly attachments_format?: string | unknown | undefined;
@@ -466,16 +464,18 @@ type TargetAccessConfig = {
     readonly max_calls?: number;
 };
 /**
- * Configuration for workspace lifecycle scripts (before_all, after_all, before_each, after_each).
- * Scripts are executed with workspace context passed via stdin.
+ * Configuration for workspace lifecycle commands (before_all, after_all, before_each, after_each).
+ * Commands are executed with workspace context passed via stdin.
  */
 type WorkspaceScriptConfig = {
     /** Command array to execute (e.g., ["bun", "run", "setup.ts"]) */
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     /** Optional timeout in milliseconds (default: 60000 for setup, 30000 for teardown) */
     readonly timeout_ms?: number;
     readonly timeoutMs?: number;
-    /** Optional working directory for script execution */
+    /** Optional working directory for command execution */
     readonly cwd?: string;
 };
 /**
@@ -493,19 +493,21 @@ type WorkspaceConfig = {
     /** Template directory or .code-workspace file. Directories are copied to temp workspace.
      *  .code-workspace files are used by VS Code providers; CLI providers use the parent directory. */
     readonly template?: string;
-    /** Script to run once before first test (after workspace creation, before git baseline) */
+    /** Command to run once before first test (after workspace creation, before git baseline) */
     readonly before_all?: WorkspaceScriptConfig;
-    /** Script to run once after last test (before workspace cleanup) */
+    /** Command to run once after last test (before workspace cleanup) */
     readonly after_all?: WorkspaceScriptConfig;
-    /** Script to run before each test */
+    /** Command to run before each test */
     readonly before_each?: WorkspaceScriptConfig;
-    /** Script to run after each test (e.g., git reset for workspace reuse) */
+    /** Command to run after each test (e.g., git reset for workspace reuse) */
     readonly after_each?: WorkspaceScriptConfig;
 };
 type CodeEvaluatorConfig = {
     readonly name: string;
     readonly type: 'code';
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     readonly resolvedScriptPath?: string;
     readonly cwd?: string;
     readonly resolvedCwd?: string;
@@ -513,9 +515,9 @@ type CodeEvaluatorConfig = {
     readonly required?: boolean | number;
     /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
-    /** Pass-through configuration for the code_judge script (any unrecognized YAML properties) */
+    /** Pass-through configuration for the code_judge (any unrecognized YAML properties) */
     readonly config?: JsonObject;
-    /** When present, enables target access for the script via local proxy */
+    /** When present, enables target access via local proxy */
     readonly target?: TargetAccessConfig;
 };
 /**
@@ -524,7 +526,9 @@ type CodeEvaluatorConfig = {
  */
 type PromptScriptConfig = {
     /** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     /** Pass-through configuration for the prompt template */
     readonly config?: Record<string, unknown>;
 };
@@ -949,6 +953,8 @@ interface EvaluationResult {
     readonly aggregation?: TrialAggregation;
     /** Whether the trial loop was terminated early due to cost limit */
     readonly costLimited?: boolean;
+    /** Whether the evaluation was skipped due to suite-level budget exhaustion */
+    readonly budgetExceeded?: boolean;
 }
 type EvaluationVerdict = 'pass' | 'fail' | 'borderline';
 interface EvaluatorResult {
@@ -1110,6 +1116,8 @@ type EvalSuiteResult = {
     readonly cacheConfig?: CacheConfig;
     /** Suite-level metadata (name, description, version, etc.) */
     readonly metadata?: EvalMetadata;
+    /** Suite-level total cost budget in USD */
+    readonly totalBudgetUsd?: number;
 };
 /**
  * Load tests and suite metadata from a single parse.
@@ -1178,7 +1186,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
  * @example
  * ```typescript
  * const config: CliNormalizedConfig = {
- *   commandTemplate: 'agent run {PROMPT}',
+ *   command: 'agent run {PROMPT}',
  *   timeoutMs: 120000,
  *   verbose: true,
  * };
@@ -1186,72 +1194,62 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
  * ```
  */
 declare const CliTargetConfigSchema: z.ZodObject<{
-    commandTemplate: z.ZodString;
+    command: z.ZodString;
     filesFormat: z.ZodOptional<z.ZodString>;
     cwd: z.ZodOptional<z.ZodString>;
     workspaceTemplate: z.ZodOptional<z.ZodString>;
     timeoutMs: z.ZodOptional<z.ZodNumber>;
-    healthcheck: z.ZodOptional<z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
-        type: z.ZodLiteral<"http">;
+    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
         url: z.ZodString;
         timeoutMs: z.ZodOptional<z.ZodNumber>;
     }, "strict", z.ZodTypeAny, {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     }, {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     }>, z.ZodObject<{
-        type: z.ZodLiteral<"command">;
-        commandTemplate: z.ZodString;
+        command: z.ZodString;
         cwd: z.ZodOptional<z.ZodString>;
         timeoutMs: z.ZodOptional<z.ZodNumber>;
     }, "strict", z.ZodTypeAny, {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     }, {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     }>]>>;
     verbose: z.ZodOptional<z.ZodBoolean>;
     keepTempFiles: z.ZodOptional<z.ZodBoolean>;
 }, "strict", z.ZodTypeAny, {
-    commandTemplate: string;
+    command: string;
     cwd?: string | undefined;
     verbose?: boolean | undefined;
     filesFormat?: string | undefined;
     workspaceTemplate?: string | undefined;
     healthcheck?: {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     } | {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     } | undefined;
     keepTempFiles?: boolean | undefined;
     timeoutMs?: number | undefined;
 }, {
-    commandTemplate: string;
+    command: string;
     cwd?: string | undefined;
     verbose?: boolean | undefined;
     filesFormat?: string | undefined;
     workspaceTemplate?: string | undefined;
     healthcheck?: {
-        type: "http";
         url: string;
         timeoutMs?: number | undefined;
     } | {
-        type: "command";
-        commandTemplate: string;
+        command: string;
         cwd?: string | undefined;
         timeoutMs?: number | undefined;
     } | undefined;
@@ -1698,17 +1696,19 @@ declare function deepEqual(a: unknown, b: unknown): boolean;
 declare function negateScore(score: EvaluationScore): EvaluationScore;
 interface CodeEvaluatorOptions {
-    readonly script: readonly string[];
+    readonly command: readonly string[];
+    /** @deprecated Use `command` instead */
+    readonly script?: readonly string[];
     readonly cwd?: string;
     readonly agentTimeoutMs?: number;
     /** Pass-through configuration from YAML (any unrecognized properties) */
     readonly config?: Record<string, unknown>;
-    /** Target access config - when present, enables target invocation for the script */
+    /** Target access config - when present, enables target invocation */
     readonly target?: TargetAccessConfig;
 }
 declare class CodeEvaluator implements Evaluator {
     readonly kind = "code";
-    private readonly script;
+    private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
     private readonly config?;
@@ -2198,6 +2198,8 @@ interface RunEvaluationOptions {
     readonly trials?: TrialsConfig;
     /** Real-time observability callbacks passed to the provider */
     readonly streamCallbacks?: ProviderStreamCallbacks;
+    /** Suite-level total cost budget in USD (stops dispatching when exceeded) */
+    readonly totalBudgetUsd?: number;
 }
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
 declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
@@ -2634,13 +2636,13 @@ interface ScriptExecutionContext {
 }
 type ScriptFailureMode = 'fatal' | 'warn';
 /**
- * Executes a workspace lifecycle script (before_all, after_all, before_each, after_each).
+ * Executes a workspace lifecycle command (before_all, after_all, before_each, after_each).
  *
- * @param config - Workspace script configuration (script, timeout_ms, cwd)
- * @param context - Context passed to script via stdin (JSON)
+ * @param config - Workspace command configuration (command, timeout_ms, cwd)
+ * @param context - Context passed to command via stdin (JSON)
  * @param failureMode - 'fatal' throws on non-zero exit; 'warn' logs warning
- * @returns Captured stdout from the script
- * @throws Error if script exits with non-zero code (fatal mode) or times out
+ * @returns Captured stdout from the command
+ * @throws Error if command exits with non-zero code (fatal mode) or times out
  */
 declare function executeWorkspaceScript(config: WorkspaceScriptConfig, context: ScriptExecutionContext, failureMode?: ScriptFailureMode): Promise<string>;