npm - @agentv/core - Versions diffs - 4.17.1 → 4.19.0-next.1 - Mend

@agentv/core 4.17.1 → 4.19.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{chunk-6VZY3B6M.js → chunk-24ND5HZC.js} +102 -102
package/dist/chunk-24ND5HZC.js.map +1 -0
package/dist/chunk-QXX3IBYV.js +19740 -0
package/dist/chunk-QXX3IBYV.js.map +1 -0
package/dist/evaluation/validation/index.cjs +5 -5
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +3 -3
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +22852 -21848
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +1015 -974
package/dist/index.d.ts +1015 -974
package/dist/index.js +494 -19790
package/dist/index.js.map +1 -1
package/dist/ts-eval-loader-XFQ6S4DT.js +12 -0
package/dist/ts-eval-loader-XFQ6S4DT.js.map +1 -0
package/package.json +1 -1
package/dist/chunk-6VZY3B6M.js.map +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -341,7 +341,7 @@ type ArgsMatchMode = 'exact' | 'ignore' | 'subset' | 'superset';
 /**
  * Configuration for tool-trajectory evaluator.
  */
-interface ToolTrajectoryEvaluatorConfig {
+interface ToolTrajectoryGraderConfig {
     readonly name: string;
     readonly type: 'tool-trajectory';
     /** Matching mode */
@@ -355,7 +355,7 @@ interface ToolTrajectoryEvaluatorConfig {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
     /** Default argument matching mode for all expected items (defaults to 'exact') */
     readonly argsMatch?: ArgsMatchMode | readonly string[];
@@ -539,9 +539,9 @@ declare function isJsonValue(value: unknown): value is JsonValue;
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
  */
 declare function isTestMessage(value: unknown): value is TestMessage;
-declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
-type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
-declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
+declare const GRADER_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
+type GraderKind = (typeof GRADER_KIND_VALUES)[number];
+declare function isGraderKind(value: unknown): value is GraderKind;
 /**
  * Configuration for enabling target access in code-grader evaluators.
  * When present, the runtime will start a local proxy server that allows
@@ -697,7 +697,7 @@ type WorkspaceConfig = {
      *  relative paths from their own directory, not the eval file's directory. */
     readonly workspaceFileDir?: string;
 };
-type CodeEvaluatorConfig = {
+type CodeGraderConfig = {
     readonly name: string;
     readonly type: 'code-grader';
     readonly command: readonly string[];
@@ -710,7 +710,7 @@ type CodeEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
     /** Pass-through configuration for the code-grader (any unrecognized YAML properties) */
     readonly config?: JsonObject;
@@ -739,7 +739,7 @@ type ContentPreprocessorConfig = {
     /** Resolved absolute path for the command script (last argv element) */
     readonly resolvedCommand?: readonly string[];
 };
-type LlmGraderEvaluatorConfig = {
+type LlmGraderConfig = {
     readonly name: string;
     readonly type: 'llm-grader';
     /** Text prompt (inline or file path) or executable script config */
@@ -754,7 +754,7 @@ type LlmGraderEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
     /** Optional target override for this grader (uses a named LLM target from targets.yaml). */
     readonly target?: string;
@@ -767,8 +767,6 @@ type LlmGraderEvaluatorConfig = {
     /** Optional content preprocessors for ContentFile blocks in assistant output */
     readonly preprocessors?: readonly ContentPreprocessorConfig[];
 };
-/** @deprecated Use `LlmGraderEvaluatorConfig` instead */
-type LlmJudgeEvaluatorConfig = LlmGraderEvaluatorConfig;
 /**
  * Score range definition for analytic rubric scoring.
  * Each range maps an integer score band (0-10) to an outcome description.
@@ -830,16 +828,16 @@ type CompositeAggregatorConfig = {
     readonly type: 'threshold';
     readonly threshold: number;
 };
-type CompositeEvaluatorConfig = {
+type CompositeGraderConfig = {
     readonly name: string;
     readonly type: 'composite';
-    readonly assertions: readonly EvaluatorConfig[];
+    readonly assertions: readonly GraderConfig[];
     readonly aggregator: CompositeAggregatorConfig;
     readonly weight?: number;
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
@@ -874,7 +872,7 @@ type FieldConfig = {
 /**
  * Configuration for the field-accuracy evaluator.
  */
-type FieldAccuracyEvaluatorConfig = {
+type FieldAccuracyGraderConfig = {
     readonly name: string;
     readonly type: 'field-accuracy';
     /** Fields to compare between candidate and expected */
@@ -885,14 +883,14 @@ type FieldAccuracyEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the latency evaluator.
  * Checks execution duration against a threshold.
  */
-type LatencyEvaluatorConfig = {
+type LatencyGraderConfig = {
     readonly name: string;
     readonly type: 'latency';
     /** Maximum allowed duration in milliseconds */
@@ -901,14 +899,14 @@ type LatencyEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the cost evaluator.
  * Checks execution cost against a budget.
  */
-type CostEvaluatorConfig = {
+type CostGraderConfig = {
     readonly name: string;
     readonly type: 'cost';
     /** Maximum allowed cost in USD */
@@ -917,14 +915,14 @@ type CostEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the token-usage evaluator.
  * Checks provider-reported token usage against configured limits.
  */
-type TokenUsageEvaluatorConfig = {
+type TokenUsageGraderConfig = {
     readonly name: string;
     readonly type: 'token-usage';
     /** Maximum allowed total tokens (input + output + cached, when present) */
@@ -937,7 +935,7 @@ type TokenUsageEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
@@ -945,7 +943,7 @@ type TokenUsageEvaluatorConfig = {
  * Provides declarative threshold-based checks on execution metrics.
  * Only specified thresholds are checked; omitted ones are ignored.
  */
-type ExecutionMetricsEvaluatorConfig = {
+type ExecutionMetricsGraderConfig = {
     readonly name: string;
     readonly type: 'execution-metrics';
     /** Maximum allowed number of tool calls */
@@ -966,14 +964,14 @@ type ExecutionMetricsEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the contains assertion evaluator.
  * Checks whether the candidate output contains a specified substring.
  */
-type ContainsEvaluatorConfig = {
+type ContainsGraderConfig = {
     readonly name: string;
     readonly type: 'contains';
     readonly value: string;
@@ -981,14 +979,14 @@ type ContainsEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the contains_any assertion evaluator.
  * Checks whether the candidate output contains ANY of the specified substrings.
  */
-type ContainsAnyEvaluatorConfig = {
+type ContainsAnyGraderConfig = {
     readonly name: string;
     readonly type: 'contains-any';
     readonly value: readonly string[];
@@ -996,14 +994,14 @@ type ContainsAnyEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the contains_all assertion evaluator.
  * Checks whether the candidate output contains ALL of the specified substrings.
  */
-type ContainsAllEvaluatorConfig = {
+type ContainsAllGraderConfig = {
     readonly name: string;
     readonly type: 'contains-all';
     readonly value: readonly string[];
@@ -1011,14 +1009,14 @@ type ContainsAllEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the icontains assertion evaluator.
  * Case-insensitive check whether the candidate output contains a specified substring.
  */
-type IcontainsEvaluatorConfig = {
+type IcontainsGraderConfig = {
     readonly name: string;
     readonly type: 'icontains';
     readonly value: string;
@@ -1026,14 +1024,14 @@ type IcontainsEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the icontains_any assertion evaluator.
  * Case-insensitive check whether the candidate output contains ANY of the specified substrings.
  */
-type IcontainsAnyEvaluatorConfig = {
+type IcontainsAnyGraderConfig = {
     readonly name: string;
     readonly type: 'icontains-any';
     readonly value: readonly string[];
@@ -1041,14 +1039,14 @@ type IcontainsAnyEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the icontains_all assertion evaluator.
  * Case-insensitive check whether the candidate output contains ALL of the specified substrings.
  */
-type IcontainsAllEvaluatorConfig = {
+type IcontainsAllGraderConfig = {
     readonly name: string;
     readonly type: 'icontains-all';
     readonly value: readonly string[];
@@ -1056,14 +1054,14 @@ type IcontainsAllEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the starts_with assertion evaluator.
  * Checks whether the candidate output starts with a specified string (both trimmed).
  */
-type StartsWithEvaluatorConfig = {
+type StartsWithGraderConfig = {
     readonly name: string;
     readonly type: 'starts-with';
     readonly value: string;
@@ -1071,14 +1069,14 @@ type StartsWithEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the ends_with assertion evaluator.
  * Checks whether the candidate output ends with a specified string (both trimmed).
  */
-type EndsWithEvaluatorConfig = {
+type EndsWithGraderConfig = {
     readonly name: string;
     readonly type: 'ends-with';
     readonly value: string;
@@ -1086,14 +1084,14 @@ type EndsWithEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the regex assertion evaluator.
  * Checks whether the candidate output matches a regular expression pattern.
  */
-type RegexEvaluatorConfig = {
+type RegexGraderConfig = {
     readonly name: string;
     readonly type: 'regex';
     readonly value: string;
@@ -1103,28 +1101,28 @@ type RegexEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the is_json assertion evaluator.
  * Checks whether the candidate output is valid JSON.
  */
-type IsJsonEvaluatorConfig = {
+type IsJsonGraderConfig = {
     readonly name: string;
     readonly type: 'is-json';
     readonly weight?: number;
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
  * Configuration for the equals assertion evaluator.
  * Checks whether the candidate output exactly equals a specified string.
  */
-type EqualsEvaluatorConfig = {
+type EqualsGraderConfig = {
     readonly name: string;
     readonly type: 'equals';
     readonly value: string;
@@ -1132,7 +1130,7 @@ type EqualsEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
@@ -1147,7 +1145,7 @@ type RubricsEvaluatorConfig = {
     readonly required?: boolean | number;
     /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
     readonly min_score?: number;
-    /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
+    /** When true, inverts the grader score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
 };
 /**
@@ -1156,7 +1154,7 @@ type RubricsEvaluatorConfig = {
  * Tool-name resolution is automatic based on the provider kind.
  * For providers not covered by the built-in mapping, use a code-grader.
  */
-type SkillTriggerEvaluatorConfig = {
+type SkillTriggerGraderConfig = {
     readonly name: string;
     readonly type: 'skill-trigger';
     /** The skill name to check for (case-sensitive substring match) */
@@ -1182,7 +1180,7 @@ type InlineAssertEvaluatorConfig = {
     readonly min_score?: number;
     readonly negate?: boolean;
 };
-type EvaluatorConfig = CodeEvaluatorConfig | LlmGraderEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig | FieldAccuracyEvaluatorConfig | LatencyEvaluatorConfig | CostEvaluatorConfig | TokenUsageEvaluatorConfig | ExecutionMetricsEvaluatorConfig | SkillTriggerEvaluatorConfig | ContainsEvaluatorConfig | ContainsAnyEvaluatorConfig | ContainsAllEvaluatorConfig | IcontainsEvaluatorConfig | IcontainsAnyEvaluatorConfig | IcontainsAllEvaluatorConfig | StartsWithEvaluatorConfig | EndsWithEvaluatorConfig | RegexEvaluatorConfig | IsJsonEvaluatorConfig | EqualsEvaluatorConfig | RubricsEvaluatorConfig | InlineAssertEvaluatorConfig;
+type GraderConfig = CodeGraderConfig | LlmGraderConfig | CompositeGraderConfig | ToolTrajectoryGraderConfig | FieldAccuracyGraderConfig | LatencyGraderConfig | CostGraderConfig | TokenUsageGraderConfig | ExecutionMetricsGraderConfig | SkillTriggerGraderConfig | ContainsGraderConfig | ContainsAnyGraderConfig | ContainsAllGraderConfig | IcontainsGraderConfig | IcontainsAnyGraderConfig | IcontainsAllGraderConfig | StartsWithGraderConfig | EndsWithGraderConfig | RegexGraderConfig | IsJsonGraderConfig | EqualsGraderConfig | RubricsEvaluatorConfig | InlineAssertEvaluatorConfig;
 /**
  * A single turn in a multi-turn conversation evaluation.
  * Each turn is a user message. The runner generates the assistant response.
@@ -1193,7 +1191,7 @@ interface ConversationTurn {
     /** Reference assistant response for grading (NOT carried forward — actual LLM response is used) */
     readonly expected_output?: TestMessageContent;
     /** Per-turn assertions. Strings become rubric criteria via shorthand. */
-    readonly assertions?: readonly (string | EvaluatorConfig)[];
+    readonly assertions?: readonly (string | GraderConfig)[];
 }
 /**
  * Conversation evaluation mode.
@@ -1228,8 +1226,8 @@ interface EvalTest {
     readonly reference_answer?: string;
     readonly file_paths: readonly string[];
     readonly criteria: string;
-    readonly evaluator?: EvaluatorKind;
-    readonly assertions?: readonly EvaluatorConfig[];
+    readonly evaluator?: GraderKind;
+    readonly assertions?: readonly GraderConfig[];
     /** Suite-level preprocessors used by the implicit default llm-grader. */
     readonly preprocessors?: readonly ContentPreprocessorConfig[];
     /** Workspace configuration (merged from suite-level and case-level) */
@@ -1293,7 +1291,7 @@ interface TrialResult {
     readonly attempt: number;
     readonly score: number;
     readonly verdict: EvaluationVerdict;
-    readonly scores?: readonly EvaluatorResult[];
+    readonly scores?: readonly GraderResult[];
     readonly error?: string;
     readonly costUsd?: number;
     /** Primary classification for this trial attempt */
@@ -1359,7 +1357,7 @@ interface ExecutionError {
  */
 type FailOnError = boolean;
 /**
- * Evaluator scorecard for a single eval case run.
+ * Grader scorecard for a single eval case run.
  */
 interface EvaluationResult {
     readonly timestamp: string;
@@ -1390,7 +1388,7 @@ interface EvaluationResult {
         readonly lm?: JsonObject;
         readonly evaluator?: JsonObject;
     };
-    readonly scores?: readonly EvaluatorResult[];
+    readonly scores?: readonly GraderResult[];
     readonly error?: string;
     /** Lightweight summary of the execution trace (always included when available) */
     readonly trace?: TraceSummary;
@@ -1433,9 +1431,9 @@ interface EvaluationResult {
     readonly executionError?: ExecutionError;
 }
 type EvaluationVerdict = 'pass' | 'fail' | 'skip';
-interface EvaluatorResult {
+interface GraderResult {
     readonly name: string;
-    readonly type: EvaluatorKind;
+    readonly type: GraderKind;
     readonly score: number;
     readonly weight?: number;
     readonly verdict?: EvaluationVerdict;
@@ -1444,7 +1442,7 @@ interface EvaluatorResult {
     readonly input?: JsonObject;
     /** Target name used for grading (e.g., the LLM provider name). */
     readonly target?: string;
-    readonly scores?: readonly EvaluatorResult[];
+    readonly scores?: readonly GraderResult[];
     /** Optional structured details from code graders (e.g., TP/TN/FP/FN counts). */
     readonly details?: JsonObject;
     /** Token usage from LLM calls made by this evaluator (optional). */
@@ -1457,156 +1455,558 @@ interface EvaluatorResult {
     readonly endedAt?: string;
 }
-declare const MetadataSchema: z.ZodObject<{
-    name: z.ZodString;
-    description: z.ZodOptional<z.ZodString>;
-    version: z.ZodOptional<z.ZodString>;
-    author: z.ZodOptional<z.ZodString>;
-    tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
-    license: z.ZodOptional<z.ZodString>;
-    requires: z.ZodOptional<z.ZodObject<{
-        agentv: z.ZodOptional<z.ZodString>;
-    }, "strip", z.ZodTypeAny, {
-        agentv?: string | undefined;
+/**
+ * Strict normalized schema for CLI target configuration.
+ * This is the final validated shape after environment variable resolution
+ * and internal field normalization.
+ *
+ * Uses .strict() to reject unknown properties, ensuring configuration
+ * errors are caught early rather than silently ignored.
+ *
+ * @example
+ * ```typescript
+ * const config: CliNormalizedConfig = {
+ *   command: 'agent run {PROMPT}',
+ *   timeoutMs: 120000,
+ *   verbose: true,
+ * };
+ * CliTargetConfigSchema.parse(config); // Validates the normalized config
+ * ```
+ */
+declare const CliTargetConfigSchema: z.ZodObject<{
+    command: z.ZodString;
+    filesFormat: z.ZodOptional<z.ZodString>;
+    cwd: z.ZodOptional<z.ZodString>;
+    timeoutMs: z.ZodOptional<z.ZodNumber>;
+    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
+        url: z.ZodString;
+        timeoutMs: z.ZodOptional<z.ZodNumber>;
+    }, "strict", z.ZodTypeAny, {
+        url: string;
+        timeoutMs?: number | undefined;
     }, {
-        agentv?: string | undefined;
-    }>>;
-}, "strip", z.ZodTypeAny, {
-    name: string;
-    description?: string | undefined;
-    version?: string | undefined;
-    author?: string | undefined;
-    tags?: string[] | undefined;
-    license?: string | undefined;
-    requires?: {
-        agentv?: string | undefined;
+        url: string;
+        timeoutMs?: number | undefined;
+    }>, z.ZodObject<{
+        command: z.ZodString;
+        cwd: z.ZodOptional<z.ZodString>;
+        timeoutMs: z.ZodOptional<z.ZodNumber>;
+    }, "strict", z.ZodTypeAny, {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
+    }, {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
+    }>]>>;
+    verbose: z.ZodOptional<z.ZodBoolean>;
+    keepTempFiles: z.ZodOptional<z.ZodBoolean>;
+}, "strict", z.ZodTypeAny, {
+    command: string;
+    timeoutMs?: number | undefined;
+    cwd?: string | undefined;
+    verbose?: boolean | undefined;
+    healthcheck?: {
+        url: string;
+        timeoutMs?: number | undefined;
+    } | {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
     } | undefined;
+    filesFormat?: string | undefined;
+    keepTempFiles?: boolean | undefined;
 }, {
-    name: string;
-    description?: string | undefined;
-    version?: string | undefined;
-    author?: string | undefined;
-    tags?: string[] | undefined;
-    license?: string | undefined;
-    requires?: {
-        agentv?: string | undefined;
+    command: string;
+    timeoutMs?: number | undefined;
+    cwd?: string | undefined;
+    verbose?: boolean | undefined;
+    healthcheck?: {
+        url: string;
+        timeoutMs?: number | undefined;
+    } | {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
     } | undefined;
+    filesFormat?: string | undefined;
+    keepTempFiles?: boolean | undefined;
 }>;
-type EvalMetadata = z.infer<typeof MetadataSchema>;
-declare const DEFAULT_EVAL_PATTERNS: readonly string[];
-type ExecutionDefaults = {
-    readonly verbose?: boolean;
-    readonly keep_workspaces?: boolean;
-    readonly otel_file?: string;
-    readonly export_otel?: boolean;
-    readonly otel_backend?: string;
-    readonly otel_capture_content?: boolean;
-    readonly otel_group_turns?: boolean;
-    readonly pool_workspaces?: boolean;
-    readonly pool_slots?: number;
-};
-type ResultsExportConfig = {
-    readonly repo: string;
-    readonly path: string;
-    readonly auto_push?: boolean;
-    readonly branch_prefix?: string;
-};
-type AgentVConfig$1 = {
-    readonly required_version?: string;
-    readonly eval_patterns?: readonly string[];
-    readonly execution?: ExecutionDefaults;
-    readonly results?: {
-        readonly export?: ResultsExportConfig;
-    };
-};
-/**
- * Load optional .agentv/config.yaml configuration file.
- * Searches from eval file directory up to repo root.
- */
-declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
-/**
- * Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
- */
-declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
-/**
- * Extract target refs from parsed eval suite.
- * Supports both string shorthand and object form with hooks.
- * Returns undefined when no targets array is specified.
- */
-declare function extractTargetRefsFromSuite(suite: JsonObject): readonly EvalTargetRef[] | undefined;
-/**
- * Extract target names from parsed eval suite (backward-compat wrapper).
- * Precedence: execution.targets (array) > execution.target (singular).
- * Returns undefined when no targets array is specified.
- */
-declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
-/**
- * Extract workers count from suite-level execution block.
- */
-declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
-/**
- * Extract per-test targets array from a raw test case object.
- */
-declare function extractTargetsFromTestCase(testCase: JsonObject): readonly string[] | undefined;
-/**
- * Extract trials configuration from parsed eval suite's execution block.
- * Returns undefined when count is 1 or not specified (no-op).
- */
-declare function extractTrialsConfig(suite: JsonObject): TrialsConfig | undefined;
+type CliNormalizedConfig = z.infer<typeof CliTargetConfigSchema>;
 /**
- * Cache configuration parsed from execution block.
+ * Resolved CLI configuration type derived from CliTargetConfigSchema.
+ * This is the final validated shape used by the CLI provider at runtime.
+ * Using Readonly to ensure immutability for runtime safety.
  */
-interface CacheConfig {
-    readonly enabled: boolean;
-    readonly cachePath?: string;
+type CliResolvedConfig = Readonly<CliNormalizedConfig>;
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
 }
 /**
- * Extract cache configuration from parsed eval suite's execution block.
- * Returns undefined when no cache config is specified.
- */
-declare function extractCacheConfig(suite: JsonObject): CacheConfig | undefined;
-/**
- * Extract `execution.fail_on_error` from parsed eval suite.
- * Accepts `true` or `false`.
- * Returns undefined when not specified.
+ * Selects which OpenAI-compatible API endpoint to use.
+ * - "chat" (default): POST /chat/completions — universally supported by all OpenAI-compatible providers.
+ * - "responses": POST /responses — only supported by api.openai.com.
+ *
+ * Maps to Vercel AI SDK methods: "chat" → provider.chat(model), "responses" → provider(model).
  */
-declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
+type ApiFormat = 'chat' | 'responses';
 /**
- * Extract `execution.threshold` from parsed eval suite.
- * Accepts a number in [0, 1] range.
- * Returns undefined when not specified.
+ * Azure OpenAI settings used by the Vercel AI SDK.
  */
-declare function extractThreshold(suite: JsonObject): number | undefined;
+interface AzureResolvedConfig {
+    readonly resourceName: string;
+    readonly deploymentName: string;
+    readonly apiKey: string;
+    readonly version?: string;
+    readonly apiFormat?: ApiFormat;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Formatting mode for segment content.
- * - 'agent': File references only (for providers with filesystem access)
- * - 'lm': Embedded file content with XML tags (for language model providers)
+ * OpenAI-compatible settings used by the Vercel AI SDK.
  */
-type FormattingMode = 'agent' | 'lm';
+interface OpenAIResolvedConfig {
+    readonly baseURL: string;
+    readonly apiKey: string;
+    readonly model: string;
+    readonly apiFormat?: ApiFormat;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Build prompt inputs by consolidating user request context.
+ * OpenRouter settings used by the Vercel AI SDK provider.
  */
-interface PromptInputs {
-    readonly question: string;
-    readonly chatPrompt?: ChatPrompt;
-    readonly systemMessage?: string;
+interface OpenRouterResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 /**
- * Build prompt inputs by consolidating user request context.
- *
- * @param testCase - The evaluation test case
- * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
+ * Anthropic Claude settings used by the Vercel AI SDK.
  */
-declare function buildPromptInputs(testCase: EvalTest, mode?: FormattingMode): Promise<PromptInputs>;
+interface AnthropicResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Detect file format by extension.
+ * Google Gemini settings used by the Vercel AI SDK.
  */
-declare function detectFormat(filePath: string): 'yaml' | 'jsonl' | 'agent-skills-json';
+interface GeminiResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
+interface CodexResolvedConfig {
+    readonly model?: string;
+    readonly executable: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface CopilotCliResolvedConfig {
+    readonly executable: string;
+    readonly model?: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface CopilotSdkResolvedConfig {
+    readonly cliUrl?: string;
+    readonly cliPath?: string;
+    readonly githubToken?: string;
+    readonly model?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+    /** BYOK provider type: "azure", "openai", or "anthropic". */
+    readonly byokType?: string;
+    /** BYOK base URL for the provider endpoint. */
+    readonly byokBaseUrl?: string;
+    /** BYOK API key for authenticating with the provider. */
+    readonly byokApiKey?: string;
+    /** BYOK bearer token (takes precedence over apiKey when set). */
+    readonly byokBearerToken?: string;
+    /** BYOK Azure API version (e.g. "2024-10-21"). Only used when byokType is "azure". */
+    readonly byokApiVersion?: string;
+    /** BYOK wire API format: "completions" or "responses". */
+    readonly byokWireApi?: string;
+}
+interface CopilotLogResolvedConfig {
+    /** Explicit path to a session directory containing events.jsonl. */
+    readonly sessionDir?: string;
+    /** Session UUID — combined with sessionStateDir to build the path. */
+    readonly sessionId?: string;
+    /** Auto-discovery mode. 'latest' picks the most recent session. */
+    readonly discover?: 'latest';
+    /** Override the default ~/.copilot/session-state directory. */
+    readonly sessionStateDir?: string;
+    /** Filter discovery by working directory. */
+    readonly cwd?: string;
+}
+interface PiCodingAgentResolvedConfig {
+    readonly subprovider?: string;
+    readonly model?: string;
+    readonly apiKey?: string;
+    readonly baseUrl?: string;
+    readonly tools?: string;
+    readonly thinking?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface PiCliResolvedConfig {
+    readonly executable: string;
+    readonly subprovider?: string;
+    readonly model?: string;
+    readonly apiKey?: string;
+    readonly baseUrl?: string;
+    readonly tools?: string;
+    readonly thinking?: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface ClaudeResolvedConfig {
+    readonly executable: string;
+    readonly model?: string;
+    readonly systemPrompt?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly maxTurns?: number;
+    readonly maxBudgetUsd?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+}
+interface MockResolvedConfig {
+    readonly response?: string;
+    readonly delayMs?: number;
+    readonly delayMinMs?: number;
+    readonly delayMaxMs?: number;
+}
+interface VSCodeResolvedConfig {
+    readonly executable: string;
+    readonly waitForResponse: boolean;
+    readonly dryRun: boolean;
+    readonly subagentRoot?: string;
+    readonly timeoutMs?: number;
+}
+interface AgentVResolvedConfig {
+    readonly model: string;
+    readonly temperature: number;
+}
+/** Base fields shared by all resolved targets. */
+interface ResolvedTargetBase {
+    readonly name: string;
+    readonly graderTarget?: string;
+    readonly workers?: number;
+    readonly providerBatching?: boolean;
+    /**
+     * Whether this target can be executed via executor subagents in subagent mode.
+     * Defaults to `true` for all non-CLI providers. Set `false` in targets.yaml
+     * to force CLI invocation even in subagent mode.
+     */
+    readonly subagentModeAllowed?: boolean;
+    /**
+     * Ordered list of target names to try when the primary target fails after
+     * exhausting retries. Each fallback is attempted in order.
+     */
+    readonly fallbackTargets?: readonly string[];
+}
+type ResolvedTarget = (ResolvedTargetBase & {
+    readonly kind: 'openai';
+    readonly config: OpenAIResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'openrouter';
+    readonly config: OpenRouterResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'azure';
+    readonly config: AzureResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'anthropic';
+    readonly config: AnthropicResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'gemini';
+    readonly config: GeminiResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'codex';
+    readonly config: CodexResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-sdk';
+    readonly config: CopilotSdkResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-cli';
+    readonly config: CopilotCliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-log';
+    readonly config: CopilotLogResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'pi-coding-agent';
+    readonly config: PiCodingAgentResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'pi-cli';
+    readonly config: PiCliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude-cli';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude-sdk';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'mock';
+    readonly config: MockResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'vscode' | 'vscode-insiders';
+    readonly config: VSCodeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'agentv';
+    readonly config: AgentVResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'cli';
+    readonly config: CliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'transcript';
+    readonly config: Record<string, never>;
+});
+/**
+ * Optional settings accepted on ALL target definitions regardless of provider.
+ * Exported so the targets validator can reuse the same list — adding a field
+ * here automatically makes it valid in targets.yaml without a separate update.
+ */
+declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "subagent_mode_allowed", "fallback_targets"];
+declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
+declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string, options?: {
+    readonly emitDeprecationWarnings?: boolean;
+}): ResolvedTarget;
+/**
+ * Extensible provider registry.
+ *
+ * Replaces the hardcoded switch/case dispatch in createProvider() with
+ * a registry of named factory functions. Built-in providers are registered
+ * at startup; users can add custom providers via the registry API or by
+ * dropping files in `.agentv/providers/`.
+ */
+/**
+ * Factory function that creates a Provider instance from a resolved target.
+ */
+type ProviderFactoryFn = (target: ResolvedTarget) => Provider;
+/**
+ * Registry of provider factory functions keyed by provider kind.
+ *
+ * Built-in providers are registered at startup. Custom providers can be
+ * registered via the `register()` method.
+ */
+declare class ProviderRegistry {
+    private readonly factories;
+    /** Register a factory function for a provider kind. */
+    register(kind: string, factory: ProviderFactoryFn): this;
+    /** Get the factory function for a provider kind. */
+    get(kind: string): ProviderFactoryFn | undefined;
+    /** Check if a factory is registered for the given kind. */
+    has(kind: string): boolean;
+    /** List all registered provider kind names. */
+    list(): string[];
+    /**
+     * Create a provider instance from a resolved target.
+     * Falls back to CLI provider for unknown kinds (custom provider escape hatch).
+     */
+    create(target: ResolvedTarget): Provider;
+}
+declare const MetadataSchema: z.ZodObject<{
+    name: z.ZodString;
+    description: z.ZodOptional<z.ZodString>;
+    version: z.ZodOptional<z.ZodString>;
+    author: z.ZodOptional<z.ZodString>;
+    tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+    license: z.ZodOptional<z.ZodString>;
+    requires: z.ZodOptional<z.ZodObject<{
+        agentv: z.ZodOptional<z.ZodString>;
+    }, "strip", z.ZodTypeAny, {
+        agentv?: string | undefined;
+    }, {
+        agentv?: string | undefined;
+    }>>;
+}, "strip", z.ZodTypeAny, {
+    name: string;
+    description?: string | undefined;
+    version?: string | undefined;
+    author?: string | undefined;
+    tags?: string[] | undefined;
+    license?: string | undefined;
+    requires?: {
+        agentv?: string | undefined;
+    } | undefined;
+}, {
+    name: string;
+    description?: string | undefined;
+    version?: string | undefined;
+    author?: string | undefined;
+    tags?: string[] | undefined;
+    license?: string | undefined;
+    requires?: {
+        agentv?: string | undefined;
+    } | undefined;
+}>;
+type EvalMetadata = z.infer<typeof MetadataSchema>;
+declare const DEFAULT_EVAL_PATTERNS: readonly string[];
+type ExecutionDefaults = {
+    readonly verbose?: boolean;
+    readonly keep_workspaces?: boolean;
+    readonly otel_file?: string;
+    readonly export_otel?: boolean;
+    readonly otel_backend?: string;
+    readonly otel_capture_content?: boolean;
+    readonly otel_group_turns?: boolean;
+    readonly pool_workspaces?: boolean;
+    readonly pool_slots?: number;
+};
+type ResultsExportConfig = {
+    readonly repo: string;
+    readonly path: string;
+    readonly auto_push?: boolean;
+    readonly branch_prefix?: string;
+};
+type AgentVConfig$1 = {
+    readonly required_version?: string;
+    readonly eval_patterns?: readonly string[];
+    readonly execution?: ExecutionDefaults;
+    readonly results?: {
+        readonly export?: ResultsExportConfig;
+    };
+};
+/**
+ * Load optional .agentv/config.yaml configuration file.
+ * Searches from eval file directory up to repo root.
+ */
+declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
+/**
+ * Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
+ */
+declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
+/**
+ * Extract target refs from parsed eval suite.
+ * Supports both string shorthand and object form with hooks.
+ * Returns undefined when no targets array is specified.
+ */
+declare function extractTargetRefsFromSuite(suite: JsonObject): readonly EvalTargetRef[] | undefined;
+/**
+ * Extract target names from parsed eval suite (backward-compat wrapper).
+ * Precedence: execution.targets (array) > execution.target (singular).
+ * Returns undefined when no targets array is specified.
+ */
+declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
+/**
+ * Extract workers count from suite-level execution block.
+ */
+declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
+/**
+ * Extract per-test targets array from a raw test case object.
+ */
+declare function extractTargetsFromTestCase(testCase: JsonObject): readonly string[] | undefined;
+/**
+ * Extract trials configuration from parsed eval suite's execution block.
+ * Returns undefined when count is 1 or not specified (no-op).
+ */
+declare function extractTrialsConfig(suite: JsonObject): TrialsConfig | undefined;
+/**
+ * Cache configuration parsed from execution block.
+ */
+interface CacheConfig {
+    readonly enabled: boolean;
+    readonly cachePath?: string;
+}
+/**
+ * Extract cache configuration from parsed eval suite's execution block.
+ * Returns undefined when no cache config is specified.
+ */
+declare function extractCacheConfig(suite: JsonObject): CacheConfig | undefined;
+/**
+ * Extract `execution.fail_on_error` from parsed eval suite.
+ * Accepts `true` or `false`.
+ * Returns undefined when not specified.
+ */
+declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
+/**
+ * Extract `execution.threshold` from parsed eval suite.
+ * Accepts a number in [0, 1] range.
+ * Returns undefined when not specified.
+ */
+declare function extractThreshold(suite: JsonObject): number | undefined;
+/**
+ * Formatting mode for segment content.
+ * - 'agent': File references only (for providers with filesystem access)
+ * - 'lm': Embedded file content with XML tags (for language model providers)
+ */
+type FormattingMode = 'agent' | 'lm';
+/**
+ * Build prompt inputs by consolidating user request context.
+ */
+interface PromptInputs {
+    readonly question: string;
+    readonly chatPrompt?: ChatPrompt;
+    readonly systemMessage?: string;
+}
+/**
+ * Build prompt inputs by consolidating user request context.
+ *
+ * @param testCase - The evaluation test case
+ * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
+ */
+declare function buildPromptInputs(testCase: EvalTest, mode?: FormattingMode): Promise<PromptInputs>;
+/**
+ * Detect file format by extension.
+ */
+declare function detectFormat(filePath: string): 'yaml' | 'jsonl' | 'agent-skills-json' | 'typescript';
 type LoadOptions = {
     readonly verbose?: boolean;
     /** Filter tests by ID pattern(s) (glob supported, e.g., "summary-*"). Arrays use OR logic. */
@@ -1642,13 +2042,17 @@ type EvalSuiteResult = {
     /** Suite-level metadata (name, description, version, etc.) */
     readonly metadata?: EvalMetadata;
     /** Suite-level total cost budget in USD */
-    readonly totalBudgetUsd?: number;
+    readonly budgetUsd?: number;
     /** Execution error tolerance: true or false */
     readonly failOnError?: FailOnError;
     /** Suite-level quality threshold (0-1) — suite fails if mean score is below */
     readonly threshold?: number;
     /** Resolved workspace.path from the eval YAML (after env-var expansion), if set */
     readonly workspacePath?: string;
+    /** Inline target definition from a TS eval config. */
+    readonly inlineTarget?: TargetDefinition;
+    /** Custom provider factory from a TS eval config task(). */
+    readonly providerFactory?: ProviderFactoryFn;
 };
 /**
  * Load tests and suite metadata from a single parse.
@@ -1695,495 +2099,370 @@ declare function isAgentSkillsFormat(parsed: unknown): parsed is AgentSkillsEval
 declare function parseAgentSkillsEvals(parsed: unknown, source?: string, baseDir?: string): readonly EvalTest[];
 /**
- * EVAL.yaml → evals.json transpiler.
+ * Types for inline assertion functions used in the evaluate() API.
  *
- * Converts an AgentV EVAL.yaml file into Agent Skills evals.json format
- * for consumption by the skill-creator pipeline.
+ * Inline functions are the escape hatch for custom evaluation logic
+ * that doesn't fit a built-in grader type. For built-in assertions
+ * (contains, regex, is-json, etc.), use config objects instead:
  *
- * Handles both `assertions:` (current) and `assert:` (deprecated alias).
- */
-interface EvalsJsonCase {
-    id: number;
-    prompt: string;
-    expected_output?: string;
-    files?: string[];
-    should_trigger?: boolean;
-    assertions: string[];
-}
-interface EvalsJsonFile {
-    skill_name: string;
-    evals: EvalsJsonCase[];
-}
-/**
- * Result of transpiling a single EVAL.yaml.
- * May produce multiple evals.json files (one per skill).
- */
-interface TranspileResult {
-    /** Map from skill_name → EvalsJsonFile */
-    files: Map<string, EvalsJsonFile>;
-    /** Warning messages accumulated during transpilation */
-    warnings: string[];
-}
-/**
- * Transpile a parsed EVAL.yaml object into one or more evals.json objects.
+ *   assert: [{ type: 'contains', value: 'hello' }]
  *
- * @param suite  Parsed YAML object (already loaded, no file I/O here)
- * @param source Source identifier for error messages (e.g. file path)
- */
-declare function transpileEvalYaml(suite: unknown, source?: string): TranspileResult;
-/**
- * Transpile an EVAL.yaml file into one or more evals.json objects.
- * Returns a map from output filename → JSON content.
+ * Inline functions are for custom logic:
  *
- * @param evalYamlPath  Absolute path to the EVAL.yaml file
- */
-declare function transpileEvalYamlFile(evalYamlPath: string): TranspileResult;
-/**
- * Determine the output filename(s) for a transpile result.
- * Single skill → "evals.json"
- * Multiple skills → "<skill>.evals.json"
- */
-declare function getOutputFilenames(result: TranspileResult): Map<string, string>;
-declare function fileExists(filePath: string): Promise<boolean>;
-/**
- * Normalize line endings to LF (\n).
- * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
- */
-declare function normalizeLineEndings(content: string): string;
-/**
- * Read a text file and normalize line endings to LF (\n).
- * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
- */
-declare function readTextFile(filePath: string): Promise<string>;
-/**
- * Read a JSON file and parse it.
- */
-declare function readJsonFile<T = unknown>(filePath: string): Promise<T>;
-/**
- * Find git repository root by walking up the directory tree.
- */
-declare function findGitRoot(startPath: string): Promise<string | null>;
-/**
- * Build a chain of directories walking from a file's location up to repo root.
- * Used for discovering configuration files like targets.yaml or config.yaml.
- */
-declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
-/**
- * Build search roots for file resolution, matching yaml-parser behavior.
- * Searches from eval file directory up to repo root.
- */
-declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
-/**
- * Resolve a file reference using search roots, matching yaml-parser behavior.
+ *   assert: [({ output }) => ({ name: 'len', score: output.length > 5 ? 1 : 0 })]
  */
-declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
-    readonly displayPath: string;
-    readonly resolvedPath?: string;
-    readonly attempted: readonly string[];
-}>;
+/** Context passed to inline assertion functions */
+interface AssertContext {
+    readonly input: string;
+    readonly output: string;
+    readonly expectedOutput?: string;
+    readonly criteria?: string;
+    readonly metadata?: Record<string, unknown>;
+}
+/** Result from an inline assertion function */
+interface AssertResult {
+    readonly name: string;
+    readonly score: number;
+    readonly metadata?: Record<string, unknown>;
+}
+/** Inline assertion function signature */
+type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
 /**
- * Strict normalized schema for CLI target configuration.
- * This is the final validated shape after environment variable resolution
- * and internal field normalization.
+ * Programmatic API for running evaluations.
  *
- * Uses .strict() to reject unknown properties, ensuring configuration
- * errors are caught early rather than silently ignored.
+ * Provides `evaluate()` — a high-level function for using AgentV as a library
+ * instead of a CLI. The config shape mirrors the YAML structure for easy
+ * translation between file-based and programmatic usage.
  *
- * @example
+ * @example Inline tests with config objects
  * ```typescript
- * const config: CliNormalizedConfig = {
- *   command: 'agent run {PROMPT}',
- *   timeoutMs: 120000,
- *   verbose: true,
- * };
- * CliTargetConfigSchema.parse(config); // Validates the normalized config
+ * import { evaluate } from '@agentv/core';
+ *
+ * const results = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'capital',
+ *       input: 'What is the capital of France?',
+ *       expectedOutput: 'Paris',
+ *       assert: [{ type: 'contains', value: 'Paris' }],
+ *     },
+ *   ],
+ *   target: { provider: 'mock_agent' },
+ * });
+ *
+ * console.log(results.summary.passed, 'passed');
+ * ```
+ *
+ * @example Inline tests with task function and custom assertion
+ * ```typescript
+ * import { evaluate } from '@agentv/core';
+ *
+ * const { summary } = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'echo',
+ *       input: 'hello',
+ *       expectedOutput: 'Echo: hello',
+ *       assert: [
+ *         { type: 'contains', value: 'hello' },
+ *         { type: 'equals' },
+ *         ({ output }) => ({ name: 'custom', score: output.length > 0 ? 1 : 0 }),
+ *       ],
+ *     },
+ *   ],
+ *   task: async (input) => `Echo: ${input}`,
+ * });
  * ```
- */
-declare const CliTargetConfigSchema: z.ZodObject<{
-    command: z.ZodString;
-    filesFormat: z.ZodOptional<z.ZodString>;
-    cwd: z.ZodOptional<z.ZodString>;
-    timeoutMs: z.ZodOptional<z.ZodNumber>;
-    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
-        url: z.ZodString;
-        timeoutMs: z.ZodOptional<z.ZodNumber>;
-    }, "strict", z.ZodTypeAny, {
-        url: string;
-        timeoutMs?: number | undefined;
-    }, {
-        url: string;
-        timeoutMs?: number | undefined;
-    }>, z.ZodObject<{
-        command: z.ZodString;
-        cwd: z.ZodOptional<z.ZodString>;
-        timeoutMs: z.ZodOptional<z.ZodNumber>;
-    }, "strict", z.ZodTypeAny, {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    }, {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    }>]>>;
-    verbose: z.ZodOptional<z.ZodBoolean>;
-    keepTempFiles: z.ZodOptional<z.ZodBoolean>;
-}, "strict", z.ZodTypeAny, {
-    command: string;
-    timeoutMs?: number | undefined;
-    cwd?: string | undefined;
-    verbose?: boolean | undefined;
-    healthcheck?: {
-        url: string;
-        timeoutMs?: number | undefined;
-    } | {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    } | undefined;
-    filesFormat?: string | undefined;
-    keepTempFiles?: boolean | undefined;
-}, {
-    command: string;
-    timeoutMs?: number | undefined;
-    cwd?: string | undefined;
-    verbose?: boolean | undefined;
-    healthcheck?: {
-        url: string;
-        timeoutMs?: number | undefined;
-    } | {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    } | undefined;
-    filesFormat?: string | undefined;
-    keepTempFiles?: boolean | undefined;
-}>;
-type CliNormalizedConfig = z.infer<typeof CliTargetConfigSchema>;
-/**
- * Resolved CLI configuration type derived from CliTargetConfigSchema.
- * This is the final validated shape used by the CLI provider at runtime.
- * Using Readonly to ensure immutability for runtime safety.
- */
-type CliResolvedConfig = Readonly<CliNormalizedConfig>;
-interface RetryConfig {
-    readonly maxRetries?: number;
-    readonly initialDelayMs?: number;
-    readonly maxDelayMs?: number;
-    readonly backoffFactor?: number;
-    readonly retryableStatusCodes?: readonly number[];
-}
-/**
- * Selects which OpenAI-compatible API endpoint to use.
- * - "chat" (default): POST /chat/completions — universally supported by all OpenAI-compatible providers.
- * - "responses": POST /responses — only supported by api.openai.com.
  *
- * Maps to Vercel AI SDK methods: "chat" → provider.chat(model), "responses" → provider(model).
+ * @example File-based
+ * ```typescript
+ * const results = await evaluate({
+ *   specFile: './evals/EVAL.yaml',
+ *   target: { provider: 'claude_agent' },
+ * });
+ * ```
+ *
+ * @module
  */
-type ApiFormat = 'chat' | 'responses';
 /**
- * Azure OpenAI settings used by the Vercel AI SDK.
+ * Inline test definition for the programmatic API.
+ * Mirrors the YAML test structure.
  */
-interface AzureResolvedConfig {
-    readonly resourceName: string;
-    readonly deploymentName: string;
-    readonly apiKey: string;
-    readonly version?: string;
-    readonly apiFormat?: ApiFormat;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface EvalTestInput {
+    /** Unique test identifier */
+    readonly id: string;
+    /** What the response should accomplish */
+    readonly criteria?: string;
+    /** Input to the agent (string or message array). Omit when using turns[]. */
+    readonly input?: string | readonly {
+        role: string;
+        content: string;
+    }[];
+    /** Expected reference output (camelCase preferred) */
+    readonly expectedOutput?: string;
+    /** @deprecated Use `expectedOutput` instead */
+    readonly expected_output?: string;
+    /** Assertion graders — accepts factory functions, config objects, or inline functions */
+    readonly assert?: readonly AssertEntry[];
+    /** Arbitrary metadata */
+    readonly metadata?: Record<string, unknown>;
+    /** Enable multi-turn conversation mode. Inferred automatically when turns[] is provided. */
+    readonly mode?: 'conversation';
+    /** Ordered turns for conversation evaluation. Each turn generates a fresh LLM call. */
+    readonly turns?: readonly ConversationTurnInput[];
+    /** Score aggregation across turns: 'mean' (default), 'min', or 'max'. */
+    readonly aggregation?: ConversationAggregation;
 }
 /**
- * OpenAI-compatible settings used by the Vercel AI SDK.
+ * A single turn in a multi-turn conversation evaluation (programmatic API).
+ * Mirrors the YAML `turns` structure with camelCase naming.
  */
-interface OpenAIResolvedConfig {
-    readonly baseURL: string;
-    readonly apiKey: string;
-    readonly model: string;
-    readonly apiFormat?: ApiFormat;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface ConversationTurnInput {
+    /** Input for this turn (string or message array) */
+    readonly input: string | readonly {
+        role: string;
+        content: string;
+    }[];
+    /** Expected reference output for this turn */
+    readonly expectedOutput?: string;
+    /** @deprecated Use `expectedOutput` instead */
+    readonly expected_output?: string;
+    /** Per-turn assertions (string criteria or grader config) */
+    readonly assert?: readonly AssertEntry[];
 }
 /**
- * OpenRouter settings used by the Vercel AI SDK provider.
+ * Inline assertion definition for the programmatic API.
+ * Matches the YAML `assert` block structure.
  */
-interface OpenRouterResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface EvalAssertionInput {
+    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
+    readonly type: string;
+    /** Display name */
+    readonly name?: string;
+    /** Value for deterministic assertions (contains, equals, regex) */
+    readonly value?: string;
+    /** Weight for scoring */
+    readonly weight?: number;
+    /** Whether this assertion is required to pass */
+    readonly required?: boolean | number;
+    /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
+    readonly min_score?: number;
+    /** Prompt file for llm_grader */
+    readonly prompt?: string;
+    /** Script for code_grader */
+    readonly script?: string | readonly string[];
+    /** Additional config passed to the assertion */
+    readonly config?: Record<string, unknown>;
+    /** Nested assertions for composite type */
+    readonly assert?: readonly EvalAssertionInput[];
+    /** Rubric criteria for rubrics type */
+    readonly criteria?: readonly (string | {
+        id?: string;
+        outcome: string;
+        weight?: number;
+    })[];
+    /** Additional properties */
+    readonly [key: string]: unknown;
 }
+/** Assert entry: inline function or config object */
+type AssertEntry = AssertFn | EvalAssertionInput;
 /**
- * Anthropic Claude settings used by the Vercel AI SDK.
+ * Configuration for `evaluate()`.
+ * Accepts either inline tests or a spec file path.
  */
-interface AnthropicResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly thinkingBudget?: number;
-    readonly retry?: RetryConfig;
+interface EvalConfig {
+    /** Inline test definitions (mutually exclusive with specFile) */
+    readonly tests?: readonly EvalTestInput[];
+    /** Path to an EVAL.yaml spec file (mutually exclusive with tests) */
+    readonly specFile?: string;
+    /** Target provider configuration */
+    readonly target?: TargetDefinition;
+    /** Custom task function — mutually exclusive with target */
+    readonly task?: (input: string) => string | Promise<string>;
+    /** Suite-level assertions applied to all tests */
+    readonly assert?: readonly AssertEntry[];
+    /** Optional suite metadata used by CLI discovery, tagging, and reporting. */
+    readonly metadata?: EvalMetadata;
+    /** Filter tests by ID pattern(s) (glob supported). Arrays use OR logic. */
+    readonly filter?: string | readonly string[];
+    /** Maximum concurrent workers (default: 3) */
+    readonly workers?: number;
+    /** Maximum retries on failure (default: 2) */
+    readonly maxRetries?: number;
+    /** Agent timeout in milliseconds. No timeout if not set. */
+    readonly agentTimeoutMs?: number;
+    /** Enable response caching */
+    readonly cache?: boolean;
+    /** Verbose logging */
+    readonly verbose?: boolean;
+    /** Callback for each completed result */
+    readonly onResult?: (result: EvaluationResult) => void;
+    /** Score threshold for pass/fail (0-1). Default: 0.8 (DEFAULT_THRESHOLD). */
+    readonly threshold?: number;
+    /** Command(s) to run once before the suite starts. Same semantics as YAML before_all. */
+    readonly beforeAll?: string | readonly string[];
+    /** Suite-level cost cap in USD. Stops dispatching new tests when exceeded. */
+    readonly budgetUsd?: number;
 }
 /**
- * Google Gemini settings used by the Vercel AI SDK.
+ * Summary statistics for an evaluation run.
  */
-interface GeminiResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
-}
-interface CodexResolvedConfig {
-    readonly model?: string;
-    readonly executable: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface CopilotCliResolvedConfig {
-    readonly executable: string;
-    readonly model?: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface CopilotSdkResolvedConfig {
-    readonly cliUrl?: string;
-    readonly cliPath?: string;
-    readonly githubToken?: string;
-    readonly model?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-    /** BYOK provider type: "azure", "openai", or "anthropic". */
-    readonly byokType?: string;
-    /** BYOK base URL for the provider endpoint. */
-    readonly byokBaseUrl?: string;
-    /** BYOK API key for authenticating with the provider. */
-    readonly byokApiKey?: string;
-    /** BYOK bearer token (takes precedence over apiKey when set). */
-    readonly byokBearerToken?: string;
-    /** BYOK Azure API version (e.g. "2024-10-21"). Only used when byokType is "azure". */
-    readonly byokApiVersion?: string;
-    /** BYOK wire API format: "completions" or "responses". */
-    readonly byokWireApi?: string;
-}
-interface CopilotLogResolvedConfig {
-    /** Explicit path to a session directory containing events.jsonl. */
-    readonly sessionDir?: string;
-    /** Session UUID — combined with sessionStateDir to build the path. */
-    readonly sessionId?: string;
-    /** Auto-discovery mode. 'latest' picks the most recent session. */
-    readonly discover?: 'latest';
-    /** Override the default ~/.copilot/session-state directory. */
-    readonly sessionStateDir?: string;
-    /** Filter discovery by working directory. */
-    readonly cwd?: string;
-}
-interface PiCodingAgentResolvedConfig {
-    readonly subprovider?: string;
-    readonly model?: string;
-    readonly apiKey?: string;
-    readonly baseUrl?: string;
-    readonly tools?: string;
-    readonly thinking?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface PiCliResolvedConfig {
-    readonly executable: string;
-    readonly subprovider?: string;
-    readonly model?: string;
-    readonly apiKey?: string;
-    readonly baseUrl?: string;
-    readonly tools?: string;
-    readonly thinking?: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
+interface EvalSummary {
+    /** Total number of test cases */
+    readonly total: number;
+    /** Number of passing test cases (score >= threshold) */
+    readonly passed: number;
+    /** Number of failing test cases (score < threshold) */
+    readonly failed: number;
+    /** Total duration in milliseconds */
+    readonly durationMs: number;
+    /** Mean score across all cases */
+    readonly meanScore: number;
 }
-interface ClaudeResolvedConfig {
-    readonly executable: string;
-    readonly model?: string;
-    readonly systemPrompt?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly maxTurns?: number;
-    readonly maxBudgetUsd?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
+/**
+ * Result of an `evaluate()` call.
+ */
+interface EvalRunResult {
+    /** Individual test case results */
+    readonly results: readonly EvaluationResult[];
+    /** Aggregate summary statistics */
+    readonly summary: EvalSummary;
 }
-interface MockResolvedConfig {
-    readonly response?: string;
-    readonly delayMs?: number;
-    readonly delayMinMs?: number;
-    readonly delayMaxMs?: number;
+/**
+ * Run an evaluation suite against a target provider.
+ *
+ * Accepts either inline test definitions or a path to an EVAL.yaml spec file.
+ * The config shape mirrors the YAML structure — users can translate between
+ * file-based and programmatic usage 1:1.
+ *
+ * @param config - Evaluation configuration
+ * @returns Typed evaluation results with summary statistics
+ *
+ * @example Inline tests with assertions
+ * ```typescript
+ * const { results, summary } = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'greeting',
+ *       input: 'Say hello',
+ *       assert: [{ type: 'contains', value: 'hello' }],
+ *     },
+ *   ],
+ *   target: { provider: 'mock_agent' },
+ * });
+ * console.log(`${summary.passed}/${summary.total} passed`);
+ * ```
+ *
+ * @example Load from YAML
+ * ```typescript
+ * const { summary } = await evaluate({
+ *   specFile: './evals/my-eval.yaml',
+ *   filter: 'greeting-*',
+ * });
+ * ```
+ */
+declare function evaluate(config: EvalConfig): Promise<EvalRunResult>;
+interface TsEvalResult {
+    readonly config: EvalConfig;
+    readonly filePath: string;
 }
-interface VSCodeResolvedConfig {
-    readonly executable: string;
-    readonly waitForResponse: boolean;
-    readonly dryRun: boolean;
-    readonly subagentRoot?: string;
-    readonly timeoutMs?: number;
+/**
+ * Import a *.eval.ts file and extract the EvalConfig export.
+ * Tries default, `config`, and `evalConfig` named exports in priority order.
+ */
+declare function loadTsEvalFile(filePath: string): Promise<TsEvalResult>;
+/**
+ * EVAL.yaml → evals.json transpiler.
+ *
+ * Converts an AgentV EVAL.yaml file into Agent Skills evals.json format
+ * for consumption by the skill-creator pipeline.
+ *
+ * Handles both `assertions:` (current) and `assert:` (deprecated alias).
+ */
+interface EvalsJsonCase {
+    id: number;
+    prompt: string;
+    expected_output?: string;
+    files?: string[];
+    should_trigger?: boolean;
+    assertions: string[];
 }
-interface AgentVResolvedConfig {
-    readonly model: string;
-    readonly temperature: number;
+interface EvalsJsonFile {
+    skill_name: string;
+    evals: EvalsJsonCase[];
 }
-/** Base fields shared by all resolved targets. */
-interface ResolvedTargetBase {
-    readonly name: string;
-    readonly graderTarget?: string;
-    readonly workers?: number;
-    readonly providerBatching?: boolean;
-    /**
-     * Whether this target can be executed via executor subagents in subagent mode.
-     * Defaults to `true` for all non-CLI providers. Set `false` in targets.yaml
-     * to force CLI invocation even in subagent mode.
-     */
-    readonly subagentModeAllowed?: boolean;
-    /**
-     * Ordered list of target names to try when the primary target fails after
-     * exhausting retries. Each fallback is attempted in order.
-     */
-    readonly fallbackTargets?: readonly string[];
+/**
+ * Result of transpiling a single EVAL.yaml.
+ * May produce multiple evals.json files (one per skill).
+ */
+interface TranspileResult {
+    /** Map from skill_name → EvalsJsonFile */
+    files: Map<string, EvalsJsonFile>;
+    /** Warning messages accumulated during transpilation */
+    warnings: string[];
 }
-type ResolvedTarget = (ResolvedTargetBase & {
-    readonly kind: 'openai';
-    readonly config: OpenAIResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'openrouter';
-    readonly config: OpenRouterResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'azure';
-    readonly config: AzureResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'anthropic';
-    readonly config: AnthropicResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'gemini';
-    readonly config: GeminiResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'codex';
-    readonly config: CodexResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-sdk';
-    readonly config: CopilotSdkResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-cli';
-    readonly config: CopilotCliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-log';
-    readonly config: CopilotLogResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'pi-coding-agent';
-    readonly config: PiCodingAgentResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'pi-cli';
-    readonly config: PiCliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude-cli';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude-sdk';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'mock';
-    readonly config: MockResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'vscode' | 'vscode-insiders';
-    readonly config: VSCodeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'agentv';
-    readonly config: AgentVResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'cli';
-    readonly config: CliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'transcript';
-    readonly config: Record<string, never>;
-});
 /**
- * Optional settings accepted on ALL target definitions regardless of provider.
- * Exported so the targets validator can reuse the same list — adding a field
- * here automatically makes it valid in targets.yaml without a separate update.
+ * Transpile a parsed EVAL.yaml object into one or more evals.json objects.
+ *
+ * @param suite  Parsed YAML object (already loaded, no file I/O here)
+ * @param source Source identifier for error messages (e.g. file path)
  */
-declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "subagent_mode_allowed", "fallback_targets"];
-declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
-declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string, options?: {
-    readonly emitDeprecationWarnings?: boolean;
-}): ResolvedTarget;
+declare function transpileEvalYaml(suite: unknown, source?: string): TranspileResult;
 /**
- * Extensible provider registry.
+ * Transpile an EVAL.yaml file into one or more evals.json objects.
+ * Returns a map from output filename → JSON content.
  *
- * Replaces the hardcoded switch/case dispatch in createProvider() with
- * a registry of named factory functions. Built-in providers are registered
- * at startup; users can add custom providers via the registry API or by
- * dropping files in `.agentv/providers/`.
+ * @param evalYamlPath  Absolute path to the EVAL.yaml file
+ */
+declare function transpileEvalYamlFile(evalYamlPath: string): TranspileResult;
+/**
+ * Determine the output filename(s) for a transpile result.
+ * Single skill → "evals.json"
+ * Multiple skills → "<skill>.evals.json"
  */
+declare function getOutputFilenames(result: TranspileResult): Map<string, string>;
+declare function fileExists(filePath: string): Promise<boolean>;
 /**
- * Factory function that creates a Provider instance from a resolved target.
+ * Normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
  */
-type ProviderFactoryFn = (target: ResolvedTarget) => Provider;
+declare function normalizeLineEndings(content: string): string;
 /**
- * Registry of provider factory functions keyed by provider kind.
- *
- * Built-in providers are registered at startup. Custom providers can be
- * registered via the `register()` method.
+ * Read a text file and normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
  */
-declare class ProviderRegistry {
-    private readonly factories;
-    /** Register a factory function for a provider kind. */
-    register(kind: string, factory: ProviderFactoryFn): this;
-    /** Get the factory function for a provider kind. */
-    get(kind: string): ProviderFactoryFn | undefined;
-    /** Check if a factory is registered for the given kind. */
-    has(kind: string): boolean;
-    /** List all registered provider kind names. */
-    list(): string[];
-    /**
-     * Create a provider instance from a resolved target.
-     * Falls back to CLI provider for unknown kinds (custom provider escape hatch).
-     */
-    create(target: ResolvedTarget): Provider;
-}
+declare function readTextFile(filePath: string): Promise<string>;
+/**
+ * Read a JSON file and parse it.
+ */
+declare function readJsonFile<T = unknown>(filePath: string): Promise<T>;
+/**
+ * Find git repository root by walking up the directory tree.
+ */
+declare function findGitRoot(startPath: string): Promise<string | null>;
+/**
+ * Build a chain of directories walking from a file's location up to repo root.
+ * Used for discovering configuration files like targets.yaml or config.yaml.
+ */
+declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
+/**
+ * Build search roots for file resolution, matching yaml-parser behavior.
+ * Searches from eval file directory up to repo root.
+ */
+declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
+/**
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
+ */
+declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
+    readonly displayPath: string;
+    readonly resolvedPath?: string;
+    readonly attempted: readonly string[];
+}>;
 declare function readTargetDefinitions(filePath: string): Promise<readonly TargetDefinition[]>;
 declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
@@ -2346,8 +2625,8 @@ interface EvaluationContext {
     readonly graderProvider?: Provider;
     /** @deprecated Use `graderProvider` instead */
     readonly judgeProvider?: Provider;
-    readonly evaluatorTemplateOverride?: string;
-    readonly evaluator?: EvaluatorConfig;
+    readonly graderTemplateOverride?: string;
+    readonly evaluator?: GraderConfig;
     /** Output messages from agent execution (primary source for tool trajectory) */
     readonly output?: readonly Message[];
     /** Lightweight summary of trace events (if available) */
@@ -2380,8 +2659,8 @@ interface EvaluationScore {
     readonly verdict: EvaluationVerdict;
     readonly assertions: readonly AssertionEntry[];
     readonly expectedAspectCount: number;
-    readonly evaluatorRawRequest?: JsonObject;
-    readonly scores?: readonly ChildEvaluatorResult[];
+    readonly graderRawRequest?: JsonObject;
+    readonly scores?: readonly ChildGraderResult[];
     /** Optional structured details from evaluators (e.g., TP/TN/FP/FN counts, alignments, per-turn scores). */
     readonly details?: JsonObject;
     /** Token usage from LLM calls made by this evaluator (optional). */
@@ -2389,26 +2668,26 @@ interface EvaluationScore {
     /** Target name used for grading (e.g., the LLM provider). */
     readonly graderTarget?: string;
 }
-interface ChildEvaluatorResult {
+interface ChildGraderResult {
     readonly name: string;
     readonly type: string;
     readonly score: number;
     readonly weight?: number;
     readonly verdict: EvaluationVerdict;
     readonly assertions: readonly AssertionEntry[];
-    readonly evaluatorRawRequest?: JsonObject;
-    readonly scores?: readonly ChildEvaluatorResult[];
+    readonly graderRawRequest?: JsonObject;
+    readonly scores?: readonly ChildGraderResult[];
     /** Optional structured details from evaluators (e.g., TP/TN/FP/FN counts, alignments, per-turn scores). */
     readonly details?: JsonObject;
     /** Token usage from LLM calls made by this evaluator (optional). */
     readonly tokenUsage?: TokenUsage;
 }
-interface Evaluator {
+interface Grader {
     readonly kind: string;
     evaluate(context: EvaluationContext): Promise<EvaluationScore> | EvaluationScore;
 }
-interface EvaluatorFactory {
-    create(config: EvaluatorConfig, context: EvaluationContext): Evaluator;
+interface GraderFactory {
+    create(config: GraderConfig, context: EvaluationContext): Grader;
 }
 /**
@@ -2447,7 +2726,7 @@ declare function deepEqual(a: unknown, b: unknown): boolean;
  */
 declare function negateScore(score: EvaluationScore): EvaluationScore;
-interface CodeEvaluatorOptions {
+interface CodeGraderOptions {
     readonly command: readonly string[];
     /** @deprecated Use `command` instead */
     readonly script?: readonly string[];
@@ -2458,29 +2737,29 @@ interface CodeEvaluatorOptions {
     /** Target access config - when present, enables target invocation */
     readonly target?: TargetAccessConfig;
 }
-declare class CodeEvaluator implements Evaluator {
+declare class CodeGrader implements Grader {
     readonly kind = "code-grader";
     private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
     private readonly config?;
     private readonly target?;
-    constructor(options: CodeEvaluatorOptions);
+    constructor(options: CodeGraderOptions);
     evaluate(context: EvaluationContext): Promise<EvaluationScore>;
 }
 declare function executeScript(scriptPath: readonly string[] | string, input: string, agentTimeoutMs?: number, cwd?: string, env?: Record<string, string>): Promise<string>;
-interface CompositeEvaluatorOptions {
-    readonly config: CompositeEvaluatorConfig;
-    readonly evaluatorFactory: EvaluatorFactory;
+interface CompositeGraderOptions {
+    readonly config: CompositeGraderConfig;
+    readonly evaluatorFactory: GraderFactory;
     readonly cwd?: string;
 }
-declare class CompositeEvaluator implements Evaluator {
+declare class CompositeGrader implements Grader {
     readonly kind = "composite";
     private readonly config;
     private readonly evaluatorFactory;
     private readonly cwd?;
-    constructor(options: CompositeEvaluatorOptions);
+    constructor(options: CompositeGraderOptions);
     evaluate(context: EvaluationContext): Promise<EvaluationScore>;
     private aggregate;
     private runWeightedAverage;
@@ -2489,50 +2768,50 @@ declare class CompositeEvaluator implements Evaluator {
     private runLlmAggregator;
 }
-interface CostEvaluatorOptions {
-    readonly config: CostEvaluatorConfig;
+interface CostGraderOptions {
+    readonly config: CostGraderConfig;
 }
 /**
- * Evaluator that checks execution cost against a budget.
+ * Grader that checks execution cost against a budget.
  * Uses costUsd from the evaluation context.
  */
-declare class CostEvaluator implements Evaluator {
+declare class CostGrader implements Grader {
     readonly kind = "cost";
     private readonly config;
-    constructor(options: CostEvaluatorOptions);
+    constructor(options: CostGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
 }
-interface ExecutionMetricsEvaluatorOptions {
-    readonly config: ExecutionMetricsEvaluatorConfig;
+interface ExecutionMetricsGraderOptions {
+    readonly config: ExecutionMetricsGraderConfig;
 }
 /**
- * Evaluator that checks execution metrics against configured thresholds.
+ * Grader that checks execution metrics against configured thresholds.
  * Supports multiple threshold types: tool calls, LLM calls, tokens, cost, duration,
  * and exploration ratio. Only specified thresholds are checked.
  *
  * Score is proportional: passed / total assertions
  */
-declare class ExecutionMetricsEvaluator implements Evaluator {
+declare class ExecutionMetricsGrader implements Grader {
     readonly kind = "execution-metrics";
     private readonly config;
-    constructor(options: ExecutionMetricsEvaluatorOptions);
+    constructor(options: ExecutionMetricsGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
     private extractConfiguredThresholds;
     private filterDefinedMetrics;
 }
-interface FieldAccuracyEvaluatorOptions {
-    readonly config: FieldAccuracyEvaluatorConfig;
+interface FieldAccuracyGraderOptions {
+    readonly config: FieldAccuracyGraderConfig;
 }
 /**
- * FieldAccuracyEvaluator compares extracted structured data against expected values
+ * FieldAccuracyGrader compares extracted structured data against expected values
  * with configurable matching strategies (exact, fuzzy, numeric_tolerance, date).
  */
-declare class FieldAccuracyEvaluator implements Evaluator {
+declare class FieldAccuracyGrader implements Grader {
     readonly kind = "field-accuracy";
     private readonly config;
-    constructor(options: FieldAccuracyEvaluatorOptions);
+    constructor(options: FieldAccuracyGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
     /**
      * Extract expected data from expected_output array.
@@ -2561,33 +2840,33 @@ declare class FieldAccuracyEvaluator implements Evaluator {
     private aggregateResults;
 }
-interface LatencyEvaluatorOptions {
-    readonly config: LatencyEvaluatorConfig;
+interface LatencyGraderOptions {
+    readonly config: LatencyGraderConfig;
 }
 /**
- * Evaluator that checks execution duration against a threshold.
+ * Grader that checks execution duration against a threshold.
  * Uses durationMs from the evaluation context.
  */
-declare class LatencyEvaluator implements Evaluator {
+declare class LatencyGrader implements Grader {
     readonly kind = "latency";
     private readonly config;
-    constructor(options: LatencyEvaluatorOptions);
+    constructor(options: LatencyGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
 }
 /**
- * Default evaluator template for the user prompt (variables will be substituted).
- * Custom evaluators can override this via evaluatorTemplate option.
+ * Default grader template for the user prompt (variables will be substituted).
+ * Custom graders can override this via graderTemplate option.
  */
-declare const DEFAULT_EVALUATOR_TEMPLATE: string;
+declare const DEFAULT_GRADER_TEMPLATE: string;
 type GraderProviderResolver = (context: EvaluationContext) => Promise<Provider | undefined>;
-interface LlmGraderEvaluatorOptions {
+interface LlmGraderOptions {
     readonly resolveGraderProvider: GraderProviderResolver;
     /** @deprecated Use `resolveGraderProvider` instead. */
     readonly resolveJudgeProvider?: GraderProviderResolver;
     readonly maxOutputTokens?: number;
     readonly temperature?: number;
-    readonly evaluatorTemplate?: string;
+    readonly graderTemplate?: string;
     readonly maxSteps?: number;
     readonly graderTargetProvider?: Provider;
     /** @deprecated Use `graderTargetProvider` instead. */
@@ -2633,39 +2912,39 @@ declare const rubricEvaluationSchema: z.ZodObject<{
         reasoning: z.ZodString;
     }, "strip", z.ZodTypeAny, {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }, {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }>, "many">;
     overall_reasoning: z.ZodString;
 }, "strip", z.ZodTypeAny, {
     checks: {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }[];
     overall_reasoning: string;
 }, {
     checks: {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }[];
     overall_reasoning: string;
 }>;
-declare class LlmGraderEvaluator implements Evaluator {
+declare class LlmGrader implements Grader {
     readonly kind = "llm-grader";
     private readonly resolveGraderProvider;
     private readonly maxOutputTokens?;
     private readonly temperature?;
-    private readonly evaluatorTemplate?;
+    private readonly graderTemplate?;
     private readonly maxSteps;
     private readonly graderTargetProvider?;
-    constructor(options: LlmGraderEvaluatorOptions);
+    constructor(options: LlmGraderOptions);
     evaluate(context: EvaluationContext): Promise<EvaluationScore>;
     private prepareContext;
     private evaluateFreeform;
@@ -2722,7 +3001,7 @@ declare class LlmGraderEvaluator implements Evaluator {
 }
 /**
  * Build the mandatory output schema that all evaluators must follow.
- * This schema is always appended to the evaluator template.
+ * This schema is always appended to the grader template.
  */
 declare function buildOutputSchema(): string;
 declare function buildRubricOutputSchema(): string;
@@ -2766,10 +3045,10 @@ declare function extractImageBlocks(messages: readonly Message[]): ContentImage[
  *   names (input.skill, input.file_path) regardless of provider.
  */
-declare class SkillTriggerEvaluator implements Evaluator {
+declare class SkillTriggerGrader implements Grader {
     readonly kind = "skill-trigger";
     private readonly config;
-    constructor(config: SkillTriggerEvaluatorConfig);
+    constructor(config: SkillTriggerGraderConfig);
     evaluate(context: EvaluationContext): EvaluationScore;
 }
@@ -2783,33 +3062,33 @@ declare function assembleLlmGraderPrompt(input: {
     evalCase: EvalTest;
     candidate: string;
     promptInputs: PromptInputs;
-    evaluatorConfig?: LlmGraderEvaluatorConfig;
+    evaluatorConfig?: LlmGraderConfig;
     output?: readonly Message[];
     fileChanges?: string;
-    evaluatorTemplateOverride?: string;
+    graderTemplateOverride?: string;
 }): LlmGraderPromptAssembly;
-interface TokenUsageEvaluatorOptions {
-    readonly config: TokenUsageEvaluatorConfig;
+interface TokenUsageGraderOptions {
+    readonly config: TokenUsageGraderConfig;
 }
 /**
- * Evaluator that checks provider-reported token usage against configured limits.
+ * Grader that checks provider-reported token usage against configured limits.
  * Uses tokenUsage from the evaluation context.
  */
-declare class TokenUsageEvaluator implements Evaluator {
+declare class TokenUsageGrader implements Grader {
     readonly kind = "token-usage";
     private readonly config;
-    constructor(options: TokenUsageEvaluatorOptions);
+    constructor(options: TokenUsageGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
 }
-interface ToolTrajectoryEvaluatorOptions {
-    readonly config: ToolTrajectoryEvaluatorConfig;
+interface ToolTrajectoryGraderOptions {
+    readonly config: ToolTrajectoryGraderConfig;
 }
-declare class ToolTrajectoryEvaluator implements Evaluator {
+declare class ToolTrajectoryGrader implements Grader {
     readonly kind = "tool-trajectory";
     private readonly config;
-    constructor(options: ToolTrajectoryEvaluatorOptions);
+    constructor(options: ToolTrajectoryGraderOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
     /**
      * Extract tool calls from output messages.
@@ -2873,7 +3152,7 @@ declare function runIsJsonAssertion(output: string): AssertionResult;
 declare function runEqualsAssertion(output: string, value: string): AssertionResult;
 /**
- * Extensible evaluator registry.
+ * Extensible grader registry.
  *
  * Replaces the hardcoded switch/case dispatch in the orchestrator with
  * a registry of named factory functions. Built-in evaluators are registered
@@ -2882,10 +3161,10 @@ declare function runEqualsAssertion(output: string, value: string): AssertionRes
  */
 /**
- * Context passed to evaluator factory functions during creation.
+ * Context passed to grader factory functions during creation.
  * Contains shared resources needed by evaluator instances.
  */
-interface EvaluatorDispatchContext {
+interface GraderDispatchContext {
     /** Shared LLM grader provider (resolved at suite level) */
     readonly graderProvider?: Provider;
     /** @deprecated Use `graderProvider` instead */
@@ -2899,48 +3178,48 @@ interface EvaluatorDispatchContext {
     /** Directory containing the eval file (for composite member resolution) */
     readonly evalFileDir?: string;
     /** Shared LLM grader evaluator instance */
-    readonly llmGrader: Evaluator;
+    readonly llmGrader: Grader;
     /** @deprecated Use `llmGrader` instead */
-    readonly llmJudge?: Evaluator;
+    readonly llmJudge?: Grader;
     /** Reference to the registry itself (for composite evaluators that need to create children) */
-    readonly registry: EvaluatorRegistry;
+    readonly registry: GraderRegistry;
 }
 /**
- * Factory function that creates an Evaluator instance from a config.
+ * Factory function that creates an Grader instance from a config.
  *
  * Factory functions handle all type-specific initialization logic:
  * - Reading prompt files for LLM graders
  * - Resolving script paths for code graders
  * - Creating adapter evaluators for deterministic assertions
  */
-type EvaluatorFactoryFn = (config: EvaluatorConfig, context: EvaluatorDispatchContext) => Evaluator | Promise<Evaluator>;
+type GraderFactoryFn = (config: GraderConfig, context: GraderDispatchContext) => Grader | Promise<Grader>;
 /**
- * Registry of evaluator factory functions keyed by evaluator type name.
+ * Registry of grader factory functions keyed by grader type name.
  *
  * Built-in evaluators are registered at startup. Custom evaluators can be
  * registered via the `register()` method or discovered from `.agentv/assertions/`.
  */
-declare class EvaluatorRegistry {
+declare class GraderRegistry {
     private readonly factories;
-    /** Register a factory function for an evaluator type. */
-    register(type: string, factory: EvaluatorFactoryFn): this;
-    /** Get the factory function for an evaluator type. */
-    get(type: string): EvaluatorFactoryFn | undefined;
+    /** Register a factory function for an grader type. */
+    register(type: string, factory: GraderFactoryFn): this;
+    /** Get the factory function for an grader type. */
+    get(type: string): GraderFactoryFn | undefined;
     /** Check if a factory is registered for the given type. */
     has(type: string): boolean;
-    /** List all registered evaluator type names. */
+    /** List all registered grader type names. */
     list(): string[];
     /**
      * Create an evaluator instance from a config, using the registered factory.
-     * Throws if no factory is registered for the evaluator type.
+     * Throws if no factory is registered for the grader type.
      */
-    create(config: EvaluatorConfig, context: EvaluatorDispatchContext): Promise<Evaluator>;
+    create(config: GraderConfig, context: GraderDispatchContext): Promise<Grader>;
 }
 /**
- * Adapter that wraps a synchronous assertion function as an Evaluator.
+ * Adapter that wraps a synchronous assertion function as an Grader.
  * Used for deterministic assertions (contains, regex, is-json, equals).
  */
-declare class DeterministicAssertionEvaluator implements Evaluator {
+declare class DeterministicAssertionGrader implements Grader {
     private readonly assertFn;
     readonly kind: string;
     constructor(kind: string, assertFn: (context: EvaluationContext) => EvaluationScore);
@@ -2988,8 +3267,8 @@ interface RunEvalCaseOptions {
     readonly evalCase: EvalTest;
     readonly provider: Provider;
     readonly target: ResolvedTarget;
-    readonly evaluators: Partial<Record<string, Evaluator>> & {
-        readonly 'llm-grader': Evaluator;
+    readonly evaluators: Partial<Record<string, Grader>> & {
+        readonly 'llm-grader': Grader;
     };
     readonly now?: () => Date;
     readonly maxRetries?: number;
@@ -3020,8 +3299,8 @@ interface RunEvalCaseOptions {
     readonly suiteWorkspaceFile?: string;
     /** Real-time observability callbacks passed to the provider */
     readonly streamCallbacks?: ProviderStreamCallbacks;
-    /** Evaluator type registry (with custom assertions discovered) */
-    readonly typeRegistry?: EvaluatorRegistry;
+    /** Grader type registry (with custom assertions discovered) */
+    readonly typeRegistry?: GraderRegistry;
     /** RepoManager instance for repo lifecycle (shared workspace mode) */
     readonly repoManager?: RepoManager;
     /** Directory containing the eval YAML file. Used as default cwd for workspace scripts. */
@@ -3054,7 +3333,7 @@ interface RunEvaluationOptions {
     readonly targets?: readonly TargetDefinition[];
     readonly env?: EnvLookup;
     readonly providerFactory?: (target: ResolvedTarget) => Provider;
-    readonly evaluators?: Partial<Record<string, Evaluator>>;
+    readonly evaluators?: Partial<Record<string, Grader>>;
     readonly maxRetries?: number;
     readonly agentTimeoutMs?: number;
     readonly cache?: EvaluationCache;
@@ -3076,7 +3355,7 @@ interface RunEvaluationOptions {
     /** Real-time observability callbacks passed to the provider */
     readonly streamCallbacks?: ProviderStreamCallbacks;
     /** Suite-level total cost budget in USD (stops dispatching when exceeded) */
-    readonly totalBudgetUsd?: number;
+    readonly budgetUsd?: number;
     /** Execution error tolerance: true halts on first error */
     readonly failOnError?: FailOnError;
     /** Workspace pooling: true (default) enables pool, false disables, undefined defaults to true */
@@ -3107,244 +3386,6 @@ interface RunEvaluationOptions {
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
 declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
-/**
- * Types for inline assertion functions used in the evaluate() API.
- *
- * Inline functions are the escape hatch for custom evaluation logic
- * that doesn't fit a built-in evaluator type. For built-in assertions
- * (contains, regex, is-json, etc.), use config objects instead:
- *
- *   assert: [{ type: 'contains', value: 'hello' }]
- *
- * Inline functions are for custom logic:
- *
- *   assert: [({ output }) => ({ name: 'len', score: output.length > 5 ? 1 : 0 })]
- */
-/** Context passed to inline assertion functions */
-interface AssertContext {
-    readonly input: string;
-    readonly output: string;
-    readonly expectedOutput?: string;
-    readonly criteria?: string;
-    readonly metadata?: Record<string, unknown>;
-}
-/** Result from an inline assertion function */
-interface AssertResult {
-    readonly name: string;
-    readonly score: number;
-    readonly metadata?: Record<string, unknown>;
-}
-/** Inline assertion function signature */
-type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
-/**
- * Programmatic API for running evaluations.
- *
- * Provides `evaluate()` — a high-level function for using AgentV as a library
- * instead of a CLI. The config shape mirrors the YAML structure for easy
- * translation between file-based and programmatic usage.
- *
- * @example Inline tests with config objects
- * ```typescript
- * import { evaluate } from '@agentv/core';
- *
- * const results = await evaluate({
- *   tests: [
- *     {
- *       id: 'capital',
- *       input: 'What is the capital of France?',
- *       expectedOutput: 'Paris',
- *       assert: [{ type: 'contains', value: 'Paris' }],
- *     },
- *   ],
- *   target: { provider: 'mock_agent' },
- * });
- *
- * console.log(results.summary.passed, 'passed');
- * ```
- *
- * @example Inline tests with task function and custom assertion
- * ```typescript
- * import { evaluate } from '@agentv/core';
- *
- * const { summary } = await evaluate({
- *   tests: [
- *     {
- *       id: 'echo',
- *       input: 'hello',
- *       expectedOutput: 'Echo: hello',
- *       assert: [
- *         { type: 'contains', value: 'hello' },
- *         { type: 'equals' },
- *         ({ output }) => ({ name: 'custom', score: output.length > 0 ? 1 : 0 }),
- *       ],
- *     },
- *   ],
- *   task: async (input) => `Echo: ${input}`,
- * });
- * ```
- *
- * @example File-based
- * ```typescript
- * const results = await evaluate({
- *   specFile: './evals/EVAL.yaml',
- *   target: { provider: 'claude_agent' },
- * });
- * ```
- *
- * @module
- */
-/**
- * Inline test definition for the programmatic API.
- * Mirrors the YAML test structure.
- */
-interface EvalTestInput {
-    /** Unique test identifier */
-    readonly id: string;
-    /** What the response should accomplish */
-    readonly criteria?: string;
-    /** Input to the agent (string or message array) */
-    readonly input: string | readonly {
-        role: string;
-        content: string;
-    }[];
-    /** Expected reference output (camelCase preferred) */
-    readonly expectedOutput?: string;
-    /** @deprecated Use `expectedOutput` instead */
-    readonly expected_output?: string;
-    /** Assertion graders — accepts factory functions, config objects, or inline functions */
-    readonly assert?: readonly AssertEntry[];
-    /** Arbitrary metadata */
-    readonly metadata?: Record<string, unknown>;
-}
-/**
- * Inline assertion definition for the programmatic API.
- * Matches the YAML `assert` block structure.
- */
-interface EvalAssertionInput {
-    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
-    readonly type: string;
-    /** Display name */
-    readonly name?: string;
-    /** Value for deterministic assertions (contains, equals, regex) */
-    readonly value?: string;
-    /** Weight for scoring */
-    readonly weight?: number;
-    /** Whether this assertion is required to pass */
-    readonly required?: boolean | number;
-    /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
-    readonly min_score?: number;
-    /** Prompt file for llm_grader */
-    readonly prompt?: string;
-    /** Script for code_grader */
-    readonly script?: string | readonly string[];
-    /** Additional config passed to the assertion */
-    readonly config?: Record<string, unknown>;
-    /** Nested assertions for composite type */
-    readonly assert?: readonly EvalAssertionInput[];
-    /** Rubric criteria for rubrics type */
-    readonly criteria?: readonly (string | {
-        id?: string;
-        outcome: string;
-        weight?: number;
-    })[];
-    /** Additional properties */
-    readonly [key: string]: unknown;
-}
-/** Assert entry: inline function or config object */
-type AssertEntry = AssertFn | EvalAssertionInput;
-/**
- * Configuration for `evaluate()`.
- * Accepts either inline tests or a spec file path.
- */
-interface EvalConfig {
-    /** Inline test definitions (mutually exclusive with specFile) */
-    readonly tests?: readonly EvalTestInput[];
-    /** Path to an EVAL.yaml spec file (mutually exclusive with tests) */
-    readonly specFile?: string;
-    /** Target provider configuration */
-    readonly target?: TargetDefinition;
-    /** Custom task function — mutually exclusive with target */
-    readonly task?: (input: string) => string | Promise<string>;
-    /** Suite-level assertions applied to all tests */
-    readonly assert?: readonly AssertEntry[];
-    /** Filter tests by ID pattern(s) (glob supported). Arrays use OR logic. */
-    readonly filter?: string | readonly string[];
-    /** Maximum concurrent workers (default: 3) */
-    readonly workers?: number;
-    /** Maximum retries on failure (default: 2) */
-    readonly maxRetries?: number;
-    /** Agent timeout in milliseconds. No timeout if not set. */
-    readonly agentTimeoutMs?: number;
-    /** Enable response caching */
-    readonly cache?: boolean;
-    /** Verbose logging */
-    readonly verbose?: boolean;
-    /** Callback for each completed result */
-    readonly onResult?: (result: EvaluationResult) => void;
-    /** Score threshold for pass/fail (0-1). Default: 0.8 (DEFAULT_THRESHOLD). */
-    readonly threshold?: number;
-}
-/**
- * Summary statistics for an evaluation run.
- */
-interface EvalSummary {
-    /** Total number of test cases */
-    readonly total: number;
-    /** Number of passing test cases (score >= threshold) */
-    readonly passed: number;
-    /** Number of failing test cases (score < threshold) */
-    readonly failed: number;
-    /** Total duration in milliseconds */
-    readonly durationMs: number;
-    /** Mean score across all cases */
-    readonly meanScore: number;
-}
-/**
- * Result of an `evaluate()` call.
- */
-interface EvalRunResult {
-    /** Individual test case results */
-    readonly results: readonly EvaluationResult[];
-    /** Aggregate summary statistics */
-    readonly summary: EvalSummary;
-}
-/**
- * Run an evaluation suite against a target provider.
- *
- * Accepts either inline test definitions or a path to an EVAL.yaml spec file.
- * The config shape mirrors the YAML structure — users can translate between
- * file-based and programmatic usage 1:1.
- *
- * @param config - Evaluation configuration
- * @returns Typed evaluation results with summary statistics
- *
- * @example Inline tests with assertions
- * ```typescript
- * const { results, summary } = await evaluate({
- *   tests: [
- *     {
- *       id: 'greeting',
- *       input: 'Say hello',
- *       assert: [{ type: 'contains', value: 'hello' }],
- *     },
- *   ],
- *   target: { provider: 'mock_agent' },
- * });
- * console.log(`${summary.passed}/${summary.total} passed`);
- * ```
- *
- * @example Load from YAML
- * ```typescript
- * const { summary } = await evaluate({
- *   specFile: './evals/my-eval.yaml',
- *   filter: 'greeting-*',
- * });
- * ```
- */
-declare function evaluate(config: EvalConfig): Promise<EvalRunResult>;
 /**
  * Typed configuration file support for AgentV.
  *
@@ -4186,17 +4227,17 @@ declare class OtlpJsonFileExporter {
 }
 /**
- * Factory functions for all built-in evaluator types.
+ * Factory functions for all built-in grader types.
  *
- * Each factory creates an Evaluator instance from an EvaluatorConfig,
+ * Each factory creates an Grader instance from an GraderConfig,
  * handling type-specific initialization logic. These are registered into
- * the EvaluatorRegistry at startup.
+ * the GraderRegistry at startup.
  */
 /**
- * Create a new EvaluatorRegistry with all built-in evaluator types registered.
+ * Create a new GraderRegistry with all built-in grader types registered.
  */
-declare function createBuiltinRegistry(): EvaluatorRegistry;
+declare function createBuiltinRegistry(): GraderRegistry;
 /**
  * Convention-based discovery of custom assertion scripts.
@@ -4216,27 +4257,27 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
  * @returns Names of discovered assertion types
  */
-declare function discoverAssertions(registry: EvaluatorRegistry, baseDir: string): Promise<string[]>;
+declare function discoverAssertions(registry: GraderRegistry, baseDir: string): Promise<string[]>;
 /**
  * Convention-based discovery of custom grader scripts.
  *
  * Scans `.agentv/graders/` (and legacy `.agentv/judges/`) for TypeScript/JavaScript
- * files and registers them as code-grader evaluators in the registry. The file name
- * (without extension) becomes the evaluator type name.
+ * files and registers them as code graders in the registry. The file name
+ * (without extension) becomes the grader type name.
  *
  * Example: `.agentv/graders/custom-grader.ts` → type "custom-grader" in EVAL.yaml
  */
 /**
  * Discover custom grader scripts from `.agentv/graders/` (and legacy `.agentv/judges/`)
- * and register them as evaluator types in the registry.
+ * and register them as grader types in the registry.
  *
- * @param registry - The evaluator registry to register discovered graders into
+ * @param registry - The grader registry to register discovered graders into
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
  * @returns Names of discovered grader types
  */
-declare function discoverGraders(registry: EvaluatorRegistry, baseDir: string): Promise<string[]>;
+declare function discoverGraders(registry: GraderRegistry, baseDir: string): Promise<string[]>;
 /**
  * Core types for the transcript import pipeline.
@@ -4489,7 +4530,7 @@ declare function discoverClaudeSessions(opts?: ClaudeDiscoverOptions): Promise<C
  *   1. Reads a transcript JSONL file (produced by `agentv import`)
  *   2. Each invocation pops the next line from the transcript
  *   3. Returns a ProviderResponse with pre-populated output, token usage, etc.
- *   4. Evaluators run identically to live eval — they see the same ProviderResponse
+ *   4. Graders run identically to live eval — they see the same ProviderResponse
  *
  * The provider name in results is set to the source provider from the transcript
  * (e.g., "claude", "codex", "copilot").
@@ -4555,4 +4596,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type BenchmarkEntry, type BenchmarkRegistry, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildEvaluatorResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionEvaluator, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsExportConfig, type ResultsRepoCachePaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addBenchmark, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveBenchmarkId, deriveCategory, detectFormat, directorySizeBytes, discoverAssertions, discoverBenchmarks, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getBenchmark, getBenchmarksRegistryPath, getOutputFilenames, getResultsRepoCachePaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadBenchmarkRegistry, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsExportConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseJsonFromText, parseJsonSafe, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeBenchmark, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveBenchmarkRegistry, scanRepoDeps, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchBenchmark, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type BenchmarkEntry, type BenchmarkRegistry, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsExportConfig, type ResultsRepoCachePaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addBenchmark, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveBenchmarkId, deriveCategory, detectFormat, directorySizeBytes, discoverAssertions, discoverBenchmarks, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getBenchmark, getBenchmarksRegistryPath, getOutputFilenames, getResultsRepoCachePaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadBenchmarkRegistry, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsExportConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseJsonFromText, parseJsonSafe, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeBenchmark, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveBenchmarkRegistry, scanRepoDeps, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchBenchmark, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };