npm - @agentv/core - Versions diffs - 2.12.0 → 2.14.0-next.1 - Mend

@agentv/core 2.12.0 → 2.14.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-7HPKTRFZ.js → chunk-N55K52OO.js} +15 -15
package/dist/chunk-N55K52OO.js.map +1 -0
package/dist/evaluation/validation/index.cjs +25 -24
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +12 -11
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +248 -160
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +58 -41
package/dist/index.d.ts +58 -41
package/dist/index.js +235 -148
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-7HPKTRFZ.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -257,7 +257,7 @@ interface TraceComputeResult {
     readonly endTime?: string;
 }
 /**
- * Argument matching mode for tool_trajectory expected items.
+ * Argument matching mode for tool-trajectory expected items.
  * - 'exact': bidirectional deep equality, no extra keys allowed (default)
  * - 'superset': actual args must contain all expected keys (extras OK)
  * - 'subset': actual args must be a subset of expected keys (no unexpected keys)
@@ -265,11 +265,11 @@ interface TraceComputeResult {
  */
 type ArgsMatchMode = 'exact' | 'ignore' | 'subset' | 'superset';
 /**
- * Configuration for tool_trajectory evaluator.
+ * Configuration for tool-trajectory evaluator.
  */
 interface ToolTrajectoryEvaluatorConfig {
     readonly name: string;
-    readonly type: 'tool_trajectory';
+    readonly type: 'tool-trajectory';
     /** Matching mode */
     readonly mode: 'any_order' | 'in_order' | 'exact' | 'subset' | 'superset';
     /** Minimum call counts per tool (for any_order mode) */
@@ -453,11 +453,11 @@ declare function isJsonValue(value: unknown): value is JsonValue;
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
  */
 declare function isTestMessage(value: unknown): value is TestMessage;
-declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "field_accuracy", "latency", "cost", "token_usage", "execution_metrics", "agent_judge", "contains", "contains_any", "contains_all", "icontains", "icontains_any", "icontains_all", "starts_with", "ends_with", "regex", "is_json", "equals", "rubrics"];
+declare const EVALUATOR_KIND_VALUES: readonly ["code-judge", "llm-judge", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "agent-judge", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics"];
 type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
 declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
 /**
- * Configuration for enabling target access in code_judge evaluators.
+ * Configuration for enabling target access in code-judge evaluators.
  * When present, the runtime will start a local proxy server that allows
  * the script to invoke configured targets without direct credential access.
  */
@@ -539,7 +539,7 @@ type WorkspaceConfig = {
 };
 type CodeEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'code';
+    readonly type: 'code-judge';
     readonly command: readonly string[];
     /** @deprecated Use `command` instead */
     readonly script?: readonly string[];
@@ -550,14 +550,14 @@ type CodeEvaluatorConfig = {
     readonly required?: boolean | number;
     /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
     readonly negate?: boolean;
-    /** Pass-through configuration for the code_judge (any unrecognized YAML properties) */
+    /** Pass-through configuration for the code-judge (any unrecognized YAML properties) */
     readonly config?: JsonObject;
     /** When present, enables target access via local proxy */
     readonly target?: TargetAccessConfig;
 };
 /**
  * Executable prompt template configuration.
- * Matches code_judge pattern for consistency.
+ * Matches code-judge pattern for consistency.
  */
 type PromptScriptConfig = {
     /** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
@@ -569,13 +569,13 @@ type PromptScriptConfig = {
 };
 type LlmJudgeEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'llm_judge';
+    readonly type: 'llm-judge';
     /** Text prompt (inline or file path) or executable script config */
     readonly prompt?: string | PromptScriptConfig;
     readonly promptPath?: string;
     /** Resolved absolute path for prompt file (used for text template prompts) */
     readonly resolvedPromptPath?: string;
-    /** Resolved script array for executable prompts (matches code_judge pattern) */
+    /** Resolved script array for executable prompts (matches code-judge pattern) */
     readonly resolvedPromptScript?: readonly string[];
     readonly rubrics?: readonly RubricItem[];
     readonly weight?: number;
@@ -630,11 +630,11 @@ type CompositeAggregatorConfig = {
     readonly type: 'weighted_average';
     readonly weights?: Record<string, number>;
 } | {
-    readonly type: 'code_judge';
+    readonly type: 'code-judge';
     readonly path: string;
     readonly cwd?: string;
 } | {
-    readonly type: 'llm_judge';
+    readonly type: 'llm-judge';
     readonly prompt?: string;
     readonly promptPath?: string;
     readonly model?: string;
@@ -654,7 +654,7 @@ type CompositeEvaluatorConfig = {
 };
 /**
  * Match type for field accuracy evaluation.
- * Note: For fuzzy string matching (Levenshtein, Jaro-Winkler, etc.), use a code_judge evaluator.
+ * Note: For fuzzy string matching (Levenshtein, Jaro-Winkler, etc.), use a code-judge evaluator.
  * See examples/features/document-extraction/fuzzy_match.ts for an example.
  */
 type FieldMatchType = 'exact' | 'numeric_tolerance' | 'date';
@@ -682,11 +682,11 @@ type FieldConfig = {
     readonly formats?: readonly string[];
 };
 /**
- * Configuration for the field_accuracy evaluator.
+ * Configuration for the field-accuracy evaluator.
  */
 type FieldAccuracyEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'field_accuracy';
+    readonly type: 'field-accuracy';
     /** Fields to compare between candidate and expected */
     readonly fields: readonly FieldConfig[];
     /** Strategy for combining field scores (default: weighted_average) */
@@ -725,12 +725,12 @@ type CostEvaluatorConfig = {
     readonly negate?: boolean;
 };
 /**
- * Configuration for the token_usage evaluator.
+ * Configuration for the token-usage evaluator.
  * Checks provider-reported token usage against configured limits.
  */
 type TokenUsageEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'token_usage';
+    readonly type: 'token-usage';
     /** Maximum allowed total tokens (input + output + cached, when present) */
     readonly max_total?: number;
     /** Maximum allowed input tokens (prompt) */
@@ -743,13 +743,13 @@ type TokenUsageEvaluatorConfig = {
     readonly negate?: boolean;
 };
 /**
- * Configuration for the execution_metrics evaluator.
+ * Configuration for the execution-metrics evaluator.
  * Provides declarative threshold-based checks on execution metrics.
  * Only specified thresholds are checked; omitted ones are ignored.
  */
 type ExecutionMetricsEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'execution_metrics';
+    readonly type: 'execution-metrics';
     /** Maximum allowed number of tool calls */
     readonly max_tool_calls?: number;
     /** Maximum allowed number of LLM calls (assistant messages) */
@@ -770,7 +770,7 @@ type ExecutionMetricsEvaluatorConfig = {
     readonly negate?: boolean;
 };
 /**
- * Configuration for the agent_judge evaluator.
+ * Configuration for the agent-judge evaluator.
  * Runs an agentic investigation loop to audit workspaces and verify criteria.
  * Two modes:
  * - Built-in: Uses AI SDK generateText() with sandboxed filesystem tools
@@ -778,13 +778,13 @@ type ExecutionMetricsEvaluatorConfig = {
  */
 type AgentJudgeEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'agent_judge';
+    readonly type: 'agent-judge';
     /** Custom evaluation prompt (inline text or file path) */
     readonly prompt?: string;
     readonly promptPath?: string;
     /** Resolved absolute path for prompt file */
     readonly resolvedPromptPath?: string;
-    /** Rubric items for structured evaluation (reuses llm_judge rubric infra) */
+    /** Rubric items for structured evaluation (reuses llm-judge rubric infra) */
     readonly rubrics?: readonly RubricItem[];
     /** Maximum agent steps for built-in mode (default 10, max 50) */
     readonly max_steps?: number;
@@ -816,7 +816,7 @@ type ContainsEvaluatorConfig = {
  */
 type ContainsAnyEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'contains_any';
+    readonly type: 'contains-any';
     readonly value: readonly string[];
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -829,7 +829,7 @@ type ContainsAnyEvaluatorConfig = {
  */
 type ContainsAllEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'contains_all';
+    readonly type: 'contains-all';
     readonly value: readonly string[];
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -855,7 +855,7 @@ type IcontainsEvaluatorConfig = {
  */
 type IcontainsAnyEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'icontains_any';
+    readonly type: 'icontains-any';
     readonly value: readonly string[];
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -868,7 +868,7 @@ type IcontainsAnyEvaluatorConfig = {
  */
 type IcontainsAllEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'icontains_all';
+    readonly type: 'icontains-all';
     readonly value: readonly string[];
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -881,7 +881,7 @@ type IcontainsAllEvaluatorConfig = {
  */
 type StartsWithEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'starts_with';
+    readonly type: 'starts-with';
     readonly value: string;
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -894,7 +894,7 @@ type StartsWithEvaluatorConfig = {
  */
 type EndsWithEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'ends_with';
+    readonly type: 'ends-with';
     readonly value: string;
     readonly weight?: number;
     readonly required?: boolean | number;
@@ -922,7 +922,7 @@ type RegexEvaluatorConfig = {
  */
 type IsJsonEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'is_json';
+    readonly type: 'is-json';
     readonly weight?: number;
     readonly required?: boolean | number;
     /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
@@ -1060,6 +1060,12 @@ interface ExecutionError {
     readonly message: string;
     readonly stage: FailureStage;
 }
+/**
+ * Tolerance for execution errors in an eval run.
+ * - `true`: halt on first execution error
+ * - `false`: never halt on errors (default)
+ */
+type FailOnError = boolean;
 /**
  * Evaluator scorecard for a single eval case run.
  */
@@ -1194,6 +1200,7 @@ type ExecutionDefaults = {
     readonly otel_file?: string;
 };
 type AgentVConfig$1 = {
+    readonly required_version?: string;
     readonly guideline_patterns?: readonly string[];
     readonly eval_patterns?: readonly string[];
     readonly execution?: ExecutionDefaults;
@@ -1238,6 +1245,12 @@ interface CacheConfig {
  * Returns undefined when no cache config is specified.
  */
 declare function extractCacheConfig(suite: JsonObject): CacheConfig | undefined;
+/**
+ * Extract `execution.fail_on_error` from parsed eval suite.
+ * Accepts `true` or `false`.
+ * Returns undefined when not specified.
+ */
+declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
 /**
  * Formatting mode for segment content.
@@ -1297,6 +1310,8 @@ type EvalSuiteResult = {
     readonly metadata?: EvalMetadata;
     /** Suite-level total cost budget in USD */
     readonly totalBudgetUsd?: number;
+    /** Execution error tolerance: true or false */
+    readonly failOnError?: FailOnError;
 };
 /**
  * Load tests and suite metadata from a single parse.
@@ -1900,7 +1915,7 @@ interface CodeEvaluatorOptions {
     readonly target?: TargetAccessConfig;
 }
 declare class CodeEvaluator implements Evaluator {
-    readonly kind = "code";
+    readonly kind = "code-judge";
     private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
@@ -1955,7 +1970,7 @@ interface ExecutionMetricsEvaluatorOptions {
  * Score is proportional: hits.length / (hits.length + misses.length)
  */
 declare class ExecutionMetricsEvaluator implements Evaluator {
-    readonly kind = "execution_metrics";
+    readonly kind = "execution-metrics";
     private readonly config;
     constructor(options: ExecutionMetricsEvaluatorOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
@@ -1971,7 +1986,7 @@ interface FieldAccuracyEvaluatorOptions {
  * with configurable matching strategies (exact, fuzzy, numeric_tolerance, date).
  */
 declare class FieldAccuracyEvaluator implements Evaluator {
-    readonly kind = "field_accuracy";
+    readonly kind = "field-accuracy";
     private readonly config;
     constructor(options: FieldAccuracyEvaluatorOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
@@ -2076,7 +2091,7 @@ declare const rubricEvaluationSchema: z.ZodObject<{
 }>;
 declare class LlmJudgeEvaluator implements Evaluator {
-    readonly kind = "llm_judge";
+    readonly kind = "llm-judge";
     private readonly resolveJudgeProvider;
     private readonly maxOutputTokens?;
     private readonly temperature?;
@@ -2123,7 +2138,7 @@ interface AgentJudgeEvaluatorOptions {
     readonly judgeTargetProvider?: Provider;
 }
 declare class AgentJudgeEvaluator implements Evaluator {
-    readonly kind = "agent_judge";
+    readonly kind = "agent-judge";
     private readonly resolveJudgeProvider;
     private readonly maxSteps;
     private readonly temperature;
@@ -2185,7 +2200,7 @@ interface TokenUsageEvaluatorOptions {
  * Uses tokenUsage from the evaluation context.
  */
 declare class TokenUsageEvaluator implements Evaluator {
-    readonly kind = "token_usage";
+    readonly kind = "token-usage";
     private readonly config;
     constructor(options: TokenUsageEvaluatorOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
@@ -2195,7 +2210,7 @@ interface ToolTrajectoryEvaluatorOptions {
     readonly config: ToolTrajectoryEvaluatorConfig;
 }
 declare class ToolTrajectoryEvaluator implements Evaluator {
-    readonly kind = "tool_trajectory";
+    readonly kind = "tool-trajectory";
     private readonly config;
     constructor(options: ToolTrajectoryEvaluatorOptions);
     evaluate(context: EvaluationContext): EvaluationScore;
@@ -2320,7 +2335,7 @@ declare class EvaluatorRegistry {
 }
 /**
  * Adapter that wraps a synchronous assertion function as an Evaluator.
- * Used for deterministic assertions (contains, regex, is_json, equals).
+ * Used for deterministic assertions (contains, regex, is-json, equals).
  */
 declare class DeterministicAssertionEvaluator implements Evaluator {
     private readonly assertFn;
@@ -2368,7 +2383,7 @@ interface RunEvalCaseOptions {
     readonly provider: Provider;
     readonly target: ResolvedTarget;
     readonly evaluators: Partial<Record<string, Evaluator>> & {
-        readonly llm_judge: Evaluator;
+        readonly 'llm-judge': Evaluator;
     };
     readonly now?: () => Date;
     readonly maxRetries?: number;
@@ -2440,6 +2455,8 @@ interface RunEvaluationOptions {
     readonly streamCallbacks?: ProviderStreamCallbacks;
     /** Suite-level total cost budget in USD (stops dispatching when exceeded) */
     readonly totalBudgetUsd?: number;
+    /** Execution error tolerance: true halts on first error */
+    readonly failOnError?: FailOnError;
 }
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
 declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
@@ -2507,7 +2524,7 @@ interface EvalTestInput {
  * Matches the YAML `assert` block structure.
  */
 interface EvalAssertionInput {
-    /** Assertion type (e.g., 'contains', 'llm_judge', 'code_judge') */
+    /** Assertion type (e.g., 'contains', 'llm-judge', 'code-judge') */
     readonly type: string;
     /** Display name */
     readonly name?: string;
@@ -3135,7 +3152,7 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
  * Convention-based discovery of custom assertion scripts.
  *
  * Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
- * them as code_judge evaluators in the registry. The file name (without
+ * them as code-judge evaluators in the registry. The file name (without
  * extension) becomes the evaluator type name.
  *
  * Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
@@ -3156,4 +3173,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };
+export { AgentJudgeEvaluator, type AgentJudgeEvaluatorConfig, type AgentJudgeEvaluatorOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type LlmJudgePromptAssembly, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResetConfig, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceScriptConfig, assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, getWorkspacePath, initializeBaseline, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, trimBaselineResult };