@agentv/core 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
473
473
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
474
474
  */
475
475
  declare function isTestMessage(value: unknown): value is TestMessage;
476
- declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "code-judge", "llm-judge", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
476
+ declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
477
477
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
478
478
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
479
479
  /**
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
576
576
  };
577
577
  type CodeEvaluatorConfig = {
578
578
  readonly name: string;
579
- readonly type: 'code-judge' | 'code-grader';
579
+ readonly type: 'code-grader';
580
580
  readonly command: readonly string[];
581
581
  /** @deprecated Use `command` instead */
582
582
  readonly script?: readonly string[];
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
606
606
  };
607
607
  type LlmGraderEvaluatorConfig = {
608
608
  readonly name: string;
609
- readonly type: 'llm-grader' | 'llm-judge';
609
+ readonly type: 'llm-grader';
610
610
  /** Text prompt (inline or file path) or executable script config */
611
611
  readonly prompt?: string | PromptScriptConfig;
612
612
  readonly promptPath?: string;
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
678
678
  readonly type: 'code-grader';
679
679
  readonly path: string;
680
680
  readonly cwd?: string;
681
- } | {
682
- readonly type: 'code-judge';
683
- readonly path: string;
684
- readonly cwd?: string;
685
681
  } | {
686
682
  readonly type: 'llm-grader';
687
683
  readonly prompt?: string;
688
684
  readonly promptPath?: string;
689
685
  readonly model?: string;
690
- } | {
691
- readonly type: 'llm-judge';
692
- readonly prompt?: string;
693
- readonly promptPath?: string;
694
- readonly model?: string;
695
686
  } | {
696
687
  readonly type: 'threshold';
697
688
  readonly threshold: number;
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
1250
1241
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1251
1242
  type ExecutionDefaults = {
1252
1243
  readonly verbose?: boolean;
1253
- readonly trace_file?: string;
1254
1244
  readonly keep_workspaces?: boolean;
1255
1245
  readonly otel_file?: string;
1256
1246
  readonly export_otel?: boolean;
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
2118
2108
  readonly target?: TargetAccessConfig;
2119
2109
  }
2120
2110
  declare class CodeEvaluator implements Evaluator {
2121
- readonly kind = "code-judge";
2111
+ readonly kind = "code-grader";
2122
2112
  private readonly command;
2123
2113
  private readonly cwd?;
2124
2114
  private readonly agentTimeoutMs?;
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
2853
2843
  readonly expectedOutput?: string;
2854
2844
  /** @deprecated Use `expectedOutput` instead */
2855
2845
  readonly expected_output?: string;
2856
- /** Assertion evaluators — accepts factory functions, config objects, or inline functions */
2846
+ /** Assertion graders — accepts factory functions, config objects, or inline functions */
2857
2847
  readonly assert?: readonly AssertEntry[];
2858
2848
  /** Arbitrary metadata */
2859
2849
  readonly metadata?: Record<string, unknown>;
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
2863
2853
  * Matches the YAML `assert` block structure.
2864
2854
  */
2865
2855
  interface EvalAssertionInput {
2866
- /** Assertion type (e.g., 'contains', 'llm-judge', 'code-judge') */
2856
+ /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
2867
2857
  readonly type: string;
2868
2858
  /** Display name */
2869
2859
  readonly name?: string;
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
2873
2863
  readonly weight?: number;
2874
2864
  /** Whether this assertion is required to pass */
2875
2865
  readonly required?: boolean | number;
2876
- /** Prompt file for llm_judge */
2866
+ /** Prompt file for llm_grader */
2877
2867
  readonly prompt?: string;
2878
- /** Script for code_judge */
2868
+ /** Script for code_grader */
2879
2869
  readonly script?: string | readonly string[];
2880
2870
  /** Additional config passed to the assertion */
2881
2871
  readonly config?: Record<string, unknown>;
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3024
3014
  agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
3025
3015
  /** Enable verbose logging */
3026
3016
  verbose: z.ZodOptional<z.ZodBoolean>;
3027
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
3028
- traceFile: z.ZodOptional<z.ZodString>;
3029
3017
  /** Always keep temp workspaces after eval */
3030
3018
  keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
3031
3019
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3036
3024
  maxRetries?: number | undefined;
3037
3025
  agentTimeoutMs?: number | undefined;
3038
3026
  keepWorkspaces?: boolean | undefined;
3039
- traceFile?: string | undefined;
3040
3027
  otelFile?: string | undefined;
3041
3028
  }, {
3042
3029
  workers?: number | undefined;
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3044
3031
  maxRetries?: number | undefined;
3045
3032
  agentTimeoutMs?: number | undefined;
3046
3033
  keepWorkspaces?: boolean | undefined;
3047
- traceFile?: string | undefined;
3048
3034
  otelFile?: string | undefined;
3049
3035
  }>>;
3050
3036
  /** Output settings */
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3093
3079
  maxRetries?: number | undefined;
3094
3080
  agentTimeoutMs?: number | undefined;
3095
3081
  keepWorkspaces?: boolean | undefined;
3096
- traceFile?: string | undefined;
3097
3082
  otelFile?: string | undefined;
3098
3083
  } | undefined;
3099
3084
  cache?: {
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3115
3100
  maxRetries?: number | undefined;
3116
3101
  agentTimeoutMs?: number | undefined;
3117
3102
  keepWorkspaces?: boolean | undefined;
3118
- traceFile?: string | undefined;
3119
3103
  otelFile?: string | undefined;
3120
3104
  } | undefined;
3121
3105
  cache?: {
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
3469
3453
  readonly groupTurns?: boolean;
3470
3454
  /** Path to write OTLP JSON file (importable by OTel backends) */
3471
3455
  readonly otlpFilePath?: string;
3472
- /** Path to write human-readable simple JSONL trace file */
3473
- readonly traceFilePath?: string;
3474
3456
  }
3475
3457
  /** Preset configuration for a known observability backend. */
3476
3458
  interface OtelBackendPreset {
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
3543
3525
  getStreamCallbacks(): ProviderStreamCallbacks;
3544
3526
  }
3545
3527
 
3546
- type ReadableSpan$1 = any;
3528
+ type ReadableSpan = any;
3547
3529
  /**
3548
3530
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
3549
3531
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
3552
3534
  private spans;
3553
3535
  private filePath;
3554
3536
  constructor(filePath: string);
3555
- export(spans: ReadableSpan$1[], resultCallback: (result: {
3556
- code: number;
3557
- }) => void): void;
3558
- shutdown(): Promise<void>;
3559
- forceFlush(): Promise<void>;
3560
- private flush;
3561
- }
3562
-
3563
- type ReadableSpan = any;
3564
- /**
3565
- * SpanExporter that writes human-readable JSONL (one line per root span).
3566
- * Designed for quick debugging and analysis without OTel tooling.
3567
- */
3568
- declare class SimpleTraceFileExporter {
3569
- private stream;
3570
- private filePath;
3571
- private streamReady;
3572
- private pendingWrites;
3573
- private _shuttingDown;
3574
- private spansByTraceId;
3575
- constructor(filePath: string);
3576
- private ensureStream;
3577
3537
  export(spans: ReadableSpan[], resultCallback: (result: {
3578
3538
  code: number;
3579
3539
  }) => void): void;
3580
3540
  shutdown(): Promise<void>;
3581
3541
  forceFlush(): Promise<void>;
3582
- private buildSimpleRecord;
3542
+ private flush;
3583
3543
  }
3584
3544
 
3585
3545
  /**
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
3599
3559
  * Convention-based discovery of custom assertion scripts.
3600
3560
  *
3601
3561
  * Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
3602
- * them as code-judge evaluators in the registry. The file name (without
3603
- * extension) becomes the evaluator type name.
3562
+ * them as code graders in the registry. The file name (without
3563
+ * extension) becomes the grader type name.
3604
3564
  *
3605
3565
  * Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
3606
3566
  */
3607
3567
 
3608
3568
  /**
3609
3569
  * Discover custom assertion scripts from `.agentv/assertions/` and register
3610
- * them as evaluator types in the registry.
3570
+ * them as grader types in the registry.
3611
3571
  *
3612
- * @param registry - The evaluator registry to register discovered assertions into
3572
+ * @param registry - The grader registry to register discovered assertions into
3613
3573
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
3614
3574
  * @returns Names of discovered assertion types
3615
3575
  */
@@ -3640,4 +3600,4 @@ type AgentKernel = {
3640
3600
  };
3641
3601
  declare function createAgentKernel(): AgentKernel;
3642
3602
 
3643
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3603
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
package/dist/index.d.ts CHANGED
@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
473
473
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
474
474
  */
475
475
  declare function isTestMessage(value: unknown): value is TestMessage;
476
- declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "code-judge", "llm-judge", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
476
+ declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
477
477
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
478
478
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
479
479
  /**
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
576
576
  };
577
577
  type CodeEvaluatorConfig = {
578
578
  readonly name: string;
579
- readonly type: 'code-judge' | 'code-grader';
579
+ readonly type: 'code-grader';
580
580
  readonly command: readonly string[];
581
581
  /** @deprecated Use `command` instead */
582
582
  readonly script?: readonly string[];
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
606
606
  };
607
607
  type LlmGraderEvaluatorConfig = {
608
608
  readonly name: string;
609
- readonly type: 'llm-grader' | 'llm-judge';
609
+ readonly type: 'llm-grader';
610
610
  /** Text prompt (inline or file path) or executable script config */
611
611
  readonly prompt?: string | PromptScriptConfig;
612
612
  readonly promptPath?: string;
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
678
678
  readonly type: 'code-grader';
679
679
  readonly path: string;
680
680
  readonly cwd?: string;
681
- } | {
682
- readonly type: 'code-judge';
683
- readonly path: string;
684
- readonly cwd?: string;
685
681
  } | {
686
682
  readonly type: 'llm-grader';
687
683
  readonly prompt?: string;
688
684
  readonly promptPath?: string;
689
685
  readonly model?: string;
690
- } | {
691
- readonly type: 'llm-judge';
692
- readonly prompt?: string;
693
- readonly promptPath?: string;
694
- readonly model?: string;
695
686
  } | {
696
687
  readonly type: 'threshold';
697
688
  readonly threshold: number;
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
1250
1241
  declare const DEFAULT_EVAL_PATTERNS: readonly string[];
1251
1242
  type ExecutionDefaults = {
1252
1243
  readonly verbose?: boolean;
1253
- readonly trace_file?: string;
1254
1244
  readonly keep_workspaces?: boolean;
1255
1245
  readonly otel_file?: string;
1256
1246
  readonly export_otel?: boolean;
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
2118
2108
  readonly target?: TargetAccessConfig;
2119
2109
  }
2120
2110
  declare class CodeEvaluator implements Evaluator {
2121
- readonly kind = "code-judge";
2111
+ readonly kind = "code-grader";
2122
2112
  private readonly command;
2123
2113
  private readonly cwd?;
2124
2114
  private readonly agentTimeoutMs?;
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
2853
2843
  readonly expectedOutput?: string;
2854
2844
  /** @deprecated Use `expectedOutput` instead */
2855
2845
  readonly expected_output?: string;
2856
- /** Assertion evaluators — accepts factory functions, config objects, or inline functions */
2846
+ /** Assertion graders — accepts factory functions, config objects, or inline functions */
2857
2847
  readonly assert?: readonly AssertEntry[];
2858
2848
  /** Arbitrary metadata */
2859
2849
  readonly metadata?: Record<string, unknown>;
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
2863
2853
  * Matches the YAML `assert` block structure.
2864
2854
  */
2865
2855
  interface EvalAssertionInput {
2866
- /** Assertion type (e.g., 'contains', 'llm-judge', 'code-judge') */
2856
+ /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
2867
2857
  readonly type: string;
2868
2858
  /** Display name */
2869
2859
  readonly name?: string;
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
2873
2863
  readonly weight?: number;
2874
2864
  /** Whether this assertion is required to pass */
2875
2865
  readonly required?: boolean | number;
2876
- /** Prompt file for llm_judge */
2866
+ /** Prompt file for llm_grader */
2877
2867
  readonly prompt?: string;
2878
- /** Script for code_judge */
2868
+ /** Script for code_grader */
2879
2869
  readonly script?: string | readonly string[];
2880
2870
  /** Additional config passed to the assertion */
2881
2871
  readonly config?: Record<string, unknown>;
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3024
3014
  agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
3025
3015
  /** Enable verbose logging */
3026
3016
  verbose: z.ZodOptional<z.ZodBoolean>;
3027
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
3028
- traceFile: z.ZodOptional<z.ZodString>;
3029
3017
  /** Always keep temp workspaces after eval */
3030
3018
  keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
3031
3019
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3036
3024
  maxRetries?: number | undefined;
3037
3025
  agentTimeoutMs?: number | undefined;
3038
3026
  keepWorkspaces?: boolean | undefined;
3039
- traceFile?: string | undefined;
3040
3027
  otelFile?: string | undefined;
3041
3028
  }, {
3042
3029
  workers?: number | undefined;
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3044
3031
  maxRetries?: number | undefined;
3045
3032
  agentTimeoutMs?: number | undefined;
3046
3033
  keepWorkspaces?: boolean | undefined;
3047
- traceFile?: string | undefined;
3048
3034
  otelFile?: string | undefined;
3049
3035
  }>>;
3050
3036
  /** Output settings */
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3093
3079
  maxRetries?: number | undefined;
3094
3080
  agentTimeoutMs?: number | undefined;
3095
3081
  keepWorkspaces?: boolean | undefined;
3096
- traceFile?: string | undefined;
3097
3082
  otelFile?: string | undefined;
3098
3083
  } | undefined;
3099
3084
  cache?: {
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
3115
3100
  maxRetries?: number | undefined;
3116
3101
  agentTimeoutMs?: number | undefined;
3117
3102
  keepWorkspaces?: boolean | undefined;
3118
- traceFile?: string | undefined;
3119
3103
  otelFile?: string | undefined;
3120
3104
  } | undefined;
3121
3105
  cache?: {
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
3469
3453
  readonly groupTurns?: boolean;
3470
3454
  /** Path to write OTLP JSON file (importable by OTel backends) */
3471
3455
  readonly otlpFilePath?: string;
3472
- /** Path to write human-readable simple JSONL trace file */
3473
- readonly traceFilePath?: string;
3474
3456
  }
3475
3457
  /** Preset configuration for a known observability backend. */
3476
3458
  interface OtelBackendPreset {
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
3543
3525
  getStreamCallbacks(): ProviderStreamCallbacks;
3544
3526
  }
3545
3527
 
3546
- type ReadableSpan$1 = any;
3528
+ type ReadableSpan = any;
3547
3529
  /**
3548
3530
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
3549
3531
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
3552
3534
  private spans;
3553
3535
  private filePath;
3554
3536
  constructor(filePath: string);
3555
- export(spans: ReadableSpan$1[], resultCallback: (result: {
3556
- code: number;
3557
- }) => void): void;
3558
- shutdown(): Promise<void>;
3559
- forceFlush(): Promise<void>;
3560
- private flush;
3561
- }
3562
-
3563
- type ReadableSpan = any;
3564
- /**
3565
- * SpanExporter that writes human-readable JSONL (one line per root span).
3566
- * Designed for quick debugging and analysis without OTel tooling.
3567
- */
3568
- declare class SimpleTraceFileExporter {
3569
- private stream;
3570
- private filePath;
3571
- private streamReady;
3572
- private pendingWrites;
3573
- private _shuttingDown;
3574
- private spansByTraceId;
3575
- constructor(filePath: string);
3576
- private ensureStream;
3577
3537
  export(spans: ReadableSpan[], resultCallback: (result: {
3578
3538
  code: number;
3579
3539
  }) => void): void;
3580
3540
  shutdown(): Promise<void>;
3581
3541
  forceFlush(): Promise<void>;
3582
- private buildSimpleRecord;
3542
+ private flush;
3583
3543
  }
3584
3544
 
3585
3545
  /**
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
3599
3559
  * Convention-based discovery of custom assertion scripts.
3600
3560
  *
3601
3561
  * Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
3602
- * them as code-judge evaluators in the registry. The file name (without
3603
- * extension) becomes the evaluator type name.
3562
+ * them as code graders in the registry. The file name (without
3563
+ * extension) becomes the grader type name.
3604
3564
  *
3605
3565
  * Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
3606
3566
  */
3607
3567
 
3608
3568
  /**
3609
3569
  * Discover custom assertion scripts from `.agentv/assertions/` and register
3610
- * them as evaluator types in the registry.
3570
+ * them as grader types in the registry.
3611
3571
  *
3612
- * @param registry - The evaluator registry to register discovered assertions into
3572
+ * @param registry - The grader registry to register discovered assertions into
3613
3573
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
3614
3574
  * @returns Names of discovered assertion types
3615
3575
  */
@@ -3640,4 +3600,4 @@ type AgentKernel = {
3640
3600
  };
3641
3601
  declare function createAgentKernel(): AgentKernel;
3642
3602
 
3643
- export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
3603
+ export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };