npm - @agentv/core - Versions diffs - 3.12.0 → 3.13.1 - Mend

@agentv/core 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{chunk-4XWPXNQM.js → chunk-ZB3AUPES.js} +1 -3
package/dist/chunk-ZB3AUPES.js.map +1 -0
package/dist/evaluation/validation/index.cjs +0 -2
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +63 -177
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +15 -55
package/dist/index.d.ts +15 -55
package/dist/index.js +62 -49
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-3G2KXH7N.js +0 -120
package/dist/chunk-3G2KXH7N.js.map +0 -1
package/dist/chunk-4XWPXNQM.js.map +0 -1
package/dist/simple-trace-file-exporter-CRIO5HDZ.js +0 -7
package/dist/simple-trace-file-exporter-CRIO5HDZ.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
  */
 declare function isTestMessage(value: unknown): value is TestMessage;
-declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "code-judge", "llm-judge", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
+declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
 type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
 declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
 /**
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
 };
 type CodeEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'code-judge' | 'code-grader';
+    readonly type: 'code-grader';
     readonly command: readonly string[];
     /** @deprecated Use `command` instead */
     readonly script?: readonly string[];
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
 };
 type LlmGraderEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'llm-grader' | 'llm-judge';
+    readonly type: 'llm-grader';
     /** Text prompt (inline or file path) or executable script config */
     readonly prompt?: string | PromptScriptConfig;
     readonly promptPath?: string;
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
     readonly type: 'code-grader';
     readonly path: string;
     readonly cwd?: string;
-} | {
-    readonly type: 'code-judge';
-    readonly path: string;
-    readonly cwd?: string;
 } | {
     readonly type: 'llm-grader';
     readonly prompt?: string;
     readonly promptPath?: string;
     readonly model?: string;
-} | {
-    readonly type: 'llm-judge';
-    readonly prompt?: string;
-    readonly promptPath?: string;
-    readonly model?: string;
 } | {
     readonly type: 'threshold';
     readonly threshold: number;
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
 declare const DEFAULT_EVAL_PATTERNS: readonly string[];
 type ExecutionDefaults = {
     readonly verbose?: boolean;
-    readonly trace_file?: string;
     readonly keep_workspaces?: boolean;
     readonly otel_file?: string;
     readonly export_otel?: boolean;
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
     readonly target?: TargetAccessConfig;
 }
 declare class CodeEvaluator implements Evaluator {
-    readonly kind = "code-judge";
+    readonly kind = "code-grader";
     private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
     readonly expectedOutput?: string;
     /** @deprecated Use `expectedOutput` instead */
     readonly expected_output?: string;
-    /** Assertion evaluators — accepts factory functions, config objects, or inline functions */
+    /** Assertion graders — accepts factory functions, config objects, or inline functions */
     readonly assert?: readonly AssertEntry[];
     /** Arbitrary metadata */
     readonly metadata?: Record<string, unknown>;
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
  * Matches the YAML `assert` block structure.
  */
 interface EvalAssertionInput {
-    /** Assertion type (e.g., 'contains', 'llm-judge', 'code-judge') */
+    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
     readonly type: string;
     /** Display name */
     readonly name?: string;
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
     readonly weight?: number;
     /** Whether this assertion is required to pass */
     readonly required?: boolean | number;
-    /** Prompt file for llm_judge */
+    /** Prompt file for llm_grader */
     readonly prompt?: string;
-    /** Script for code_judge */
+    /** Script for code_grader */
     readonly script?: string | readonly string[];
     /** Additional config passed to the assertion */
     readonly config?: Record<string, unknown>;
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
         /** Enable verbose logging */
         verbose: z.ZodOptional<z.ZodBoolean>;
-        /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
-        traceFile: z.ZodOptional<z.ZodString>;
         /** Always keep temp workspaces after eval */
         keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
         /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     }, {
         workers?: number | undefined;
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     }>>;
     /** Output settings */
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     } | undefined;
     cache?: {
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     } | undefined;
     cache?: {
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
     readonly groupTurns?: boolean;
     /** Path to write OTLP JSON file (importable by OTel backends) */
     readonly otlpFilePath?: string;
-    /** Path to write human-readable simple JSONL trace file */
-    readonly traceFilePath?: string;
 }
 /** Preset configuration for a known observability backend. */
 interface OtelBackendPreset {
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
     getStreamCallbacks(): ProviderStreamCallbacks;
 }
-type ReadableSpan$1 = any;
+type ReadableSpan = any;
 /**
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
     private spans;
     private filePath;
     constructor(filePath: string);
-    export(spans: ReadableSpan$1[], resultCallback: (result: {
-        code: number;
-    }) => void): void;
-    shutdown(): Promise<void>;
-    forceFlush(): Promise<void>;
-    private flush;
-}
-type ReadableSpan = any;
-/**
- * SpanExporter that writes human-readable JSONL (one line per root span).
- * Designed for quick debugging and analysis without OTel tooling.
- */
-declare class SimpleTraceFileExporter {
-    private stream;
-    private filePath;
-    private streamReady;
-    private pendingWrites;
-    private _shuttingDown;
-    private spansByTraceId;
-    constructor(filePath: string);
-    private ensureStream;
     export(spans: ReadableSpan[], resultCallback: (result: {
         code: number;
     }) => void): void;
     shutdown(): Promise<void>;
     forceFlush(): Promise<void>;
-    private buildSimpleRecord;
+    private flush;
 }
 /**
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
  * Convention-based discovery of custom assertion scripts.
  *
  * Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
- * them as code-judge evaluators in the registry. The file name (without
- * extension) becomes the evaluator type name.
+ * them as code graders in the registry. The file name (without
+ * extension) becomes the grader type name.
  *
  * Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
  */
 /**
  * Discover custom assertion scripts from `.agentv/assertions/` and register
- * them as evaluator types in the registry.
+ * them as grader types in the registry.
  *
- * @param registry - The evaluator registry to register discovered assertions into
+ * @param registry - The grader registry to register discovered assertions into
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
  * @returns Names of discovered assertion types
  */
@@ -3640,4 +3600,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };

package/dist/index.d.ts CHANGED Viewed

@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
  * - Either content (string or array of objects) OR tool_calls (for assistant messages)
  */
 declare function isTestMessage(value: unknown): value is TestMessage;
-declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "code-judge", "llm-judge", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
+declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
 type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
 declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
 /**
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
 };
 type CodeEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'code-judge' | 'code-grader';
+    readonly type: 'code-grader';
     readonly command: readonly string[];
     /** @deprecated Use `command` instead */
     readonly script?: readonly string[];
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
 };
 type LlmGraderEvaluatorConfig = {
     readonly name: string;
-    readonly type: 'llm-grader' | 'llm-judge';
+    readonly type: 'llm-grader';
     /** Text prompt (inline or file path) or executable script config */
     readonly prompt?: string | PromptScriptConfig;
     readonly promptPath?: string;
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
     readonly type: 'code-grader';
     readonly path: string;
     readonly cwd?: string;
-} | {
-    readonly type: 'code-judge';
-    readonly path: string;
-    readonly cwd?: string;
 } | {
     readonly type: 'llm-grader';
     readonly prompt?: string;
     readonly promptPath?: string;
     readonly model?: string;
-} | {
-    readonly type: 'llm-judge';
-    readonly prompt?: string;
-    readonly promptPath?: string;
-    readonly model?: string;
 } | {
     readonly type: 'threshold';
     readonly threshold: number;
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
 declare const DEFAULT_EVAL_PATTERNS: readonly string[];
 type ExecutionDefaults = {
     readonly verbose?: boolean;
-    readonly trace_file?: string;
     readonly keep_workspaces?: boolean;
     readonly otel_file?: string;
     readonly export_otel?: boolean;
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
     readonly target?: TargetAccessConfig;
 }
 declare class CodeEvaluator implements Evaluator {
-    readonly kind = "code-judge";
+    readonly kind = "code-grader";
     private readonly command;
     private readonly cwd?;
     private readonly agentTimeoutMs?;
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
     readonly expectedOutput?: string;
     /** @deprecated Use `expectedOutput` instead */
     readonly expected_output?: string;
-    /** Assertion evaluators — accepts factory functions, config objects, or inline functions */
+    /** Assertion graders — accepts factory functions, config objects, or inline functions */
     readonly assert?: readonly AssertEntry[];
     /** Arbitrary metadata */
     readonly metadata?: Record<string, unknown>;
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
  * Matches the YAML `assert` block structure.
  */
 interface EvalAssertionInput {
-    /** Assertion type (e.g., 'contains', 'llm-judge', 'code-judge') */
+    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
     readonly type: string;
     /** Display name */
     readonly name?: string;
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
     readonly weight?: number;
     /** Whether this assertion is required to pass */
     readonly required?: boolean | number;
-    /** Prompt file for llm_judge */
+    /** Prompt file for llm_grader */
     readonly prompt?: string;
-    /** Script for code_judge */
+    /** Script for code_grader */
     readonly script?: string | readonly string[];
     /** Additional config passed to the assertion */
     readonly config?: Record<string, unknown>;
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
         /** Enable verbose logging */
         verbose: z.ZodOptional<z.ZodBoolean>;
-        /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
-        traceFile: z.ZodOptional<z.ZodString>;
         /** Always keep temp workspaces after eval */
         keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
         /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     }, {
         workers?: number | undefined;
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     }>>;
     /** Output settings */
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     } | undefined;
     cache?: {
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
         maxRetries?: number | undefined;
         agentTimeoutMs?: number | undefined;
         keepWorkspaces?: boolean | undefined;
-        traceFile?: string | undefined;
         otelFile?: string | undefined;
     } | undefined;
     cache?: {
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
     readonly groupTurns?: boolean;
     /** Path to write OTLP JSON file (importable by OTel backends) */
     readonly otlpFilePath?: string;
-    /** Path to write human-readable simple JSONL trace file */
-    readonly traceFilePath?: string;
 }
 /** Preset configuration for a known observability backend. */
 interface OtelBackendPreset {
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
     getStreamCallbacks(): ProviderStreamCallbacks;
 }
-type ReadableSpan$1 = any;
+type ReadableSpan = any;
 /**
  * SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
  * The file can be imported by any OTel-compatible backend.
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
     private spans;
     private filePath;
     constructor(filePath: string);
-    export(spans: ReadableSpan$1[], resultCallback: (result: {
-        code: number;
-    }) => void): void;
-    shutdown(): Promise<void>;
-    forceFlush(): Promise<void>;
-    private flush;
-}
-type ReadableSpan = any;
-/**
- * SpanExporter that writes human-readable JSONL (one line per root span).
- * Designed for quick debugging and analysis without OTel tooling.
- */
-declare class SimpleTraceFileExporter {
-    private stream;
-    private filePath;
-    private streamReady;
-    private pendingWrites;
-    private _shuttingDown;
-    private spansByTraceId;
-    constructor(filePath: string);
-    private ensureStream;
     export(spans: ReadableSpan[], resultCallback: (result: {
         code: number;
     }) => void): void;
     shutdown(): Promise<void>;
     forceFlush(): Promise<void>;
-    private buildSimpleRecord;
+    private flush;
 }
 /**
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
  * Convention-based discovery of custom assertion scripts.
  *
  * Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
- * them as code-judge evaluators in the registry. The file name (without
- * extension) becomes the evaluator type name.
+ * them as code graders in the registry. The file name (without
+ * extension) becomes the grader type name.
  *
  * Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
  */
 /**
  * Discover custom assertion scripts from `.agentv/assertions/` and register
- * them as evaluator types in the registry.
+ * them as grader types in the registry.
  *
- * @param registry - The evaluator registry to register discovered assertions into
+ * @param registry - The grader registry to register discovered assertions into
  * @param baseDir - The base directory to search from (typically project root or eval file dir)
  * @returns Names of discovered assertion types
  */
@@ -3640,4 +3600,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };