@agentv/core 3.8.0 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-3ZS3GCMI.js → chunk-PC5TLJF6.js} +1 -2
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +533 -772
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -21
- package/dist/index.d.ts +16 -21
- package/dist/index.js +381 -619
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3ZS3GCMI.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -23,8 +23,6 @@ interface ProviderStreamCallbacks {
|
|
|
23
23
|
interface ProviderRequest {
|
|
24
24
|
readonly question: string;
|
|
25
25
|
readonly systemPrompt?: string;
|
|
26
|
-
readonly guidelines?: string;
|
|
27
|
-
readonly guideline_patterns?: readonly string[];
|
|
28
26
|
readonly chatPrompt?: ChatPrompt;
|
|
29
27
|
readonly inputFiles?: readonly string[];
|
|
30
28
|
readonly evalCaseId?: string;
|
|
@@ -1016,15 +1014,13 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmGraderEvaluatorConfig | Composit
|
|
|
1016
1014
|
*/
|
|
1017
1015
|
interface EvalTest {
|
|
1018
1016
|
readonly id: string;
|
|
1019
|
-
readonly
|
|
1017
|
+
readonly eval_set?: string;
|
|
1020
1018
|
readonly conversation_id?: string;
|
|
1021
1019
|
readonly question: string;
|
|
1022
1020
|
readonly input: readonly TestMessage[];
|
|
1023
1021
|
readonly input_segments: readonly JsonObject[];
|
|
1024
1022
|
readonly expected_output: readonly JsonObject[];
|
|
1025
1023
|
readonly reference_answer?: string;
|
|
1026
|
-
readonly guideline_paths: readonly string[];
|
|
1027
|
-
readonly guideline_patterns?: readonly string[];
|
|
1028
1024
|
readonly file_paths: readonly string[];
|
|
1029
1025
|
readonly criteria: string;
|
|
1030
1026
|
readonly evaluator?: EvaluatorKind;
|
|
@@ -1128,7 +1124,7 @@ type FailOnError = boolean;
|
|
|
1128
1124
|
interface EvaluationResult {
|
|
1129
1125
|
readonly timestamp: string;
|
|
1130
1126
|
readonly testId: string;
|
|
1131
|
-
readonly
|
|
1127
|
+
readonly eval_set?: string;
|
|
1132
1128
|
readonly conversationId?: string;
|
|
1133
1129
|
readonly score: number;
|
|
1134
1130
|
readonly assertions: readonly AssertionEntry[];
|
|
@@ -1261,7 +1257,6 @@ type ExecutionDefaults = {
|
|
|
1261
1257
|
};
|
|
1262
1258
|
type AgentVConfig$1 = {
|
|
1263
1259
|
readonly required_version?: string;
|
|
1264
|
-
readonly guideline_patterns?: readonly string[];
|
|
1265
1260
|
readonly eval_patterns?: readonly string[];
|
|
1266
1261
|
readonly execution?: ExecutionDefaults;
|
|
1267
1262
|
};
|
|
@@ -1270,10 +1265,6 @@ type AgentVConfig$1 = {
|
|
|
1270
1265
|
* Searches from eval file directory up to repo root.
|
|
1271
1266
|
*/
|
|
1272
1267
|
declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
|
|
1273
|
-
/**
|
|
1274
|
-
* Determine whether a path references guideline content (instructions or prompts).
|
|
1275
|
-
*/
|
|
1276
|
-
declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
|
|
1277
1268
|
/**
|
|
1278
1269
|
* Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
|
|
1279
1270
|
*/
|
|
@@ -1284,6 +1275,10 @@ declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
|
|
|
1284
1275
|
* Returns undefined when no targets array is specified.
|
|
1285
1276
|
*/
|
|
1286
1277
|
declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
|
|
1278
|
+
/**
|
|
1279
|
+
* Extract workers count from suite-level execution block.
|
|
1280
|
+
*/
|
|
1281
|
+
declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
|
|
1287
1282
|
/**
|
|
1288
1283
|
* Extract per-test targets array from a raw test case object.
|
|
1289
1284
|
*/
|
|
@@ -1320,16 +1315,15 @@ declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
|
|
|
1320
1315
|
type FormattingMode = 'agent' | 'lm';
|
|
1321
1316
|
|
|
1322
1317
|
/**
|
|
1323
|
-
* Build prompt inputs by consolidating user request context
|
|
1318
|
+
* Build prompt inputs by consolidating user request context.
|
|
1324
1319
|
*/
|
|
1325
1320
|
interface PromptInputs {
|
|
1326
1321
|
readonly question: string;
|
|
1327
|
-
readonly guidelines: string;
|
|
1328
1322
|
readonly chatPrompt?: ChatPrompt;
|
|
1329
1323
|
readonly systemMessage?: string;
|
|
1330
1324
|
}
|
|
1331
1325
|
/**
|
|
1332
|
-
* Build prompt inputs by consolidating user request context
|
|
1326
|
+
* Build prompt inputs by consolidating user request context.
|
|
1333
1327
|
*
|
|
1334
1328
|
* @param testCase - The evaluation test case
|
|
1335
1329
|
* @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
|
|
@@ -1364,6 +1358,8 @@ type EvalSuiteResult = {
|
|
|
1364
1358
|
readonly trials?: TrialsConfig;
|
|
1365
1359
|
/** Suite-level targets from execution.targets (matrix evaluation) */
|
|
1366
1360
|
readonly targets?: readonly string[];
|
|
1361
|
+
/** Suite-level workers from execution.workers */
|
|
1362
|
+
readonly workers?: number;
|
|
1367
1363
|
/** Suite-level cache config from execution.cache */
|
|
1368
1364
|
readonly cacheConfig?: CacheConfig;
|
|
1369
1365
|
/** Suite-level metadata (name, description, version, etc.) */
|
|
@@ -2018,7 +2014,6 @@ interface EvaluationContext {
|
|
|
2018
2014
|
readonly attempt: number;
|
|
2019
2015
|
readonly promptInputs: {
|
|
2020
2016
|
readonly question: string;
|
|
2021
|
-
readonly guidelines: string;
|
|
2022
2017
|
readonly systemMessage?: string;
|
|
2023
2018
|
readonly chatPrompt?: ChatPrompt;
|
|
2024
2019
|
};
|
|
@@ -3018,16 +3013,16 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3018
3013
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
3019
3014
|
otelFile: z.ZodOptional<z.ZodString>;
|
|
3020
3015
|
}, "strip", z.ZodTypeAny, {
|
|
3021
|
-
verbose?: boolean | undefined;
|
|
3022
3016
|
workers?: number | undefined;
|
|
3017
|
+
verbose?: boolean | undefined;
|
|
3023
3018
|
maxRetries?: number | undefined;
|
|
3024
3019
|
agentTimeoutMs?: number | undefined;
|
|
3025
3020
|
keepWorkspaces?: boolean | undefined;
|
|
3026
3021
|
traceFile?: string | undefined;
|
|
3027
3022
|
otelFile?: string | undefined;
|
|
3028
3023
|
}, {
|
|
3029
|
-
verbose?: boolean | undefined;
|
|
3030
3024
|
workers?: number | undefined;
|
|
3025
|
+
verbose?: boolean | undefined;
|
|
3031
3026
|
maxRetries?: number | undefined;
|
|
3032
3027
|
agentTimeoutMs?: number | undefined;
|
|
3033
3028
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3075,8 +3070,8 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3075
3070
|
}>>;
|
|
3076
3071
|
}, "strip", z.ZodTypeAny, {
|
|
3077
3072
|
execution?: {
|
|
3078
|
-
verbose?: boolean | undefined;
|
|
3079
3073
|
workers?: number | undefined;
|
|
3074
|
+
verbose?: boolean | undefined;
|
|
3080
3075
|
maxRetries?: number | undefined;
|
|
3081
3076
|
agentTimeoutMs?: number | undefined;
|
|
3082
3077
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3097,8 +3092,8 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3097
3092
|
} | undefined;
|
|
3098
3093
|
}, {
|
|
3099
3094
|
execution?: {
|
|
3100
|
-
verbose?: boolean | undefined;
|
|
3101
3095
|
workers?: number | undefined;
|
|
3096
|
+
verbose?: boolean | undefined;
|
|
3102
3097
|
maxRetries?: number | undefined;
|
|
3103
3098
|
agentTimeoutMs?: number | undefined;
|
|
3104
3099
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3499,7 +3494,7 @@ declare class OtelStreamingObserver {
|
|
|
3499
3494
|
private rootCtx;
|
|
3500
3495
|
constructor(tracer: Tracer, api: OtelApi, captureContent: boolean, parentCtx?: any | undefined);
|
|
3501
3496
|
/** Create root eval span immediately (visible in backend right away) */
|
|
3502
|
-
startEvalCase(testId: string, target: string,
|
|
3497
|
+
startEvalCase(testId: string, target: string, evalSet?: string): void;
|
|
3503
3498
|
/** Create and immediately export a tool span */
|
|
3504
3499
|
onToolCall(name: string, input: unknown, output: unknown, _durationMs: number, toolCallId?: string): void;
|
|
3505
3500
|
/** Create and immediately export an LLM span */
|
|
@@ -3612,4 +3607,4 @@ type AgentKernel = {
|
|
|
3612
3607
|
};
|
|
3613
3608
|
declare function createAgentKernel(): AgentKernel;
|
|
3614
3609
|
|
|
3615
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind,
|
|
3610
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -23,8 +23,6 @@ interface ProviderStreamCallbacks {
|
|
|
23
23
|
interface ProviderRequest {
|
|
24
24
|
readonly question: string;
|
|
25
25
|
readonly systemPrompt?: string;
|
|
26
|
-
readonly guidelines?: string;
|
|
27
|
-
readonly guideline_patterns?: readonly string[];
|
|
28
26
|
readonly chatPrompt?: ChatPrompt;
|
|
29
27
|
readonly inputFiles?: readonly string[];
|
|
30
28
|
readonly evalCaseId?: string;
|
|
@@ -1016,15 +1014,13 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmGraderEvaluatorConfig | Composit
|
|
|
1016
1014
|
*/
|
|
1017
1015
|
interface EvalTest {
|
|
1018
1016
|
readonly id: string;
|
|
1019
|
-
readonly
|
|
1017
|
+
readonly eval_set?: string;
|
|
1020
1018
|
readonly conversation_id?: string;
|
|
1021
1019
|
readonly question: string;
|
|
1022
1020
|
readonly input: readonly TestMessage[];
|
|
1023
1021
|
readonly input_segments: readonly JsonObject[];
|
|
1024
1022
|
readonly expected_output: readonly JsonObject[];
|
|
1025
1023
|
readonly reference_answer?: string;
|
|
1026
|
-
readonly guideline_paths: readonly string[];
|
|
1027
|
-
readonly guideline_patterns?: readonly string[];
|
|
1028
1024
|
readonly file_paths: readonly string[];
|
|
1029
1025
|
readonly criteria: string;
|
|
1030
1026
|
readonly evaluator?: EvaluatorKind;
|
|
@@ -1128,7 +1124,7 @@ type FailOnError = boolean;
|
|
|
1128
1124
|
interface EvaluationResult {
|
|
1129
1125
|
readonly timestamp: string;
|
|
1130
1126
|
readonly testId: string;
|
|
1131
|
-
readonly
|
|
1127
|
+
readonly eval_set?: string;
|
|
1132
1128
|
readonly conversationId?: string;
|
|
1133
1129
|
readonly score: number;
|
|
1134
1130
|
readonly assertions: readonly AssertionEntry[];
|
|
@@ -1261,7 +1257,6 @@ type ExecutionDefaults = {
|
|
|
1261
1257
|
};
|
|
1262
1258
|
type AgentVConfig$1 = {
|
|
1263
1259
|
readonly required_version?: string;
|
|
1264
|
-
readonly guideline_patterns?: readonly string[];
|
|
1265
1260
|
readonly eval_patterns?: readonly string[];
|
|
1266
1261
|
readonly execution?: ExecutionDefaults;
|
|
1267
1262
|
};
|
|
@@ -1270,10 +1265,6 @@ type AgentVConfig$1 = {
|
|
|
1270
1265
|
* Searches from eval file directory up to repo root.
|
|
1271
1266
|
*/
|
|
1272
1267
|
declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
|
|
1273
|
-
/**
|
|
1274
|
-
* Determine whether a path references guideline content (instructions or prompts).
|
|
1275
|
-
*/
|
|
1276
|
-
declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
|
|
1277
1268
|
/**
|
|
1278
1269
|
* Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
|
|
1279
1270
|
*/
|
|
@@ -1284,6 +1275,10 @@ declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
|
|
|
1284
1275
|
* Returns undefined when no targets array is specified.
|
|
1285
1276
|
*/
|
|
1286
1277
|
declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
|
|
1278
|
+
/**
|
|
1279
|
+
* Extract workers count from suite-level execution block.
|
|
1280
|
+
*/
|
|
1281
|
+
declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
|
|
1287
1282
|
/**
|
|
1288
1283
|
* Extract per-test targets array from a raw test case object.
|
|
1289
1284
|
*/
|
|
@@ -1320,16 +1315,15 @@ declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
|
|
|
1320
1315
|
type FormattingMode = 'agent' | 'lm';
|
|
1321
1316
|
|
|
1322
1317
|
/**
|
|
1323
|
-
* Build prompt inputs by consolidating user request context
|
|
1318
|
+
* Build prompt inputs by consolidating user request context.
|
|
1324
1319
|
*/
|
|
1325
1320
|
interface PromptInputs {
|
|
1326
1321
|
readonly question: string;
|
|
1327
|
-
readonly guidelines: string;
|
|
1328
1322
|
readonly chatPrompt?: ChatPrompt;
|
|
1329
1323
|
readonly systemMessage?: string;
|
|
1330
1324
|
}
|
|
1331
1325
|
/**
|
|
1332
|
-
* Build prompt inputs by consolidating user request context
|
|
1326
|
+
* Build prompt inputs by consolidating user request context.
|
|
1333
1327
|
*
|
|
1334
1328
|
* @param testCase - The evaluation test case
|
|
1335
1329
|
* @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
|
|
@@ -1364,6 +1358,8 @@ type EvalSuiteResult = {
|
|
|
1364
1358
|
readonly trials?: TrialsConfig;
|
|
1365
1359
|
/** Suite-level targets from execution.targets (matrix evaluation) */
|
|
1366
1360
|
readonly targets?: readonly string[];
|
|
1361
|
+
/** Suite-level workers from execution.workers */
|
|
1362
|
+
readonly workers?: number;
|
|
1367
1363
|
/** Suite-level cache config from execution.cache */
|
|
1368
1364
|
readonly cacheConfig?: CacheConfig;
|
|
1369
1365
|
/** Suite-level metadata (name, description, version, etc.) */
|
|
@@ -2018,7 +2014,6 @@ interface EvaluationContext {
|
|
|
2018
2014
|
readonly attempt: number;
|
|
2019
2015
|
readonly promptInputs: {
|
|
2020
2016
|
readonly question: string;
|
|
2021
|
-
readonly guidelines: string;
|
|
2022
2017
|
readonly systemMessage?: string;
|
|
2023
2018
|
readonly chatPrompt?: ChatPrompt;
|
|
2024
2019
|
};
|
|
@@ -3018,16 +3013,16 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3018
3013
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
3019
3014
|
otelFile: z.ZodOptional<z.ZodString>;
|
|
3020
3015
|
}, "strip", z.ZodTypeAny, {
|
|
3021
|
-
verbose?: boolean | undefined;
|
|
3022
3016
|
workers?: number | undefined;
|
|
3017
|
+
verbose?: boolean | undefined;
|
|
3023
3018
|
maxRetries?: number | undefined;
|
|
3024
3019
|
agentTimeoutMs?: number | undefined;
|
|
3025
3020
|
keepWorkspaces?: boolean | undefined;
|
|
3026
3021
|
traceFile?: string | undefined;
|
|
3027
3022
|
otelFile?: string | undefined;
|
|
3028
3023
|
}, {
|
|
3029
|
-
verbose?: boolean | undefined;
|
|
3030
3024
|
workers?: number | undefined;
|
|
3025
|
+
verbose?: boolean | undefined;
|
|
3031
3026
|
maxRetries?: number | undefined;
|
|
3032
3027
|
agentTimeoutMs?: number | undefined;
|
|
3033
3028
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3075,8 +3070,8 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3075
3070
|
}>>;
|
|
3076
3071
|
}, "strip", z.ZodTypeAny, {
|
|
3077
3072
|
execution?: {
|
|
3078
|
-
verbose?: boolean | undefined;
|
|
3079
3073
|
workers?: number | undefined;
|
|
3074
|
+
verbose?: boolean | undefined;
|
|
3080
3075
|
maxRetries?: number | undefined;
|
|
3081
3076
|
agentTimeoutMs?: number | undefined;
|
|
3082
3077
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3097,8 +3092,8 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3097
3092
|
} | undefined;
|
|
3098
3093
|
}, {
|
|
3099
3094
|
execution?: {
|
|
3100
|
-
verbose?: boolean | undefined;
|
|
3101
3095
|
workers?: number | undefined;
|
|
3096
|
+
verbose?: boolean | undefined;
|
|
3102
3097
|
maxRetries?: number | undefined;
|
|
3103
3098
|
agentTimeoutMs?: number | undefined;
|
|
3104
3099
|
keepWorkspaces?: boolean | undefined;
|
|
@@ -3499,7 +3494,7 @@ declare class OtelStreamingObserver {
|
|
|
3499
3494
|
private rootCtx;
|
|
3500
3495
|
constructor(tracer: Tracer, api: OtelApi, captureContent: boolean, parentCtx?: any | undefined);
|
|
3501
3496
|
/** Create root eval span immediately (visible in backend right away) */
|
|
3502
|
-
startEvalCase(testId: string, target: string,
|
|
3497
|
+
startEvalCase(testId: string, target: string, evalSet?: string): void;
|
|
3503
3498
|
/** Create and immediately export a tool span */
|
|
3504
3499
|
onToolCall(name: string, input: unknown, output: unknown, _durationMs: number, toolCallId?: string): void;
|
|
3505
3500
|
/** Create and immediately export an LLM span */
|
|
@@ -3612,4 +3607,4 @@ type AgentKernel = {
|
|
|
3612
3607
|
};
|
|
3613
3608
|
declare function createAgentKernel(): AgentKernel;
|
|
3614
3609
|
|
|
3615
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind,
|
|
3610
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SimpleTraceFileExporter, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|