@agentv/core 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -204,6 +204,15 @@ declare function isTestMessage(value: unknown): value is TestMessage;
204
204
  declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite", "tool_trajectory", "field_accuracy", "latency", "cost", "token_usage"];
205
205
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
206
206
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
207
+ /**
208
+ * Configuration for enabling target access in code_judge evaluators.
209
+ * When present, the runtime will start a local proxy server that allows
210
+ * the script to invoke configured targets without direct credential access.
211
+ */
212
+ type TargetAccessConfig = {
213
+ /** Maximum number of target invocations allowed per execution (default: 50) */
214
+ readonly max_calls?: number;
215
+ };
207
216
  type CodeEvaluatorConfig = {
208
217
  readonly name: string;
209
218
  readonly type: 'code';
@@ -214,6 +223,8 @@ type CodeEvaluatorConfig = {
214
223
  readonly weight?: number;
215
224
  /** Pass-through configuration for the code_judge script (any unrecognized YAML properties) */
216
225
  readonly config?: JsonObject;
226
+ /** When present, enables target access for the script via local proxy */
227
+ readonly target?: TargetAccessConfig;
217
228
  };
218
229
  type LlmJudgeEvaluatorConfig = {
219
230
  readonly name: string;
@@ -343,7 +354,6 @@ interface EvalCase {
343
354
  readonly guideline_paths: readonly string[];
344
355
  readonly guideline_patterns?: readonly string[];
345
356
  readonly file_paths: readonly string[];
346
- readonly code_snippets: readonly string[];
347
357
  readonly expected_outcome: string;
348
358
  readonly evaluator?: EvaluatorKind;
349
359
  readonly evaluators?: readonly EvaluatorConfig[];
@@ -383,6 +393,8 @@ interface EvaluatorResult {
383
393
  readonly rawRequest?: JsonObject;
384
394
  readonly evaluatorProviderRequest?: JsonObject;
385
395
  readonly evaluatorResults?: readonly EvaluatorResult[];
396
+ /** Optional structured details from code judges (e.g., TP/TN/FP/FN counts). */
397
+ readonly details?: JsonObject;
386
398
  }
387
399
  /**
388
400
  * Convenience accessor matching the Python hit_count property.
@@ -396,7 +408,7 @@ interface ChatMessage {
396
408
  readonly name?: string;
397
409
  }
398
410
  type ChatPrompt = readonly ChatMessage[];
399
- type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
411
+ type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'pi-agent-sdk' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
400
412
  interface ProviderRequest {
401
413
  readonly question: string;
402
414
  readonly systemPrompt?: string;
@@ -566,10 +578,6 @@ interface TargetDefinition {
566
578
  * - 'lm': Embedded file content with XML tags (for language model providers)
567
579
  */
568
580
  type FormattingMode = 'agent' | 'lm';
569
- /**
570
- * Extract fenced code blocks from AgentV user segments.
571
- */
572
- declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
573
581
 
574
582
  /**
575
583
  * Build prompt inputs by consolidating user request context and guideline content.
@@ -805,6 +813,13 @@ interface PiCodingAgentResolvedConfig {
805
813
  readonly logFormat?: 'summary' | 'json';
806
814
  readonly systemPrompt?: string;
807
815
  }
816
+ interface PiAgentSdkResolvedConfig {
817
+ readonly provider?: string;
818
+ readonly model?: string;
819
+ readonly apiKey?: string;
820
+ readonly timeoutMs?: number;
821
+ readonly systemPrompt?: string;
822
+ }
808
823
  interface ClaudeCodeResolvedConfig {
809
824
  readonly executable: string;
810
825
  readonly model?: string;
@@ -863,6 +878,13 @@ type ResolvedTarget = {
863
878
  readonly workers?: number;
864
879
  readonly providerBatching?: boolean;
865
880
  readonly config: PiCodingAgentResolvedConfig;
881
+ } | {
882
+ readonly kind: 'pi-agent-sdk';
883
+ readonly name: string;
884
+ readonly judgeTarget?: string;
885
+ readonly workers?: number;
886
+ readonly providerBatching?: boolean;
887
+ readonly config: PiAgentSdkResolvedConfig;
866
888
  } | {
867
889
  readonly kind: 'claude-code';
868
890
  readonly name: string;
@@ -948,6 +970,11 @@ declare function subscribeToClaudeCodeLogEntries(listener: ClaudeCodeLogListener
948
970
  declare function createProvider(target: ResolvedTarget): Provider;
949
971
  declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
950
972
 
973
+ /**
974
+ * Function to resolve a target name to a provider.
975
+ * Used by code judges to support target override.
976
+ */
977
+ type TargetResolver = (targetName: string) => Provider | undefined;
951
978
  interface EvaluationContext {
952
979
  readonly evalCase: EvalCase;
953
980
  readonly candidate: string;
@@ -968,6 +995,10 @@ interface EvaluationContext {
968
995
  readonly outputMessages?: readonly OutputMessage[];
969
996
  /** Lightweight summary of trace events (if available) */
970
997
  readonly traceSummary?: TraceSummary;
998
+ /** Resolver for target override in code judges */
999
+ readonly targetResolver?: TargetResolver;
1000
+ /** List of available target names for code judges */
1001
+ readonly availableTargets?: readonly string[];
971
1002
  }
972
1003
  interface EvaluationScore {
973
1004
  readonly score: number;
@@ -978,6 +1009,8 @@ interface EvaluationScore {
978
1009
  readonly reasoning?: string;
979
1010
  readonly evaluatorRawRequest?: JsonObject;
980
1011
  readonly evaluatorResults?: readonly ChildEvaluatorResult[];
1012
+ /** Optional structured details from code judges (e.g., TP/TN/FP/FN counts, alignments). */
1013
+ readonly details?: JsonObject;
981
1014
  }
982
1015
  interface ChildEvaluatorResult {
983
1016
  readonly name: string;
@@ -990,37 +1023,37 @@ interface ChildEvaluatorResult {
990
1023
  readonly reasoning?: string;
991
1024
  readonly evaluatorRawRequest?: JsonObject;
992
1025
  readonly evaluatorResults?: readonly ChildEvaluatorResult[];
1026
+ /** Optional structured details from code judges (e.g., TP/TN/FP/FN counts, alignments). */
1027
+ readonly details?: JsonObject;
993
1028
  }
994
1029
  interface Evaluator {
995
1030
  readonly kind: string;
996
1031
  evaluate(context: EvaluationContext): Promise<EvaluationScore> | EvaluationScore;
997
1032
  }
998
- type JudgeProviderResolver = (context: EvaluationContext) => Promise<Provider | undefined>;
999
- interface LlmJudgeEvaluatorOptions {
1000
- readonly resolveJudgeProvider: JudgeProviderResolver;
1001
- readonly maxOutputTokens?: number;
1002
- readonly temperature?: number;
1003
- readonly evaluatorTemplate?: string;
1004
- }
1005
- declare class LlmJudgeEvaluator implements Evaluator {
1006
- readonly kind = "llm_judge";
1007
- private readonly resolveJudgeProvider;
1008
- private readonly maxOutputTokens?;
1009
- private readonly temperature?;
1010
- private readonly evaluatorTemplate?;
1011
- constructor(options: LlmJudgeEvaluatorOptions);
1012
- evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1013
- private evaluateFreeform;
1014
- private evaluateWithRubrics;
1015
- private buildRubricPrompt;
1016
- private runWithRetry;
1033
+ interface EvaluatorFactory {
1034
+ create(config: EvaluatorConfig, context: EvaluationContext): Evaluator;
1017
1035
  }
1036
+
1037
+ declare function scoreToVerdict(score: number): EvaluationVerdict;
1038
+ declare function clampScore(value: number): number;
1039
+ declare function extractJsonBlob(text: string): string | undefined;
1040
+ declare function parseJsonFromText(text: string): unknown;
1041
+ declare function isNonEmptyString(value: unknown): value is string;
1042
+ declare function parseJsonSafe(payload: string): Record<string, unknown> | undefined;
1043
+ /**
1044
+ * Deep equality check for two values.
1045
+ * Handles primitives, arrays, and plain objects.
1046
+ */
1047
+ declare function deepEqual(a: unknown, b: unknown): boolean;
1048
+
1018
1049
  interface CodeEvaluatorOptions {
1019
1050
  readonly script: readonly string[];
1020
1051
  readonly cwd?: string;
1021
1052
  readonly agentTimeoutMs?: number;
1022
1053
  /** Pass-through configuration from YAML (any unrecognized properties) */
1023
1054
  readonly config?: Record<string, unknown>;
1055
+ /** Target access config - when present, enables target invocation for the script */
1056
+ readonly target?: TargetAccessConfig;
1024
1057
  }
1025
1058
  declare class CodeEvaluator implements Evaluator {
1026
1059
  readonly kind = "code";
@@ -1028,29 +1061,44 @@ declare class CodeEvaluator implements Evaluator {
1028
1061
  private readonly cwd?;
1029
1062
  private readonly agentTimeoutMs?;
1030
1063
  private readonly config?;
1064
+ private readonly target?;
1031
1065
  constructor(options: CodeEvaluatorOptions);
1032
1066
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1033
1067
  }
1034
- interface ToolTrajectoryEvaluatorOptions {
1035
- readonly config: ToolTrajectoryEvaluatorConfig;
1068
+ declare function executeScript(scriptPath: readonly string[] | string, input: string, agentTimeoutMs?: number, cwd?: string, env?: Record<string, string>): Promise<string>;
1069
+
1070
+ interface CompositeEvaluatorOptions {
1071
+ readonly config: CompositeEvaluatorConfig;
1072
+ readonly evaluatorFactory: EvaluatorFactory;
1073
+ readonly cwd?: string;
1036
1074
  }
1037
- declare class ToolTrajectoryEvaluator implements Evaluator {
1038
- readonly kind = "tool_trajectory";
1075
+ declare class CompositeEvaluator implements Evaluator {
1076
+ readonly kind = "composite";
1039
1077
  private readonly config;
1040
- constructor(options: ToolTrajectoryEvaluatorOptions);
1078
+ private readonly evaluatorFactory;
1079
+ private readonly cwd?;
1080
+ constructor(options: CompositeEvaluatorOptions);
1081
+ evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1082
+ private aggregate;
1083
+ private runWeightedAverage;
1084
+ private runCodeAggregator;
1085
+ private runLlmAggregator;
1086
+ }
1087
+
1088
+ interface CostEvaluatorOptions {
1089
+ readonly config: CostEvaluatorConfig;
1090
+ }
1091
+ /**
1092
+ * Evaluator that checks execution cost against a budget.
1093
+ * Uses traceSummary.costUsd from the evaluation context.
1094
+ */
1095
+ declare class CostEvaluator implements Evaluator {
1096
+ readonly kind = "cost";
1097
+ private readonly config;
1098
+ constructor(options: CostEvaluatorOptions);
1041
1099
  evaluate(context: EvaluationContext): EvaluationScore;
1042
- /**
1043
- * Extract tool calls from output messages.
1044
- */
1045
- private extractToolCallsFromMessages;
1046
- /**
1047
- * Build a summary from extracted tool calls.
1048
- */
1049
- private buildSummary;
1050
- private evaluateAnyOrder;
1051
- private evaluateInOrder;
1052
- private evaluateExact;
1053
1100
  }
1101
+
1054
1102
  interface FieldAccuracyEvaluatorOptions {
1055
1103
  readonly config: FieldAccuracyEvaluatorConfig;
1056
1104
  }
@@ -1089,26 +1137,7 @@ declare class FieldAccuracyEvaluator implements Evaluator {
1089
1137
  */
1090
1138
  private aggregateResults;
1091
1139
  }
1092
- interface EvaluatorFactory {
1093
- create(config: EvaluatorConfig, context: EvaluationContext): Evaluator;
1094
- }
1095
- interface CompositeEvaluatorOptions {
1096
- readonly config: CompositeEvaluatorConfig;
1097
- readonly evaluatorFactory: EvaluatorFactory;
1098
- readonly cwd?: string;
1099
- }
1100
- declare class CompositeEvaluator implements Evaluator {
1101
- readonly kind = "composite";
1102
- private readonly config;
1103
- private readonly evaluatorFactory;
1104
- private readonly cwd?;
1105
- constructor(options: CompositeEvaluatorOptions);
1106
- evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1107
- private aggregate;
1108
- private runWeightedAverage;
1109
- private runCodeAggregator;
1110
- private runLlmAggregator;
1111
- }
1140
+
1112
1141
  interface LatencyEvaluatorOptions {
1113
1142
  readonly config: LatencyEvaluatorConfig;
1114
1143
  }
@@ -1122,19 +1151,50 @@ declare class LatencyEvaluator implements Evaluator {
1122
1151
  constructor(options: LatencyEvaluatorOptions);
1123
1152
  evaluate(context: EvaluationContext): EvaluationScore;
1124
1153
  }
1125
- interface CostEvaluatorOptions {
1126
- readonly config: CostEvaluatorConfig;
1154
+
1155
+ type JudgeProviderResolver = (context: EvaluationContext) => Promise<Provider | undefined>;
1156
+ interface LlmJudgeEvaluatorOptions {
1157
+ readonly resolveJudgeProvider: JudgeProviderResolver;
1158
+ readonly maxOutputTokens?: number;
1159
+ readonly temperature?: number;
1160
+ readonly evaluatorTemplate?: string;
1161
+ }
1162
+ declare const freeformEvaluationSchema: z.ZodObject<{
1163
+ score: z.ZodNumber;
1164
+ hits: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1165
+ misses: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1166
+ reasoning: z.ZodOptional<z.ZodString>;
1167
+ }, "strip", z.ZodTypeAny, {
1168
+ score: number;
1169
+ hits?: string[] | undefined;
1170
+ misses?: string[] | undefined;
1171
+ reasoning?: string | undefined;
1172
+ }, {
1173
+ score: number;
1174
+ hits?: string[] | undefined;
1175
+ misses?: string[] | undefined;
1176
+ reasoning?: string | undefined;
1177
+ }>;
1178
+
1179
+ declare class LlmJudgeEvaluator implements Evaluator {
1180
+ readonly kind = "llm_judge";
1181
+ private readonly resolveJudgeProvider;
1182
+ private readonly maxOutputTokens?;
1183
+ private readonly temperature?;
1184
+ private readonly evaluatorTemplate?;
1185
+ constructor(options: LlmJudgeEvaluatorOptions);
1186
+ evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1187
+ private evaluateFreeform;
1188
+ private evaluateWithRubrics;
1189
+ private buildRubricPrompt;
1190
+ private runWithRetry;
1127
1191
  }
1128
1192
  /**
1129
- * Evaluator that checks execution cost against a budget.
1130
- * Uses traceSummary.costUsd from the evaluation context.
1193
+ * Build the mandatory output schema that all evaluators must follow.
1194
+ * This schema is always appended to the evaluator template.
1131
1195
  */
1132
- declare class CostEvaluator implements Evaluator {
1133
- readonly kind = "cost";
1134
- private readonly config;
1135
- constructor(options: CostEvaluatorOptions);
1136
- evaluate(context: EvaluationContext): EvaluationScore;
1137
- }
1196
+ declare function buildOutputSchema(): string;
1197
+
1138
1198
  interface TokenUsageEvaluatorOptions {
1139
1199
  readonly config: TokenUsageEvaluatorConfig;
1140
1200
  }
@@ -1149,6 +1209,27 @@ declare class TokenUsageEvaluator implements Evaluator {
1149
1209
  evaluate(context: EvaluationContext): EvaluationScore;
1150
1210
  }
1151
1211
 
1212
+ interface ToolTrajectoryEvaluatorOptions {
1213
+ readonly config: ToolTrajectoryEvaluatorConfig;
1214
+ }
1215
+ declare class ToolTrajectoryEvaluator implements Evaluator {
1216
+ readonly kind = "tool_trajectory";
1217
+ private readonly config;
1218
+ constructor(options: ToolTrajectoryEvaluatorOptions);
1219
+ evaluate(context: EvaluationContext): EvaluationScore;
1220
+ /**
1221
+ * Extract tool calls from output messages.
1222
+ */
1223
+ private extractToolCallsFromMessages;
1224
+ /**
1225
+ * Build a summary from extracted tool calls.
1226
+ */
1227
+ private buildSummary;
1228
+ private evaluateAnyOrder;
1229
+ private evaluateInOrder;
1230
+ private evaluateExact;
1231
+ }
1232
+
1152
1233
  type MaybePromise<T> = T | Promise<T>;
1153
1234
  interface EvaluationCache {
1154
1235
  get(key: string): MaybePromise<ProviderResponse | undefined>;
@@ -1168,6 +1249,10 @@ interface RunEvalCaseOptions {
1168
1249
  readonly useCache?: boolean;
1169
1250
  readonly signal?: AbortSignal;
1170
1251
  readonly judgeProvider?: Provider;
1252
+ /** Resolver for target override in code judges */
1253
+ readonly targetResolver?: (name: string) => Provider | undefined;
1254
+ /** List of available target names for code judges */
1255
+ readonly availableTargets?: readonly string[];
1171
1256
  }
1172
1257
  interface ProgressEvent {
1173
1258
  readonly workerId: number;
@@ -1211,37 +1296,9 @@ interface GenerateRubricsOptions {
1211
1296
  */
1212
1297
  declare function generateRubrics(options: GenerateRubricsOptions): Promise<readonly RubricItem[]>;
1213
1298
 
1214
- /**
1215
- * Payload received by code judges via stdin.
1216
- * All properties use camelCase for TypeScript ergonomics.
1217
- */
1218
- interface CodeJudgePayload {
1219
- readonly question: string;
1220
- readonly expectedOutcome: string;
1221
- readonly expectedMessages: readonly JsonObject[];
1222
- readonly referenceAnswer?: string;
1223
- readonly candidateAnswer: string;
1224
- readonly outputMessages?: readonly OutputMessage[] | null;
1225
- readonly guidelineFiles: readonly string[];
1226
- readonly inputFiles: readonly string[];
1227
- readonly inputMessages: readonly TestMessage[];
1228
- readonly traceSummary?: TraceSummary | null;
1229
- readonly config?: JsonObject | null;
1230
- }
1231
- /**
1232
- * Parse stdin JSON (snake_case) into typed camelCase object.
1233
- * Use this in TypeScript code judges to get type-safe, idiomatic input.
1234
- */
1235
- declare function parseCodeJudgePayload(payload: string): CodeJudgePayload;
1236
- /**
1237
- * Convenience helper that reads stdin and parses it.
1238
- * Equivalent to: parseCodeJudgePayload(readFileSync(0, 'utf8'))
1239
- */
1240
- declare function readCodeJudgePayload(): CodeJudgePayload;
1241
-
1242
1299
  type AgentKernel = {
1243
1300
  status: string;
1244
1301
  };
1245
1302
  declare function createAgentKernel(): AgentKernel;
1246
1303
 
1247
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CodeJudgePayload, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, explorationRatio, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseCodeJudgePayload, readCodeJudgePayload, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
1304
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };