@agentv/core 2.1.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -60,6 +60,8 @@ interface ToolTrajectoryExpectedItem {
60
60
  readonly tool: string;
61
61
  /** Optional argument matching: 'any' skips validation, object performs partial deep equality */
62
62
  readonly args?: 'any' | Record<string, unknown>;
63
+ /** Optional maximum duration in milliseconds for latency assertions */
64
+ readonly maxDurationMs?: number;
63
65
  }
64
66
  /**
65
67
  * Simplified input type for computeTraceSummary.
@@ -226,19 +228,71 @@ type CodeEvaluatorConfig = {
226
228
  /** When present, enables target access for the script via local proxy */
227
229
  readonly target?: TargetAccessConfig;
228
230
  };
231
+ /**
232
+ * Executable prompt template configuration.
233
+ * Matches code_judge pattern for consistency.
234
+ */
235
+ type PromptScriptConfig = {
236
+ /** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
237
+ readonly script: readonly string[];
238
+ /** Pass-through configuration for the prompt template */
239
+ readonly config?: Record<string, unknown>;
240
+ };
229
241
  type LlmJudgeEvaluatorConfig = {
230
242
  readonly name: string;
231
243
  readonly type: 'llm_judge';
232
- readonly prompt?: string;
244
+ /** Text prompt (inline or file path) or executable script config */
245
+ readonly prompt?: string | PromptScriptConfig;
233
246
  readonly promptPath?: string;
247
+ /** Resolved absolute path for prompt file (used for text template prompts) */
248
+ readonly resolvedPromptPath?: string;
249
+ /** Resolved script array for executable prompts (matches code_judge pattern) */
250
+ readonly resolvedPromptScript?: readonly string[];
234
251
  readonly rubrics?: readonly RubricItem[];
235
252
  readonly weight?: number;
253
+ /** Pass-through configuration for custom evaluator prompts (legacy, prefer prompt.config) */
254
+ readonly config?: Record<string, unknown>;
255
+ };
256
+ /**
257
+ * Score range definition for analytic rubric scoring.
258
+ * Each range maps an integer score band (0-10) to an expected outcome description.
259
+ */
260
+ type ScoreRange = {
261
+ /** Inclusive integer range [min, max] within 0-10 */
262
+ readonly score_range: readonly [number, number];
263
+ /** Description of what this score range represents */
264
+ readonly expected_outcome: string;
236
265
  };
266
+ /**
267
+ * Rubric item for LLM judge evaluation.
268
+ * Supports two modes:
269
+ * - Checklist mode: boolean satisfied/not-satisfied with `expected_outcome`
270
+ * - Score-range mode: 0-10 integer scoring with `score_ranges`
271
+ */
237
272
  type RubricItem = {
238
273
  readonly id: string;
239
- readonly description: string;
274
+ /**
275
+ * For checklist rubrics: the expected outcome text (required).
276
+ * For score-range rubrics: optional overall criterion description.
277
+ */
278
+ readonly expected_outcome?: string;
240
279
  readonly weight: number;
241
- readonly required: boolean;
280
+ /**
281
+ * Legacy boolean gating (deprecated, treated as required_min_score: 10).
282
+ * Use required_min_score instead for finer control.
283
+ */
284
+ readonly required?: boolean;
285
+ /**
286
+ * Minimum score (0-10) required to pass this criterion.
287
+ * If the criterion score is below this threshold, the overall verdict is 'fail'.
288
+ */
289
+ readonly required_min_score?: number;
290
+ /**
291
+ * Score range definitions for analytic rubric scoring.
292
+ * When present, the judge outputs an integer 0-10 score per criterion.
293
+ * Ranges must be non-overlapping and cover 0-10 inclusive.
294
+ */
295
+ readonly score_ranges?: readonly ScoreRange[];
242
296
  };
243
297
  type CompositeAggregatorConfig = {
244
298
  readonly type: 'weighted_average';
@@ -438,6 +492,8 @@ interface ToolCall {
438
492
  readonly id?: string;
439
493
  /** ISO 8601 timestamp */
440
494
  readonly timestamp?: string;
495
+ /** Duration of the tool call in milliseconds */
496
+ readonly durationMs?: number;
441
497
  }
442
498
  /**
443
499
  * An output message from agent execution.
@@ -454,6 +510,8 @@ interface OutputMessage {
454
510
  readonly toolCalls?: readonly ToolCall[];
455
511
  /** ISO 8601 timestamp */
456
512
  readonly timestamp?: string;
513
+ /** Duration of the message in milliseconds */
514
+ readonly durationMs?: number;
457
515
  /** Provider-specific metadata */
458
516
  readonly metadata?: Record<string, unknown>;
459
517
  }
@@ -601,9 +659,15 @@ declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): P
601
659
  */
602
660
  declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
603
661
 
662
+ /**
663
+ * Detect file format by extension.
664
+ */
665
+ declare function detectFormat(filePath: string): 'yaml' | 'jsonl';
666
+
604
667
  type LoadOptions = {
605
668
  readonly verbose?: boolean;
606
- readonly evalId?: string;
669
+ /** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
670
+ readonly filter?: string;
607
671
  };
608
672
  /**
609
673
  * Read metadata from a test suite file (like target name).
@@ -613,7 +677,8 @@ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
613
677
  target?: string;
614
678
  }>;
615
679
  /**
616
- * Load eval cases from a AgentV YAML specification file.
680
+ * Load eval cases from a AgentV specification file (YAML or JSONL).
681
+ * Format is detected by file extension: .yaml/.yml for YAML, .jsonl for JSONL.
617
682
  */
618
683
  declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
619
684
 
@@ -1186,6 +1251,15 @@ declare class LlmJudgeEvaluator implements Evaluator {
1186
1251
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1187
1252
  private evaluateFreeform;
1188
1253
  private evaluateWithRubrics;
1254
+ /**
1255
+ * Evaluate using score-range rubrics (analytic rubric scoring).
1256
+ * Each criterion is scored 0-10 and normalized to 0-1.
1257
+ */
1258
+ private evaluateWithScoreRanges;
1259
+ /**
1260
+ * Build prompt for score-range rubric evaluation.
1261
+ */
1262
+ private buildScoreRangePrompt;
1189
1263
  private buildRubricPrompt;
1190
1264
  private runWithRetry;
1191
1265
  }
@@ -1275,7 +1349,8 @@ interface RunEvaluationOptions {
1275
1349
  readonly cache?: EvaluationCache;
1276
1350
  readonly useCache?: boolean;
1277
1351
  readonly now?: () => Date;
1278
- readonly evalId?: string;
1352
+ /** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
1353
+ readonly filter?: string;
1279
1354
  readonly verbose?: boolean;
1280
1355
  readonly maxConcurrency?: number;
1281
1356
  readonly evalCases?: readonly EvalCase[];
@@ -1301,4 +1376,4 @@ type AgentKernel = {
1301
1376
  };
1302
1377
  declare function createAgentKernel(): AgentKernel;
1303
1378
 
1304
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
1379
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
package/dist/index.d.ts CHANGED
@@ -60,6 +60,8 @@ interface ToolTrajectoryExpectedItem {
60
60
  readonly tool: string;
61
61
  /** Optional argument matching: 'any' skips validation, object performs partial deep equality */
62
62
  readonly args?: 'any' | Record<string, unknown>;
63
+ /** Optional maximum duration in milliseconds for latency assertions */
64
+ readonly maxDurationMs?: number;
63
65
  }
64
66
  /**
65
67
  * Simplified input type for computeTraceSummary.
@@ -226,19 +228,71 @@ type CodeEvaluatorConfig = {
226
228
  /** When present, enables target access for the script via local proxy */
227
229
  readonly target?: TargetAccessConfig;
228
230
  };
231
+ /**
232
+ * Executable prompt template configuration.
233
+ * Matches code_judge pattern for consistency.
234
+ */
235
+ type PromptScriptConfig = {
236
+ /** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
237
+ readonly script: readonly string[];
238
+ /** Pass-through configuration for the prompt template */
239
+ readonly config?: Record<string, unknown>;
240
+ };
229
241
  type LlmJudgeEvaluatorConfig = {
230
242
  readonly name: string;
231
243
  readonly type: 'llm_judge';
232
- readonly prompt?: string;
244
+ /** Text prompt (inline or file path) or executable script config */
245
+ readonly prompt?: string | PromptScriptConfig;
233
246
  readonly promptPath?: string;
247
+ /** Resolved absolute path for prompt file (used for text template prompts) */
248
+ readonly resolvedPromptPath?: string;
249
+ /** Resolved script array for executable prompts (matches code_judge pattern) */
250
+ readonly resolvedPromptScript?: readonly string[];
234
251
  readonly rubrics?: readonly RubricItem[];
235
252
  readonly weight?: number;
253
+ /** Pass-through configuration for custom evaluator prompts (legacy, prefer prompt.config) */
254
+ readonly config?: Record<string, unknown>;
255
+ };
256
+ /**
257
+ * Score range definition for analytic rubric scoring.
258
+ * Each range maps an integer score band (0-10) to an expected outcome description.
259
+ */
260
+ type ScoreRange = {
261
+ /** Inclusive integer range [min, max] within 0-10 */
262
+ readonly score_range: readonly [number, number];
263
+ /** Description of what this score range represents */
264
+ readonly expected_outcome: string;
236
265
  };
266
+ /**
267
+ * Rubric item for LLM judge evaluation.
268
+ * Supports two modes:
269
+ * - Checklist mode: boolean satisfied/not-satisfied with `expected_outcome`
270
+ * - Score-range mode: 0-10 integer scoring with `score_ranges`
271
+ */
237
272
  type RubricItem = {
238
273
  readonly id: string;
239
- readonly description: string;
274
+ /**
275
+ * For checklist rubrics: the expected outcome text (required).
276
+ * For score-range rubrics: optional overall criterion description.
277
+ */
278
+ readonly expected_outcome?: string;
240
279
  readonly weight: number;
241
- readonly required: boolean;
280
+ /**
281
+ * Legacy boolean gating (deprecated, treated as required_min_score: 10).
282
+ * Use required_min_score instead for finer control.
283
+ */
284
+ readonly required?: boolean;
285
+ /**
286
+ * Minimum score (0-10) required to pass this criterion.
287
+ * If the criterion score is below this threshold, the overall verdict is 'fail'.
288
+ */
289
+ readonly required_min_score?: number;
290
+ /**
291
+ * Score range definitions for analytic rubric scoring.
292
+ * When present, the judge outputs an integer 0-10 score per criterion.
293
+ * Ranges must be non-overlapping and cover 0-10 inclusive.
294
+ */
295
+ readonly score_ranges?: readonly ScoreRange[];
242
296
  };
243
297
  type CompositeAggregatorConfig = {
244
298
  readonly type: 'weighted_average';
@@ -438,6 +492,8 @@ interface ToolCall {
438
492
  readonly id?: string;
439
493
  /** ISO 8601 timestamp */
440
494
  readonly timestamp?: string;
495
+ /** Duration of the tool call in milliseconds */
496
+ readonly durationMs?: number;
441
497
  }
442
498
  /**
443
499
  * An output message from agent execution.
@@ -454,6 +510,8 @@ interface OutputMessage {
454
510
  readonly toolCalls?: readonly ToolCall[];
455
511
  /** ISO 8601 timestamp */
456
512
  readonly timestamp?: string;
513
+ /** Duration of the message in milliseconds */
514
+ readonly durationMs?: number;
457
515
  /** Provider-specific metadata */
458
516
  readonly metadata?: Record<string, unknown>;
459
517
  }
@@ -601,9 +659,15 @@ declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): P
601
659
  */
602
660
  declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
603
661
 
662
+ /**
663
+ * Detect file format by extension.
664
+ */
665
+ declare function detectFormat(filePath: string): 'yaml' | 'jsonl';
666
+
604
667
  type LoadOptions = {
605
668
  readonly verbose?: boolean;
606
- readonly evalId?: string;
669
+ /** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
670
+ readonly filter?: string;
607
671
  };
608
672
  /**
609
673
  * Read metadata from a test suite file (like target name).
@@ -613,7 +677,8 @@ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
613
677
  target?: string;
614
678
  }>;
615
679
  /**
616
- * Load eval cases from a AgentV YAML specification file.
680
+ * Load eval cases from a AgentV specification file (YAML or JSONL).
681
+ * Format is detected by file extension: .yaml/.yml for YAML, .jsonl for JSONL.
617
682
  */
618
683
  declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
619
684
 
@@ -1186,6 +1251,15 @@ declare class LlmJudgeEvaluator implements Evaluator {
1186
1251
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
1187
1252
  private evaluateFreeform;
1188
1253
  private evaluateWithRubrics;
1254
+ /**
1255
+ * Evaluate using score-range rubrics (analytic rubric scoring).
1256
+ * Each criterion is scored 0-10 and normalized to 0-1.
1257
+ */
1258
+ private evaluateWithScoreRanges;
1259
+ /**
1260
+ * Build prompt for score-range rubric evaluation.
1261
+ */
1262
+ private buildScoreRangePrompt;
1189
1263
  private buildRubricPrompt;
1190
1264
  private runWithRetry;
1191
1265
  }
@@ -1275,7 +1349,8 @@ interface RunEvaluationOptions {
1275
1349
  readonly cache?: EvaluationCache;
1276
1350
  readonly useCache?: boolean;
1277
1351
  readonly now?: () => Date;
1278
- readonly evalId?: string;
1352
+ /** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
1353
+ readonly filter?: string;
1279
1354
  readonly verbose?: boolean;
1280
1355
  readonly maxConcurrency?: number;
1281
1356
  readonly evalCases?: readonly EvalCase[];
@@ -1301,4 +1376,4 @@ type AgentKernel = {
1301
1376
  };
1302
1377
  declare function createAgentKernel(): AgentKernel;
1303
1378
 
1304
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
1379
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };