@agentv/core 2.2.0 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -77
- package/dist/{chunk-KDEP4I7G.js → chunk-RP3M7COZ.js} +1 -1
- package/dist/{chunk-KDEP4I7G.js.map → chunk-RP3M7COZ.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs +38 -4
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +39 -5
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +654 -119
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -6
- package/dist/index.d.ts +75 -6
- package/dist/index.js +655 -120
- package/dist/index.js.map +1 -1
- package/package.json +3 -6
package/dist/index.d.cts
CHANGED
|
@@ -60,6 +60,8 @@ interface ToolTrajectoryExpectedItem {
|
|
|
60
60
|
readonly tool: string;
|
|
61
61
|
/** Optional argument matching: 'any' skips validation, object performs partial deep equality */
|
|
62
62
|
readonly args?: 'any' | Record<string, unknown>;
|
|
63
|
+
/** Optional maximum duration in milliseconds for latency assertions */
|
|
64
|
+
readonly maxDurationMs?: number;
|
|
63
65
|
}
|
|
64
66
|
/**
|
|
65
67
|
* Simplified input type for computeTraceSummary.
|
|
@@ -226,19 +228,71 @@ type CodeEvaluatorConfig = {
|
|
|
226
228
|
/** When present, enables target access for the script via local proxy */
|
|
227
229
|
readonly target?: TargetAccessConfig;
|
|
228
230
|
};
|
|
231
|
+
/**
|
|
232
|
+
* Executable prompt template configuration.
|
|
233
|
+
* Matches code_judge pattern for consistency.
|
|
234
|
+
*/
|
|
235
|
+
type PromptScriptConfig = {
|
|
236
|
+
/** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
|
|
237
|
+
readonly script: readonly string[];
|
|
238
|
+
/** Pass-through configuration for the prompt template */
|
|
239
|
+
readonly config?: Record<string, unknown>;
|
|
240
|
+
};
|
|
229
241
|
type LlmJudgeEvaluatorConfig = {
|
|
230
242
|
readonly name: string;
|
|
231
243
|
readonly type: 'llm_judge';
|
|
232
|
-
|
|
244
|
+
/** Text prompt (inline or file path) or executable script config */
|
|
245
|
+
readonly prompt?: string | PromptScriptConfig;
|
|
233
246
|
readonly promptPath?: string;
|
|
247
|
+
/** Resolved absolute path for prompt file (used for text template prompts) */
|
|
248
|
+
readonly resolvedPromptPath?: string;
|
|
249
|
+
/** Resolved script array for executable prompts (matches code_judge pattern) */
|
|
250
|
+
readonly resolvedPromptScript?: readonly string[];
|
|
234
251
|
readonly rubrics?: readonly RubricItem[];
|
|
235
252
|
readonly weight?: number;
|
|
253
|
+
/** Pass-through configuration for custom evaluator prompts (legacy, prefer prompt.config) */
|
|
254
|
+
readonly config?: Record<string, unknown>;
|
|
236
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Score range definition for analytic rubric scoring.
|
|
258
|
+
* Each range maps an integer score band (0-10) to an expected outcome description.
|
|
259
|
+
*/
|
|
260
|
+
type ScoreRange = {
|
|
261
|
+
/** Inclusive integer range [min, max] within 0-10 */
|
|
262
|
+
readonly score_range: readonly [number, number];
|
|
263
|
+
/** Description of what this score range represents */
|
|
264
|
+
readonly expected_outcome: string;
|
|
265
|
+
};
|
|
266
|
+
/**
|
|
267
|
+
* Rubric item for LLM judge evaluation.
|
|
268
|
+
* Supports two modes:
|
|
269
|
+
* - Checklist mode: boolean satisfied/not-satisfied with `expected_outcome`
|
|
270
|
+
* - Score-range mode: 0-10 integer scoring with `score_ranges`
|
|
271
|
+
*/
|
|
237
272
|
type RubricItem = {
|
|
238
273
|
readonly id: string;
|
|
239
|
-
|
|
274
|
+
/**
|
|
275
|
+
* For checklist rubrics: the expected outcome text (required).
|
|
276
|
+
* For score-range rubrics: optional overall criterion description.
|
|
277
|
+
*/
|
|
278
|
+
readonly expected_outcome?: string;
|
|
240
279
|
readonly weight: number;
|
|
241
|
-
|
|
280
|
+
/**
|
|
281
|
+
* Legacy boolean gating (deprecated, treated as required_min_score: 10).
|
|
282
|
+
* Use required_min_score instead for finer control.
|
|
283
|
+
*/
|
|
284
|
+
readonly required?: boolean;
|
|
285
|
+
/**
|
|
286
|
+
* Minimum score (0-10) required to pass this criterion.
|
|
287
|
+
* If the criterion score is below this threshold, the overall verdict is 'fail'.
|
|
288
|
+
*/
|
|
289
|
+
readonly required_min_score?: number;
|
|
290
|
+
/**
|
|
291
|
+
* Score range definitions for analytic rubric scoring.
|
|
292
|
+
* When present, the judge outputs an integer 0-10 score per criterion.
|
|
293
|
+
* Ranges must be non-overlapping and cover 0-10 inclusive.
|
|
294
|
+
*/
|
|
295
|
+
readonly score_ranges?: readonly ScoreRange[];
|
|
242
296
|
};
|
|
243
297
|
type CompositeAggregatorConfig = {
|
|
244
298
|
readonly type: 'weighted_average';
|
|
@@ -438,6 +492,8 @@ interface ToolCall {
|
|
|
438
492
|
readonly id?: string;
|
|
439
493
|
/** ISO 8601 timestamp */
|
|
440
494
|
readonly timestamp?: string;
|
|
495
|
+
/** Duration of the tool call in milliseconds */
|
|
496
|
+
readonly durationMs?: number;
|
|
441
497
|
}
|
|
442
498
|
/**
|
|
443
499
|
* An output message from agent execution.
|
|
@@ -454,6 +510,8 @@ interface OutputMessage {
|
|
|
454
510
|
readonly toolCalls?: readonly ToolCall[];
|
|
455
511
|
/** ISO 8601 timestamp */
|
|
456
512
|
readonly timestamp?: string;
|
|
513
|
+
/** Duration of the message in milliseconds */
|
|
514
|
+
readonly durationMs?: number;
|
|
457
515
|
/** Provider-specific metadata */
|
|
458
516
|
readonly metadata?: Record<string, unknown>;
|
|
459
517
|
}
|
|
@@ -608,7 +666,8 @@ declare function detectFormat(filePath: string): 'yaml' | 'jsonl';
|
|
|
608
666
|
|
|
609
667
|
type LoadOptions = {
|
|
610
668
|
readonly verbose?: boolean;
|
|
611
|
-
|
|
669
|
+
/** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
|
|
670
|
+
readonly filter?: string;
|
|
612
671
|
};
|
|
613
672
|
/**
|
|
614
673
|
* Read metadata from a test suite file (like target name).
|
|
@@ -1192,6 +1251,15 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
1192
1251
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
1193
1252
|
private evaluateFreeform;
|
|
1194
1253
|
private evaluateWithRubrics;
|
|
1254
|
+
/**
|
|
1255
|
+
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
1256
|
+
* Each criterion is scored 0-10 and normalized to 0-1.
|
|
1257
|
+
*/
|
|
1258
|
+
private evaluateWithScoreRanges;
|
|
1259
|
+
/**
|
|
1260
|
+
* Build prompt for score-range rubric evaluation.
|
|
1261
|
+
*/
|
|
1262
|
+
private buildScoreRangePrompt;
|
|
1195
1263
|
private buildRubricPrompt;
|
|
1196
1264
|
private runWithRetry;
|
|
1197
1265
|
}
|
|
@@ -1281,7 +1349,8 @@ interface RunEvaluationOptions {
|
|
|
1281
1349
|
readonly cache?: EvaluationCache;
|
|
1282
1350
|
readonly useCache?: boolean;
|
|
1283
1351
|
readonly now?: () => Date;
|
|
1284
|
-
|
|
1352
|
+
/** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
|
|
1353
|
+
readonly filter?: string;
|
|
1285
1354
|
readonly verbose?: boolean;
|
|
1286
1355
|
readonly maxConcurrency?: number;
|
|
1287
1356
|
readonly evalCases?: readonly EvalCase[];
|
|
@@ -1307,4 +1376,4 @@ type AgentKernel = {
|
|
|
1307
1376
|
};
|
|
1308
1377
|
declare function createAgentKernel(): AgentKernel;
|
|
1309
1378
|
|
|
1310
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|
|
1379
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|
package/dist/index.d.ts
CHANGED
|
@@ -60,6 +60,8 @@ interface ToolTrajectoryExpectedItem {
|
|
|
60
60
|
readonly tool: string;
|
|
61
61
|
/** Optional argument matching: 'any' skips validation, object performs partial deep equality */
|
|
62
62
|
readonly args?: 'any' | Record<string, unknown>;
|
|
63
|
+
/** Optional maximum duration in milliseconds for latency assertions */
|
|
64
|
+
readonly maxDurationMs?: number;
|
|
63
65
|
}
|
|
64
66
|
/**
|
|
65
67
|
* Simplified input type for computeTraceSummary.
|
|
@@ -226,19 +228,71 @@ type CodeEvaluatorConfig = {
|
|
|
226
228
|
/** When present, enables target access for the script via local proxy */
|
|
227
229
|
readonly target?: TargetAccessConfig;
|
|
228
230
|
};
|
|
231
|
+
/**
|
|
232
|
+
* Executable prompt template configuration.
|
|
233
|
+
* Matches code_judge pattern for consistency.
|
|
234
|
+
*/
|
|
235
|
+
type PromptScriptConfig = {
|
|
236
|
+
/** Command array to execute (e.g., ["bun", "run", "template.ts"]) */
|
|
237
|
+
readonly script: readonly string[];
|
|
238
|
+
/** Pass-through configuration for the prompt template */
|
|
239
|
+
readonly config?: Record<string, unknown>;
|
|
240
|
+
};
|
|
229
241
|
type LlmJudgeEvaluatorConfig = {
|
|
230
242
|
readonly name: string;
|
|
231
243
|
readonly type: 'llm_judge';
|
|
232
|
-
|
|
244
|
+
/** Text prompt (inline or file path) or executable script config */
|
|
245
|
+
readonly prompt?: string | PromptScriptConfig;
|
|
233
246
|
readonly promptPath?: string;
|
|
247
|
+
/** Resolved absolute path for prompt file (used for text template prompts) */
|
|
248
|
+
readonly resolvedPromptPath?: string;
|
|
249
|
+
/** Resolved script array for executable prompts (matches code_judge pattern) */
|
|
250
|
+
readonly resolvedPromptScript?: readonly string[];
|
|
234
251
|
readonly rubrics?: readonly RubricItem[];
|
|
235
252
|
readonly weight?: number;
|
|
253
|
+
/** Pass-through configuration for custom evaluator prompts (legacy, prefer prompt.config) */
|
|
254
|
+
readonly config?: Record<string, unknown>;
|
|
236
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Score range definition for analytic rubric scoring.
|
|
258
|
+
* Each range maps an integer score band (0-10) to an expected outcome description.
|
|
259
|
+
*/
|
|
260
|
+
type ScoreRange = {
|
|
261
|
+
/** Inclusive integer range [min, max] within 0-10 */
|
|
262
|
+
readonly score_range: readonly [number, number];
|
|
263
|
+
/** Description of what this score range represents */
|
|
264
|
+
readonly expected_outcome: string;
|
|
265
|
+
};
|
|
266
|
+
/**
|
|
267
|
+
* Rubric item for LLM judge evaluation.
|
|
268
|
+
* Supports two modes:
|
|
269
|
+
* - Checklist mode: boolean satisfied/not-satisfied with `expected_outcome`
|
|
270
|
+
* - Score-range mode: 0-10 integer scoring with `score_ranges`
|
|
271
|
+
*/
|
|
237
272
|
type RubricItem = {
|
|
238
273
|
readonly id: string;
|
|
239
|
-
|
|
274
|
+
/**
|
|
275
|
+
* For checklist rubrics: the expected outcome text (required).
|
|
276
|
+
* For score-range rubrics: optional overall criterion description.
|
|
277
|
+
*/
|
|
278
|
+
readonly expected_outcome?: string;
|
|
240
279
|
readonly weight: number;
|
|
241
|
-
|
|
280
|
+
/**
|
|
281
|
+
* Legacy boolean gating (deprecated, treated as required_min_score: 10).
|
|
282
|
+
* Use required_min_score instead for finer control.
|
|
283
|
+
*/
|
|
284
|
+
readonly required?: boolean;
|
|
285
|
+
/**
|
|
286
|
+
* Minimum score (0-10) required to pass this criterion.
|
|
287
|
+
* If the criterion score is below this threshold, the overall verdict is 'fail'.
|
|
288
|
+
*/
|
|
289
|
+
readonly required_min_score?: number;
|
|
290
|
+
/**
|
|
291
|
+
* Score range definitions for analytic rubric scoring.
|
|
292
|
+
* When present, the judge outputs an integer 0-10 score per criterion.
|
|
293
|
+
* Ranges must be non-overlapping and cover 0-10 inclusive.
|
|
294
|
+
*/
|
|
295
|
+
readonly score_ranges?: readonly ScoreRange[];
|
|
242
296
|
};
|
|
243
297
|
type CompositeAggregatorConfig = {
|
|
244
298
|
readonly type: 'weighted_average';
|
|
@@ -438,6 +492,8 @@ interface ToolCall {
|
|
|
438
492
|
readonly id?: string;
|
|
439
493
|
/** ISO 8601 timestamp */
|
|
440
494
|
readonly timestamp?: string;
|
|
495
|
+
/** Duration of the tool call in milliseconds */
|
|
496
|
+
readonly durationMs?: number;
|
|
441
497
|
}
|
|
442
498
|
/**
|
|
443
499
|
* An output message from agent execution.
|
|
@@ -454,6 +510,8 @@ interface OutputMessage {
|
|
|
454
510
|
readonly toolCalls?: readonly ToolCall[];
|
|
455
511
|
/** ISO 8601 timestamp */
|
|
456
512
|
readonly timestamp?: string;
|
|
513
|
+
/** Duration of the message in milliseconds */
|
|
514
|
+
readonly durationMs?: number;
|
|
457
515
|
/** Provider-specific metadata */
|
|
458
516
|
readonly metadata?: Record<string, unknown>;
|
|
459
517
|
}
|
|
@@ -608,7 +666,8 @@ declare function detectFormat(filePath: string): 'yaml' | 'jsonl';
|
|
|
608
666
|
|
|
609
667
|
type LoadOptions = {
|
|
610
668
|
readonly verbose?: boolean;
|
|
611
|
-
|
|
669
|
+
/** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
|
|
670
|
+
readonly filter?: string;
|
|
612
671
|
};
|
|
613
672
|
/**
|
|
614
673
|
* Read metadata from a test suite file (like target name).
|
|
@@ -1192,6 +1251,15 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
1192
1251
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
1193
1252
|
private evaluateFreeform;
|
|
1194
1253
|
private evaluateWithRubrics;
|
|
1254
|
+
/**
|
|
1255
|
+
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
1256
|
+
* Each criterion is scored 0-10 and normalized to 0-1.
|
|
1257
|
+
*/
|
|
1258
|
+
private evaluateWithScoreRanges;
|
|
1259
|
+
/**
|
|
1260
|
+
* Build prompt for score-range rubric evaluation.
|
|
1261
|
+
*/
|
|
1262
|
+
private buildScoreRangePrompt;
|
|
1195
1263
|
private buildRubricPrompt;
|
|
1196
1264
|
private runWithRetry;
|
|
1197
1265
|
}
|
|
@@ -1281,7 +1349,8 @@ interface RunEvaluationOptions {
|
|
|
1281
1349
|
readonly cache?: EvaluationCache;
|
|
1282
1350
|
readonly useCache?: boolean;
|
|
1283
1351
|
readonly now?: () => Date;
|
|
1284
|
-
|
|
1352
|
+
/** Filter eval cases by ID pattern (glob supported, e.g., "summary-*") */
|
|
1353
|
+
readonly filter?: string;
|
|
1285
1354
|
readonly verbose?: boolean;
|
|
1286
1355
|
readonly maxConcurrency?: number;
|
|
1287
1356
|
readonly evalCases?: readonly EvalCase[];
|
|
@@ -1307,4 +1376,4 @@ type AgentKernel = {
|
|
|
1307
1376
|
};
|
|
1308
1377
|
declare function createAgentKernel(): AgentKernel;
|
|
1309
1378
|
|
|
1310
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|
|
1379
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildSearchRoots, clampScore, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, deepEqual, detectFormat, ensureVSCodeSubagents, executeScript, explorationRatio, extractJsonBlob, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, scoreToVerdict, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|