@agentv/core 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -77
- package/dist/{chunk-V3JCB3HI.js → chunk-KPHTMTZ3.js} +32 -7
- package/dist/chunk-KPHTMTZ3.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +17 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +18 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +411 -146
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +59 -51
- package/dist/index.d.ts +59 -51
- package/dist/index.js +371 -129
- package/dist/index.js.map +1 -1
- package/package.json +2 -5
- package/dist/chunk-V3JCB3HI.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -4,32 +4,6 @@ import * as ai from 'ai';
|
|
|
4
4
|
* Trace event types for capturing agent execution traces.
|
|
5
5
|
* Provides a normalized, provider-agnostic model for tool-call trajectories.
|
|
6
6
|
*/
|
|
7
|
-
/**
|
|
8
|
-
* Supported trace event types.
|
|
9
|
-
*/
|
|
10
|
-
type TraceEventType = 'model_step' | 'tool_call' | 'tool_result' | 'message' | 'error';
|
|
11
|
-
/**
|
|
12
|
-
* Normalized trace event representing a single step in agent execution.
|
|
13
|
-
* Provider-agnostic format for tool-call trajectory evaluation.
|
|
14
|
-
*/
|
|
15
|
-
interface TraceEvent {
|
|
16
|
-
/** Event type */
|
|
17
|
-
readonly type: TraceEventType;
|
|
18
|
-
/** ISO 8601 timestamp */
|
|
19
|
-
readonly timestamp: string;
|
|
20
|
-
/** Stable identifier for pairing tool_call/tool_result */
|
|
21
|
-
readonly id?: string;
|
|
22
|
-
/** Tool name (for tool_call/tool_result) */
|
|
23
|
-
readonly name?: string;
|
|
24
|
-
/** Tool input - any JSON value */
|
|
25
|
-
readonly input?: unknown;
|
|
26
|
-
/** Tool output - any JSON value */
|
|
27
|
-
readonly output?: unknown;
|
|
28
|
-
/** Message content (for message/model_step) */
|
|
29
|
-
readonly text?: string;
|
|
30
|
-
/** Provider-specific metadata */
|
|
31
|
-
readonly metadata?: Record<string, unknown>;
|
|
32
|
-
}
|
|
33
7
|
/**
|
|
34
8
|
* Compact summary of a trace for lightweight persistence.
|
|
35
9
|
* Included in results by default to avoid payload bloat.
|
|
@@ -66,18 +40,19 @@ interface ToolTrajectoryExpectedItem {
|
|
|
66
40
|
readonly tool: string;
|
|
67
41
|
}
|
|
68
42
|
/**
|
|
69
|
-
*
|
|
43
|
+
* Simplified input type for computeTraceSummary.
|
|
44
|
+
* Matches OutputMessage structure without requiring full provider/types import.
|
|
70
45
|
*/
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
46
|
+
interface OutputMessageLike {
|
|
47
|
+
readonly toolCalls?: readonly {
|
|
48
|
+
readonly tool: string;
|
|
49
|
+
}[];
|
|
50
|
+
}
|
|
76
51
|
/**
|
|
77
|
-
* Compute a lightweight summary from
|
|
52
|
+
* Compute a lightweight summary from output messages.
|
|
78
53
|
* Used for default result persistence without payload bloat.
|
|
79
54
|
*/
|
|
80
|
-
declare function computeTraceSummary(
|
|
55
|
+
declare function computeTraceSummary(messages: readonly OutputMessageLike[]): TraceSummary;
|
|
81
56
|
|
|
82
57
|
/**
|
|
83
58
|
* JSON primitive values appearing in AgentV payloads.
|
|
@@ -105,7 +80,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
|
|
|
105
80
|
/**
|
|
106
81
|
* Text or structured payload attached to a message.
|
|
107
82
|
*/
|
|
108
|
-
type TestMessageContent = string | readonly JsonObject[];
|
|
83
|
+
type TestMessageContent = string | JsonObject | readonly JsonObject[];
|
|
109
84
|
/**
|
|
110
85
|
* System-authored instruction message.
|
|
111
86
|
*/
|
|
@@ -246,8 +221,6 @@ interface EvaluationResult {
|
|
|
246
221
|
readonly error?: string;
|
|
247
222
|
/** Lightweight summary of the execution trace (always included when available) */
|
|
248
223
|
readonly trace_summary?: TraceSummary;
|
|
249
|
-
/** Full trace events (only included when --include-trace flag is set) */
|
|
250
|
-
readonly trace?: readonly TraceEvent[];
|
|
251
224
|
}
|
|
252
225
|
type EvaluationVerdict = 'pass' | 'fail' | 'borderline';
|
|
253
226
|
interface EvaluatorResult {
|
|
@@ -290,15 +263,45 @@ interface ProviderRequest {
|
|
|
290
263
|
readonly metadata?: JsonObject;
|
|
291
264
|
readonly signal?: AbortSignal;
|
|
292
265
|
}
|
|
266
|
+
/**
|
|
267
|
+
* A tool call within an output message.
|
|
268
|
+
* Represents a single tool invocation with its input and optional output.
|
|
269
|
+
*/
|
|
270
|
+
interface ToolCall {
|
|
271
|
+
/** Tool name */
|
|
272
|
+
readonly tool: string;
|
|
273
|
+
/** Tool input arguments */
|
|
274
|
+
readonly input?: unknown;
|
|
275
|
+
/** Tool output result */
|
|
276
|
+
readonly output?: unknown;
|
|
277
|
+
/** Stable identifier for pairing tool calls */
|
|
278
|
+
readonly id?: string;
|
|
279
|
+
/** ISO 8601 timestamp */
|
|
280
|
+
readonly timestamp?: string;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* An output message from agent execution.
|
|
284
|
+
* Represents a single message in the conversation with optional tool calls.
|
|
285
|
+
*/
|
|
286
|
+
interface OutputMessage {
|
|
287
|
+
/** Message role (e.g., 'assistant', 'user', 'tool') */
|
|
288
|
+
readonly role: string;
|
|
289
|
+
/** Optional name for the message sender */
|
|
290
|
+
readonly name?: string;
|
|
291
|
+
/** Message content */
|
|
292
|
+
readonly content?: unknown;
|
|
293
|
+
/** Tool calls made in this message */
|
|
294
|
+
readonly toolCalls?: readonly ToolCall[];
|
|
295
|
+
/** ISO 8601 timestamp */
|
|
296
|
+
readonly timestamp?: string;
|
|
297
|
+
/** Provider-specific metadata */
|
|
298
|
+
readonly metadata?: Record<string, unknown>;
|
|
299
|
+
}
|
|
293
300
|
interface ProviderResponse {
|
|
294
|
-
readonly text: string;
|
|
295
|
-
readonly reasoning?: string;
|
|
296
301
|
readonly raw?: unknown;
|
|
297
302
|
readonly usage?: JsonObject;
|
|
298
|
-
/**
|
|
299
|
-
readonly
|
|
300
|
-
/** Reference to external trace file (alternative to inline trace) */
|
|
301
|
-
readonly traceRef?: string;
|
|
303
|
+
/** Output messages from agent execution (primary source for tool trajectory) */
|
|
304
|
+
readonly outputMessages?: readonly OutputMessage[];
|
|
302
305
|
}
|
|
303
306
|
interface Provider {
|
|
304
307
|
readonly id: string;
|
|
@@ -532,8 +535,6 @@ interface MockResolvedConfig {
|
|
|
532
535
|
readonly delayMs?: number;
|
|
533
536
|
readonly delayMinMs?: number;
|
|
534
537
|
readonly delayMaxMs?: number;
|
|
535
|
-
/** Mock trace events for testing tool_trajectory evaluator */
|
|
536
|
-
readonly trace?: readonly TraceEvent[];
|
|
537
538
|
}
|
|
538
539
|
interface VSCodeResolvedConfig {
|
|
539
540
|
readonly command: string;
|
|
@@ -559,6 +560,7 @@ interface CliResolvedConfig {
|
|
|
559
560
|
readonly timeoutMs?: number;
|
|
560
561
|
readonly healthcheck?: CliHealthcheck;
|
|
561
562
|
readonly verbose?: boolean;
|
|
563
|
+
readonly keepTempFiles?: boolean;
|
|
562
564
|
}
|
|
563
565
|
type ResolvedTarget = {
|
|
564
566
|
readonly kind: 'azure';
|
|
@@ -662,12 +664,10 @@ interface EvaluationContext {
|
|
|
662
664
|
readonly judgeProvider?: Provider;
|
|
663
665
|
readonly evaluatorTemplateOverride?: string;
|
|
664
666
|
readonly evaluator?: EvaluatorConfig;
|
|
665
|
-
/**
|
|
666
|
-
readonly
|
|
667
|
-
/** File path to trace data (alternative to inline candidateTrace) */
|
|
668
|
-
readonly candidateTraceRef?: string;
|
|
667
|
+
/** Output messages from agent execution (primary source for tool trajectory) */
|
|
668
|
+
readonly outputMessages?: readonly OutputMessage[];
|
|
669
669
|
/** Lightweight summary of trace events (if available) */
|
|
670
|
-
readonly
|
|
670
|
+
readonly traceSummary?: TraceSummary;
|
|
671
671
|
}
|
|
672
672
|
interface EvaluationScore {
|
|
673
673
|
readonly score: number;
|
|
@@ -737,6 +737,14 @@ declare class ToolTrajectoryEvaluator implements Evaluator {
|
|
|
737
737
|
private readonly config;
|
|
738
738
|
constructor(options: ToolTrajectoryEvaluatorOptions);
|
|
739
739
|
evaluate(context: EvaluationContext): EvaluationScore;
|
|
740
|
+
/**
|
|
741
|
+
* Extract tool calls from output messages.
|
|
742
|
+
*/
|
|
743
|
+
private extractToolCallsFromMessages;
|
|
744
|
+
/**
|
|
745
|
+
* Build a summary from extracted tool calls.
|
|
746
|
+
*/
|
|
747
|
+
private buildSummary;
|
|
740
748
|
private evaluateAnyOrder;
|
|
741
749
|
private evaluateInOrder;
|
|
742
750
|
private evaluateExact;
|
|
@@ -831,4 +839,4 @@ type AgentKernel = {
|
|
|
831
839
|
};
|
|
832
840
|
declare function createAgentKernel(): AgentKernel;
|
|
833
841
|
|
|
834
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type
|
|
842
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -4,32 +4,6 @@ import * as ai from 'ai';
|
|
|
4
4
|
* Trace event types for capturing agent execution traces.
|
|
5
5
|
* Provides a normalized, provider-agnostic model for tool-call trajectories.
|
|
6
6
|
*/
|
|
7
|
-
/**
|
|
8
|
-
* Supported trace event types.
|
|
9
|
-
*/
|
|
10
|
-
type TraceEventType = 'model_step' | 'tool_call' | 'tool_result' | 'message' | 'error';
|
|
11
|
-
/**
|
|
12
|
-
* Normalized trace event representing a single step in agent execution.
|
|
13
|
-
* Provider-agnostic format for tool-call trajectory evaluation.
|
|
14
|
-
*/
|
|
15
|
-
interface TraceEvent {
|
|
16
|
-
/** Event type */
|
|
17
|
-
readonly type: TraceEventType;
|
|
18
|
-
/** ISO 8601 timestamp */
|
|
19
|
-
readonly timestamp: string;
|
|
20
|
-
/** Stable identifier for pairing tool_call/tool_result */
|
|
21
|
-
readonly id?: string;
|
|
22
|
-
/** Tool name (for tool_call/tool_result) */
|
|
23
|
-
readonly name?: string;
|
|
24
|
-
/** Tool input - any JSON value */
|
|
25
|
-
readonly input?: unknown;
|
|
26
|
-
/** Tool output - any JSON value */
|
|
27
|
-
readonly output?: unknown;
|
|
28
|
-
/** Message content (for message/model_step) */
|
|
29
|
-
readonly text?: string;
|
|
30
|
-
/** Provider-specific metadata */
|
|
31
|
-
readonly metadata?: Record<string, unknown>;
|
|
32
|
-
}
|
|
33
7
|
/**
|
|
34
8
|
* Compact summary of a trace for lightweight persistence.
|
|
35
9
|
* Included in results by default to avoid payload bloat.
|
|
@@ -66,18 +40,19 @@ interface ToolTrajectoryExpectedItem {
|
|
|
66
40
|
readonly tool: string;
|
|
67
41
|
}
|
|
68
42
|
/**
|
|
69
|
-
*
|
|
43
|
+
* Simplified input type for computeTraceSummary.
|
|
44
|
+
* Matches OutputMessage structure without requiring full provider/types import.
|
|
70
45
|
*/
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
46
|
+
interface OutputMessageLike {
|
|
47
|
+
readonly toolCalls?: readonly {
|
|
48
|
+
readonly tool: string;
|
|
49
|
+
}[];
|
|
50
|
+
}
|
|
76
51
|
/**
|
|
77
|
-
* Compute a lightweight summary from
|
|
52
|
+
* Compute a lightweight summary from output messages.
|
|
78
53
|
* Used for default result persistence without payload bloat.
|
|
79
54
|
*/
|
|
80
|
-
declare function computeTraceSummary(
|
|
55
|
+
declare function computeTraceSummary(messages: readonly OutputMessageLike[]): TraceSummary;
|
|
81
56
|
|
|
82
57
|
/**
|
|
83
58
|
* JSON primitive values appearing in AgentV payloads.
|
|
@@ -105,7 +80,7 @@ type TestMessageRole = (typeof TEST_MESSAGE_ROLE_VALUES)[number];
|
|
|
105
80
|
/**
|
|
106
81
|
* Text or structured payload attached to a message.
|
|
107
82
|
*/
|
|
108
|
-
type TestMessageContent = string | readonly JsonObject[];
|
|
83
|
+
type TestMessageContent = string | JsonObject | readonly JsonObject[];
|
|
109
84
|
/**
|
|
110
85
|
* System-authored instruction message.
|
|
111
86
|
*/
|
|
@@ -246,8 +221,6 @@ interface EvaluationResult {
|
|
|
246
221
|
readonly error?: string;
|
|
247
222
|
/** Lightweight summary of the execution trace (always included when available) */
|
|
248
223
|
readonly trace_summary?: TraceSummary;
|
|
249
|
-
/** Full trace events (only included when --include-trace flag is set) */
|
|
250
|
-
readonly trace?: readonly TraceEvent[];
|
|
251
224
|
}
|
|
252
225
|
type EvaluationVerdict = 'pass' | 'fail' | 'borderline';
|
|
253
226
|
interface EvaluatorResult {
|
|
@@ -290,15 +263,45 @@ interface ProviderRequest {
|
|
|
290
263
|
readonly metadata?: JsonObject;
|
|
291
264
|
readonly signal?: AbortSignal;
|
|
292
265
|
}
|
|
266
|
+
/**
|
|
267
|
+
* A tool call within an output message.
|
|
268
|
+
* Represents a single tool invocation with its input and optional output.
|
|
269
|
+
*/
|
|
270
|
+
interface ToolCall {
|
|
271
|
+
/** Tool name */
|
|
272
|
+
readonly tool: string;
|
|
273
|
+
/** Tool input arguments */
|
|
274
|
+
readonly input?: unknown;
|
|
275
|
+
/** Tool output result */
|
|
276
|
+
readonly output?: unknown;
|
|
277
|
+
/** Stable identifier for pairing tool calls */
|
|
278
|
+
readonly id?: string;
|
|
279
|
+
/** ISO 8601 timestamp */
|
|
280
|
+
readonly timestamp?: string;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* An output message from agent execution.
|
|
284
|
+
* Represents a single message in the conversation with optional tool calls.
|
|
285
|
+
*/
|
|
286
|
+
interface OutputMessage {
|
|
287
|
+
/** Message role (e.g., 'assistant', 'user', 'tool') */
|
|
288
|
+
readonly role: string;
|
|
289
|
+
/** Optional name for the message sender */
|
|
290
|
+
readonly name?: string;
|
|
291
|
+
/** Message content */
|
|
292
|
+
readonly content?: unknown;
|
|
293
|
+
/** Tool calls made in this message */
|
|
294
|
+
readonly toolCalls?: readonly ToolCall[];
|
|
295
|
+
/** ISO 8601 timestamp */
|
|
296
|
+
readonly timestamp?: string;
|
|
297
|
+
/** Provider-specific metadata */
|
|
298
|
+
readonly metadata?: Record<string, unknown>;
|
|
299
|
+
}
|
|
293
300
|
interface ProviderResponse {
|
|
294
|
-
readonly text: string;
|
|
295
|
-
readonly reasoning?: string;
|
|
296
301
|
readonly raw?: unknown;
|
|
297
302
|
readonly usage?: JsonObject;
|
|
298
|
-
/**
|
|
299
|
-
readonly
|
|
300
|
-
/** Reference to external trace file (alternative to inline trace) */
|
|
301
|
-
readonly traceRef?: string;
|
|
303
|
+
/** Output messages from agent execution (primary source for tool trajectory) */
|
|
304
|
+
readonly outputMessages?: readonly OutputMessage[];
|
|
302
305
|
}
|
|
303
306
|
interface Provider {
|
|
304
307
|
readonly id: string;
|
|
@@ -532,8 +535,6 @@ interface MockResolvedConfig {
|
|
|
532
535
|
readonly delayMs?: number;
|
|
533
536
|
readonly delayMinMs?: number;
|
|
534
537
|
readonly delayMaxMs?: number;
|
|
535
|
-
/** Mock trace events for testing tool_trajectory evaluator */
|
|
536
|
-
readonly trace?: readonly TraceEvent[];
|
|
537
538
|
}
|
|
538
539
|
interface VSCodeResolvedConfig {
|
|
539
540
|
readonly command: string;
|
|
@@ -559,6 +560,7 @@ interface CliResolvedConfig {
|
|
|
559
560
|
readonly timeoutMs?: number;
|
|
560
561
|
readonly healthcheck?: CliHealthcheck;
|
|
561
562
|
readonly verbose?: boolean;
|
|
563
|
+
readonly keepTempFiles?: boolean;
|
|
562
564
|
}
|
|
563
565
|
type ResolvedTarget = {
|
|
564
566
|
readonly kind: 'azure';
|
|
@@ -662,12 +664,10 @@ interface EvaluationContext {
|
|
|
662
664
|
readonly judgeProvider?: Provider;
|
|
663
665
|
readonly evaluatorTemplateOverride?: string;
|
|
664
666
|
readonly evaluator?: EvaluatorConfig;
|
|
665
|
-
/**
|
|
666
|
-
readonly
|
|
667
|
-
/** File path to trace data (alternative to inline candidateTrace) */
|
|
668
|
-
readonly candidateTraceRef?: string;
|
|
667
|
+
/** Output messages from agent execution (primary source for tool trajectory) */
|
|
668
|
+
readonly outputMessages?: readonly OutputMessage[];
|
|
669
669
|
/** Lightweight summary of trace events (if available) */
|
|
670
|
-
readonly
|
|
670
|
+
readonly traceSummary?: TraceSummary;
|
|
671
671
|
}
|
|
672
672
|
interface EvaluationScore {
|
|
673
673
|
readonly score: number;
|
|
@@ -737,6 +737,14 @@ declare class ToolTrajectoryEvaluator implements Evaluator {
|
|
|
737
737
|
private readonly config;
|
|
738
738
|
constructor(options: ToolTrajectoryEvaluatorOptions);
|
|
739
739
|
evaluate(context: EvaluationContext): EvaluationScore;
|
|
740
|
+
/**
|
|
741
|
+
* Extract tool calls from output messages.
|
|
742
|
+
*/
|
|
743
|
+
private extractToolCallsFromMessages;
|
|
744
|
+
/**
|
|
745
|
+
* Build a summary from extracted tool calls.
|
|
746
|
+
*/
|
|
747
|
+
private buildSummary;
|
|
740
748
|
private evaluateAnyOrder;
|
|
741
749
|
private evaluateInOrder;
|
|
742
750
|
private evaluateExact;
|
|
@@ -831,4 +839,4 @@ type AgentKernel = {
|
|
|
831
839
|
};
|
|
832
840
|
declare function createAgentKernel(): AgentKernel;
|
|
833
841
|
|
|
834
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type
|
|
842
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|