@agentv/core 3.12.0 → 3.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-4XWPXNQM.js → chunk-ZB3AUPES.js} +1 -3
- package/dist/chunk-ZB3AUPES.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +0 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +63 -177
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -55
- package/dist/index.d.ts +15 -55
- package/dist/index.js +62 -49
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3G2KXH7N.js +0 -120
- package/dist/chunk-3G2KXH7N.js.map +0 -1
- package/dist/chunk-4XWPXNQM.js.map +0 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js +0 -7
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
|
|
|
473
473
|
* - Either content (string or array of objects) OR tool_calls (for assistant messages)
|
|
474
474
|
*/
|
|
475
475
|
declare function isTestMessage(value: unknown): value is TestMessage;
|
|
476
|
-
declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "
|
|
476
|
+
declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
|
|
477
477
|
type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
|
|
478
478
|
declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
|
|
479
479
|
/**
|
|
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
|
|
|
576
576
|
};
|
|
577
577
|
type CodeEvaluatorConfig = {
|
|
578
578
|
readonly name: string;
|
|
579
|
-
readonly type: 'code-
|
|
579
|
+
readonly type: 'code-grader';
|
|
580
580
|
readonly command: readonly string[];
|
|
581
581
|
/** @deprecated Use `command` instead */
|
|
582
582
|
readonly script?: readonly string[];
|
|
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
|
|
|
606
606
|
};
|
|
607
607
|
type LlmGraderEvaluatorConfig = {
|
|
608
608
|
readonly name: string;
|
|
609
|
-
readonly type: 'llm-grader'
|
|
609
|
+
readonly type: 'llm-grader';
|
|
610
610
|
/** Text prompt (inline or file path) or executable script config */
|
|
611
611
|
readonly prompt?: string | PromptScriptConfig;
|
|
612
612
|
readonly promptPath?: string;
|
|
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
|
|
|
678
678
|
readonly type: 'code-grader';
|
|
679
679
|
readonly path: string;
|
|
680
680
|
readonly cwd?: string;
|
|
681
|
-
} | {
|
|
682
|
-
readonly type: 'code-judge';
|
|
683
|
-
readonly path: string;
|
|
684
|
-
readonly cwd?: string;
|
|
685
681
|
} | {
|
|
686
682
|
readonly type: 'llm-grader';
|
|
687
683
|
readonly prompt?: string;
|
|
688
684
|
readonly promptPath?: string;
|
|
689
685
|
readonly model?: string;
|
|
690
|
-
} | {
|
|
691
|
-
readonly type: 'llm-judge';
|
|
692
|
-
readonly prompt?: string;
|
|
693
|
-
readonly promptPath?: string;
|
|
694
|
-
readonly model?: string;
|
|
695
686
|
} | {
|
|
696
687
|
readonly type: 'threshold';
|
|
697
688
|
readonly threshold: number;
|
|
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
|
1250
1241
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1251
1242
|
type ExecutionDefaults = {
|
|
1252
1243
|
readonly verbose?: boolean;
|
|
1253
|
-
readonly trace_file?: string;
|
|
1254
1244
|
readonly keep_workspaces?: boolean;
|
|
1255
1245
|
readonly otel_file?: string;
|
|
1256
1246
|
readonly export_otel?: boolean;
|
|
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
|
|
|
2118
2108
|
readonly target?: TargetAccessConfig;
|
|
2119
2109
|
}
|
|
2120
2110
|
declare class CodeEvaluator implements Evaluator {
|
|
2121
|
-
readonly kind = "code-
|
|
2111
|
+
readonly kind = "code-grader";
|
|
2122
2112
|
private readonly command;
|
|
2123
2113
|
private readonly cwd?;
|
|
2124
2114
|
private readonly agentTimeoutMs?;
|
|
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
|
|
|
2853
2843
|
readonly expectedOutput?: string;
|
|
2854
2844
|
/** @deprecated Use `expectedOutput` instead */
|
|
2855
2845
|
readonly expected_output?: string;
|
|
2856
|
-
/** Assertion
|
|
2846
|
+
/** Assertion graders — accepts factory functions, config objects, or inline functions */
|
|
2857
2847
|
readonly assert?: readonly AssertEntry[];
|
|
2858
2848
|
/** Arbitrary metadata */
|
|
2859
2849
|
readonly metadata?: Record<string, unknown>;
|
|
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
|
|
|
2863
2853
|
* Matches the YAML `assert` block structure.
|
|
2864
2854
|
*/
|
|
2865
2855
|
interface EvalAssertionInput {
|
|
2866
|
-
/** Assertion type (e.g., 'contains', 'llm-
|
|
2856
|
+
/** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
|
|
2867
2857
|
readonly type: string;
|
|
2868
2858
|
/** Display name */
|
|
2869
2859
|
readonly name?: string;
|
|
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
|
|
|
2873
2863
|
readonly weight?: number;
|
|
2874
2864
|
/** Whether this assertion is required to pass */
|
|
2875
2865
|
readonly required?: boolean | number;
|
|
2876
|
-
/** Prompt file for
|
|
2866
|
+
/** Prompt file for llm_grader */
|
|
2877
2867
|
readonly prompt?: string;
|
|
2878
|
-
/** Script for
|
|
2868
|
+
/** Script for code_grader */
|
|
2879
2869
|
readonly script?: string | readonly string[];
|
|
2880
2870
|
/** Additional config passed to the assertion */
|
|
2881
2871
|
readonly config?: Record<string, unknown>;
|
|
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3024
3014
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
3025
3015
|
/** Enable verbose logging */
|
|
3026
3016
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
|
3027
|
-
/** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
|
|
3028
|
-
traceFile: z.ZodOptional<z.ZodString>;
|
|
3029
3017
|
/** Always keep temp workspaces after eval */
|
|
3030
3018
|
keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
|
|
3031
3019
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3036
3024
|
maxRetries?: number | undefined;
|
|
3037
3025
|
agentTimeoutMs?: number | undefined;
|
|
3038
3026
|
keepWorkspaces?: boolean | undefined;
|
|
3039
|
-
traceFile?: string | undefined;
|
|
3040
3027
|
otelFile?: string | undefined;
|
|
3041
3028
|
}, {
|
|
3042
3029
|
workers?: number | undefined;
|
|
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3044
3031
|
maxRetries?: number | undefined;
|
|
3045
3032
|
agentTimeoutMs?: number | undefined;
|
|
3046
3033
|
keepWorkspaces?: boolean | undefined;
|
|
3047
|
-
traceFile?: string | undefined;
|
|
3048
3034
|
otelFile?: string | undefined;
|
|
3049
3035
|
}>>;
|
|
3050
3036
|
/** Output settings */
|
|
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3093
3079
|
maxRetries?: number | undefined;
|
|
3094
3080
|
agentTimeoutMs?: number | undefined;
|
|
3095
3081
|
keepWorkspaces?: boolean | undefined;
|
|
3096
|
-
traceFile?: string | undefined;
|
|
3097
3082
|
otelFile?: string | undefined;
|
|
3098
3083
|
} | undefined;
|
|
3099
3084
|
cache?: {
|
|
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3115
3100
|
maxRetries?: number | undefined;
|
|
3116
3101
|
agentTimeoutMs?: number | undefined;
|
|
3117
3102
|
keepWorkspaces?: boolean | undefined;
|
|
3118
|
-
traceFile?: string | undefined;
|
|
3119
3103
|
otelFile?: string | undefined;
|
|
3120
3104
|
} | undefined;
|
|
3121
3105
|
cache?: {
|
|
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
|
|
|
3469
3453
|
readonly groupTurns?: boolean;
|
|
3470
3454
|
/** Path to write OTLP JSON file (importable by OTel backends) */
|
|
3471
3455
|
readonly otlpFilePath?: string;
|
|
3472
|
-
/** Path to write human-readable simple JSONL trace file */
|
|
3473
|
-
readonly traceFilePath?: string;
|
|
3474
3456
|
}
|
|
3475
3457
|
/** Preset configuration for a known observability backend. */
|
|
3476
3458
|
interface OtelBackendPreset {
|
|
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
|
|
|
3543
3525
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3544
3526
|
}
|
|
3545
3527
|
|
|
3546
|
-
type ReadableSpan
|
|
3528
|
+
type ReadableSpan = any;
|
|
3547
3529
|
/**
|
|
3548
3530
|
* SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
|
|
3549
3531
|
* The file can be imported by any OTel-compatible backend.
|
|
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
|
|
|
3552
3534
|
private spans;
|
|
3553
3535
|
private filePath;
|
|
3554
3536
|
constructor(filePath: string);
|
|
3555
|
-
export(spans: ReadableSpan$1[], resultCallback: (result: {
|
|
3556
|
-
code: number;
|
|
3557
|
-
}) => void): void;
|
|
3558
|
-
shutdown(): Promise<void>;
|
|
3559
|
-
forceFlush(): Promise<void>;
|
|
3560
|
-
private flush;
|
|
3561
|
-
}
|
|
3562
|
-
|
|
3563
|
-
type ReadableSpan = any;
|
|
3564
|
-
/**
|
|
3565
|
-
* SpanExporter that writes human-readable JSONL (one line per root span).
|
|
3566
|
-
* Designed for quick debugging and analysis without OTel tooling.
|
|
3567
|
-
*/
|
|
3568
|
-
declare class SimpleTraceFileExporter {
|
|
3569
|
-
private stream;
|
|
3570
|
-
private filePath;
|
|
3571
|
-
private streamReady;
|
|
3572
|
-
private pendingWrites;
|
|
3573
|
-
private _shuttingDown;
|
|
3574
|
-
private spansByTraceId;
|
|
3575
|
-
constructor(filePath: string);
|
|
3576
|
-
private ensureStream;
|
|
3577
3537
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
3578
3538
|
code: number;
|
|
3579
3539
|
}) => void): void;
|
|
3580
3540
|
shutdown(): Promise<void>;
|
|
3581
3541
|
forceFlush(): Promise<void>;
|
|
3582
|
-
private
|
|
3542
|
+
private flush;
|
|
3583
3543
|
}
|
|
3584
3544
|
|
|
3585
3545
|
/**
|
|
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
|
|
|
3599
3559
|
* Convention-based discovery of custom assertion scripts.
|
|
3600
3560
|
*
|
|
3601
3561
|
* Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
|
|
3602
|
-
* them as code
|
|
3603
|
-
* extension) becomes the
|
|
3562
|
+
* them as code graders in the registry. The file name (without
|
|
3563
|
+
* extension) becomes the grader type name.
|
|
3604
3564
|
*
|
|
3605
3565
|
* Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
|
|
3606
3566
|
*/
|
|
3607
3567
|
|
|
3608
3568
|
/**
|
|
3609
3569
|
* Discover custom assertion scripts from `.agentv/assertions/` and register
|
|
3610
|
-
* them as
|
|
3570
|
+
* them as grader types in the registry.
|
|
3611
3571
|
*
|
|
3612
|
-
* @param registry - The
|
|
3572
|
+
* @param registry - The grader registry to register discovered assertions into
|
|
3613
3573
|
* @param baseDir - The base directory to search from (typically project root or eval file dir)
|
|
3614
3574
|
* @returns Names of discovered assertion types
|
|
3615
3575
|
*/
|
|
@@ -3640,4 +3600,4 @@ type AgentKernel = {
|
|
|
3640
3600
|
};
|
|
3641
3601
|
declare function createAgentKernel(): AgentKernel;
|
|
3642
3602
|
|
|
3643
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext,
|
|
3603
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|
package/dist/index.d.ts
CHANGED
|
@@ -473,7 +473,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
|
|
|
473
473
|
* - Either content (string or array of objects) OR tool_calls (for assistant messages)
|
|
474
474
|
*/
|
|
475
475
|
declare function isTestMessage(value: unknown): value is TestMessage;
|
|
476
|
-
declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "
|
|
476
|
+
declare const EVALUATOR_KIND_VALUES: readonly ["code-grader", "llm-grader", "rubric", "composite", "tool-trajectory", "field-accuracy", "latency", "cost", "token-usage", "execution-metrics", "skill-trigger", "contains", "contains-any", "contains-all", "icontains", "icontains-any", "icontains-all", "starts-with", "ends-with", "regex", "is-json", "equals", "rubrics", "inline-assert"];
|
|
477
477
|
type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
|
|
478
478
|
declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
|
|
479
479
|
/**
|
|
@@ -576,7 +576,7 @@ type WorkspaceConfig = {
|
|
|
576
576
|
};
|
|
577
577
|
type CodeEvaluatorConfig = {
|
|
578
578
|
readonly name: string;
|
|
579
|
-
readonly type: 'code-
|
|
579
|
+
readonly type: 'code-grader';
|
|
580
580
|
readonly command: readonly string[];
|
|
581
581
|
/** @deprecated Use `command` instead */
|
|
582
582
|
readonly script?: readonly string[];
|
|
@@ -606,7 +606,7 @@ type PromptScriptConfig = {
|
|
|
606
606
|
};
|
|
607
607
|
type LlmGraderEvaluatorConfig = {
|
|
608
608
|
readonly name: string;
|
|
609
|
-
readonly type: 'llm-grader'
|
|
609
|
+
readonly type: 'llm-grader';
|
|
610
610
|
/** Text prompt (inline or file path) or executable script config */
|
|
611
611
|
readonly prompt?: string | PromptScriptConfig;
|
|
612
612
|
readonly promptPath?: string;
|
|
@@ -678,20 +678,11 @@ type CompositeAggregatorConfig = {
|
|
|
678
678
|
readonly type: 'code-grader';
|
|
679
679
|
readonly path: string;
|
|
680
680
|
readonly cwd?: string;
|
|
681
|
-
} | {
|
|
682
|
-
readonly type: 'code-judge';
|
|
683
|
-
readonly path: string;
|
|
684
|
-
readonly cwd?: string;
|
|
685
681
|
} | {
|
|
686
682
|
readonly type: 'llm-grader';
|
|
687
683
|
readonly prompt?: string;
|
|
688
684
|
readonly promptPath?: string;
|
|
689
685
|
readonly model?: string;
|
|
690
|
-
} | {
|
|
691
|
-
readonly type: 'llm-judge';
|
|
692
|
-
readonly prompt?: string;
|
|
693
|
-
readonly promptPath?: string;
|
|
694
|
-
readonly model?: string;
|
|
695
686
|
} | {
|
|
696
687
|
readonly type: 'threshold';
|
|
697
688
|
readonly threshold: number;
|
|
@@ -1250,7 +1241,6 @@ type EvalMetadata = z.infer<typeof MetadataSchema>;
|
|
|
1250
1241
|
declare const DEFAULT_EVAL_PATTERNS: readonly string[];
|
|
1251
1242
|
type ExecutionDefaults = {
|
|
1252
1243
|
readonly verbose?: boolean;
|
|
1253
|
-
readonly trace_file?: string;
|
|
1254
1244
|
readonly keep_workspaces?: boolean;
|
|
1255
1245
|
readonly otel_file?: string;
|
|
1256
1246
|
readonly export_otel?: boolean;
|
|
@@ -2118,7 +2108,7 @@ interface CodeEvaluatorOptions {
|
|
|
2118
2108
|
readonly target?: TargetAccessConfig;
|
|
2119
2109
|
}
|
|
2120
2110
|
declare class CodeEvaluator implements Evaluator {
|
|
2121
|
-
readonly kind = "code-
|
|
2111
|
+
readonly kind = "code-grader";
|
|
2122
2112
|
private readonly command;
|
|
2123
2113
|
private readonly cwd?;
|
|
2124
2114
|
private readonly agentTimeoutMs?;
|
|
@@ -2853,7 +2843,7 @@ interface EvalTestInput {
|
|
|
2853
2843
|
readonly expectedOutput?: string;
|
|
2854
2844
|
/** @deprecated Use `expectedOutput` instead */
|
|
2855
2845
|
readonly expected_output?: string;
|
|
2856
|
-
/** Assertion
|
|
2846
|
+
/** Assertion graders — accepts factory functions, config objects, or inline functions */
|
|
2857
2847
|
readonly assert?: readonly AssertEntry[];
|
|
2858
2848
|
/** Arbitrary metadata */
|
|
2859
2849
|
readonly metadata?: Record<string, unknown>;
|
|
@@ -2863,7 +2853,7 @@ interface EvalTestInput {
|
|
|
2863
2853
|
* Matches the YAML `assert` block structure.
|
|
2864
2854
|
*/
|
|
2865
2855
|
interface EvalAssertionInput {
|
|
2866
|
-
/** Assertion type (e.g., 'contains', 'llm-
|
|
2856
|
+
/** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
|
|
2867
2857
|
readonly type: string;
|
|
2868
2858
|
/** Display name */
|
|
2869
2859
|
readonly name?: string;
|
|
@@ -2873,9 +2863,9 @@ interface EvalAssertionInput {
|
|
|
2873
2863
|
readonly weight?: number;
|
|
2874
2864
|
/** Whether this assertion is required to pass */
|
|
2875
2865
|
readonly required?: boolean | number;
|
|
2876
|
-
/** Prompt file for
|
|
2866
|
+
/** Prompt file for llm_grader */
|
|
2877
2867
|
readonly prompt?: string;
|
|
2878
|
-
/** Script for
|
|
2868
|
+
/** Script for code_grader */
|
|
2879
2869
|
readonly script?: string | readonly string[];
|
|
2880
2870
|
/** Additional config passed to the assertion */
|
|
2881
2871
|
readonly config?: Record<string, unknown>;
|
|
@@ -3024,8 +3014,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3024
3014
|
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
3025
3015
|
/** Enable verbose logging */
|
|
3026
3016
|
verbose: z.ZodOptional<z.ZodBoolean>;
|
|
3027
|
-
/** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
|
|
3028
|
-
traceFile: z.ZodOptional<z.ZodString>;
|
|
3029
3017
|
/** Always keep temp workspaces after eval */
|
|
3030
3018
|
keepWorkspaces: z.ZodOptional<z.ZodBoolean>;
|
|
3031
3019
|
/** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
|
|
@@ -3036,7 +3024,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3036
3024
|
maxRetries?: number | undefined;
|
|
3037
3025
|
agentTimeoutMs?: number | undefined;
|
|
3038
3026
|
keepWorkspaces?: boolean | undefined;
|
|
3039
|
-
traceFile?: string | undefined;
|
|
3040
3027
|
otelFile?: string | undefined;
|
|
3041
3028
|
}, {
|
|
3042
3029
|
workers?: number | undefined;
|
|
@@ -3044,7 +3031,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3044
3031
|
maxRetries?: number | undefined;
|
|
3045
3032
|
agentTimeoutMs?: number | undefined;
|
|
3046
3033
|
keepWorkspaces?: boolean | undefined;
|
|
3047
|
-
traceFile?: string | undefined;
|
|
3048
3034
|
otelFile?: string | undefined;
|
|
3049
3035
|
}>>;
|
|
3050
3036
|
/** Output settings */
|
|
@@ -3093,7 +3079,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3093
3079
|
maxRetries?: number | undefined;
|
|
3094
3080
|
agentTimeoutMs?: number | undefined;
|
|
3095
3081
|
keepWorkspaces?: boolean | undefined;
|
|
3096
|
-
traceFile?: string | undefined;
|
|
3097
3082
|
otelFile?: string | undefined;
|
|
3098
3083
|
} | undefined;
|
|
3099
3084
|
cache?: {
|
|
@@ -3115,7 +3100,6 @@ declare const AgentVConfigSchema: z.ZodObject<{
|
|
|
3115
3100
|
maxRetries?: number | undefined;
|
|
3116
3101
|
agentTimeoutMs?: number | undefined;
|
|
3117
3102
|
keepWorkspaces?: boolean | undefined;
|
|
3118
|
-
traceFile?: string | undefined;
|
|
3119
3103
|
otelFile?: string | undefined;
|
|
3120
3104
|
} | undefined;
|
|
3121
3105
|
cache?: {
|
|
@@ -3469,8 +3453,6 @@ interface OtelExportOptions {
|
|
|
3469
3453
|
readonly groupTurns?: boolean;
|
|
3470
3454
|
/** Path to write OTLP JSON file (importable by OTel backends) */
|
|
3471
3455
|
readonly otlpFilePath?: string;
|
|
3472
|
-
/** Path to write human-readable simple JSONL trace file */
|
|
3473
|
-
readonly traceFilePath?: string;
|
|
3474
3456
|
}
|
|
3475
3457
|
/** Preset configuration for a known observability backend. */
|
|
3476
3458
|
interface OtelBackendPreset {
|
|
@@ -3543,7 +3525,7 @@ declare class OtelStreamingObserver {
|
|
|
3543
3525
|
getStreamCallbacks(): ProviderStreamCallbacks;
|
|
3544
3526
|
}
|
|
3545
3527
|
|
|
3546
|
-
type ReadableSpan
|
|
3528
|
+
type ReadableSpan = any;
|
|
3547
3529
|
/**
|
|
3548
3530
|
* SpanExporter that writes OTLP JSON (the standard OTel wire format) to a file.
|
|
3549
3531
|
* The file can be imported by any OTel-compatible backend.
|
|
@@ -3552,34 +3534,12 @@ declare class OtlpJsonFileExporter {
|
|
|
3552
3534
|
private spans;
|
|
3553
3535
|
private filePath;
|
|
3554
3536
|
constructor(filePath: string);
|
|
3555
|
-
export(spans: ReadableSpan$1[], resultCallback: (result: {
|
|
3556
|
-
code: number;
|
|
3557
|
-
}) => void): void;
|
|
3558
|
-
shutdown(): Promise<void>;
|
|
3559
|
-
forceFlush(): Promise<void>;
|
|
3560
|
-
private flush;
|
|
3561
|
-
}
|
|
3562
|
-
|
|
3563
|
-
type ReadableSpan = any;
|
|
3564
|
-
/**
|
|
3565
|
-
* SpanExporter that writes human-readable JSONL (one line per root span).
|
|
3566
|
-
* Designed for quick debugging and analysis without OTel tooling.
|
|
3567
|
-
*/
|
|
3568
|
-
declare class SimpleTraceFileExporter {
|
|
3569
|
-
private stream;
|
|
3570
|
-
private filePath;
|
|
3571
|
-
private streamReady;
|
|
3572
|
-
private pendingWrites;
|
|
3573
|
-
private _shuttingDown;
|
|
3574
|
-
private spansByTraceId;
|
|
3575
|
-
constructor(filePath: string);
|
|
3576
|
-
private ensureStream;
|
|
3577
3537
|
export(spans: ReadableSpan[], resultCallback: (result: {
|
|
3578
3538
|
code: number;
|
|
3579
3539
|
}) => void): void;
|
|
3580
3540
|
shutdown(): Promise<void>;
|
|
3581
3541
|
forceFlush(): Promise<void>;
|
|
3582
|
-
private
|
|
3542
|
+
private flush;
|
|
3583
3543
|
}
|
|
3584
3544
|
|
|
3585
3545
|
/**
|
|
@@ -3599,17 +3559,17 @@ declare function createBuiltinRegistry(): EvaluatorRegistry;
|
|
|
3599
3559
|
* Convention-based discovery of custom assertion scripts.
|
|
3600
3560
|
*
|
|
3601
3561
|
* Scans `.agentv/assertions/` for TypeScript/JavaScript files and registers
|
|
3602
|
-
* them as code
|
|
3603
|
-
* extension) becomes the
|
|
3562
|
+
* them as code graders in the registry. The file name (without
|
|
3563
|
+
* extension) becomes the grader type name.
|
|
3604
3564
|
*
|
|
3605
3565
|
* Example: `.agentv/assertions/sentiment.ts` → type "sentiment" in EVAL.yaml
|
|
3606
3566
|
*/
|
|
3607
3567
|
|
|
3608
3568
|
/**
|
|
3609
3569
|
* Discover custom assertion scripts from `.agentv/assertions/` and register
|
|
3610
|
-
* them as
|
|
3570
|
+
* them as grader types in the registry.
|
|
3611
3571
|
*
|
|
3612
|
-
* @param registry - The
|
|
3572
|
+
* @param registry - The grader registry to register discovered assertions into
|
|
3613
3573
|
* @param baseDir - The base directory to search from (typically project root or eval file dir)
|
|
3614
3574
|
* @returns Names of discovered assertion types
|
|
3615
3575
|
*/
|
|
@@ -3640,4 +3600,4 @@ type AgentKernel = {
|
|
|
3640
3600
|
};
|
|
3641
3601
|
declare function createAgentKernel(): AgentKernel;
|
|
3642
3602
|
|
|
3643
|
-
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext,
|
|
3603
|
+
export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AnthropicResolvedConfig, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type CacheConfig, type ChildEvaluatorResult, type ClaudeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type ConfidenceIntervalAggregation, type ContainsAllEvaluatorConfig, type ContainsAnyEvaluatorConfig, type ContainsEvaluatorConfig, type CopilotCliResolvedConfig, type CopilotSdkResolvedConfig, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EVALUATOR_TEMPLATE, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DeterministicAssertionEvaluator, type EndsWithEvaluatorConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsEvaluatorConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorDispatchContext, type EvaluatorFactory, type EvaluatorFactoryFn, type EvaluatorKind, EvaluatorRegistry, type EvaluatorResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsEvaluator, type ExecutionMetricsEvaluatorConfig, type ExecutionMetricsEvaluatorOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type IcontainsAllEvaluatorConfig, type IcontainsAnyEvaluatorConfig, type IcontainsEvaluatorConfig, type InlineAssertEvaluatorConfig, type IsJsonEvaluatorConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmGraderEvaluator, type LlmGraderEvaluatorConfig, type LlmGraderEvaluatorOptions, type LlmGraderPromptAssembly, LlmGraderEvaluator as LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmGraderEvaluatorOptions as LlmJudgeEvaluatorOptions, type LlmGraderPromptAssembly as LlmJudgePromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexEvaluatorConfig, type RepoCheckout, type RepoClone, type RepoConfig, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerEvaluator, type SkillTriggerEvaluatorConfig, type StartsWithEvaluatorConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolCall, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceComputeResult, type TraceSummary, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, assembleLlmGraderPrompt, assembleLlmGraderPrompt as assembleLlmJudgePrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createProvider, createTempWorkspace, deepEqual, defineConfig, detectFormat, discoverAssertions, discoverGraders, discoverGraders as discoverJudges, discoverProviders, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractJsonBlob, extractTargetFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvHome, getOutputFilenames, getSubagentsRoot, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, initializeBaseline, isAgentSkillsFormat, isEvaluatorKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, parseAgentSkillsEvals, parseJsonFromText, parseJsonSafe, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, toCamelCaseDeep, toSnakeCaseDeep, tokensPerTool, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
|