@agentv/core 0.22.2 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -75,7 +75,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
75
75
  * Guard validating raw test messages.
76
76
  */
77
77
  declare function isTestMessage(value: unknown): value is TestMessage;
78
- declare const EVALUATOR_KIND_VALUES: readonly ["code", "llm_judge", "rubric"];
78
+ declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite"];
79
79
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
80
80
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
81
81
  type CodeEvaluatorConfig = {
@@ -99,7 +99,26 @@ type RubricItem = {
99
99
  readonly weight: number;
100
100
  readonly required: boolean;
101
101
  };
102
- type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
102
+ type CompositeAggregatorConfig = {
103
+ readonly type: 'weighted_average';
104
+ readonly weights?: Record<string, number>;
105
+ } | {
106
+ readonly type: 'code_judge';
107
+ readonly path: string;
108
+ readonly cwd?: string;
109
+ } | {
110
+ readonly type: 'llm_judge';
111
+ readonly prompt?: string;
112
+ readonly promptPath?: string;
113
+ readonly model?: string;
114
+ };
115
+ type CompositeEvaluatorConfig = {
116
+ readonly name: string;
117
+ readonly type: 'composite';
118
+ readonly evaluators: readonly EvaluatorConfig[];
119
+ readonly aggregator: CompositeAggregatorConfig;
120
+ };
121
+ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig;
103
122
  /**
104
123
  * Eval case definition sourced from AgentV specs.
105
124
  */
@@ -146,12 +165,14 @@ interface EvaluatorResult {
146
165
  readonly name: string;
147
166
  readonly type: EvaluatorKind;
148
167
  readonly score: number;
168
+ readonly weight?: number;
149
169
  readonly verdict?: EvaluationVerdict;
150
170
  readonly hits: readonly string[];
151
171
  readonly misses: readonly string[];
152
172
  readonly reasoning?: string;
153
173
  readonly raw_request?: JsonObject;
154
174
  readonly evaluator_provider_request?: JsonObject;
175
+ readonly evaluator_results?: readonly EvaluatorResult[];
155
176
  }
156
177
  /**
157
178
  * Convenience accessor matching the Python hit_count property.
@@ -552,6 +573,19 @@ interface EvaluationScore {
552
573
  readonly reasoning?: string;
553
574
  readonly rawAspects?: readonly string[];
554
575
  readonly evaluatorRawRequest?: JsonObject;
576
+ readonly evaluatorResults?: readonly ChildEvaluatorResult[];
577
+ }
578
+ interface ChildEvaluatorResult {
579
+ readonly name: string;
580
+ readonly type: string;
581
+ readonly score: number;
582
+ readonly weight?: number;
583
+ readonly verdict: EvaluationVerdict;
584
+ readonly hits: readonly string[];
585
+ readonly misses: readonly string[];
586
+ readonly reasoning?: string;
587
+ readonly evaluatorRawRequest?: JsonObject;
588
+ readonly evaluatorResults?: readonly ChildEvaluatorResult[];
555
589
  }
556
590
  interface Evaluator {
557
591
  readonly kind: string;
@@ -590,6 +624,26 @@ declare class CodeEvaluator implements Evaluator {
590
624
  constructor(options: CodeEvaluatorOptions);
591
625
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
592
626
  }
627
+ interface EvaluatorFactory {
628
+ create(config: EvaluatorConfig, context: EvaluationContext): Evaluator;
629
+ }
630
+ interface CompositeEvaluatorOptions {
631
+ readonly config: CompositeEvaluatorConfig;
632
+ readonly evaluatorFactory: EvaluatorFactory;
633
+ readonly cwd?: string;
634
+ }
635
+ declare class CompositeEvaluator implements Evaluator {
636
+ readonly kind = "composite";
637
+ private readonly config;
638
+ private readonly evaluatorFactory;
639
+ private readonly cwd?;
640
+ constructor(options: CompositeEvaluatorOptions);
641
+ evaluate(context: EvaluationContext): Promise<EvaluationScore>;
642
+ private aggregate;
643
+ private runWeightedAverage;
644
+ private runCodeAggregator;
645
+ private runLlmAggregator;
646
+ }
593
647
 
594
648
  type MaybePromise<T> = T | Promise<T>;
595
649
  interface EvaluationCache {
@@ -660,4 +714,4 @@ type AgentKernel = {
660
714
  };
661
715
  declare function createAgentKernel(): AgentKernel;
662
716
 
663
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
717
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.d.ts CHANGED
@@ -75,7 +75,7 @@ declare function isJsonValue(value: unknown): value is JsonValue;
75
75
  * Guard validating raw test messages.
76
76
  */
77
77
  declare function isTestMessage(value: unknown): value is TestMessage;
78
- declare const EVALUATOR_KIND_VALUES: readonly ["code", "llm_judge", "rubric"];
78
+ declare const EVALUATOR_KIND_VALUES: readonly ["code_judge", "llm_judge", "rubric", "composite"];
79
79
  type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
80
80
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
81
81
  type CodeEvaluatorConfig = {
@@ -99,7 +99,26 @@ type RubricItem = {
99
99
  readonly weight: number;
100
100
  readonly required: boolean;
101
101
  };
102
- type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
102
+ type CompositeAggregatorConfig = {
103
+ readonly type: 'weighted_average';
104
+ readonly weights?: Record<string, number>;
105
+ } | {
106
+ readonly type: 'code_judge';
107
+ readonly path: string;
108
+ readonly cwd?: string;
109
+ } | {
110
+ readonly type: 'llm_judge';
111
+ readonly prompt?: string;
112
+ readonly promptPath?: string;
113
+ readonly model?: string;
114
+ };
115
+ type CompositeEvaluatorConfig = {
116
+ readonly name: string;
117
+ readonly type: 'composite';
118
+ readonly evaluators: readonly EvaluatorConfig[];
119
+ readonly aggregator: CompositeAggregatorConfig;
120
+ };
121
+ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig;
103
122
  /**
104
123
  * Eval case definition sourced from AgentV specs.
105
124
  */
@@ -146,12 +165,14 @@ interface EvaluatorResult {
146
165
  readonly name: string;
147
166
  readonly type: EvaluatorKind;
148
167
  readonly score: number;
168
+ readonly weight?: number;
149
169
  readonly verdict?: EvaluationVerdict;
150
170
  readonly hits: readonly string[];
151
171
  readonly misses: readonly string[];
152
172
  readonly reasoning?: string;
153
173
  readonly raw_request?: JsonObject;
154
174
  readonly evaluator_provider_request?: JsonObject;
175
+ readonly evaluator_results?: readonly EvaluatorResult[];
155
176
  }
156
177
  /**
157
178
  * Convenience accessor matching the Python hit_count property.
@@ -552,6 +573,19 @@ interface EvaluationScore {
552
573
  readonly reasoning?: string;
553
574
  readonly rawAspects?: readonly string[];
554
575
  readonly evaluatorRawRequest?: JsonObject;
576
+ readonly evaluatorResults?: readonly ChildEvaluatorResult[];
577
+ }
578
+ interface ChildEvaluatorResult {
579
+ readonly name: string;
580
+ readonly type: string;
581
+ readonly score: number;
582
+ readonly weight?: number;
583
+ readonly verdict: EvaluationVerdict;
584
+ readonly hits: readonly string[];
585
+ readonly misses: readonly string[];
586
+ readonly reasoning?: string;
587
+ readonly evaluatorRawRequest?: JsonObject;
588
+ readonly evaluatorResults?: readonly ChildEvaluatorResult[];
555
589
  }
556
590
  interface Evaluator {
557
591
  readonly kind: string;
@@ -590,6 +624,26 @@ declare class CodeEvaluator implements Evaluator {
590
624
  constructor(options: CodeEvaluatorOptions);
591
625
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
592
626
  }
627
+ interface EvaluatorFactory {
628
+ create(config: EvaluatorConfig, context: EvaluationContext): Evaluator;
629
+ }
630
+ interface CompositeEvaluatorOptions {
631
+ readonly config: CompositeEvaluatorConfig;
632
+ readonly evaluatorFactory: EvaluatorFactory;
633
+ readonly cwd?: string;
634
+ }
635
+ declare class CompositeEvaluator implements Evaluator {
636
+ readonly kind = "composite";
637
+ private readonly config;
638
+ private readonly evaluatorFactory;
639
+ private readonly cwd?;
640
+ constructor(options: CompositeEvaluatorOptions);
641
+ evaluate(context: EvaluationContext): Promise<EvaluationScore>;
642
+ private aggregate;
643
+ private runWeightedAverage;
644
+ private runCodeAggregator;
645
+ private runLlmAggregator;
646
+ }
593
647
 
594
648
  type MaybePromise<T> = T | Promise<T>;
595
649
  interface EvaluationCache {
@@ -660,4 +714,4 @@ type AgentKernel = {
660
714
  };
661
715
  declare function createAgentKernel(): AgentKernel;
662
716
 
663
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
717
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };