@agentv/core 0.22.1 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +217 -267
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +8 -23
- package/dist/index.d.ts +8 -23
- package/dist/index.js +217 -266
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -91,6 +91,7 @@ type LlmJudgeEvaluatorConfig = {
|
|
|
91
91
|
readonly type: 'llm_judge';
|
|
92
92
|
readonly prompt?: string;
|
|
93
93
|
readonly promptPath?: string;
|
|
94
|
+
readonly rubrics?: readonly RubricItem[];
|
|
94
95
|
};
|
|
95
96
|
type RubricItem = {
|
|
96
97
|
readonly id: string;
|
|
@@ -98,12 +99,7 @@ type RubricItem = {
|
|
|
98
99
|
readonly weight: number;
|
|
99
100
|
readonly required: boolean;
|
|
100
101
|
};
|
|
101
|
-
type
|
|
102
|
-
readonly name: string;
|
|
103
|
-
readonly type: 'rubric';
|
|
104
|
-
readonly rubrics: readonly RubricItem[];
|
|
105
|
-
};
|
|
106
|
-
type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | RubricEvaluatorConfig;
|
|
102
|
+
type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
107
103
|
/**
|
|
108
104
|
* Eval case definition sourced from AgentV specs.
|
|
109
105
|
*/
|
|
@@ -530,20 +526,6 @@ declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => v
|
|
|
530
526
|
declare function createProvider(target: ResolvedTarget): Provider;
|
|
531
527
|
declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
|
|
532
528
|
|
|
533
|
-
interface RubricEvaluatorOptions {
|
|
534
|
-
readonly config: RubricEvaluatorConfig;
|
|
535
|
-
readonly resolveJudgeProvider: (context: EvaluationContext) => Promise<Provider | undefined>;
|
|
536
|
-
}
|
|
537
|
-
declare class RubricEvaluator implements Evaluator {
|
|
538
|
-
readonly kind = "rubric";
|
|
539
|
-
private readonly config;
|
|
540
|
-
private readonly resolveJudgeProvider;
|
|
541
|
-
constructor(options: RubricEvaluatorOptions);
|
|
542
|
-
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
543
|
-
private buildPrompt;
|
|
544
|
-
private calculateScore;
|
|
545
|
-
}
|
|
546
|
-
|
|
547
529
|
interface EvaluationContext {
|
|
548
530
|
readonly evalCase: EvalCase;
|
|
549
531
|
readonly candidate: string;
|
|
@@ -563,7 +545,7 @@ interface EvaluationContext {
|
|
|
563
545
|
}
|
|
564
546
|
interface EvaluationScore {
|
|
565
547
|
readonly score: number;
|
|
566
|
-
readonly verdict
|
|
548
|
+
readonly verdict: EvaluationVerdict;
|
|
567
549
|
readonly hits: readonly string[];
|
|
568
550
|
readonly misses: readonly string[];
|
|
569
551
|
readonly expectedAspectCount: number;
|
|
@@ -590,7 +572,10 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
590
572
|
private readonly evaluatorTemplate?;
|
|
591
573
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
592
574
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
593
|
-
private
|
|
575
|
+
private evaluateFreeform;
|
|
576
|
+
private evaluateWithRubrics;
|
|
577
|
+
private buildRubricPrompt;
|
|
578
|
+
private runWithRetry;
|
|
594
579
|
}
|
|
595
580
|
interface CodeEvaluatorOptions {
|
|
596
581
|
readonly script: string;
|
|
@@ -675,4 +660,4 @@ type AgentKernel = {
|
|
|
675
660
|
};
|
|
676
661
|
declare function createAgentKernel(): AgentKernel;
|
|
677
662
|
|
|
678
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget,
|
|
663
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|
package/dist/index.d.ts
CHANGED
|
@@ -91,6 +91,7 @@ type LlmJudgeEvaluatorConfig = {
|
|
|
91
91
|
readonly type: 'llm_judge';
|
|
92
92
|
readonly prompt?: string;
|
|
93
93
|
readonly promptPath?: string;
|
|
94
|
+
readonly rubrics?: readonly RubricItem[];
|
|
94
95
|
};
|
|
95
96
|
type RubricItem = {
|
|
96
97
|
readonly id: string;
|
|
@@ -98,12 +99,7 @@ type RubricItem = {
|
|
|
98
99
|
readonly weight: number;
|
|
99
100
|
readonly required: boolean;
|
|
100
101
|
};
|
|
101
|
-
type
|
|
102
|
-
readonly name: string;
|
|
103
|
-
readonly type: 'rubric';
|
|
104
|
-
readonly rubrics: readonly RubricItem[];
|
|
105
|
-
};
|
|
106
|
-
type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | RubricEvaluatorConfig;
|
|
102
|
+
type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
107
103
|
/**
|
|
108
104
|
* Eval case definition sourced from AgentV specs.
|
|
109
105
|
*/
|
|
@@ -530,20 +526,6 @@ declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => v
|
|
|
530
526
|
declare function createProvider(target: ResolvedTarget): Provider;
|
|
531
527
|
declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
|
|
532
528
|
|
|
533
|
-
interface RubricEvaluatorOptions {
|
|
534
|
-
readonly config: RubricEvaluatorConfig;
|
|
535
|
-
readonly resolveJudgeProvider: (context: EvaluationContext) => Promise<Provider | undefined>;
|
|
536
|
-
}
|
|
537
|
-
declare class RubricEvaluator implements Evaluator {
|
|
538
|
-
readonly kind = "rubric";
|
|
539
|
-
private readonly config;
|
|
540
|
-
private readonly resolveJudgeProvider;
|
|
541
|
-
constructor(options: RubricEvaluatorOptions);
|
|
542
|
-
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
543
|
-
private buildPrompt;
|
|
544
|
-
private calculateScore;
|
|
545
|
-
}
|
|
546
|
-
|
|
547
529
|
interface EvaluationContext {
|
|
548
530
|
readonly evalCase: EvalCase;
|
|
549
531
|
readonly candidate: string;
|
|
@@ -563,7 +545,7 @@ interface EvaluationContext {
|
|
|
563
545
|
}
|
|
564
546
|
interface EvaluationScore {
|
|
565
547
|
readonly score: number;
|
|
566
|
-
readonly verdict
|
|
548
|
+
readonly verdict: EvaluationVerdict;
|
|
567
549
|
readonly hits: readonly string[];
|
|
568
550
|
readonly misses: readonly string[];
|
|
569
551
|
readonly expectedAspectCount: number;
|
|
@@ -590,7 +572,10 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
590
572
|
private readonly evaluatorTemplate?;
|
|
591
573
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
592
574
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
593
|
-
private
|
|
575
|
+
private evaluateFreeform;
|
|
576
|
+
private evaluateWithRubrics;
|
|
577
|
+
private buildRubricPrompt;
|
|
578
|
+
private runWithRetry;
|
|
594
579
|
}
|
|
595
580
|
interface CodeEvaluatorOptions {
|
|
596
581
|
readonly script: string;
|
|
@@ -675,4 +660,4 @@ type AgentKernel = {
|
|
|
675
660
|
};
|
|
676
661
|
declare function createAgentKernel(): AgentKernel;
|
|
677
662
|
|
|
678
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget,
|
|
663
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
|