@agentv/core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-OW3SHBIJ.js → chunk-L7I5UTJU.js} +1 -1
- package/dist/{chunk-OW3SHBIJ.js.map → chunk-L7I5UTJU.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +221 -242
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -13
- package/dist/index.d.ts +11 -13
- package/dist/index.js +222 -243
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -99,17 +99,18 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
|
99
99
|
*/
|
|
100
100
|
interface EvalCase {
|
|
101
101
|
readonly id: string;
|
|
102
|
-
readonly dataset
|
|
102
|
+
readonly dataset?: string;
|
|
103
103
|
readonly conversation_id?: string;
|
|
104
|
-
readonly
|
|
105
|
-
readonly
|
|
104
|
+
readonly question: string;
|
|
105
|
+
readonly input_segments: readonly JsonObject[];
|
|
106
|
+
readonly output_segments: readonly JsonObject[];
|
|
106
107
|
readonly system_message?: string;
|
|
107
|
-
readonly
|
|
108
|
+
readonly reference_answer: string;
|
|
108
109
|
readonly guideline_paths: readonly string[];
|
|
109
110
|
readonly guideline_patterns?: readonly string[];
|
|
110
111
|
readonly file_paths: readonly string[];
|
|
111
112
|
readonly code_snippets: readonly string[];
|
|
112
|
-
readonly
|
|
113
|
+
readonly expected_outcome: string;
|
|
113
114
|
readonly evaluator?: EvaluatorKind;
|
|
114
115
|
readonly evaluators?: readonly EvaluatorConfig[];
|
|
115
116
|
}
|
|
@@ -118,12 +119,12 @@ interface EvalCase {
|
|
|
118
119
|
*/
|
|
119
120
|
interface EvaluationResult {
|
|
120
121
|
readonly eval_id: string;
|
|
121
|
-
readonly dataset
|
|
122
|
+
readonly dataset?: string;
|
|
122
123
|
readonly conversation_id?: string;
|
|
123
124
|
readonly score: number;
|
|
124
125
|
readonly hits: readonly string[];
|
|
125
126
|
readonly misses: readonly string[];
|
|
126
|
-
readonly
|
|
127
|
+
readonly candidate_answer: string;
|
|
127
128
|
readonly expected_aspect_count: number;
|
|
128
129
|
readonly target: string;
|
|
129
130
|
readonly timestamp: string;
|
|
@@ -167,7 +168,7 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
167
168
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
168
169
|
*/
|
|
169
170
|
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
170
|
-
|
|
171
|
+
question: string;
|
|
171
172
|
guidelines: string;
|
|
172
173
|
systemMessage?: string;
|
|
173
174
|
}>;
|
|
@@ -204,7 +205,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
204
205
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
205
206
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
206
207
|
interface ProviderRequest {
|
|
207
|
-
readonly
|
|
208
|
+
readonly question: string;
|
|
208
209
|
readonly guidelines?: string;
|
|
209
210
|
readonly guideline_patterns?: readonly string[];
|
|
210
211
|
readonly chatPrompt?: ChatPrompt;
|
|
@@ -404,7 +405,7 @@ interface EvaluationContext {
|
|
|
404
405
|
readonly provider: Provider;
|
|
405
406
|
readonly attempt: number;
|
|
406
407
|
readonly promptInputs: {
|
|
407
|
-
readonly
|
|
408
|
+
readonly question: string;
|
|
408
409
|
readonly guidelines: string;
|
|
409
410
|
readonly systemMessage?: string;
|
|
410
411
|
};
|
|
@@ -442,10 +443,7 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
442
443
|
private readonly customPrompt?;
|
|
443
444
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
444
445
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
445
|
-
private evaluateWithAx;
|
|
446
446
|
private evaluateWithPrompt;
|
|
447
|
-
private buildJudgeForwardOptions;
|
|
448
|
-
private buildJudgeModelConfig;
|
|
449
447
|
}
|
|
450
448
|
interface CodeEvaluatorOptions {
|
|
451
449
|
readonly script: string;
|
package/dist/index.d.ts
CHANGED
|
@@ -99,17 +99,18 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
|
|
|
99
99
|
*/
|
|
100
100
|
interface EvalCase {
|
|
101
101
|
readonly id: string;
|
|
102
|
-
readonly dataset
|
|
102
|
+
readonly dataset?: string;
|
|
103
103
|
readonly conversation_id?: string;
|
|
104
|
-
readonly
|
|
105
|
-
readonly
|
|
104
|
+
readonly question: string;
|
|
105
|
+
readonly input_segments: readonly JsonObject[];
|
|
106
|
+
readonly output_segments: readonly JsonObject[];
|
|
106
107
|
readonly system_message?: string;
|
|
107
|
-
readonly
|
|
108
|
+
readonly reference_answer: string;
|
|
108
109
|
readonly guideline_paths: readonly string[];
|
|
109
110
|
readonly guideline_patterns?: readonly string[];
|
|
110
111
|
readonly file_paths: readonly string[];
|
|
111
112
|
readonly code_snippets: readonly string[];
|
|
112
|
-
readonly
|
|
113
|
+
readonly expected_outcome: string;
|
|
113
114
|
readonly evaluator?: EvaluatorKind;
|
|
114
115
|
readonly evaluators?: readonly EvaluatorConfig[];
|
|
115
116
|
}
|
|
@@ -118,12 +119,12 @@ interface EvalCase {
|
|
|
118
119
|
*/
|
|
119
120
|
interface EvaluationResult {
|
|
120
121
|
readonly eval_id: string;
|
|
121
|
-
readonly dataset
|
|
122
|
+
readonly dataset?: string;
|
|
122
123
|
readonly conversation_id?: string;
|
|
123
124
|
readonly score: number;
|
|
124
125
|
readonly hits: readonly string[];
|
|
125
126
|
readonly misses: readonly string[];
|
|
126
|
-
readonly
|
|
127
|
+
readonly candidate_answer: string;
|
|
127
128
|
readonly expected_aspect_count: number;
|
|
128
129
|
readonly target: string;
|
|
129
130
|
readonly timestamp: string;
|
|
@@ -167,7 +168,7 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
|
|
|
167
168
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
168
169
|
*/
|
|
169
170
|
declare function buildPromptInputs(testCase: EvalCase): Promise<{
|
|
170
|
-
|
|
171
|
+
question: string;
|
|
171
172
|
guidelines: string;
|
|
172
173
|
systemMessage?: string;
|
|
173
174
|
}>;
|
|
@@ -204,7 +205,7 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
|
|
|
204
205
|
type ChatPrompt = AxChatRequest["chatPrompt"];
|
|
205
206
|
type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
|
|
206
207
|
interface ProviderRequest {
|
|
207
|
-
readonly
|
|
208
|
+
readonly question: string;
|
|
208
209
|
readonly guidelines?: string;
|
|
209
210
|
readonly guideline_patterns?: readonly string[];
|
|
210
211
|
readonly chatPrompt?: ChatPrompt;
|
|
@@ -404,7 +405,7 @@ interface EvaluationContext {
|
|
|
404
405
|
readonly provider: Provider;
|
|
405
406
|
readonly attempt: number;
|
|
406
407
|
readonly promptInputs: {
|
|
407
|
-
readonly
|
|
408
|
+
readonly question: string;
|
|
408
409
|
readonly guidelines: string;
|
|
409
410
|
readonly systemMessage?: string;
|
|
410
411
|
};
|
|
@@ -442,10 +443,7 @@ declare class LlmJudgeEvaluator implements Evaluator {
|
|
|
442
443
|
private readonly customPrompt?;
|
|
443
444
|
constructor(options: LlmJudgeEvaluatorOptions);
|
|
444
445
|
evaluate(context: EvaluationContext): Promise<EvaluationScore>;
|
|
445
|
-
private evaluateWithAx;
|
|
446
446
|
private evaluateWithPrompt;
|
|
447
|
-
private buildJudgeForwardOptions;
|
|
448
|
-
private buildJudgeModelConfig;
|
|
449
447
|
}
|
|
450
448
|
interface CodeEvaluatorOptions {
|
|
451
449
|
readonly script: string;
|