@agentv/core 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-IOCVST3R.js → chunk-YCIZ33BO.js} +28 -11
- package/dist/chunk-YCIZ33BO.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +68 -64
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +64 -67
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +297 -149
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -5
- package/dist/index.d.ts +18 -5
- package/dist/index.js +251 -115
- package/dist/index.js.map +1 -1
- package/package.json +15 -16
- package/LICENSE +0 -21
- package/dist/chunk-IOCVST3R.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -101,7 +101,7 @@ interface EvalCase {
|
|
|
101
101
|
readonly question: string;
|
|
102
102
|
readonly input_messages: readonly TestMessage[];
|
|
103
103
|
readonly input_segments: readonly JsonObject[];
|
|
104
|
-
readonly
|
|
104
|
+
readonly expected_segments: readonly JsonObject[];
|
|
105
105
|
readonly reference_answer?: string;
|
|
106
106
|
readonly guideline_paths: readonly string[];
|
|
107
107
|
readonly guideline_patterns?: readonly string[];
|
|
@@ -262,6 +262,17 @@ interface TargetDefinition {
|
|
|
262
262
|
readonly retryStatusCodes?: unknown | undefined;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
+
/**
|
|
266
|
+
* Formatting mode for segment content.
|
|
267
|
+
* - 'agent': File references only (for providers with filesystem access)
|
|
268
|
+
* - 'lm': Embedded file content with XML tags (for language model providers)
|
|
269
|
+
*/
|
|
270
|
+
type FormattingMode = "agent" | "lm";
|
|
271
|
+
/**
|
|
272
|
+
* Extract fenced code blocks from AgentV user segments.
|
|
273
|
+
*/
|
|
274
|
+
declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
|
|
275
|
+
|
|
265
276
|
/**
|
|
266
277
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
267
278
|
*/
|
|
@@ -271,12 +282,13 @@ interface PromptInputs {
|
|
|
271
282
|
readonly chatPrompt?: ChatPrompt;
|
|
272
283
|
readonly systemMessage?: string;
|
|
273
284
|
}
|
|
274
|
-
declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
|
|
275
|
-
|
|
276
285
|
/**
|
|
277
|
-
*
|
|
286
|
+
* Build prompt inputs by consolidating user request context and guideline content.
|
|
287
|
+
*
|
|
288
|
+
* @param testCase - The evaluation test case
|
|
289
|
+
* @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
|
|
278
290
|
*/
|
|
279
|
-
declare function
|
|
291
|
+
declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): Promise<PromptInputs>;
|
|
280
292
|
|
|
281
293
|
/**
|
|
282
294
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
@@ -605,6 +617,7 @@ interface RunEvaluationOptions {
|
|
|
605
617
|
readonly evalId?: string;
|
|
606
618
|
readonly verbose?: boolean;
|
|
607
619
|
readonly maxConcurrency?: number;
|
|
620
|
+
readonly evalCases?: readonly EvalCase[];
|
|
608
621
|
readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;
|
|
609
622
|
readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
|
|
610
623
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -101,7 +101,7 @@ interface EvalCase {
|
|
|
101
101
|
readonly question: string;
|
|
102
102
|
readonly input_messages: readonly TestMessage[];
|
|
103
103
|
readonly input_segments: readonly JsonObject[];
|
|
104
|
-
readonly
|
|
104
|
+
readonly expected_segments: readonly JsonObject[];
|
|
105
105
|
readonly reference_answer?: string;
|
|
106
106
|
readonly guideline_paths: readonly string[];
|
|
107
107
|
readonly guideline_patterns?: readonly string[];
|
|
@@ -262,6 +262,17 @@ interface TargetDefinition {
|
|
|
262
262
|
readonly retryStatusCodes?: unknown | undefined;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
+
/**
|
|
266
|
+
* Formatting mode for segment content.
|
|
267
|
+
* - 'agent': File references only (for providers with filesystem access)
|
|
268
|
+
* - 'lm': Embedded file content with XML tags (for language model providers)
|
|
269
|
+
*/
|
|
270
|
+
type FormattingMode = "agent" | "lm";
|
|
271
|
+
/**
|
|
272
|
+
* Extract fenced code blocks from AgentV user segments.
|
|
273
|
+
*/
|
|
274
|
+
declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
|
|
275
|
+
|
|
265
276
|
/**
|
|
266
277
|
* Build prompt inputs by consolidating user request context and guideline content.
|
|
267
278
|
*/
|
|
@@ -271,12 +282,13 @@ interface PromptInputs {
|
|
|
271
282
|
readonly chatPrompt?: ChatPrompt;
|
|
272
283
|
readonly systemMessage?: string;
|
|
273
284
|
}
|
|
274
|
-
declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
|
|
275
|
-
|
|
276
285
|
/**
|
|
277
|
-
*
|
|
286
|
+
* Build prompt inputs by consolidating user request context and guideline content.
|
|
287
|
+
*
|
|
288
|
+
* @param testCase - The evaluation test case
|
|
289
|
+
* @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
|
|
278
290
|
*/
|
|
279
|
-
declare function
|
|
291
|
+
declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): Promise<PromptInputs>;
|
|
280
292
|
|
|
281
293
|
/**
|
|
282
294
|
* Determine whether a path references guideline content (instructions or prompts).
|
|
@@ -605,6 +617,7 @@ interface RunEvaluationOptions {
|
|
|
605
617
|
readonly evalId?: string;
|
|
606
618
|
readonly verbose?: boolean;
|
|
607
619
|
readonly maxConcurrency?: number;
|
|
620
|
+
readonly evalCases?: readonly EvalCase[];
|
|
608
621
|
readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;
|
|
609
622
|
readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
|
|
610
623
|
}
|