@agentv/core 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -117,6 +117,7 @@ interface EvalCase {
117
117
  * Evaluator scorecard for a single eval case run.
118
118
  */
119
119
  interface EvaluationResult {
120
+ readonly timestamp: string;
120
121
  readonly eval_id: string;
121
122
  readonly dataset?: string;
122
123
  readonly conversation_id?: string;
@@ -124,14 +125,12 @@ interface EvaluationResult {
124
125
  readonly hits: readonly string[];
125
126
  readonly misses: readonly string[];
126
127
  readonly candidate_answer: string;
127
- readonly expected_aspect_count: number;
128
128
  readonly target: string;
129
- readonly timestamp: string;
130
129
  readonly reasoning?: string;
131
130
  readonly raw_aspects?: readonly string[];
132
131
  readonly agent_provider_request?: JsonObject;
133
132
  readonly lm_provider_request?: JsonObject;
134
- readonly evaluator_raw_request?: JsonObject;
133
+ readonly evaluator_provider_request?: JsonObject;
135
134
  readonly evaluator_results?: readonly EvaluatorResult[];
136
135
  readonly error?: string;
137
136
  }
@@ -143,7 +142,7 @@ interface EvaluatorResult {
143
142
  readonly misses: readonly string[];
144
143
  readonly reasoning?: string;
145
144
  readonly raw_request?: JsonObject;
146
- readonly evaluator_raw_request?: JsonObject;
145
+ readonly evaluator_provider_request?: JsonObject;
147
146
  }
148
147
  /**
149
148
  * Convenience accessor matching the Python hit_count property.
@@ -154,6 +153,7 @@ type ChatPrompt = AxChatRequest["chatPrompt"];
154
153
  type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
155
154
  interface ProviderRequest {
156
155
  readonly question: string;
156
+ readonly systemPrompt?: string;
157
157
  readonly guidelines?: string;
158
158
  readonly guideline_patterns?: readonly string[];
159
159
  readonly chatPrompt?: ChatPrompt;
@@ -264,38 +264,41 @@ interface TargetDefinition {
264
264
  }
265
265
 
266
266
  /**
267
- * Read metadata from a test suite file (like target name).
268
- * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
- */
270
- declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
- target?: string;
272
- }>;
273
- /**
274
- * Determine whether a path references guideline content (instructions or prompts).
267
+ * Build prompt inputs by consolidating user request context and guideline content.
275
268
  */
276
- declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
269
+ interface PromptInputs {
270
+ readonly question: string;
271
+ readonly guidelines: string;
272
+ readonly chatPrompt?: ChatPrompt;
273
+ readonly systemMessage?: string;
274
+ }
275
+ declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
276
+
277
277
  /**
278
278
  * Extract fenced code blocks from AgentV user segments.
279
279
  */
280
280
  declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
281
+
282
+ /**
283
+ * Determine whether a path references guideline content (instructions or prompts).
284
+ */
285
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
286
+
281
287
  type LoadOptions = {
282
288
  readonly verbose?: boolean;
283
289
  readonly evalId?: string;
284
290
  };
285
291
  /**
286
- * Load eval cases from a AgentV YAML specification file.
292
+ * Read metadata from a test suite file (like target name).
293
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
287
294
  */
288
- declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
295
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
296
+ target?: string;
297
+ }>;
289
298
  /**
290
- * Build prompt inputs by consolidating user request context and guideline content.
299
+ * Load eval cases from a AgentV YAML specification file.
291
300
  */
292
- interface PromptInputs {
293
- readonly question: string;
294
- readonly guidelines: string;
295
- readonly chatPrompt?: ChatPrompt;
296
- readonly systemMessage?: string;
297
- }
298
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
301
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
299
302
 
300
303
  declare function fileExists(filePath: string): Promise<boolean>;
301
304
  /**
@@ -500,7 +503,7 @@ interface EvaluationContext {
500
503
  };
501
504
  readonly now: Date;
502
505
  readonly judgeProvider?: Provider;
503
- readonly systemPrompt?: string;
506
+ readonly evaluatorTemplateOverride?: string;
504
507
  readonly evaluator?: EvaluatorConfig;
505
508
  }
506
509
  interface EvaluationScore {
@@ -521,14 +524,14 @@ interface LlmJudgeEvaluatorOptions {
521
524
  readonly resolveJudgeProvider: JudgeProviderResolver;
522
525
  readonly maxOutputTokens?: number;
523
526
  readonly temperature?: number;
524
- readonly customPrompt?: string;
527
+ readonly evaluatorTemplate?: string;
525
528
  }
526
529
  declare class LlmJudgeEvaluator implements Evaluator {
527
530
  readonly kind = "llm_judge";
528
531
  private readonly resolveJudgeProvider;
529
532
  private readonly maxOutputTokens?;
530
533
  private readonly temperature?;
531
- private readonly customPrompt?;
534
+ private readonly evaluatorTemplate?;
532
535
  constructor(options: LlmJudgeEvaluatorOptions);
533
536
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
534
537
  private evaluateWithPrompt;
package/dist/index.d.ts CHANGED
@@ -117,6 +117,7 @@ interface EvalCase {
117
117
  * Evaluator scorecard for a single eval case run.
118
118
  */
119
119
  interface EvaluationResult {
120
+ readonly timestamp: string;
120
121
  readonly eval_id: string;
121
122
  readonly dataset?: string;
122
123
  readonly conversation_id?: string;
@@ -124,14 +125,12 @@ interface EvaluationResult {
124
125
  readonly hits: readonly string[];
125
126
  readonly misses: readonly string[];
126
127
  readonly candidate_answer: string;
127
- readonly expected_aspect_count: number;
128
128
  readonly target: string;
129
- readonly timestamp: string;
130
129
  readonly reasoning?: string;
131
130
  readonly raw_aspects?: readonly string[];
132
131
  readonly agent_provider_request?: JsonObject;
133
132
  readonly lm_provider_request?: JsonObject;
134
- readonly evaluator_raw_request?: JsonObject;
133
+ readonly evaluator_provider_request?: JsonObject;
135
134
  readonly evaluator_results?: readonly EvaluatorResult[];
136
135
  readonly error?: string;
137
136
  }
@@ -143,7 +142,7 @@ interface EvaluatorResult {
143
142
  readonly misses: readonly string[];
144
143
  readonly reasoning?: string;
145
144
  readonly raw_request?: JsonObject;
146
- readonly evaluator_raw_request?: JsonObject;
145
+ readonly evaluator_provider_request?: JsonObject;
147
146
  }
148
147
  /**
149
148
  * Convenience accessor matching the Python hit_count property.
@@ -154,6 +153,7 @@ type ChatPrompt = AxChatRequest["chatPrompt"];
154
153
  type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
155
154
  interface ProviderRequest {
156
155
  readonly question: string;
156
+ readonly systemPrompt?: string;
157
157
  readonly guidelines?: string;
158
158
  readonly guideline_patterns?: readonly string[];
159
159
  readonly chatPrompt?: ChatPrompt;
@@ -264,38 +264,41 @@ interface TargetDefinition {
264
264
  }
265
265
 
266
266
  /**
267
- * Read metadata from a test suite file (like target name).
268
- * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
- */
270
- declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
- target?: string;
272
- }>;
273
- /**
274
- * Determine whether a path references guideline content (instructions or prompts).
267
+ * Build prompt inputs by consolidating user request context and guideline content.
275
268
  */
276
- declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
269
+ interface PromptInputs {
270
+ readonly question: string;
271
+ readonly guidelines: string;
272
+ readonly chatPrompt?: ChatPrompt;
273
+ readonly systemMessage?: string;
274
+ }
275
+ declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
276
+
277
277
  /**
278
278
  * Extract fenced code blocks from AgentV user segments.
279
279
  */
280
280
  declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
281
+
282
+ /**
283
+ * Determine whether a path references guideline content (instructions or prompts).
284
+ */
285
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
286
+
281
287
  type LoadOptions = {
282
288
  readonly verbose?: boolean;
283
289
  readonly evalId?: string;
284
290
  };
285
291
  /**
286
- * Load eval cases from a AgentV YAML specification file.
292
+ * Read metadata from a test suite file (like target name).
293
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
287
294
  */
288
- declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
295
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
296
+ target?: string;
297
+ }>;
289
298
  /**
290
- * Build prompt inputs by consolidating user request context and guideline content.
299
+ * Load eval cases from a AgentV YAML specification file.
291
300
  */
292
- interface PromptInputs {
293
- readonly question: string;
294
- readonly guidelines: string;
295
- readonly chatPrompt?: ChatPrompt;
296
- readonly systemMessage?: string;
297
- }
298
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
301
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
299
302
 
300
303
  declare function fileExists(filePath: string): Promise<boolean>;
301
304
  /**
@@ -500,7 +503,7 @@ interface EvaluationContext {
500
503
  };
501
504
  readonly now: Date;
502
505
  readonly judgeProvider?: Provider;
503
- readonly systemPrompt?: string;
506
+ readonly evaluatorTemplateOverride?: string;
504
507
  readonly evaluator?: EvaluatorConfig;
505
508
  }
506
509
  interface EvaluationScore {
@@ -521,14 +524,14 @@ interface LlmJudgeEvaluatorOptions {
521
524
  readonly resolveJudgeProvider: JudgeProviderResolver;
522
525
  readonly maxOutputTokens?: number;
523
526
  readonly temperature?: number;
524
- readonly customPrompt?: string;
527
+ readonly evaluatorTemplate?: string;
525
528
  }
526
529
  declare class LlmJudgeEvaluator implements Evaluator {
527
530
  readonly kind = "llm_judge";
528
531
  private readonly resolveJudgeProvider;
529
532
  private readonly maxOutputTokens?;
530
533
  private readonly temperature?;
531
- private readonly customPrompt?;
534
+ private readonly evaluatorTemplate?;
532
535
  constructor(options: LlmJudgeEvaluatorOptions);
533
536
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
534
537
  private evaluateWithPrompt;