@agentv/core 0.11.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,5 +1,3 @@
1
- import { AxChatRequest, AxAI } from '@ax-llm/ax';
2
-
3
1
  /**
4
2
  * JSON primitive values appearing in AgentV payloads.
5
3
  */
@@ -117,6 +115,7 @@ interface EvalCase {
117
115
  * Evaluator scorecard for a single eval case run.
118
116
  */
119
117
  interface EvaluationResult {
118
+ readonly timestamp: string;
120
119
  readonly eval_id: string;
121
120
  readonly dataset?: string;
122
121
  readonly conversation_id?: string;
@@ -124,14 +123,12 @@ interface EvaluationResult {
124
123
  readonly hits: readonly string[];
125
124
  readonly misses: readonly string[];
126
125
  readonly candidate_answer: string;
127
- readonly expected_aspect_count: number;
128
126
  readonly target: string;
129
- readonly timestamp: string;
130
127
  readonly reasoning?: string;
131
128
  readonly raw_aspects?: readonly string[];
132
129
  readonly agent_provider_request?: JsonObject;
133
130
  readonly lm_provider_request?: JsonObject;
134
- readonly evaluator_raw_request?: JsonObject;
131
+ readonly evaluator_provider_request?: JsonObject;
135
132
  readonly evaluator_results?: readonly EvaluatorResult[];
136
133
  readonly error?: string;
137
134
  }
@@ -143,17 +140,24 @@ interface EvaluatorResult {
143
140
  readonly misses: readonly string[];
144
141
  readonly reasoning?: string;
145
142
  readonly raw_request?: JsonObject;
146
- readonly evaluator_raw_request?: JsonObject;
143
+ readonly evaluator_provider_request?: JsonObject;
147
144
  }
148
145
  /**
149
146
  * Convenience accessor matching the Python hit_count property.
150
147
  */
151
148
  declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
152
149
 
153
- type ChatPrompt = AxChatRequest["chatPrompt"];
150
+ type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
151
+ interface ChatMessage {
152
+ readonly role: ChatMessageRole;
153
+ readonly content: string;
154
+ readonly name?: string;
155
+ }
156
+ type ChatPrompt = readonly ChatMessage[];
154
157
  type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
155
158
  interface ProviderRequest {
156
159
  readonly question: string;
160
+ readonly systemPrompt?: string;
157
161
  readonly guidelines?: string;
158
162
  readonly guideline_patterns?: readonly string[];
159
163
  readonly chatPrompt?: ChatPrompt;
@@ -185,11 +189,6 @@ interface Provider {
185
189
  * the orchestrator may send multiple requests in a single provider session.
186
190
  */
187
191
  invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
188
- /**
189
- * Optional access to the underlying AxAI instance.
190
- * This enables using advanced Ax features like structured output signatures.
191
- */
192
- getAxAI?(): AxAI;
193
192
  }
194
193
  type EnvLookup = Readonly<Record<string, string | undefined>>;
195
194
  interface TargetDefinition {
@@ -264,38 +263,41 @@ interface TargetDefinition {
264
263
  }
265
264
 
266
265
  /**
267
- * Read metadata from a test suite file (like target name).
268
- * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
- */
270
- declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
- target?: string;
272
- }>;
273
- /**
274
- * Determine whether a path references guideline content (instructions or prompts).
266
+ * Build prompt inputs by consolidating user request context and guideline content.
275
267
  */
276
- declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
268
+ interface PromptInputs {
269
+ readonly question: string;
270
+ readonly guidelines: string;
271
+ readonly chatPrompt?: ChatPrompt;
272
+ readonly systemMessage?: string;
273
+ }
274
+ declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
275
+
277
276
  /**
278
277
  * Extract fenced code blocks from AgentV user segments.
279
278
  */
280
279
  declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
280
+
281
+ /**
282
+ * Determine whether a path references guideline content (instructions or prompts).
283
+ */
284
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
285
+
281
286
  type LoadOptions = {
282
287
  readonly verbose?: boolean;
283
288
  readonly evalId?: string;
284
289
  };
285
290
  /**
286
- * Load eval cases from a AgentV YAML specification file.
291
+ * Read metadata from a test suite file (like target name).
292
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
287
293
  */
288
- declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
294
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
295
+ target?: string;
296
+ }>;
289
297
  /**
290
- * Build prompt inputs by consolidating user request context and guideline content.
298
+ * Load eval cases from a AgentV YAML specification file.
291
299
  */
292
- interface PromptInputs {
293
- readonly question: string;
294
- readonly guidelines: string;
295
- readonly chatPrompt?: ChatPrompt;
296
- readonly systemMessage?: string;
297
- }
298
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
300
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
299
301
 
300
302
  declare function fileExists(filePath: string): Promise<boolean>;
301
303
  /**
@@ -338,6 +340,9 @@ interface RetryConfig {
338
340
  readonly backoffFactor?: number;
339
341
  readonly retryableStatusCodes?: readonly number[];
340
342
  }
343
+ /**
344
+ * Azure OpenAI settings used by the Vercel AI SDK.
345
+ */
341
346
  interface AzureResolvedConfig {
342
347
  readonly resourceName: string;
343
348
  readonly deploymentName: string;
@@ -347,6 +352,9 @@ interface AzureResolvedConfig {
347
352
  readonly maxOutputTokens?: number;
348
353
  readonly retry?: RetryConfig;
349
354
  }
355
+ /**
356
+ * Anthropic Claude settings used by the Vercel AI SDK.
357
+ */
350
358
  interface AnthropicResolvedConfig {
351
359
  readonly apiKey: string;
352
360
  readonly model: string;
@@ -355,6 +363,9 @@ interface AnthropicResolvedConfig {
355
363
  readonly thinkingBudget?: number;
356
364
  readonly retry?: RetryConfig;
357
365
  }
366
+ /**
367
+ * Google Gemini settings used by the Vercel AI SDK.
368
+ */
358
369
  interface GeminiResolvedConfig {
359
370
  readonly apiKey: string;
360
371
  readonly model: string;
@@ -399,6 +410,7 @@ interface CliResolvedConfig {
399
410
  readonly cwd?: string;
400
411
  readonly timeoutMs?: number;
401
412
  readonly healthcheck?: CliHealthcheck;
413
+ readonly verbose?: boolean;
402
414
  }
403
415
  type ResolvedTarget = {
404
416
  readonly kind: "azure";
@@ -500,7 +512,7 @@ interface EvaluationContext {
500
512
  };
501
513
  readonly now: Date;
502
514
  readonly judgeProvider?: Provider;
503
- readonly systemPrompt?: string;
515
+ readonly evaluatorTemplateOverride?: string;
504
516
  readonly evaluator?: EvaluatorConfig;
505
517
  }
506
518
  interface EvaluationScore {
@@ -521,14 +533,14 @@ interface LlmJudgeEvaluatorOptions {
521
533
  readonly resolveJudgeProvider: JudgeProviderResolver;
522
534
  readonly maxOutputTokens?: number;
523
535
  readonly temperature?: number;
524
- readonly customPrompt?: string;
536
+ readonly evaluatorTemplate?: string;
525
537
  }
526
538
  declare class LlmJudgeEvaluator implements Evaluator {
527
539
  readonly kind = "llm_judge";
528
540
  private readonly resolveJudgeProvider;
529
541
  private readonly maxOutputTokens?;
530
542
  private readonly temperature?;
531
- private readonly customPrompt?;
543
+ private readonly evaluatorTemplate?;
532
544
  constructor(options: LlmJudgeEvaluatorOptions);
533
545
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
534
546
  private evaluateWithPrompt;
package/dist/index.d.ts CHANGED
@@ -1,5 +1,3 @@
1
- import { AxChatRequest, AxAI } from '@ax-llm/ax';
2
-
3
1
  /**
4
2
  * JSON primitive values appearing in AgentV payloads.
5
3
  */
@@ -117,6 +115,7 @@ interface EvalCase {
117
115
  * Evaluator scorecard for a single eval case run.
118
116
  */
119
117
  interface EvaluationResult {
118
+ readonly timestamp: string;
120
119
  readonly eval_id: string;
121
120
  readonly dataset?: string;
122
121
  readonly conversation_id?: string;
@@ -124,14 +123,12 @@ interface EvaluationResult {
124
123
  readonly hits: readonly string[];
125
124
  readonly misses: readonly string[];
126
125
  readonly candidate_answer: string;
127
- readonly expected_aspect_count: number;
128
126
  readonly target: string;
129
- readonly timestamp: string;
130
127
  readonly reasoning?: string;
131
128
  readonly raw_aspects?: readonly string[];
132
129
  readonly agent_provider_request?: JsonObject;
133
130
  readonly lm_provider_request?: JsonObject;
134
- readonly evaluator_raw_request?: JsonObject;
131
+ readonly evaluator_provider_request?: JsonObject;
135
132
  readonly evaluator_results?: readonly EvaluatorResult[];
136
133
  readonly error?: string;
137
134
  }
@@ -143,17 +140,24 @@ interface EvaluatorResult {
143
140
  readonly misses: readonly string[];
144
141
  readonly reasoning?: string;
145
142
  readonly raw_request?: JsonObject;
146
- readonly evaluator_raw_request?: JsonObject;
143
+ readonly evaluator_provider_request?: JsonObject;
147
144
  }
148
145
  /**
149
146
  * Convenience accessor matching the Python hit_count property.
150
147
  */
151
148
  declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
152
149
 
153
- type ChatPrompt = AxChatRequest["chatPrompt"];
150
+ type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
151
+ interface ChatMessage {
152
+ readonly role: ChatMessageRole;
153
+ readonly content: string;
154
+ readonly name?: string;
155
+ }
156
+ type ChatPrompt = readonly ChatMessage[];
154
157
  type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
155
158
  interface ProviderRequest {
156
159
  readonly question: string;
160
+ readonly systemPrompt?: string;
157
161
  readonly guidelines?: string;
158
162
  readonly guideline_patterns?: readonly string[];
159
163
  readonly chatPrompt?: ChatPrompt;
@@ -185,11 +189,6 @@ interface Provider {
185
189
  * the orchestrator may send multiple requests in a single provider session.
186
190
  */
187
191
  invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
188
- /**
189
- * Optional access to the underlying AxAI instance.
190
- * This enables using advanced Ax features like structured output signatures.
191
- */
192
- getAxAI?(): AxAI;
193
192
  }
194
193
  type EnvLookup = Readonly<Record<string, string | undefined>>;
195
194
  interface TargetDefinition {
@@ -264,38 +263,41 @@ interface TargetDefinition {
264
263
  }
265
264
 
266
265
  /**
267
- * Read metadata from a test suite file (like target name).
268
- * This is a convenience function for CLI tools that need metadata without loading all eval cases.
269
- */
270
- declare function readTestSuiteMetadata(testFilePath: string): Promise<{
271
- target?: string;
272
- }>;
273
- /**
274
- * Determine whether a path references guideline content (instructions or prompts).
266
+ * Build prompt inputs by consolidating user request context and guideline content.
275
267
  */
276
- declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
268
+ interface PromptInputs {
269
+ readonly question: string;
270
+ readonly guidelines: string;
271
+ readonly chatPrompt?: ChatPrompt;
272
+ readonly systemMessage?: string;
273
+ }
274
+ declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
275
+
277
276
  /**
278
277
  * Extract fenced code blocks from AgentV user segments.
279
278
  */
280
279
  declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
280
+
281
+ /**
282
+ * Determine whether a path references guideline content (instructions or prompts).
283
+ */
284
+ declare function isGuidelineFile(filePath: string, patterns?: readonly string[]): boolean;
285
+
281
286
  type LoadOptions = {
282
287
  readonly verbose?: boolean;
283
288
  readonly evalId?: string;
284
289
  };
285
290
  /**
286
- * Load eval cases from a AgentV YAML specification file.
291
+ * Read metadata from a test suite file (like target name).
292
+ * This is a convenience function for CLI tools that need metadata without loading all eval cases.
287
293
  */
288
- declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
294
+ declare function readTestSuiteMetadata(testFilePath: string): Promise<{
295
+ target?: string;
296
+ }>;
289
297
  /**
290
- * Build prompt inputs by consolidating user request context and guideline content.
298
+ * Load eval cases from a AgentV YAML specification file.
291
299
  */
292
- interface PromptInputs {
293
- readonly question: string;
294
- readonly guidelines: string;
295
- readonly chatPrompt?: ChatPrompt;
296
- readonly systemMessage?: string;
297
- }
298
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
300
+ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, options?: LoadOptions): Promise<readonly EvalCase[]>;
299
301
 
300
302
  declare function fileExists(filePath: string): Promise<boolean>;
301
303
  /**
@@ -338,6 +340,9 @@ interface RetryConfig {
338
340
  readonly backoffFactor?: number;
339
341
  readonly retryableStatusCodes?: readonly number[];
340
342
  }
343
+ /**
344
+ * Azure OpenAI settings used by the Vercel AI SDK.
345
+ */
341
346
  interface AzureResolvedConfig {
342
347
  readonly resourceName: string;
343
348
  readonly deploymentName: string;
@@ -347,6 +352,9 @@ interface AzureResolvedConfig {
347
352
  readonly maxOutputTokens?: number;
348
353
  readonly retry?: RetryConfig;
349
354
  }
355
+ /**
356
+ * Anthropic Claude settings used by the Vercel AI SDK.
357
+ */
350
358
  interface AnthropicResolvedConfig {
351
359
  readonly apiKey: string;
352
360
  readonly model: string;
@@ -355,6 +363,9 @@ interface AnthropicResolvedConfig {
355
363
  readonly thinkingBudget?: number;
356
364
  readonly retry?: RetryConfig;
357
365
  }
366
+ /**
367
+ * Google Gemini settings used by the Vercel AI SDK.
368
+ */
358
369
  interface GeminiResolvedConfig {
359
370
  readonly apiKey: string;
360
371
  readonly model: string;
@@ -399,6 +410,7 @@ interface CliResolvedConfig {
399
410
  readonly cwd?: string;
400
411
  readonly timeoutMs?: number;
401
412
  readonly healthcheck?: CliHealthcheck;
413
+ readonly verbose?: boolean;
402
414
  }
403
415
  type ResolvedTarget = {
404
416
  readonly kind: "azure";
@@ -500,7 +512,7 @@ interface EvaluationContext {
500
512
  };
501
513
  readonly now: Date;
502
514
  readonly judgeProvider?: Provider;
503
- readonly systemPrompt?: string;
515
+ readonly evaluatorTemplateOverride?: string;
504
516
  readonly evaluator?: EvaluatorConfig;
505
517
  }
506
518
  interface EvaluationScore {
@@ -521,14 +533,14 @@ interface LlmJudgeEvaluatorOptions {
521
533
  readonly resolveJudgeProvider: JudgeProviderResolver;
522
534
  readonly maxOutputTokens?: number;
523
535
  readonly temperature?: number;
524
- readonly customPrompt?: string;
536
+ readonly evaluatorTemplate?: string;
525
537
  }
526
538
  declare class LlmJudgeEvaluator implements Evaluator {
527
539
  readonly kind = "llm_judge";
528
540
  private readonly resolveJudgeProvider;
529
541
  private readonly maxOutputTokens?;
530
542
  private readonly temperature?;
531
- private readonly customPrompt?;
543
+ private readonly evaluatorTemplate?;
532
544
  constructor(options: LlmJudgeEvaluatorOptions);
533
545
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
534
546
  private evaluateWithPrompt;