@agentv/core 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -101,7 +101,7 @@ interface EvalCase {
101
101
  readonly question: string;
102
102
  readonly input_messages: readonly TestMessage[];
103
103
  readonly input_segments: readonly JsonObject[];
104
- readonly output_segments: readonly JsonObject[];
104
+ readonly expected_segments: readonly JsonObject[];
105
105
  readonly reference_answer?: string;
106
106
  readonly guideline_paths: readonly string[];
107
107
  readonly guideline_patterns?: readonly string[];
@@ -262,6 +262,17 @@ interface TargetDefinition {
262
262
  readonly retryStatusCodes?: unknown | undefined;
263
263
  }
264
264
 
265
+ /**
266
+ * Formatting mode for segment content.
267
+ * - 'agent': File references only (for providers with filesystem access)
268
+ * - 'lm': Embedded file content with XML tags (for language model providers)
269
+ */
270
+ type FormattingMode = "agent" | "lm";
271
+ /**
272
+ * Extract fenced code blocks from AgentV user segments.
273
+ */
274
+ declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
275
+
265
276
  /**
266
277
  * Build prompt inputs by consolidating user request context and guideline content.
267
278
  */
@@ -271,12 +282,13 @@ interface PromptInputs {
271
282
  readonly chatPrompt?: ChatPrompt;
272
283
  readonly systemMessage?: string;
273
284
  }
274
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
275
-
276
285
  /**
277
- * Extract fenced code blocks from AgentV user segments.
286
+ * Build prompt inputs by consolidating user request context and guideline content.
287
+ *
288
+ * @param testCase - The evaluation test case
289
+ * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
278
290
  */
279
- declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
291
+ declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): Promise<PromptInputs>;
280
292
 
281
293
  /**
282
294
  * Determine whether a path references guideline content (instructions or prompts).
@@ -605,6 +617,7 @@ interface RunEvaluationOptions {
605
617
  readonly evalId?: string;
606
618
  readonly verbose?: boolean;
607
619
  readonly maxConcurrency?: number;
620
+ readonly evalCases?: readonly EvalCase[];
608
621
  readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;
609
622
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
610
623
  }
package/dist/index.d.ts CHANGED
@@ -101,7 +101,7 @@ interface EvalCase {
101
101
  readonly question: string;
102
102
  readonly input_messages: readonly TestMessage[];
103
103
  readonly input_segments: readonly JsonObject[];
104
- readonly output_segments: readonly JsonObject[];
104
+ readonly expected_segments: readonly JsonObject[];
105
105
  readonly reference_answer?: string;
106
106
  readonly guideline_paths: readonly string[];
107
107
  readonly guideline_patterns?: readonly string[];
@@ -262,6 +262,17 @@ interface TargetDefinition {
262
262
  readonly retryStatusCodes?: unknown | undefined;
263
263
  }
264
264
 
265
+ /**
266
+ * Formatting mode for segment content.
267
+ * - 'agent': File references only (for providers with filesystem access)
268
+ * - 'lm': Embedded file content with XML tags (for language model providers)
269
+ */
270
+ type FormattingMode = "agent" | "lm";
271
+ /**
272
+ * Extract fenced code blocks from AgentV user segments.
273
+ */
274
+ declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
275
+
265
276
  /**
266
277
  * Build prompt inputs by consolidating user request context and guideline content.
267
278
  */
@@ -271,12 +282,13 @@ interface PromptInputs {
271
282
  readonly chatPrompt?: ChatPrompt;
272
283
  readonly systemMessage?: string;
273
284
  }
274
- declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
275
-
276
285
  /**
277
- * Extract fenced code blocks from AgentV user segments.
286
+ * Build prompt inputs by consolidating user request context and guideline content.
287
+ *
288
+ * @param testCase - The evaluation test case
289
+ * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
278
290
  */
279
- declare function extractCodeBlocks(segments: readonly JsonObject[]): readonly string[];
291
+ declare function buildPromptInputs(testCase: EvalCase, mode?: FormattingMode): Promise<PromptInputs>;
280
292
 
281
293
  /**
282
294
  * Determine whether a path references guideline content (instructions or prompts).
@@ -605,6 +617,7 @@ interface RunEvaluationOptions {
605
617
  readonly evalId?: string;
606
618
  readonly verbose?: boolean;
607
619
  readonly maxConcurrency?: number;
620
+ readonly evalCases?: readonly EvalCase[];
608
621
  readonly onResult?: (result: EvaluationResult) => MaybePromise<void>;
609
622
  readonly onProgress?: (event: ProgressEvent) => MaybePromise<void>;
610
623
  }