npm - @agentv/eval - Versions diffs - 2.18.4 → 3.0.0-next.1 - Mend

@agentv/eval 2.18.4 → 3.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -2,7 +2,7 @@ import { z } from 'zod';
 export { z } from 'zod';
 /**
- * Zod schemas for code judge input/output validation.
+ * Zod schemas for code grader input/output validation.
  * Provides both compile-time types and runtime validation.
  */
@@ -147,9 +147,9 @@ declare const MessageSchema: z.ZodObject<{
     metadata?: Record<string, unknown> | undefined;
 }>;
 /**
- * Code judge input schema (camelCase, converted from snake_case wire format).
+ * Code grader input schema (camelCase, converted from snake_case wire format).
  */
-declare const CodeJudgeInputSchema: z.ZodObject<{
+declare const CodeGraderInputSchema: z.ZodObject<{
     question: z.ZodString;
     criteria: z.ZodString;
     expectedOutput: z.ZodArray<z.ZodObject<{
@@ -570,9 +570,9 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
     config?: Record<string, unknown> | null | undefined;
 }>;
 /**
- * Code judge result schema (validated before output).
+ * Code grader result schema (validated before output).
  */
-declare const CodeJudgeResultSchema: z.ZodObject<{
+declare const CodeGraderResultSchema: z.ZodObject<{
     score: z.ZodNumber;
     hits: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
     misses: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
@@ -595,15 +595,15 @@ declare const CodeJudgeResultSchema: z.ZodObject<{
 /**
  * Inferred types from schemas.
  */
-type CodeJudgeInput = z.infer<typeof CodeJudgeInputSchema>;
-type CodeJudgeResult = z.infer<typeof CodeJudgeResultSchema>;
+type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;
+type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;
 type TraceSummary = z.infer<typeof TraceSummarySchema>;
 type Message = z.infer<typeof MessageSchema>;
 type ToolCall = z.infer<typeof ToolCallSchema>;
 type TokenUsage = z.infer<typeof TokenUsageSchema>;
 /**
  * Prompt template input schema (camelCase, converted from snake_case wire format).
- * Uses the same schema as CodeJudgeInput since the orchestrator sends identical payloads.
+ * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.
  */
 declare const PromptTemplateInputSchema: z.ZodObject<{
     question: z.ZodString;
@@ -1025,10 +1025,456 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
     workspacePath?: string | null | undefined;
     config?: Record<string, unknown> | null | undefined;
 }>;
-type PromptTemplateInput = CodeJudgeInput;
+type PromptTemplateInput = CodeGraderInput;
+/** @deprecated Use CodeGraderInputSchema */
+declare const CodeJudgeInputSchema: z.ZodObject<{
+    question: z.ZodString;
+    criteria: z.ZodString;
+    expectedOutput: z.ZodArray<z.ZodObject<{
+        role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
+        content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
+        toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+            tool: z.ZodString;
+            input: z.ZodOptional<z.ZodUnknown>;
+            output: z.ZodOptional<z.ZodUnknown>;
+            id: z.ZodOptional<z.ZodString>;
+            startTime: z.ZodOptional<z.ZodString>;
+            endTime: z.ZodOptional<z.ZodString>;
+            durationMs: z.ZodOptional<z.ZodNumber>;
+        }, "strip", z.ZodTypeAny, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }>, "many">>;
+        name: z.ZodOptional<z.ZodString>;
+        startTime: z.ZodOptional<z.ZodString>;
+        endTime: z.ZodOptional<z.ZodString>;
+        durationMs: z.ZodOptional<z.ZodNumber>;
+        metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    }, "strip", z.ZodTypeAny, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }>, "many">;
+    referenceAnswer: z.ZodOptional<z.ZodString>;
+    answer: z.ZodString;
+    output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
+        role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
+        content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
+        toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+            tool: z.ZodString;
+            input: z.ZodOptional<z.ZodUnknown>;
+            output: z.ZodOptional<z.ZodUnknown>;
+            id: z.ZodOptional<z.ZodString>;
+            startTime: z.ZodOptional<z.ZodString>;
+            endTime: z.ZodOptional<z.ZodString>;
+            durationMs: z.ZodOptional<z.ZodNumber>;
+        }, "strip", z.ZodTypeAny, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }>, "many">>;
+        name: z.ZodOptional<z.ZodString>;
+        startTime: z.ZodOptional<z.ZodString>;
+        endTime: z.ZodOptional<z.ZodString>;
+        durationMs: z.ZodOptional<z.ZodNumber>;
+        metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    }, "strip", z.ZodTypeAny, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }>, "many">>>;
+    /** Path to a temp file containing the output JSON (used for large payloads). */
+    outputPath: z.ZodOptional<z.ZodString>;
+    guidelineFiles: z.ZodArray<z.ZodString, "many">;
+    inputFiles: z.ZodArray<z.ZodString, "many">;
+    input: z.ZodArray<z.ZodObject<{
+        role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
+        content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
+        toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+            tool: z.ZodString;
+            input: z.ZodOptional<z.ZodUnknown>;
+            output: z.ZodOptional<z.ZodUnknown>;
+            id: z.ZodOptional<z.ZodString>;
+            startTime: z.ZodOptional<z.ZodString>;
+            endTime: z.ZodOptional<z.ZodString>;
+            durationMs: z.ZodOptional<z.ZodNumber>;
+        }, "strip", z.ZodTypeAny, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }, {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }>, "many">>;
+        name: z.ZodOptional<z.ZodString>;
+        startTime: z.ZodOptional<z.ZodString>;
+        endTime: z.ZodOptional<z.ZodString>;
+        durationMs: z.ZodOptional<z.ZodNumber>;
+        metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    }, "strip", z.ZodTypeAny, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }, {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }>, "many">;
+    trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
+        eventCount: z.ZodNumber;
+        toolNames: z.ZodArray<z.ZodString, "many">;
+        toolCallsByName: z.ZodRecord<z.ZodString, z.ZodNumber>;
+        errorCount: z.ZodNumber;
+        toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
+        llmCallCount: z.ZodOptional<z.ZodNumber>;
+    }, "strip", z.ZodTypeAny, {
+        eventCount: number;
+        toolNames: string[];
+        toolCallsByName: Record<string, number>;
+        errorCount: number;
+        toolDurations?: Record<string, number[]> | undefined;
+        llmCallCount?: number | undefined;
+    }, {
+        eventCount: number;
+        toolNames: string[];
+        toolCallsByName: Record<string, number>;
+        errorCount: number;
+        toolDurations?: Record<string, number[]> | undefined;
+        llmCallCount?: number | undefined;
+    }>>>;
+    tokenUsage: z.ZodOptional<z.ZodNullable<z.ZodObject<{
+        input: z.ZodNumber;
+        output: z.ZodNumber;
+        cached: z.ZodOptional<z.ZodNumber>;
+    }, "strip", z.ZodTypeAny, {
+        input: number;
+        output: number;
+        cached?: number | undefined;
+    }, {
+        input: number;
+        output: number;
+        cached?: number | undefined;
+    }>>>;
+    costUsd: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
+    durationMs: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
+    startTime: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    endTime: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
+}, "strip", z.ZodTypeAny, {
+    input: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[];
+    question: string;
+    criteria: string;
+    expectedOutput: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[];
+    answer: string;
+    guidelineFiles: string[];
+    inputFiles: string[];
+    output?: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[] | null | undefined;
+    startTime?: string | null | undefined;
+    endTime?: string | null | undefined;
+    durationMs?: number | null | undefined;
+    referenceAnswer?: string | undefined;
+    outputPath?: string | undefined;
+    trace?: {
+        eventCount: number;
+        toolNames: string[];
+        toolCallsByName: Record<string, number>;
+        errorCount: number;
+        toolDurations?: Record<string, number[]> | undefined;
+        llmCallCount?: number | undefined;
+    } | null | undefined;
+    tokenUsage?: {
+        input: number;
+        output: number;
+        cached?: number | undefined;
+    } | null | undefined;
+    costUsd?: number | null | undefined;
+    fileChanges?: string | null | undefined;
+    workspacePath?: string | null | undefined;
+    config?: Record<string, unknown> | null | undefined;
+}, {
+    input: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[];
+    question: string;
+    criteria: string;
+    expectedOutput: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[];
+    answer: string;
+    guidelineFiles: string[];
+    inputFiles: string[];
+    output?: {
+        role: "tool" | "assistant" | "user" | "system";
+        startTime?: string | undefined;
+        endTime?: string | undefined;
+        durationMs?: number | undefined;
+        content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
+        toolCalls?: {
+            tool: string;
+            input?: unknown;
+            output?: unknown;
+            id?: string | undefined;
+            startTime?: string | undefined;
+            endTime?: string | undefined;
+            durationMs?: number | undefined;
+        }[] | undefined;
+        name?: string | undefined;
+        metadata?: Record<string, unknown> | undefined;
+    }[] | null | undefined;
+    startTime?: string | null | undefined;
+    endTime?: string | null | undefined;
+    durationMs?: number | null | undefined;
+    referenceAnswer?: string | undefined;
+    outputPath?: string | undefined;
+    trace?: {
+        eventCount: number;
+        toolNames: string[];
+        toolCallsByName: Record<string, number>;
+        errorCount: number;
+        toolDurations?: Record<string, number[]> | undefined;
+        llmCallCount?: number | undefined;
+    } | null | undefined;
+    tokenUsage?: {
+        input: number;
+        output: number;
+        cached?: number | undefined;
+    } | null | undefined;
+    costUsd?: number | null | undefined;
+    fileChanges?: string | null | undefined;
+    workspacePath?: string | null | undefined;
+    config?: Record<string, unknown> | null | undefined;
+}>;
+/** @deprecated Use CodeGraderResultSchema */
+declare const CodeJudgeResultSchema: z.ZodObject<{
+    score: z.ZodNumber;
+    hits: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
+    misses: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
+    reasoning: z.ZodOptional<z.ZodString>;
+    /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
+    details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+}, "strip", z.ZodTypeAny, {
+    score: number;
+    hits: string[];
+    misses: string[];
+    reasoning?: string | undefined;
+    details?: Record<string, unknown> | undefined;
+}, {
+    score: number;
+    hits?: string[] | undefined;
+    misses?: string[] | undefined;
+    reasoning?: string | undefined;
+    details?: Record<string, unknown> | undefined;
+}>;
+/** @deprecated Use CodeGraderInput */
+type CodeJudgeInput = CodeGraderInput;
+/** @deprecated Use CodeGraderResult */
+type CodeJudgeResult = CodeGraderResult;
 /**
- * Client for invoking configured targets from code-judge scripts.
+ * Client for invoking configured targets from code-grader scripts.
  *
  * Environment variables (set automatically by AgentV when `target` config is present):
  * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server
@@ -1108,16 +1554,16 @@ declare class TargetInvocationError extends Error {
  *
  * This function reads the proxy URL and token from environment variables
  * that are automatically set by AgentV when a `target` config block is present
- * on a `code_judge` evaluator.
+ * on a `code_grader` (or `code_judge`) evaluator.
  *
  * @returns A target client if environment variables are set, otherwise undefined
  * @throws TargetNotAvailableError if token is missing when URL is present
  *
  * @example
  * ```typescript
- * import { createTargetClient, defineCodeJudge } from '@agentv/eval';
+ * import { createTargetClient, defineCodeGrader } from '@agentv/eval';
  *
- * export default defineCodeJudge(async ({ question, criteria }) => {
+ * export default defineCodeGrader(async ({ question, criteria }) => {
  *   const target = createTargetClient();
  *
  *   if (!target) {
@@ -1139,15 +1585,15 @@ declare function createTargetClient(): TargetClient | undefined;
 /**
  * Context provided to assertion handlers.
- * Same shape as CodeJudgeInput — assertions receive full evaluation context.
+ * Same shape as CodeGraderInput — assertions receive full evaluation context.
  */
-type AssertionContext = CodeJudgeInput;
+type AssertionContext = CodeGraderInput;
 /**
  * Known built-in assertion types. Custom types are extensible via string.
  *
- * Use in EVAL.yaml `assert` blocks:
+ * Use in EVAL.yaml `assertions` blocks:
  * ```yaml
- * assert:
+ * assertions:
  *   - type: contains
  *     value: "Paris"
  * ```
@@ -1156,7 +1602,7 @@ type AssertionContext = CodeJudgeInput;
  * are also valid — the `string & {}` escape hatch provides autocomplete
  * for known types while accepting any string.
  */
-type AssertionType = 'llm-judge' | 'code-judge' | 'rubrics' | 'composite' | 'tool-trajectory' | 'field-accuracy' | 'latency' | 'cost' | 'token-usage' | 'execution-metrics' | 'agent-judge' | 'contains' | 'contains-any' | 'contains-all' | 'icontains' | 'icontains-any' | 'icontains-all' | 'starts-with' | 'ends-with' | 'equals' | 'regex' | 'is-json' | 'llm_judge' | 'code_judge' | 'tool_trajectory' | 'field_accuracy' | 'token_usage' | 'execution_metrics' | 'agent_judge' | 'contains_any' | 'contains_all' | 'icontains_any' | 'icontains_all' | 'starts_with' | 'ends_with' | 'is_json' | (string & {});
+type AssertionType = 'llm-grader' | 'code-grader' | 'rubrics' | 'composite' | 'tool-trajectory' | 'field-accuracy' | 'latency' | 'cost' | 'token-usage' | 'execution-metrics' | 'skill-trigger' | 'contains' | 'contains-any' | 'contains-all' | 'icontains' | 'icontains-any' | 'icontains-all' | 'starts-with' | 'ends-with' | 'equals' | 'regex' | 'is-json' | 'llm-judge' | 'code-judge' | 'llm_judge' | 'code_judge' | 'llm_grader' | 'code_grader' | 'tool_trajectory' | 'field_accuracy' | 'token_usage' | 'execution_metrics' | 'contains_any' | 'contains_all' | 'icontains_any' | 'icontains_all' | 'starts_with' | 'ends_with' | 'is_json' | (string & {});
 /**
  * Result returned from an assertion handler.
  *
@@ -1201,9 +1647,11 @@ type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<Asse
 type PromptTemplateHandler = (input: PromptTemplateInput) => string | Promise<string>;
 /**
- * Handler function type for code judges.
+ * Handler function type for code graders.
  */
-type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<CodeJudgeResult>;
+type CodeGraderHandler = (input: CodeGraderInput) => CodeGraderResult | Promise<CodeGraderResult>;
+/** @deprecated Use CodeGraderHandler */
+type CodeJudgeHandler = CodeGraderHandler;
 /**
  * AgentV Evaluation SDK
@@ -1221,24 +1669,24 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
  * }));
  * ```
  *
- * @example Code judge (full control)
+ * @example Code grader (full control)
  * ```typescript
  * #!/usr/bin/env bun
- * import { defineCodeJudge } from '@agentv/eval';
+ * import { defineCodeGrader } from '@agentv/eval';
  *
- * export default defineCodeJudge(({ trace, answer }) => ({
+ * export default defineCodeGrader(({ trace, answer }) => ({
  *   score: trace?.eventCount <= 5 ? 1.0 : 0.5,
  *   hits: ['Efficient tool usage'],
  *   misses: [],
  * }));
  * ```
  *
- * @example Code judge with target access (requires `target` config in YAML)
+ * @example Code grader with target access (requires `target` config in YAML)
  * ```typescript
  * #!/usr/bin/env bun
- * import { defineCodeJudge, createTargetClient } from '@agentv/eval';
+ * import { defineCodeGrader, createTargetClient } from '@agentv/eval';
  *
- * export default defineCodeJudge(async ({ question }) => {
+ * export default defineCodeGrader(async ({ question }) => {
  *   const target = createTargetClient();
  *   if (!target) {
  *     return { score: 0, misses: ['Target not available'] };
@@ -1258,7 +1706,7 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
  */
 /**
- * Define a code judge evaluator with automatic stdin/stdout handling.
+ * Define a code grader evaluator with automatic stdin/stdout handling.
  *
  * This function:
  * 1. Reads JSON from stdin (snake_case format)
@@ -1271,9 +1719,9 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
  *
  * @example
  * ```typescript
- * import { defineCodeJudge } from '@agentv/eval';
+ * import { defineCodeGrader } from '@agentv/eval';
  *
- * export default defineCodeJudge(({ trace }) => {
+ * export default defineCodeGrader(({ trace }) => {
  *   if (!trace) {
  *     return { score: 0.5, reasoning: 'No trace available' };
  *   }
@@ -1289,19 +1737,21 @@ type CodeJudgeHandler = (input: CodeJudgeInput) => CodeJudgeResult | Promise<Cod
  *
  * @example With typed config
  * ```typescript
- * import { defineCodeJudge, z } from '@agentv/eval';
+ * import { defineCodeGrader, z } from '@agentv/eval';
  *
  * const ConfigSchema = z.object({
  *   maxToolCalls: z.number().default(10),
  * });
  *
- * export default defineCodeJudge(({ trace, config }) => {
+ * export default defineCodeGrader(({ trace, config }) => {
  *   const { maxToolCalls } = ConfigSchema.parse(config ?? {});
  *   // Use maxToolCalls...
  * });
  * ```
  */
-declare function defineCodeJudge(handler: CodeJudgeHandler): void;
+declare function defineCodeGrader(handler: CodeGraderHandler): void;
+/** @deprecated Use defineCodeGrader */
+declare const defineCodeJudge: typeof defineCodeGrader;
 /**
  * Define a prompt template with automatic stdin/stdout handling.
  *
@@ -1387,4 +1837,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
  */
 declare function defineAssertion(handler: AssertionHandler): void;
-export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeJudge, definePromptTemplate };
+export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeGrader, defineCodeJudge, definePromptTemplate };