@agentv/eval 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs +10 -12
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -41
- package/dist/index.d.ts +71 -41
- package/dist/index.js +10 -12
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ import { defineCodeGrader } from '@agentv/eval';
|
|
|
32
32
|
|
|
33
33
|
export default defineCodeGrader(({ answer, trace }) => ({
|
|
34
34
|
score: answer.length > 0 ? 1.0 : 0.0,
|
|
35
|
-
|
|
35
|
+
assertions: [{ text: 'Output received', passed: answer.length > 0 }],
|
|
36
36
|
}));
|
|
37
37
|
```
|
|
38
38
|
|
package/dist/index.cjs
CHANGED
|
@@ -107,9 +107,13 @@ var CodeGraderInputSchema = import_zod.z.object({
|
|
|
107
107
|
});
|
|
108
108
|
var CodeGraderResultSchema = import_zod.z.object({
|
|
109
109
|
score: import_zod.z.number().min(0).max(1),
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
assertions: import_zod.z.array(
|
|
111
|
+
import_zod.z.object({
|
|
112
|
+
text: import_zod.z.string(),
|
|
113
|
+
passed: import_zod.z.boolean(),
|
|
114
|
+
evidence: import_zod.z.string().optional()
|
|
115
|
+
})
|
|
116
|
+
).optional().default([]),
|
|
113
117
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
114
118
|
details: import_zod.z.record(import_zod.z.unknown()).optional()
|
|
115
119
|
});
|
|
@@ -346,9 +350,7 @@ function normalizeScore(result) {
|
|
|
346
350
|
}
|
|
347
351
|
return {
|
|
348
352
|
score,
|
|
349
|
-
|
|
350
|
-
misses: result.misses ? [...result.misses] : [],
|
|
351
|
-
reasoning: result.reasoning,
|
|
353
|
+
assertions: result.assertions ? [...result.assertions] : [],
|
|
352
354
|
details: result.details
|
|
353
355
|
};
|
|
354
356
|
}
|
|
@@ -381,9 +383,7 @@ async function runAssertion(handler) {
|
|
|
381
383
|
const errorMessage = formatError(error);
|
|
382
384
|
const errorResult = {
|
|
383
385
|
score: 0,
|
|
384
|
-
|
|
385
|
-
misses: [errorMessage],
|
|
386
|
-
reasoning: `Assertion failed: ${errorMessage}`
|
|
386
|
+
assertions: [{ text: `Assertion failed: ${errorMessage}`, passed: false }]
|
|
387
387
|
};
|
|
388
388
|
console.log(JSON.stringify(errorResult, null, 2));
|
|
389
389
|
process.exit(1);
|
|
@@ -458,9 +458,7 @@ async function runCodeGrader(handler) {
|
|
|
458
458
|
const errorMessage = formatError2(error);
|
|
459
459
|
const errorResult = {
|
|
460
460
|
score: 0,
|
|
461
|
-
|
|
462
|
-
misses: [errorMessage],
|
|
463
|
-
reasoning: `Evaluation failed: ${errorMessage}`
|
|
461
|
+
assertions: [{ text: `Evaluation failed: ${errorMessage}`, passed: false }]
|
|
464
462
|
};
|
|
465
463
|
console.log(JSON.stringify(errorResult, null, 2));
|
|
466
464
|
process.exit(1);
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/schemas.ts","../src/target-client.ts","../src/assertion.ts","../src/case-conversion.ts","../src/deprecation.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.includes('hello'),\n * reasoning: 'Checks greeting',\n * }));\n * ```\n *\n * @example Code grader (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace, outputText }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * hits: ['Efficient tool usage'],\n * misses: [],\n * }));\n * ```\n *\n * @example Code grader with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ inputText }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, misses: ['Target not available'] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${inputText}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeGraderInputSchema,\n CodeGraderResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeGraderInput,\n type CodeGraderResult,\n type EnrichedCodeGraderInput,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n // Backward-compat aliases (deprecated)\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeGraderHandler, type CodeJudgeHandler, runCodeGrader } from './runtime.js';\n\nexport type { CodeGraderHandler };\n/** @deprecated Use CodeGraderHandler */\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code grader evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, reasoning: 'No trace available' };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * hits: efficient ? ['Efficient execution'] : [],\n * misses: efficient ? [] : ['Too many tool calls'],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeGrader, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeGrader(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeGrader(handler: CodeGraderHandler): void {\n // Run immediately when module is loaded\n runCodeGrader(handler);\n}\n\n/** @deprecated Use defineCodeGrader */\nexport const defineCodeJudge = defineCodeGrader;\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.toLowerCase().includes('hello'),\n * reasoning: 'Checks for greeting',\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText, trace }) => {\n * const hasContent = outputText.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * hits: [\n * ...(hasContent ? ['Has content'] : []),\n * ...(isEfficient ? ['Efficient'] : []),\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Zod schemas for code grader input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code grader input schema (camelCase, converted from snake_case wire format).\n *\n * Text convenience accessors (`inputText`, `outputText`, `expectedOutputText`) are always\n * strings. Structured fields (`input`, `output`, `expectedOutput`) are always `Message[]`.\n */\nexport const CodeGraderInputSchema = z.object({\n /** @deprecated Use `inputText` instead. First user message content as string. */\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n /** @deprecated Use `expectedOutputText` instead. Expected output content as string. */\n referenceAnswer: z.string().optional(),\n /** @deprecated Use `outputText` instead. Last assistant message content as string. */\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n /** First user message content as string. Replaces `question`. */\n inputText: z.string().optional(),\n /** Last assistant message content as string. Replaces `answer`. */\n outputText: z.string().optional(),\n /** Expected output content as string. Replaces `referenceAnswer`. */\n expectedOutputText: z.string().optional(),\n});\n\n/**\n * Code grader result schema (validated before output).\n */\nexport const CodeGraderResultSchema = z.object({\n score: z.number().min(0).max(1),\n hits: z.array(z.string()).optional().default([]),\n misses: z.array(z.string()).optional().default([]),\n reasoning: z.string().optional(),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;\nexport type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;\n\n/**\n * CodeGraderInput after `enrichInput()` has run.\n *\n * The text convenience accessors (`inputText`, `outputText`, `expectedOutputText`)\n * are always populated by the runtime before the handler is called, so they are\n * guaranteed to be `string` (never `undefined`).\n *\n * Handler function signatures (`CodeGraderHandler`, `AssertionHandler`) use this\n * type so that user code can destructure `{ outputText }` without null-checks.\n */\nexport type EnrichedCodeGraderInput = Omit<\n CodeGraderInput,\n 'inputText' | 'outputText' | 'expectedOutputText'\n> & {\n /** First user message content as string. Replaces `question`. */\n readonly inputText: string;\n /** Last assistant message content as string. Replaces `answer`. */\n readonly outputText: string;\n /** Expected output content as string. Replaces `referenceAnswer`. */\n readonly expectedOutputText: string;\n};\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeGraderInputSchema;\n\nexport type PromptTemplateInput = CodeGraderInput;\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderInputSchema */\nexport const CodeJudgeInputSchema = CodeGraderInputSchema;\n/** @deprecated Use CodeGraderResultSchema */\nexport const CodeJudgeResultSchema = CodeGraderResultSchema;\n/** @deprecated Use CodeGraderInput */\nexport type CodeJudgeInput = CodeGraderInput;\n/** @deprecated Use CodeGraderResult */\nexport type CodeJudgeResult = CodeGraderResult;\n","/**\n * Client for invoking configured targets from code-grader scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_grader` (or `code_judge`) evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, reasoning: 'Target not available' };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code graders but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n *\n * Same shape as CodeGraderInput but with `inputText`, `outputText`, and\n * `expectedOutputText` guaranteed to be strings (populated by the runtime\n * before the handler is called).\n */\nexport type AssertionContext = EnrichedCodeGraderInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assertions` blocks:\n * ```yaml\n * assertions:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-grader'\n | 'code-grader'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'skill-trigger'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // legacy aliases (still accepted)\n | 'llm-judge'\n | 'code-judge'\n | 'llm_judge'\n | 'code_judge'\n | 'llm_grader'\n | 'code_grader'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with reasoning\n * ```ts\n * { pass: true, reasoning: 'Output contains expected keywords' }\n * ```\n *\n * @example Fail with misses\n * ```ts\n * { pass: false, misses: ['Missing required header'], score: 0.3 }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Aspects that passed. */\n readonly hits?: readonly string[];\n /** Aspects that failed. */\n readonly misses?: readonly string[];\n /** Human-readable explanation. */\n readonly reasoning?: string;\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeGraderResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeGraderResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n hits: result.hits ? [...result.hits] : [],\n misses: result.misses ? [...result.misses] : [],\n reasoning: result.reasoning,\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // After enrichment, text accessors are guaranteed to be strings\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n const normalized = normalizeScore(rawResult);\n const result = CodeGraderResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Assertion failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Deprecation warning utilities for code grader and assertion runtimes.\n * Provides text convenience accessors and deprecation warnings on legacy field names.\n */\nimport type { CodeGraderInput } from './schemas.js';\n\nconst ANSI_YELLOW = '\\u001b[33m';\nconst ANSI_RESET = '\\u001b[0m';\n\n/**\n * Emit a deprecation warning to stderr (once per field name per process).\n */\nconst deprecationWarned = new Set<string>();\nfunction warnDeprecation(oldName: string, newName: string): void {\n if (deprecationWarned.has(oldName)) return;\n deprecationWarned.add(oldName);\n console.warn(\n `${ANSI_YELLOW}Warning: '${oldName}' is deprecated in code graders. Use '${newName}' instead.${ANSI_RESET}`,\n );\n}\n\n/**\n * Reset deprecation warning state. Used only in tests.\n */\nexport function resetDeprecationWarnings(): void {\n deprecationWarned.clear();\n}\n\n/**\n * Populate `inputText`, `outputText`, and `expectedOutputText` convenience accessors\n * on the validated input object, and install deprecation warnings on legacy fields.\n *\n * Text accessors are always strings. Structured fields (`input`, `output`, `expectedOutput`)\n * remain `Message[]` always.\n */\nexport function enrichInput(input: CodeGraderInput): CodeGraderInput {\n // Populate text convenience accessors (always strings)\n // inputText = question (first user message content as string)\n const inputText = input.question;\n // outputText = answer (last assistant message content as string)\n const outputText = input.answer;\n // expectedOutputText = referenceAnswer (expected output content as string)\n const expectedOutputText = input.referenceAnswer ?? '';\n\n // Store the original values before redefining properties\n const originalQuestion = input.question;\n const originalAnswer = input.answer;\n const originalReferenceAnswer = input.referenceAnswer;\n\n // Set new text accessor values\n Object.defineProperty(input, 'inputText', {\n value: inputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'outputText', {\n value: outputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'expectedOutputText', {\n value: expectedOutputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n\n // Install deprecation warnings on legacy fields via property accessors\n Object.defineProperty(input, 'question', {\n get() {\n warnDeprecation('question', 'inputText');\n return originalQuestion;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'answer', {\n get() {\n warnDeprecation('answer', 'outputText');\n return originalAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'referenceAnswer', {\n get() {\n warnDeprecation('referenceAnswer', 'expectedOutputText');\n return originalReferenceAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n return input;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport { type EnrichedCodeGraderInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type PromptTemplateHandler = (input: EnrichedCodeGraderInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 6. Run handler (input is now enriched with guaranteed text accessors)\n const prompt = await handler(input as EnrichedCodeGraderInput);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.inputText}\\nAnswer: ${ctx.outputText}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code grader evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Handler function type for code graders.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type CodeGraderHandler = (\n input: EnrichedCodeGraderInput,\n) => CodeGraderResult | Promise<CodeGraderResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code grader handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeGrader.\n */\nexport async function runCodeGrader(handler: CodeGraderHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 7. Run handler (input is now enriched with guaranteed text accessors)\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n\n // 8. Validate and normalize output\n const result = CodeGraderResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 9. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Evaluation failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderHandler */\nexport type CodeJudgeHandler = CodeGraderHandler;\n/** @deprecated Use runCodeGrader */\nexport const runCodeJudge = runCodeGrader;\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,iBAAkB;AAKX,IAAM,mBAAmB,aAAE,OAAO;AAAA,EACvC,OAAO,aAAE,OAAO;AAAA,EAChB,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,YAAY,aAAE,OAAO;AAAA,EACrB,WAAW,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,OAAO,CAAC;AAAA,EAChD,YAAY,aAAE,OAAO;AAAA,EACrB,eAAe,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,MAAM,aAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,aAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,aAAE,OAAO;AAAA,EACrC,MAAM,aAAE,OAAO;AAAA,EACf,OAAO,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,aAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,aAAE,OAAO;AAAA,EACpC,MAAM,aAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,aAAE,MAAM,CAAC,aAAE,OAAO,GAAG,aAAE,OAAO,aAAE,QAAQ,CAAC,GAAG,aAAE,MAAM,aAAE,OAAO,aAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,aAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,aAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAQM,IAAM,wBAAwB,aAAE,OAAO;AAAA;AAAA,EAE5C,UAAU,aAAE,OAAO;AAAA,EACnB,UAAU,aAAE,OAAO;AAAA,EACnB,gBAAgB,aAAE,MAAM,aAAa;AAAA;AAAA,EAErC,iBAAiB,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAErC,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAClC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,aAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAElD,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAEhC,oBAAoB,aAAE,OAAO,EAAE,SAAS;AAC1C,CAAC;AAKM,IAAM,yBAAyB,aAAE,OAAO;AAAA,EAC7C,OAAO,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,MAAM,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,QAAQ,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACjD,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,SAAS,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAsCM,IAAM,4BAA4B;AAMlC,IAAM,uBAAuB;AAE7B,IAAM,wBAAwB;;;AC1E9B,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;AFpJA,IAAAA,cAAkB;;;AGjFlB,qBAA6B;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACjCA,IAAM,cAAc;AACpB,IAAM,aAAa;AAKnB,IAAM,oBAAoB,oBAAI,IAAY;AAC1C,SAAS,gBAAgB,SAAiB,SAAuB;AAC/D,MAAI,kBAAkB,IAAI,OAAO,EAAG;AACpC,oBAAkB,IAAI,OAAO;AAC7B,UAAQ;AAAA,IACN,GAAG,WAAW,aAAa,OAAO,yCAAyC,OAAO,aAAa,UAAU;AAAA,EAC3G;AACF;AAgBO,SAAS,YAAY,OAAyC;AAGnE,QAAM,YAAY,MAAM;AAExB,QAAM,aAAa,MAAM;AAEzB,QAAM,qBAAqB,MAAM,mBAAmB;AAGpD,QAAM,mBAAmB,MAAM;AAC/B,QAAM,iBAAiB,MAAM;AAC7B,QAAM,0BAA0B,MAAM;AAGtC,SAAO,eAAe,OAAO,aAAa;AAAA,IACxC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,cAAc;AAAA,IACzC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,sBAAsB;AAAA,IACjD,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAGD,SAAO,eAAe,OAAO,YAAY;AAAA,IACvC,MAAM;AACJ,sBAAgB,YAAY,WAAW;AACvC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,UAAU;AAAA,IACrC,MAAM;AACJ,sBAAgB,UAAU,YAAY;AACtC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,mBAAmB;AAAA,IAC9C,MAAM;AACJ,sBAAgB,mBAAmB,oBAAoB;AACvD,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO;AACT;;;AF6BA,SAAS,YAAoB;AAC3B,aAAO,6BAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAA0C;AAChE,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,MAAM,OAAO,OAAO,CAAC,GAAG,OAAO,IAAI,IAAI,CAAC;AAAA,IACxC,QAAQ,OAAO,SAAS,CAAC,GAAG,OAAO,MAAM,IAAI,CAAC;AAAA,IAC9C,WAAW,OAAO;AAAA,IAClB,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,6BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAChE,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,uBAAuB,MAAM,UAAU;AACtD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,qBAAqB,YAAY;AAAA,IAC9C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AGpNA,IAAAC,kBAA6B;AAkB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQA,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,gBAAY,KAAK;AAGjB,UAAM,SAAS,MAAM,QAAQ,KAAgC;AAG7D,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ACrDA,IAAAC,kBAA6B;AAyB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,cAAc,SAA2C;AAC7E,MAAI;AAEF,UAAM,QAAQF,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,8BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAGhE,UAAM,SAAS,uBAAuB,MAAM;AAAA,MAC1C,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,sBAAsB,YAAY;AAAA,IAC/C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;APsCO,SAAS,iBAAiB,SAAkC;AAEjE,gBAAc,OAAO;AACvB;AAGO,IAAM,kBAAkB;AAwCxB,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["import_zod","import_node_fs","readStdin","import_node_fs","readStdin","clampScore","formatError"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/schemas.ts","../src/target-client.ts","../src/assertion.ts","../src/case-conversion.ts","../src/deprecation.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.includes('hello'),\n * assertions: [{ text: 'Checks greeting', passed: outputText.includes('hello') }],\n * }));\n * ```\n *\n * @example Code grader (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace, outputText }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * assertions: [{ text: 'Efficient tool usage', passed: trace?.eventCount <= 5 }],\n * }));\n * ```\n *\n * @example Code grader with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ inputText }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, assertions: [{ text: 'Target not available', passed: false }] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${inputText}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeGraderInputSchema,\n CodeGraderResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeGraderInput,\n type CodeGraderResult,\n type EnrichedCodeGraderInput,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n // Backward-compat aliases (deprecated)\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeGraderHandler, type CodeJudgeHandler, runCodeGrader } from './runtime.js';\n\nexport type { CodeGraderHandler };\n/** @deprecated Use CodeGraderHandler */\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code grader evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, assertions: [{ text: 'No trace available', passed: false }] };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * assertions: [{ text: efficient ? 'Efficient execution' : 'Too many tool calls', passed: efficient }],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeGrader, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeGrader(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeGrader(handler: CodeGraderHandler): void {\n // Run immediately when module is loaded\n runCodeGrader(handler);\n}\n\n/** @deprecated Use defineCodeGrader */\nexport const defineCodeJudge = defineCodeGrader;\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.toLowerCase().includes('hello'),\n * assertions: [{ text: 'Checks for greeting', passed: outputText.toLowerCase().includes('hello') }],\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText, trace }) => {\n * const hasContent = outputText.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * assertions: [\n * { text: 'Has content', passed: !!hasContent },\n * { text: 'Efficient', passed: !!isEfficient },\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Zod schemas for code grader input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code grader input schema (camelCase, converted from snake_case wire format).\n *\n * Text convenience accessors (`inputText`, `outputText`, `expectedOutputText`) are always\n * strings. Structured fields (`input`, `output`, `expectedOutput`) are always `Message[]`.\n */\nexport const CodeGraderInputSchema = z.object({\n /** @deprecated Use `inputText` instead. First user message content as string. */\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n /** @deprecated Use `expectedOutputText` instead. Expected output content as string. */\n referenceAnswer: z.string().optional(),\n /** @deprecated Use `outputText` instead. Last assistant message content as string. */\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n /** First user message content as string. Replaces `question`. */\n inputText: z.string().optional(),\n /** Last assistant message content as string. Replaces `answer`. */\n outputText: z.string().optional(),\n /** Expected output content as string. Replaces `referenceAnswer`. */\n expectedOutputText: z.string().optional(),\n});\n\n/**\n * Code grader result schema (validated before output).\n */\nexport const CodeGraderResultSchema = z.object({\n score: z.number().min(0).max(1),\n assertions: z\n .array(\n z.object({\n text: z.string(),\n passed: z.boolean(),\n evidence: z.string().optional(),\n }),\n )\n .optional()\n .default([]),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;\nexport type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;\n\n/**\n * CodeGraderInput after `enrichInput()` has run.\n *\n * The text convenience accessors (`inputText`, `outputText`, `expectedOutputText`)\n * are always populated by the runtime before the handler is called, so they are\n * guaranteed to be `string` (never `undefined`).\n *\n * Handler function signatures (`CodeGraderHandler`, `AssertionHandler`) use this\n * type so that user code can destructure `{ outputText }` without null-checks.\n */\nexport type EnrichedCodeGraderInput = Omit<\n CodeGraderInput,\n 'inputText' | 'outputText' | 'expectedOutputText'\n> & {\n /** First user message content as string. Replaces `question`. */\n readonly inputText: string;\n /** Last assistant message content as string. Replaces `answer`. */\n readonly outputText: string;\n /** Expected output content as string. Replaces `referenceAnswer`. */\n readonly expectedOutputText: string;\n};\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeGraderInputSchema;\n\nexport type PromptTemplateInput = CodeGraderInput;\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderInputSchema */\nexport const CodeJudgeInputSchema = CodeGraderInputSchema;\n/** @deprecated Use CodeGraderResultSchema */\nexport const CodeJudgeResultSchema = CodeGraderResultSchema;\n/** @deprecated Use CodeGraderInput */\nexport type CodeJudgeInput = CodeGraderInput;\n/** @deprecated Use CodeGraderResult */\nexport type CodeJudgeResult = CodeGraderResult;\n","/**\n * Client for invoking configured targets from code-grader scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_grader` (or `code_judge`) evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, assertions: [{ text: 'Target not available', passed: false }] };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code graders but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n *\n * Same shape as CodeGraderInput but with `inputText`, `outputText`, and\n * `expectedOutputText` guaranteed to be strings (populated by the runtime\n * before the handler is called).\n */\nexport type AssertionContext = EnrichedCodeGraderInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assertions` blocks:\n * ```yaml\n * assertions:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-grader'\n | 'code-grader'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'skill-trigger'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // legacy aliases (still accepted)\n | 'llm-judge'\n | 'code-judge'\n | 'llm_judge'\n | 'code_judge'\n | 'llm_grader'\n | 'code_grader'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with score\n * ```ts\n * { pass: true, assertions: [{ text: 'Output contains expected keywords', passed: true }] }\n * ```\n *\n * @example Fail with evidence\n * ```ts\n * { pass: false, score: 0.3, assertions: [{ text: 'Missing required header', passed: false }] }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, assertions: [\n * { text: 'Format correct', passed: true },\n * { text: 'Content relevant', passed: true },\n * { text: 'Missing citation', passed: false },\n * ] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Per-assertion verdicts with optional evidence. */\n readonly assertions?: readonly {\n readonly text: string;\n readonly passed: boolean;\n readonly evidence?: string;\n }[];\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeGraderResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeGraderResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n assertions: result.assertions ? [...result.assertions] : [],\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // After enrichment, text accessors are guaranteed to be strings\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n const normalized = normalizeScore(rawResult);\n const result = CodeGraderResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n assertions: [{ text: `Assertion failed: ${errorMessage}`, passed: false }],\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Deprecation warning utilities for code grader and assertion runtimes.\n * Provides text convenience accessors and deprecation warnings on legacy field names.\n */\nimport type { CodeGraderInput } from './schemas.js';\n\nconst ANSI_YELLOW = '\\u001b[33m';\nconst ANSI_RESET = '\\u001b[0m';\n\n/**\n * Emit a deprecation warning to stderr (once per field name per process).\n */\nconst deprecationWarned = new Set<string>();\nfunction warnDeprecation(oldName: string, newName: string): void {\n if (deprecationWarned.has(oldName)) return;\n deprecationWarned.add(oldName);\n console.warn(\n `${ANSI_YELLOW}Warning: '${oldName}' is deprecated in code graders. Use '${newName}' instead.${ANSI_RESET}`,\n );\n}\n\n/**\n * Reset deprecation warning state. Used only in tests.\n */\nexport function resetDeprecationWarnings(): void {\n deprecationWarned.clear();\n}\n\n/**\n * Populate `inputText`, `outputText`, and `expectedOutputText` convenience accessors\n * on the validated input object, and install deprecation warnings on legacy fields.\n *\n * Text accessors are always strings. Structured fields (`input`, `output`, `expectedOutput`)\n * remain `Message[]` always.\n */\nexport function enrichInput(input: CodeGraderInput): CodeGraderInput {\n // Populate text convenience accessors (always strings)\n // inputText = question (first user message content as string)\n const inputText = input.question;\n // outputText = answer (last assistant message content as string)\n const outputText = input.answer;\n // expectedOutputText = referenceAnswer (expected output content as string)\n const expectedOutputText = input.referenceAnswer ?? '';\n\n // Store the original values before redefining properties\n const originalQuestion = input.question;\n const originalAnswer = input.answer;\n const originalReferenceAnswer = input.referenceAnswer;\n\n // Set new text accessor values\n Object.defineProperty(input, 'inputText', {\n value: inputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'outputText', {\n value: outputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'expectedOutputText', {\n value: expectedOutputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n\n // Install deprecation warnings on legacy fields via property accessors\n Object.defineProperty(input, 'question', {\n get() {\n warnDeprecation('question', 'inputText');\n return originalQuestion;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'answer', {\n get() {\n warnDeprecation('answer', 'outputText');\n return originalAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'referenceAnswer', {\n get() {\n warnDeprecation('referenceAnswer', 'expectedOutputText');\n return originalReferenceAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n return input;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport { type EnrichedCodeGraderInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type PromptTemplateHandler = (input: EnrichedCodeGraderInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 6. Run handler (input is now enriched with guaranteed text accessors)\n const prompt = await handler(input as EnrichedCodeGraderInput);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.inputText}\\nAnswer: ${ctx.outputText}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code grader evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Handler function type for code graders.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type CodeGraderHandler = (\n input: EnrichedCodeGraderInput,\n) => CodeGraderResult | Promise<CodeGraderResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code grader handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeGrader.\n */\nexport async function runCodeGrader(handler: CodeGraderHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 7. Run handler (input is now enriched with guaranteed text accessors)\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n\n // 8. Validate and normalize output\n const result = CodeGraderResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 9. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n assertions: [{ text: `Evaluation failed: ${errorMessage}`, passed: false }],\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderHandler */\nexport type CodeJudgeHandler = CodeGraderHandler;\n/** @deprecated Use runCodeGrader */\nexport const runCodeJudge = runCodeGrader;\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,iBAAkB;AAKX,IAAM,mBAAmB,aAAE,OAAO;AAAA,EACvC,OAAO,aAAE,OAAO;AAAA,EAChB,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,YAAY,aAAE,OAAO;AAAA,EACrB,WAAW,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,OAAO,CAAC;AAAA,EAChD,YAAY,aAAE,OAAO;AAAA,EACrB,eAAe,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,MAAM,aAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,aAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,aAAE,OAAO;AAAA,EACrC,MAAM,aAAE,OAAO;AAAA,EACf,OAAO,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,aAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,aAAE,OAAO;AAAA,EACpC,MAAM,aAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,aAAE,MAAM,CAAC,aAAE,OAAO,GAAG,aAAE,OAAO,aAAE,QAAQ,CAAC,GAAG,aAAE,MAAM,aAAE,OAAO,aAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,aAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,aAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAQM,IAAM,wBAAwB,aAAE,OAAO;AAAA;AAAA,EAE5C,UAAU,aAAE,OAAO;AAAA,EACnB,UAAU,aAAE,OAAO;AAAA,EACnB,gBAAgB,aAAE,MAAM,aAAa;AAAA;AAAA,EAErC,iBAAiB,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAErC,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAClC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,aAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAElD,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAEhC,oBAAoB,aAAE,OAAO,EAAE,SAAS;AAC1C,CAAC;AAKM,IAAM,yBAAyB,aAAE,OAAO;AAAA,EAC7C,OAAO,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,YAAY,aACT;AAAA,IACC,aAAE,OAAO;AAAA,MACP,MAAM,aAAE,OAAO;AAAA,MACf,QAAQ,aAAE,QAAQ;AAAA,MAClB,UAAU,aAAE,OAAO,EAAE,SAAS;AAAA,IAChC,CAAC;AAAA,EACH,EACC,SAAS,EACT,QAAQ,CAAC,CAAC;AAAA;AAAA,EAEb,SAAS,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAsCM,IAAM,4BAA4B;AAMlC,IAAM,uBAAuB;AAE7B,IAAM,wBAAwB;;;ACjF9B,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;AFrJA,IAAAA,cAAkB;;;AGhFlB,qBAA6B;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACjCA,IAAM,cAAc;AACpB,IAAM,aAAa;AAKnB,IAAM,oBAAoB,oBAAI,IAAY;AAC1C,SAAS,gBAAgB,SAAiB,SAAuB;AAC/D,MAAI,kBAAkB,IAAI,OAAO,EAAG;AACpC,oBAAkB,IAAI,OAAO;AAC7B,UAAQ;AAAA,IACN,GAAG,WAAW,aAAa,OAAO,yCAAyC,OAAO,aAAa,UAAU;AAAA,EAC3G;AACF;AAgBO,SAAS,YAAY,OAAyC;AAGnE,QAAM,YAAY,MAAM;AAExB,QAAM,aAAa,MAAM;AAEzB,QAAM,qBAAqB,MAAM,mBAAmB;AAGpD,QAAM,mBAAmB,MAAM;AAC/B,QAAM,iBAAiB,MAAM;AAC7B,QAAM,0BAA0B,MAAM;AAGtC,SAAO,eAAe,OAAO,aAAa;AAAA,IACxC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,cAAc;AAAA,IACzC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,sBAAsB;AAAA,IACjD,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAGD,SAAO,eAAe,OAAO,YAAY;AAAA,IACvC,MAAM;AACJ,sBAAgB,YAAY,WAAW;AACvC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,UAAU;AAAA,IACrC,MAAM;AACJ,sBAAgB,UAAU,YAAY;AACtC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,mBAAmB;AAAA,IAC9C,MAAM;AACJ,sBAAgB,mBAAmB,oBAAoB;AACvD,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO;AACT;;;AFiCA,SAAS,YAAoB;AAC3B,aAAO,6BAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAA0C;AAChE,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO,aAAa,CAAC,GAAG,OAAO,UAAU,IAAI,CAAC;AAAA,IAC1D,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,6BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAChE,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,uBAAuB,MAAM,UAAU;AACtD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,YAAY,CAAC,EAAE,MAAM,qBAAqB,YAAY,IAAI,QAAQ,MAAM,CAAC;AAAA,IAC3E;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AGpNA,IAAAC,kBAA6B;AAkB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQA,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,gBAAY,KAAK;AAGjB,UAAM,SAAS,MAAM,QAAQ,KAAgC;AAG7D,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ACrDA,IAAAC,kBAA6B;AAyB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,cAAc,SAA2C;AAC7E,MAAI;AAEF,UAAM,QAAQF,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,8BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAGhE,UAAM,SAAS,uBAAuB,MAAM;AAAA,MAC1C,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,YAAY,CAAC,EAAE,MAAM,sBAAsB,YAAY,IAAI,QAAQ,MAAM,CAAC;AAAA,IAC5E;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;APsCO,SAAS,iBAAiB,SAAkC;AAEjE,gBAAc,OAAO;AACvB;AAGO,IAAM,kBAAkB;AAwCxB,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["import_zod","import_node_fs","readStdin","import_node_fs","readStdin","clampScore","formatError"]}
|
package/dist/index.d.cts
CHANGED
|
@@ -592,22 +592,36 @@ declare const CodeGraderInputSchema: z.ZodObject<{
|
|
|
592
592
|
*/
|
|
593
593
|
declare const CodeGraderResultSchema: z.ZodObject<{
|
|
594
594
|
score: z.ZodNumber;
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
595
|
+
assertions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
596
|
+
text: z.ZodString;
|
|
597
|
+
passed: z.ZodBoolean;
|
|
598
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
599
|
+
}, "strip", z.ZodTypeAny, {
|
|
600
|
+
text: string;
|
|
601
|
+
passed: boolean;
|
|
602
|
+
evidence?: string | undefined;
|
|
603
|
+
}, {
|
|
604
|
+
text: string;
|
|
605
|
+
passed: boolean;
|
|
606
|
+
evidence?: string | undefined;
|
|
607
|
+
}>, "many">>>;
|
|
598
608
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
599
609
|
details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
600
610
|
}, "strip", z.ZodTypeAny, {
|
|
601
611
|
score: number;
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
612
|
+
assertions: {
|
|
613
|
+
text: string;
|
|
614
|
+
passed: boolean;
|
|
615
|
+
evidence?: string | undefined;
|
|
616
|
+
}[];
|
|
605
617
|
details?: Record<string, unknown> | undefined;
|
|
606
618
|
}, {
|
|
607
619
|
score: number;
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
620
|
+
assertions?: {
|
|
621
|
+
text: string;
|
|
622
|
+
passed: boolean;
|
|
623
|
+
evidence?: string | undefined;
|
|
624
|
+
}[] | undefined;
|
|
611
625
|
details?: Record<string, unknown> | undefined;
|
|
612
626
|
}>;
|
|
613
627
|
/**
|
|
@@ -1516,22 +1530,36 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
1516
1530
|
/** @deprecated Use CodeGraderResultSchema */
|
|
1517
1531
|
declare const CodeJudgeResultSchema: z.ZodObject<{
|
|
1518
1532
|
score: z.ZodNumber;
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1533
|
+
assertions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1534
|
+
text: z.ZodString;
|
|
1535
|
+
passed: z.ZodBoolean;
|
|
1536
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
1537
|
+
}, "strip", z.ZodTypeAny, {
|
|
1538
|
+
text: string;
|
|
1539
|
+
passed: boolean;
|
|
1540
|
+
evidence?: string | undefined;
|
|
1541
|
+
}, {
|
|
1542
|
+
text: string;
|
|
1543
|
+
passed: boolean;
|
|
1544
|
+
evidence?: string | undefined;
|
|
1545
|
+
}>, "many">>>;
|
|
1522
1546
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
1523
1547
|
details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
1524
1548
|
}, "strip", z.ZodTypeAny, {
|
|
1525
1549
|
score: number;
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1550
|
+
assertions: {
|
|
1551
|
+
text: string;
|
|
1552
|
+
passed: boolean;
|
|
1553
|
+
evidence?: string | undefined;
|
|
1554
|
+
}[];
|
|
1529
1555
|
details?: Record<string, unknown> | undefined;
|
|
1530
1556
|
}, {
|
|
1531
1557
|
score: number;
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1558
|
+
assertions?: {
|
|
1559
|
+
text: string;
|
|
1560
|
+
passed: boolean;
|
|
1561
|
+
evidence?: string | undefined;
|
|
1562
|
+
}[] | undefined;
|
|
1535
1563
|
details?: Record<string, unknown> | undefined;
|
|
1536
1564
|
}>;
|
|
1537
1565
|
/** @deprecated Use CodeGraderInput */
|
|
@@ -1634,7 +1662,7 @@ declare class TargetInvocationError extends Error {
|
|
|
1634
1662
|
*
|
|
1635
1663
|
* if (!target) {
|
|
1636
1664
|
* // Target not available - no target config on this evaluator
|
|
1637
|
-
* return { score: 0.5,
|
|
1665
|
+
* return { score: 0.5, assertions: [{ text: 'Target not available', passed: false }] };
|
|
1638
1666
|
* }
|
|
1639
1667
|
*
|
|
1640
1668
|
* const response = await target.invoke({
|
|
@@ -1675,19 +1703,23 @@ type AssertionType = 'llm-grader' | 'code-grader' | 'rubrics' | 'composite' | 't
|
|
|
1675
1703
|
/**
|
|
1676
1704
|
* Result returned from an assertion handler.
|
|
1677
1705
|
*
|
|
1678
|
-
* @example Pass with
|
|
1706
|
+
* @example Pass with score
|
|
1679
1707
|
* ```ts
|
|
1680
|
-
* { pass: true,
|
|
1708
|
+
* { pass: true, assertions: [{ text: 'Output contains expected keywords', passed: true }] }
|
|
1681
1709
|
* ```
|
|
1682
1710
|
*
|
|
1683
|
-
* @example Fail with
|
|
1711
|
+
* @example Fail with evidence
|
|
1684
1712
|
* ```ts
|
|
1685
|
-
* { pass: false,
|
|
1713
|
+
* { pass: false, score: 0.3, assertions: [{ text: 'Missing required header', passed: false }] }
|
|
1686
1714
|
* ```
|
|
1687
1715
|
*
|
|
1688
1716
|
* @example Granular score (0-1)
|
|
1689
1717
|
* ```ts
|
|
1690
|
-
* { score: 0.75,
|
|
1718
|
+
* { score: 0.75, assertions: [
|
|
1719
|
+
* { text: 'Format correct', passed: true },
|
|
1720
|
+
* { text: 'Content relevant', passed: true },
|
|
1721
|
+
* { text: 'Missing citation', passed: false },
|
|
1722
|
+
* ] }
|
|
1691
1723
|
* ```
|
|
1692
1724
|
*/
|
|
1693
1725
|
interface AssertionScore {
|
|
@@ -1695,12 +1727,12 @@ interface AssertionScore {
|
|
|
1695
1727
|
readonly pass?: boolean;
|
|
1696
1728
|
/** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */
|
|
1697
1729
|
readonly score?: number;
|
|
1698
|
-
/**
|
|
1699
|
-
readonly
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1730
|
+
/** Per-assertion verdicts with optional evidence. */
|
|
1731
|
+
readonly assertions?: readonly {
|
|
1732
|
+
readonly text: string;
|
|
1733
|
+
readonly passed: boolean;
|
|
1734
|
+
readonly evidence?: string;
|
|
1735
|
+
}[];
|
|
1704
1736
|
/** Optional structured details for domain-specific metrics. */
|
|
1705
1737
|
readonly details?: Record<string, unknown>;
|
|
1706
1738
|
}
|
|
@@ -1740,7 +1772,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1740
1772
|
*
|
|
1741
1773
|
* export default defineAssertion(({ outputText }) => ({
|
|
1742
1774
|
* pass: outputText.includes('hello'),
|
|
1743
|
-
*
|
|
1775
|
+
* assertions: [{ text: 'Checks greeting', passed: outputText.includes('hello') }],
|
|
1744
1776
|
* }));
|
|
1745
1777
|
* ```
|
|
1746
1778
|
*
|
|
@@ -1751,8 +1783,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1751
1783
|
*
|
|
1752
1784
|
* export default defineCodeGrader(({ trace, outputText }) => ({
|
|
1753
1785
|
* score: trace?.eventCount <= 5 ? 1.0 : 0.5,
|
|
1754
|
-
*
|
|
1755
|
-
* misses: [],
|
|
1786
|
+
* assertions: [{ text: 'Efficient tool usage', passed: trace?.eventCount <= 5 }],
|
|
1756
1787
|
* }));
|
|
1757
1788
|
* ```
|
|
1758
1789
|
*
|
|
@@ -1764,7 +1795,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1764
1795
|
* export default defineCodeGrader(async ({ inputText }) => {
|
|
1765
1796
|
* const target = createTargetClient();
|
|
1766
1797
|
* if (!target) {
|
|
1767
|
-
* return { score: 0,
|
|
1798
|
+
* return { score: 0, assertions: [{ text: 'Target not available', passed: false }] };
|
|
1768
1799
|
* }
|
|
1769
1800
|
*
|
|
1770
1801
|
* const response = await target.invoke({
|
|
@@ -1798,14 +1829,13 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1798
1829
|
*
|
|
1799
1830
|
* export default defineCodeGrader(({ trace }) => {
|
|
1800
1831
|
* if (!trace) {
|
|
1801
|
-
* return { score: 0.5,
|
|
1832
|
+
* return { score: 0.5, assertions: [{ text: 'No trace available', passed: false }] };
|
|
1802
1833
|
* }
|
|
1803
1834
|
*
|
|
1804
1835
|
* const efficient = trace.eventCount <= 10;
|
|
1805
1836
|
* return {
|
|
1806
1837
|
* score: efficient ? 1.0 : 0.5,
|
|
1807
|
-
*
|
|
1808
|
-
* misses: efficient ? [] : ['Too many tool calls'],
|
|
1838
|
+
* assertions: [{ text: efficient ? 'Efficient execution' : 'Too many tool calls', passed: efficient }],
|
|
1809
1839
|
* };
|
|
1810
1840
|
* });
|
|
1811
1841
|
* ```
|
|
@@ -1889,7 +1919,7 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
1889
1919
|
*
|
|
1890
1920
|
* export default defineAssertion(({ outputText }) => ({
|
|
1891
1921
|
* pass: outputText.toLowerCase().includes('hello'),
|
|
1892
|
-
*
|
|
1922
|
+
* assertions: [{ text: 'Checks for greeting', passed: outputText.toLowerCase().includes('hello') }],
|
|
1893
1923
|
* }));
|
|
1894
1924
|
* ```
|
|
1895
1925
|
*
|
|
@@ -1902,9 +1932,9 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
1902
1932
|
* const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;
|
|
1903
1933
|
* return {
|
|
1904
1934
|
* score: hasContent + isEfficient,
|
|
1905
|
-
*
|
|
1906
|
-
*
|
|
1907
|
-
*
|
|
1935
|
+
* assertions: [
|
|
1936
|
+
* { text: 'Has content', passed: !!hasContent },
|
|
1937
|
+
* { text: 'Efficient', passed: !!isEfficient },
|
|
1908
1938
|
* ],
|
|
1909
1939
|
* };
|
|
1910
1940
|
* });
|
package/dist/index.d.ts
CHANGED
|
@@ -592,22 +592,36 @@ declare const CodeGraderInputSchema: z.ZodObject<{
|
|
|
592
592
|
*/
|
|
593
593
|
declare const CodeGraderResultSchema: z.ZodObject<{
|
|
594
594
|
score: z.ZodNumber;
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
595
|
+
assertions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
596
|
+
text: z.ZodString;
|
|
597
|
+
passed: z.ZodBoolean;
|
|
598
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
599
|
+
}, "strip", z.ZodTypeAny, {
|
|
600
|
+
text: string;
|
|
601
|
+
passed: boolean;
|
|
602
|
+
evidence?: string | undefined;
|
|
603
|
+
}, {
|
|
604
|
+
text: string;
|
|
605
|
+
passed: boolean;
|
|
606
|
+
evidence?: string | undefined;
|
|
607
|
+
}>, "many">>>;
|
|
598
608
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
599
609
|
details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
600
610
|
}, "strip", z.ZodTypeAny, {
|
|
601
611
|
score: number;
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
612
|
+
assertions: {
|
|
613
|
+
text: string;
|
|
614
|
+
passed: boolean;
|
|
615
|
+
evidence?: string | undefined;
|
|
616
|
+
}[];
|
|
605
617
|
details?: Record<string, unknown> | undefined;
|
|
606
618
|
}, {
|
|
607
619
|
score: number;
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
620
|
+
assertions?: {
|
|
621
|
+
text: string;
|
|
622
|
+
passed: boolean;
|
|
623
|
+
evidence?: string | undefined;
|
|
624
|
+
}[] | undefined;
|
|
611
625
|
details?: Record<string, unknown> | undefined;
|
|
612
626
|
}>;
|
|
613
627
|
/**
|
|
@@ -1516,22 +1530,36 @@ declare const CodeJudgeInputSchema: z.ZodObject<{
|
|
|
1516
1530
|
/** @deprecated Use CodeGraderResultSchema */
|
|
1517
1531
|
declare const CodeJudgeResultSchema: z.ZodObject<{
|
|
1518
1532
|
score: z.ZodNumber;
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1533
|
+
assertions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1534
|
+
text: z.ZodString;
|
|
1535
|
+
passed: z.ZodBoolean;
|
|
1536
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
1537
|
+
}, "strip", z.ZodTypeAny, {
|
|
1538
|
+
text: string;
|
|
1539
|
+
passed: boolean;
|
|
1540
|
+
evidence?: string | undefined;
|
|
1541
|
+
}, {
|
|
1542
|
+
text: string;
|
|
1543
|
+
passed: boolean;
|
|
1544
|
+
evidence?: string | undefined;
|
|
1545
|
+
}>, "many">>>;
|
|
1522
1546
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
1523
1547
|
details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
1524
1548
|
}, "strip", z.ZodTypeAny, {
|
|
1525
1549
|
score: number;
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1550
|
+
assertions: {
|
|
1551
|
+
text: string;
|
|
1552
|
+
passed: boolean;
|
|
1553
|
+
evidence?: string | undefined;
|
|
1554
|
+
}[];
|
|
1529
1555
|
details?: Record<string, unknown> | undefined;
|
|
1530
1556
|
}, {
|
|
1531
1557
|
score: number;
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1558
|
+
assertions?: {
|
|
1559
|
+
text: string;
|
|
1560
|
+
passed: boolean;
|
|
1561
|
+
evidence?: string | undefined;
|
|
1562
|
+
}[] | undefined;
|
|
1535
1563
|
details?: Record<string, unknown> | undefined;
|
|
1536
1564
|
}>;
|
|
1537
1565
|
/** @deprecated Use CodeGraderInput */
|
|
@@ -1634,7 +1662,7 @@ declare class TargetInvocationError extends Error {
|
|
|
1634
1662
|
*
|
|
1635
1663
|
* if (!target) {
|
|
1636
1664
|
* // Target not available - no target config on this evaluator
|
|
1637
|
-
* return { score: 0.5,
|
|
1665
|
+
* return { score: 0.5, assertions: [{ text: 'Target not available', passed: false }] };
|
|
1638
1666
|
* }
|
|
1639
1667
|
*
|
|
1640
1668
|
* const response = await target.invoke({
|
|
@@ -1675,19 +1703,23 @@ type AssertionType = 'llm-grader' | 'code-grader' | 'rubrics' | 'composite' | 't
|
|
|
1675
1703
|
/**
|
|
1676
1704
|
* Result returned from an assertion handler.
|
|
1677
1705
|
*
|
|
1678
|
-
* @example Pass with
|
|
1706
|
+
* @example Pass with score
|
|
1679
1707
|
* ```ts
|
|
1680
|
-
* { pass: true,
|
|
1708
|
+
* { pass: true, assertions: [{ text: 'Output contains expected keywords', passed: true }] }
|
|
1681
1709
|
* ```
|
|
1682
1710
|
*
|
|
1683
|
-
* @example Fail with
|
|
1711
|
+
* @example Fail with evidence
|
|
1684
1712
|
* ```ts
|
|
1685
|
-
* { pass: false,
|
|
1713
|
+
* { pass: false, score: 0.3, assertions: [{ text: 'Missing required header', passed: false }] }
|
|
1686
1714
|
* ```
|
|
1687
1715
|
*
|
|
1688
1716
|
* @example Granular score (0-1)
|
|
1689
1717
|
* ```ts
|
|
1690
|
-
* { score: 0.75,
|
|
1718
|
+
* { score: 0.75, assertions: [
|
|
1719
|
+
* { text: 'Format correct', passed: true },
|
|
1720
|
+
* { text: 'Content relevant', passed: true },
|
|
1721
|
+
* { text: 'Missing citation', passed: false },
|
|
1722
|
+
* ] }
|
|
1691
1723
|
* ```
|
|
1692
1724
|
*/
|
|
1693
1725
|
interface AssertionScore {
|
|
@@ -1695,12 +1727,12 @@ interface AssertionScore {
|
|
|
1695
1727
|
readonly pass?: boolean;
|
|
1696
1728
|
/** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */
|
|
1697
1729
|
readonly score?: number;
|
|
1698
|
-
/**
|
|
1699
|
-
readonly
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1730
|
+
/** Per-assertion verdicts with optional evidence. */
|
|
1731
|
+
readonly assertions?: readonly {
|
|
1732
|
+
readonly text: string;
|
|
1733
|
+
readonly passed: boolean;
|
|
1734
|
+
readonly evidence?: string;
|
|
1735
|
+
}[];
|
|
1704
1736
|
/** Optional structured details for domain-specific metrics. */
|
|
1705
1737
|
readonly details?: Record<string, unknown>;
|
|
1706
1738
|
}
|
|
@@ -1740,7 +1772,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1740
1772
|
*
|
|
1741
1773
|
* export default defineAssertion(({ outputText }) => ({
|
|
1742
1774
|
* pass: outputText.includes('hello'),
|
|
1743
|
-
*
|
|
1775
|
+
* assertions: [{ text: 'Checks greeting', passed: outputText.includes('hello') }],
|
|
1744
1776
|
* }));
|
|
1745
1777
|
* ```
|
|
1746
1778
|
*
|
|
@@ -1751,8 +1783,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1751
1783
|
*
|
|
1752
1784
|
* export default defineCodeGrader(({ trace, outputText }) => ({
|
|
1753
1785
|
* score: trace?.eventCount <= 5 ? 1.0 : 0.5,
|
|
1754
|
-
*
|
|
1755
|
-
* misses: [],
|
|
1786
|
+
* assertions: [{ text: 'Efficient tool usage', passed: trace?.eventCount <= 5 }],
|
|
1756
1787
|
* }));
|
|
1757
1788
|
* ```
|
|
1758
1789
|
*
|
|
@@ -1764,7 +1795,7 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1764
1795
|
* export default defineCodeGrader(async ({ inputText }) => {
|
|
1765
1796
|
* const target = createTargetClient();
|
|
1766
1797
|
* if (!target) {
|
|
1767
|
-
* return { score: 0,
|
|
1798
|
+
* return { score: 0, assertions: [{ text: 'Target not available', passed: false }] };
|
|
1768
1799
|
* }
|
|
1769
1800
|
*
|
|
1770
1801
|
* const response = await target.invoke({
|
|
@@ -1798,14 +1829,13 @@ type CodeJudgeHandler = CodeGraderHandler;
|
|
|
1798
1829
|
*
|
|
1799
1830
|
* export default defineCodeGrader(({ trace }) => {
|
|
1800
1831
|
* if (!trace) {
|
|
1801
|
-
* return { score: 0.5,
|
|
1832
|
+
* return { score: 0.5, assertions: [{ text: 'No trace available', passed: false }] };
|
|
1802
1833
|
* }
|
|
1803
1834
|
*
|
|
1804
1835
|
* const efficient = trace.eventCount <= 10;
|
|
1805
1836
|
* return {
|
|
1806
1837
|
* score: efficient ? 1.0 : 0.5,
|
|
1807
|
-
*
|
|
1808
|
-
* misses: efficient ? [] : ['Too many tool calls'],
|
|
1838
|
+
* assertions: [{ text: efficient ? 'Efficient execution' : 'Too many tool calls', passed: efficient }],
|
|
1809
1839
|
* };
|
|
1810
1840
|
* });
|
|
1811
1841
|
* ```
|
|
@@ -1889,7 +1919,7 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
1889
1919
|
*
|
|
1890
1920
|
* export default defineAssertion(({ outputText }) => ({
|
|
1891
1921
|
* pass: outputText.toLowerCase().includes('hello'),
|
|
1892
|
-
*
|
|
1922
|
+
* assertions: [{ text: 'Checks for greeting', passed: outputText.toLowerCase().includes('hello') }],
|
|
1893
1923
|
* }));
|
|
1894
1924
|
* ```
|
|
1895
1925
|
*
|
|
@@ -1902,9 +1932,9 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
|
|
|
1902
1932
|
* const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;
|
|
1903
1933
|
* return {
|
|
1904
1934
|
* score: hasContent + isEfficient,
|
|
1905
|
-
*
|
|
1906
|
-
*
|
|
1907
|
-
*
|
|
1935
|
+
* assertions: [
|
|
1936
|
+
* { text: 'Has content', passed: !!hasContent },
|
|
1937
|
+
* { text: 'Efficient', passed: !!isEfficient },
|
|
1908
1938
|
* ],
|
|
1909
1939
|
* };
|
|
1910
1940
|
* });
|
package/dist/index.js
CHANGED
|
@@ -65,9 +65,13 @@ var CodeGraderInputSchema = z.object({
|
|
|
65
65
|
});
|
|
66
66
|
var CodeGraderResultSchema = z.object({
|
|
67
67
|
score: z.number().min(0).max(1),
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
assertions: z.array(
|
|
69
|
+
z.object({
|
|
70
|
+
text: z.string(),
|
|
71
|
+
passed: z.boolean(),
|
|
72
|
+
evidence: z.string().optional()
|
|
73
|
+
})
|
|
74
|
+
).optional().default([]),
|
|
71
75
|
/** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
|
|
72
76
|
details: z.record(z.unknown()).optional()
|
|
73
77
|
});
|
|
@@ -304,9 +308,7 @@ function normalizeScore(result) {
|
|
|
304
308
|
}
|
|
305
309
|
return {
|
|
306
310
|
score,
|
|
307
|
-
|
|
308
|
-
misses: result.misses ? [...result.misses] : [],
|
|
309
|
-
reasoning: result.reasoning,
|
|
311
|
+
assertions: result.assertions ? [...result.assertions] : [],
|
|
310
312
|
details: result.details
|
|
311
313
|
};
|
|
312
314
|
}
|
|
@@ -339,9 +341,7 @@ async function runAssertion(handler) {
|
|
|
339
341
|
const errorMessage = formatError(error);
|
|
340
342
|
const errorResult = {
|
|
341
343
|
score: 0,
|
|
342
|
-
|
|
343
|
-
misses: [errorMessage],
|
|
344
|
-
reasoning: `Assertion failed: ${errorMessage}`
|
|
344
|
+
assertions: [{ text: `Assertion failed: ${errorMessage}`, passed: false }]
|
|
345
345
|
};
|
|
346
346
|
console.log(JSON.stringify(errorResult, null, 2));
|
|
347
347
|
process.exit(1);
|
|
@@ -416,9 +416,7 @@ async function runCodeGrader(handler) {
|
|
|
416
416
|
const errorMessage = formatError2(error);
|
|
417
417
|
const errorResult = {
|
|
418
418
|
score: 0,
|
|
419
|
-
|
|
420
|
-
misses: [errorMessage],
|
|
421
|
-
reasoning: `Evaluation failed: ${errorMessage}`
|
|
419
|
+
assertions: [{ text: `Evaluation failed: ${errorMessage}`, passed: false }]
|
|
422
420
|
};
|
|
423
421
|
console.log(JSON.stringify(errorResult, null, 2));
|
|
424
422
|
process.exit(1);
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/schemas.ts","../src/target-client.ts","../src/index.ts","../src/assertion.ts","../src/case-conversion.ts","../src/deprecation.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * Zod schemas for code grader input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code grader input schema (camelCase, converted from snake_case wire format).\n *\n * Text convenience accessors (`inputText`, `outputText`, `expectedOutputText`) are always\n * strings. Structured fields (`input`, `output`, `expectedOutput`) are always `Message[]`.\n */\nexport const CodeGraderInputSchema = z.object({\n /** @deprecated Use `inputText` instead. First user message content as string. */\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n /** @deprecated Use `expectedOutputText` instead. Expected output content as string. */\n referenceAnswer: z.string().optional(),\n /** @deprecated Use `outputText` instead. Last assistant message content as string. */\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n /** First user message content as string. Replaces `question`. */\n inputText: z.string().optional(),\n /** Last assistant message content as string. Replaces `answer`. */\n outputText: z.string().optional(),\n /** Expected output content as string. Replaces `referenceAnswer`. */\n expectedOutputText: z.string().optional(),\n});\n\n/**\n * Code grader result schema (validated before output).\n */\nexport const CodeGraderResultSchema = z.object({\n score: z.number().min(0).max(1),\n hits: z.array(z.string()).optional().default([]),\n misses: z.array(z.string()).optional().default([]),\n reasoning: z.string().optional(),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;\nexport type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;\n\n/**\n * CodeGraderInput after `enrichInput()` has run.\n *\n * The text convenience accessors (`inputText`, `outputText`, `expectedOutputText`)\n * are always populated by the runtime before the handler is called, so they are\n * guaranteed to be `string` (never `undefined`).\n *\n * Handler function signatures (`CodeGraderHandler`, `AssertionHandler`) use this\n * type so that user code can destructure `{ outputText }` without null-checks.\n */\nexport type EnrichedCodeGraderInput = Omit<\n CodeGraderInput,\n 'inputText' | 'outputText' | 'expectedOutputText'\n> & {\n /** First user message content as string. Replaces `question`. */\n readonly inputText: string;\n /** Last assistant message content as string. Replaces `answer`. */\n readonly outputText: string;\n /** Expected output content as string. Replaces `referenceAnswer`. */\n readonly expectedOutputText: string;\n};\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeGraderInputSchema;\n\nexport type PromptTemplateInput = CodeGraderInput;\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderInputSchema */\nexport const CodeJudgeInputSchema = CodeGraderInputSchema;\n/** @deprecated Use CodeGraderResultSchema */\nexport const CodeJudgeResultSchema = CodeGraderResultSchema;\n/** @deprecated Use CodeGraderInput */\nexport type CodeJudgeInput = CodeGraderInput;\n/** @deprecated Use CodeGraderResult */\nexport type CodeJudgeResult = CodeGraderResult;\n","/**\n * Client for invoking configured targets from code-grader scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_grader` (or `code_judge`) evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, reasoning: 'Target not available' };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.includes('hello'),\n * reasoning: 'Checks greeting',\n * }));\n * ```\n *\n * @example Code grader (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace, outputText }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * hits: ['Efficient tool usage'],\n * misses: [],\n * }));\n * ```\n *\n * @example Code grader with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ inputText }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, misses: ['Target not available'] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${inputText}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeGraderInputSchema,\n CodeGraderResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeGraderInput,\n type CodeGraderResult,\n type EnrichedCodeGraderInput,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n // Backward-compat aliases (deprecated)\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeGraderHandler, type CodeJudgeHandler, runCodeGrader } from './runtime.js';\n\nexport type { CodeGraderHandler };\n/** @deprecated Use CodeGraderHandler */\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code grader evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, reasoning: 'No trace available' };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * hits: efficient ? ['Efficient execution'] : [],\n * misses: efficient ? [] : ['Too many tool calls'],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeGrader, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeGrader(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeGrader(handler: CodeGraderHandler): void {\n // Run immediately when module is loaded\n runCodeGrader(handler);\n}\n\n/** @deprecated Use defineCodeGrader */\nexport const defineCodeJudge = defineCodeGrader;\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.toLowerCase().includes('hello'),\n * reasoning: 'Checks for greeting',\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText, trace }) => {\n * const hasContent = outputText.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * hits: [\n * ...(hasContent ? ['Has content'] : []),\n * ...(isEfficient ? ['Efficient'] : []),\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code graders but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n *\n * Same shape as CodeGraderInput but with `inputText`, `outputText`, and\n * `expectedOutputText` guaranteed to be strings (populated by the runtime\n * before the handler is called).\n */\nexport type AssertionContext = EnrichedCodeGraderInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assertions` blocks:\n * ```yaml\n * assertions:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-grader'\n | 'code-grader'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'skill-trigger'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // legacy aliases (still accepted)\n | 'llm-judge'\n | 'code-judge'\n | 'llm_judge'\n | 'code_judge'\n | 'llm_grader'\n | 'code_grader'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with reasoning\n * ```ts\n * { pass: true, reasoning: 'Output contains expected keywords' }\n * ```\n *\n * @example Fail with misses\n * ```ts\n * { pass: false, misses: ['Missing required header'], score: 0.3 }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Aspects that passed. */\n readonly hits?: readonly string[];\n /** Aspects that failed. */\n readonly misses?: readonly string[];\n /** Human-readable explanation. */\n readonly reasoning?: string;\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeGraderResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeGraderResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n hits: result.hits ? [...result.hits] : [],\n misses: result.misses ? [...result.misses] : [],\n reasoning: result.reasoning,\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // After enrichment, text accessors are guaranteed to be strings\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n const normalized = normalizeScore(rawResult);\n const result = CodeGraderResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Assertion failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Deprecation warning utilities for code grader and assertion runtimes.\n * Provides text convenience accessors and deprecation warnings on legacy field names.\n */\nimport type { CodeGraderInput } from './schemas.js';\n\nconst ANSI_YELLOW = '\\u001b[33m';\nconst ANSI_RESET = '\\u001b[0m';\n\n/**\n * Emit a deprecation warning to stderr (once per field name per process).\n */\nconst deprecationWarned = new Set<string>();\nfunction warnDeprecation(oldName: string, newName: string): void {\n if (deprecationWarned.has(oldName)) return;\n deprecationWarned.add(oldName);\n console.warn(\n `${ANSI_YELLOW}Warning: '${oldName}' is deprecated in code graders. Use '${newName}' instead.${ANSI_RESET}`,\n );\n}\n\n/**\n * Reset deprecation warning state. Used only in tests.\n */\nexport function resetDeprecationWarnings(): void {\n deprecationWarned.clear();\n}\n\n/**\n * Populate `inputText`, `outputText`, and `expectedOutputText` convenience accessors\n * on the validated input object, and install deprecation warnings on legacy fields.\n *\n * Text accessors are always strings. Structured fields (`input`, `output`, `expectedOutput`)\n * remain `Message[]` always.\n */\nexport function enrichInput(input: CodeGraderInput): CodeGraderInput {\n // Populate text convenience accessors (always strings)\n // inputText = question (first user message content as string)\n const inputText = input.question;\n // outputText = answer (last assistant message content as string)\n const outputText = input.answer;\n // expectedOutputText = referenceAnswer (expected output content as string)\n const expectedOutputText = input.referenceAnswer ?? '';\n\n // Store the original values before redefining properties\n const originalQuestion = input.question;\n const originalAnswer = input.answer;\n const originalReferenceAnswer = input.referenceAnswer;\n\n // Set new text accessor values\n Object.defineProperty(input, 'inputText', {\n value: inputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'outputText', {\n value: outputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'expectedOutputText', {\n value: expectedOutputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n\n // Install deprecation warnings on legacy fields via property accessors\n Object.defineProperty(input, 'question', {\n get() {\n warnDeprecation('question', 'inputText');\n return originalQuestion;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'answer', {\n get() {\n warnDeprecation('answer', 'outputText');\n return originalAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'referenceAnswer', {\n get() {\n warnDeprecation('referenceAnswer', 'expectedOutputText');\n return originalReferenceAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n return input;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport { type EnrichedCodeGraderInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type PromptTemplateHandler = (input: EnrichedCodeGraderInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 6. Run handler (input is now enriched with guaranteed text accessors)\n const prompt = await handler(input as EnrichedCodeGraderInput);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.inputText}\\nAnswer: ${ctx.outputText}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code grader evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Handler function type for code graders.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type CodeGraderHandler = (\n input: EnrichedCodeGraderInput,\n) => CodeGraderResult | Promise<CodeGraderResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code grader handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeGrader.\n */\nexport async function runCodeGrader(handler: CodeGraderHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 7. Run handler (input is now enriched with guaranteed text accessors)\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n\n // 8. Validate and normalize output\n const result = CodeGraderResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 9. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Evaluation failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderHandler */\nexport type CodeJudgeHandler = CodeGraderHandler;\n/** @deprecated Use runCodeGrader */\nexport const runCodeJudge = runCodeGrader;\n"],"mappings":";AAIA,SAAS,SAAS;AAKX,IAAM,mBAAmB,EAAE,OAAO;AAAA,EACvC,OAAO,EAAE,OAAO;AAAA,EAChB,QAAQ,EAAE,OAAO;AAAA,EACjB,QAAQ,EAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,YAAY,EAAE,OAAO;AAAA,EACrB,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,EAChD,YAAY,EAAE,OAAO;AAAA,EACrB,eAAe,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,EAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,OAAO;AAAA,EACf,OAAO,EAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,EAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,EAAE,OAAO;AAAA,EACpC,MAAM,EAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,EAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,EAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAQM,IAAM,wBAAwB,EAAE,OAAO;AAAA;AAAA,EAE5C,UAAU,EAAE,OAAO;AAAA,EACnB,UAAU,EAAE,OAAO;AAAA,EACnB,gBAAgB,EAAE,MAAM,aAAa;AAAA;AAAA,EAErC,iBAAiB,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAErC,QAAQ,EAAE,OAAO;AAAA,EACjB,QAAQ,EAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAClC,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,EAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAElD,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAEhC,oBAAoB,EAAE,OAAO,EAAE,SAAS;AAC1C,CAAC;AAKM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACjD,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,SAAS,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAsCM,IAAM,4BAA4B;AAMlC,IAAM,uBAAuB;AAE7B,IAAM,wBAAwB;;;AC1E9B,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;ACpJA,SAAS,KAAAA,UAAS;;;ACjFlB,SAAS,oBAAoB;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACjCA,IAAM,cAAc;AACpB,IAAM,aAAa;AAKnB,IAAM,oBAAoB,oBAAI,IAAY;AAC1C,SAAS,gBAAgB,SAAiB,SAAuB;AAC/D,MAAI,kBAAkB,IAAI,OAAO,EAAG;AACpC,oBAAkB,IAAI,OAAO;AAC7B,UAAQ;AAAA,IACN,GAAG,WAAW,aAAa,OAAO,yCAAyC,OAAO,aAAa,UAAU;AAAA,EAC3G;AACF;AAgBO,SAAS,YAAY,OAAyC;AAGnE,QAAM,YAAY,MAAM;AAExB,QAAM,aAAa,MAAM;AAEzB,QAAM,qBAAqB,MAAM,mBAAmB;AAGpD,QAAM,mBAAmB,MAAM;AAC/B,QAAM,iBAAiB,MAAM;AAC7B,QAAM,0BAA0B,MAAM;AAGtC,SAAO,eAAe,OAAO,aAAa;AAAA,IACxC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,cAAc;AAAA,IACzC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,sBAAsB;AAAA,IACjD,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAGD,SAAO,eAAe,OAAO,YAAY;AAAA,IACvC,MAAM;AACJ,sBAAgB,YAAY,WAAW;AACvC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,UAAU;AAAA,IACrC,MAAM;AACJ,sBAAgB,UAAU,YAAY;AACtC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,mBAAmB;AAAA,IAC9C,MAAM;AACJ,sBAAgB,mBAAmB,oBAAoB;AACvD,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO;AACT;;;AF6BA,SAAS,YAAoB;AAC3B,SAAO,aAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAA0C;AAChE,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,MAAM,OAAO,OAAO,CAAC,GAAG,OAAO,IAAI,IAAI,CAAC;AAAA,IACxC,QAAQ,OAAO,SAAS,CAAC,GAAG,OAAO,MAAM,IAAI,CAAC;AAAA,IAC9C,WAAW,OAAO;AAAA,IAClB,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,MAAM,aAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAChE,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,uBAAuB,MAAM,UAAU;AACtD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,qBAAqB,YAAY;AAAA,IAC9C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AGpNA,SAAS,gBAAAC,qBAAoB;AAkB7B,SAASC,aAAoB;AAC3B,SAAOC,cAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQD,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,gBAAY,KAAK;AAGjB,UAAM,SAAS,MAAM,QAAQ,KAAgC;AAG7D,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ACrDA,SAAS,gBAAAE,qBAAoB;AAyB7B,SAASC,aAAoB;AAC3B,SAAOC,cAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,cAAc,SAA2C;AAC7E,MAAI;AAEF,UAAM,QAAQH,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,MAAMC,cAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAGhE,UAAM,SAAS,uBAAuB,MAAM;AAAA,MAC1C,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,sBAAsB,YAAY;AAAA,IAC/C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ALsCO,SAAS,iBAAiB,SAAkC;AAEjE,gBAAc,OAAO;AACvB;AAGO,IAAM,kBAAkB;AAwCxB,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["z","readFileSync","readStdin","readFileSync","readFileSync","readStdin","readFileSync","clampScore","formatError"]}
|
|
1
|
+
{"version":3,"sources":["../src/schemas.ts","../src/target-client.ts","../src/index.ts","../src/assertion.ts","../src/case-conversion.ts","../src/deprecation.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * Zod schemas for code grader input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code grader input schema (camelCase, converted from snake_case wire format).\n *\n * Text convenience accessors (`inputText`, `outputText`, `expectedOutputText`) are always\n * strings. Structured fields (`input`, `output`, `expectedOutput`) are always `Message[]`.\n */\nexport const CodeGraderInputSchema = z.object({\n /** @deprecated Use `inputText` instead. First user message content as string. */\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n /** @deprecated Use `expectedOutputText` instead. Expected output content as string. */\n referenceAnswer: z.string().optional(),\n /** @deprecated Use `outputText` instead. Last assistant message content as string. */\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n /** First user message content as string. Replaces `question`. */\n inputText: z.string().optional(),\n /** Last assistant message content as string. Replaces `answer`. */\n outputText: z.string().optional(),\n /** Expected output content as string. Replaces `referenceAnswer`. */\n expectedOutputText: z.string().optional(),\n});\n\n/**\n * Code grader result schema (validated before output).\n */\nexport const CodeGraderResultSchema = z.object({\n score: z.number().min(0).max(1),\n assertions: z\n .array(\n z.object({\n text: z.string(),\n passed: z.boolean(),\n evidence: z.string().optional(),\n }),\n )\n .optional()\n .default([]),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;\nexport type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;\n\n/**\n * CodeGraderInput after `enrichInput()` has run.\n *\n * The text convenience accessors (`inputText`, `outputText`, `expectedOutputText`)\n * are always populated by the runtime before the handler is called, so they are\n * guaranteed to be `string` (never `undefined`).\n *\n * Handler function signatures (`CodeGraderHandler`, `AssertionHandler`) use this\n * type so that user code can destructure `{ outputText }` without null-checks.\n */\nexport type EnrichedCodeGraderInput = Omit<\n CodeGraderInput,\n 'inputText' | 'outputText' | 'expectedOutputText'\n> & {\n /** First user message content as string. Replaces `question`. */\n readonly inputText: string;\n /** Last assistant message content as string. Replaces `answer`. */\n readonly outputText: string;\n /** Expected output content as string. Replaces `referenceAnswer`. */\n readonly expectedOutputText: string;\n};\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeGraderInputSchema;\n\nexport type PromptTemplateInput = CodeGraderInput;\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderInputSchema */\nexport const CodeJudgeInputSchema = CodeGraderInputSchema;\n/** @deprecated Use CodeGraderResultSchema */\nexport const CodeJudgeResultSchema = CodeGraderResultSchema;\n/** @deprecated Use CodeGraderInput */\nexport type CodeJudgeInput = CodeGraderInput;\n/** @deprecated Use CodeGraderResult */\nexport type CodeJudgeResult = CodeGraderResult;\n","/**\n * Client for invoking configured targets from code-grader scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_grader` (or `code_judge`) evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, assertions: [{ text: 'Target not available', passed: false }] };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.includes('hello'),\n * assertions: [{ text: 'Checks greeting', passed: outputText.includes('hello') }],\n * }));\n * ```\n *\n * @example Code grader (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace, outputText }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * assertions: [{ text: 'Efficient tool usage', passed: trace?.eventCount <= 5 }],\n * }));\n * ```\n *\n * @example Code grader with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ inputText }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, assertions: [{ text: 'Target not available', passed: false }] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${inputText}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeGraderInputSchema,\n CodeGraderResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeGraderInput,\n type CodeGraderResult,\n type EnrichedCodeGraderInput,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n // Backward-compat aliases (deprecated)\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeGraderHandler, type CodeJudgeHandler, runCodeGrader } from './runtime.js';\n\nexport type { CodeGraderHandler };\n/** @deprecated Use CodeGraderHandler */\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code grader evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, assertions: [{ text: 'No trace available', passed: false }] };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * assertions: [{ text: efficient ? 'Efficient execution' : 'Too many tool calls', passed: efficient }],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeGrader, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeGrader(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeGrader(handler: CodeGraderHandler): void {\n // Run immediately when module is loaded\n runCodeGrader(handler);\n}\n\n/** @deprecated Use defineCodeGrader */\nexport const defineCodeJudge = defineCodeGrader;\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText }) => ({\n * pass: outputText.toLowerCase().includes('hello'),\n * assertions: [{ text: 'Checks for greeting', passed: outputText.toLowerCase().includes('hello') }],\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ outputText, trace }) => {\n * const hasContent = outputText.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * assertions: [\n * { text: 'Has content', passed: !!hasContent },\n * { text: 'Efficient', passed: !!isEfficient },\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code graders but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n *\n * Same shape as CodeGraderInput but with `inputText`, `outputText`, and\n * `expectedOutputText` guaranteed to be strings (populated by the runtime\n * before the handler is called).\n */\nexport type AssertionContext = EnrichedCodeGraderInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assertions` blocks:\n * ```yaml\n * assertions:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-grader'\n | 'code-grader'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'skill-trigger'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // legacy aliases (still accepted)\n | 'llm-judge'\n | 'code-judge'\n | 'llm_judge'\n | 'code_judge'\n | 'llm_grader'\n | 'code_grader'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with score\n * ```ts\n * { pass: true, assertions: [{ text: 'Output contains expected keywords', passed: true }] }\n * ```\n *\n * @example Fail with evidence\n * ```ts\n * { pass: false, score: 0.3, assertions: [{ text: 'Missing required header', passed: false }] }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, assertions: [\n * { text: 'Format correct', passed: true },\n * { text: 'Content relevant', passed: true },\n * { text: 'Missing citation', passed: false },\n * ] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Per-assertion verdicts with optional evidence. */\n readonly assertions?: readonly {\n readonly text: string;\n readonly passed: boolean;\n readonly evidence?: string;\n }[];\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeGraderResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeGraderResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n assertions: result.assertions ? [...result.assertions] : [],\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // After enrichment, text accessors are guaranteed to be strings\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n const normalized = normalizeScore(rawResult);\n const result = CodeGraderResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n assertions: [{ text: `Assertion failed: ${errorMessage}`, passed: false }],\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Deprecation warning utilities for code grader and assertion runtimes.\n * Provides text convenience accessors and deprecation warnings on legacy field names.\n */\nimport type { CodeGraderInput } from './schemas.js';\n\nconst ANSI_YELLOW = '\\u001b[33m';\nconst ANSI_RESET = '\\u001b[0m';\n\n/**\n * Emit a deprecation warning to stderr (once per field name per process).\n */\nconst deprecationWarned = new Set<string>();\nfunction warnDeprecation(oldName: string, newName: string): void {\n if (deprecationWarned.has(oldName)) return;\n deprecationWarned.add(oldName);\n console.warn(\n `${ANSI_YELLOW}Warning: '${oldName}' is deprecated in code graders. Use '${newName}' instead.${ANSI_RESET}`,\n );\n}\n\n/**\n * Reset deprecation warning state. Used only in tests.\n */\nexport function resetDeprecationWarnings(): void {\n deprecationWarned.clear();\n}\n\n/**\n * Populate `inputText`, `outputText`, and `expectedOutputText` convenience accessors\n * on the validated input object, and install deprecation warnings on legacy fields.\n *\n * Text accessors are always strings. Structured fields (`input`, `output`, `expectedOutput`)\n * remain `Message[]` always.\n */\nexport function enrichInput(input: CodeGraderInput): CodeGraderInput {\n // Populate text convenience accessors (always strings)\n // inputText = question (first user message content as string)\n const inputText = input.question;\n // outputText = answer (last assistant message content as string)\n const outputText = input.answer;\n // expectedOutputText = referenceAnswer (expected output content as string)\n const expectedOutputText = input.referenceAnswer ?? '';\n\n // Store the original values before redefining properties\n const originalQuestion = input.question;\n const originalAnswer = input.answer;\n const originalReferenceAnswer = input.referenceAnswer;\n\n // Set new text accessor values\n Object.defineProperty(input, 'inputText', {\n value: inputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'outputText', {\n value: outputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n Object.defineProperty(input, 'expectedOutputText', {\n value: expectedOutputText,\n writable: false,\n configurable: true,\n enumerable: true,\n });\n\n // Install deprecation warnings on legacy fields via property accessors\n Object.defineProperty(input, 'question', {\n get() {\n warnDeprecation('question', 'inputText');\n return originalQuestion;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'answer', {\n get() {\n warnDeprecation('answer', 'outputText');\n return originalAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n Object.defineProperty(input, 'referenceAnswer', {\n get() {\n warnDeprecation('referenceAnswer', 'expectedOutputText');\n return originalReferenceAnswer;\n },\n configurable: true,\n enumerable: true,\n });\n\n return input;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport { type EnrichedCodeGraderInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type PromptTemplateHandler = (input: EnrichedCodeGraderInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 6. Run handler (input is now enriched with guaranteed text accessors)\n const prompt = await handler(input as EnrichedCodeGraderInput);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.inputText}\n * Answer: ${ctx.outputText}\n *\n * ${ctx.expectedOutputText ? `Reference: ${ctx.expectedOutputText}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.inputText}\n * Candidate Answer: ${ctx.outputText}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.inputText}\\nAnswer: ${ctx.outputText}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code grader evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { enrichInput } from './deprecation.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n type EnrichedCodeGraderInput,\n} from './schemas.js';\n\n/**\n * Handler function type for code graders.\n *\n * The input is enriched at runtime: `inputText`, `outputText`, and\n * `expectedOutputText` are always populated before the handler is called.\n */\nexport type CodeGraderHandler = (\n input: EnrichedCodeGraderInput,\n) => CodeGraderResult | Promise<CodeGraderResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code grader handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeGrader.\n */\nexport async function runCodeGrader(handler: CodeGraderHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Enrich input with text accessors and deprecation warnings\n enrichInput(input);\n\n // 7. Run handler (input is now enriched with guaranteed text accessors)\n const rawResult = await handler(input as EnrichedCodeGraderInput);\n\n // 8. Validate and normalize output\n const result = CodeGraderResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 9. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n assertions: [{ text: `Evaluation failed: ${errorMessage}`, passed: false }],\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderHandler */\nexport type CodeJudgeHandler = CodeGraderHandler;\n/** @deprecated Use runCodeGrader */\nexport const runCodeJudge = runCodeGrader;\n"],"mappings":";AAIA,SAAS,SAAS;AAKX,IAAM,mBAAmB,EAAE,OAAO;AAAA,EACvC,OAAO,EAAE,OAAO;AAAA,EAChB,QAAQ,EAAE,OAAO;AAAA,EACjB,QAAQ,EAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,YAAY,EAAE,OAAO;AAAA,EACrB,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,EAChD,YAAY,EAAE,OAAO;AAAA,EACrB,eAAe,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,EAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,EAAE,OAAO;AAAA,EACrC,MAAM,EAAE,OAAO;AAAA,EACf,OAAO,EAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,EAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,EAAE,OAAO;AAAA,EACpC,MAAM,EAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,EAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,EAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAQM,IAAM,wBAAwB,EAAE,OAAO;AAAA;AAAA,EAE5C,UAAU,EAAE,OAAO;AAAA,EACnB,UAAU,EAAE,OAAO;AAAA,EACnB,gBAAgB,EAAE,MAAM,aAAa;AAAA;AAAA,EAErC,iBAAiB,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAErC,QAAQ,EAAE,OAAO;AAAA,EACjB,QAAQ,EAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAClC,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,EAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAElD,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAEhC,oBAAoB,EAAE,OAAO,EAAE,SAAS;AAC1C,CAAC;AAKM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,YAAY,EACT;AAAA,IACC,EAAE,OAAO;AAAA,MACP,MAAM,EAAE,OAAO;AAAA,MACf,QAAQ,EAAE,QAAQ;AAAA,MAClB,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,IAChC,CAAC;AAAA,EACH,EACC,SAAS,EACT,QAAQ,CAAC,CAAC;AAAA;AAAA,EAEb,SAAS,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAsCM,IAAM,4BAA4B;AAMlC,IAAM,uBAAuB;AAE7B,IAAM,wBAAwB;;;ACjF9B,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;ACrJA,SAAS,KAAAA,UAAS;;;AChFlB,SAAS,oBAAoB;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ACjCA,IAAM,cAAc;AACpB,IAAM,aAAa;AAKnB,IAAM,oBAAoB,oBAAI,IAAY;AAC1C,SAAS,gBAAgB,SAAiB,SAAuB;AAC/D,MAAI,kBAAkB,IAAI,OAAO,EAAG;AACpC,oBAAkB,IAAI,OAAO;AAC7B,UAAQ;AAAA,IACN,GAAG,WAAW,aAAa,OAAO,yCAAyC,OAAO,aAAa,UAAU;AAAA,EAC3G;AACF;AAgBO,SAAS,YAAY,OAAyC;AAGnE,QAAM,YAAY,MAAM;AAExB,QAAM,aAAa,MAAM;AAEzB,QAAM,qBAAqB,MAAM,mBAAmB;AAGpD,QAAM,mBAAmB,MAAM;AAC/B,QAAM,iBAAiB,MAAM;AAC7B,QAAM,0BAA0B,MAAM;AAGtC,SAAO,eAAe,OAAO,aAAa;AAAA,IACxC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,cAAc;AAAA,IACzC,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AACD,SAAO,eAAe,OAAO,sBAAsB;AAAA,IACjD,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAGD,SAAO,eAAe,OAAO,YAAY;AAAA,IACvC,MAAM;AACJ,sBAAgB,YAAY,WAAW;AACvC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,UAAU;AAAA,IACrC,MAAM;AACJ,sBAAgB,UAAU,YAAY;AACtC,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO,eAAe,OAAO,mBAAmB;AAAA,IAC9C,MAAM;AACJ,sBAAgB,mBAAmB,oBAAoB;AACvD,aAAO;AAAA,IACT;AAAA,IACA,cAAc;AAAA,IACd,YAAY;AAAA,EACd,CAAC;AAED,SAAO;AACT;;;AFiCA,SAAS,YAAoB;AAC3B,SAAO,aAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAA0C;AAChE,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO,aAAa,CAAC,GAAG,OAAO,UAAU,IAAI,CAAC;AAAA,IAC1D,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,MAAM,aAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAChE,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,uBAAuB,MAAM,UAAU;AACtD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,YAAY,CAAC,EAAE,MAAM,qBAAqB,YAAY,IAAI,QAAQ,MAAM,CAAC;AAAA,IAC3E;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AGpNA,SAAS,gBAAAC,qBAAoB;AAkB7B,SAASC,aAAoB;AAC3B,SAAOC,cAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQD,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,gBAAY,KAAK;AAGjB,UAAM,SAAS,MAAM,QAAQ,KAAgC;AAG7D,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ACrDA,SAAS,gBAAAE,qBAAoB;AAyB7B,SAASC,aAAoB;AAC3B,SAAOC,cAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,cAAc,SAA2C;AAC7E,MAAI;AAEF,UAAM,QAAQH,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,MAAMC,cAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,gBAAY,KAAK;AAGjB,UAAM,YAAY,MAAM,QAAQ,KAAgC;AAGhE,UAAM,SAAS,uBAAuB,MAAM;AAAA,MAC1C,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,YAAY,CAAC,EAAE,MAAM,sBAAsB,YAAY,IAAI,QAAQ,MAAM,CAAC;AAAA,IAC5E;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ALsCO,SAAS,iBAAiB,SAAkC;AAEjE,gBAAc,OAAO;AACvB;AAGO,IAAM,kBAAkB;AAwCxB,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["z","readFileSync","readStdin","readFileSync","readFileSync","readStdin","readFileSync","clampScore","formatError"]}
|