@agentv/eval 2.19.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,13 +24,13 @@ export default defineAssertion(({ answer }) => ({
24
24
 
25
25
  Assertions support `pass: boolean` for simple checks and `score: number` (0-1) for granular scoring.
26
26
 
27
- ### defineCodeJudge (full control)
27
+ ### defineCodeGrader (full control)
28
28
 
29
29
  ```typescript
30
30
  #!/usr/bin/env bun
31
- import { defineCodeJudge } from '@agentv/eval';
31
+ import { defineCodeGrader } from '@agentv/eval';
32
32
 
33
- export default defineCodeJudge(({ answer, trace }) => ({
33
+ export default defineCodeGrader(({ answer, trace }) => ({
34
34
  score: answer.length > 0 ? 1.0 : 0.0,
35
35
  hits: ['Output received'],
36
36
  }));
@@ -41,10 +41,10 @@ Both functions handle stdin/stdout parsing, snake_case conversion, Zod validatio
41
41
  ## Exports
42
42
 
43
43
  - `defineAssertion(handler)` - Define a custom assertion (pass/fail + optional score)
44
- - `defineCodeJudge(handler)` - Define a code judge evaluator (full score control)
44
+ - `defineCodeGrader(handler)` - Define a code grader evaluator (full score control)
45
45
  - `definePromptTemplate(handler)` - Define a dynamic prompt template
46
46
  - `AssertionContext`, `AssertionScore` - Assertion types
47
- - `CodeJudgeInput`, `CodeJudgeResult` - Code judge types
47
+ - `CodeGraderInput`, `CodeGraderResult` - Code grader types
48
48
  - `TraceSummary`, `Message`, `ToolCall` - Trace data types
49
49
  - `createTargetClient()` - LLM target proxy for evaluators
50
50
  - `z` - Re-exported Zod for custom config schemas
package/dist/index.cjs CHANGED
@@ -20,6 +20,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ CodeGraderInputSchema: () => CodeGraderInputSchema,
24
+ CodeGraderResultSchema: () => CodeGraderResultSchema,
23
25
  CodeJudgeInputSchema: () => CodeJudgeInputSchema,
24
26
  CodeJudgeResultSchema: () => CodeJudgeResultSchema,
25
27
  MessageSchema: () => MessageSchema,
@@ -31,6 +33,7 @@ __export(index_exports, {
31
33
  TraceSummarySchema: () => TraceSummarySchema,
32
34
  createTargetClient: () => createTargetClient,
33
35
  defineAssertion: () => defineAssertion,
36
+ defineCodeGrader: () => defineCodeGrader,
34
37
  defineCodeJudge: () => defineCodeJudge,
35
38
  definePromptTemplate: () => definePromptTemplate,
36
39
  z: () => import_zod2.z
@@ -71,7 +74,7 @@ var MessageSchema = import_zod.z.object({
71
74
  durationMs: import_zod.z.number().optional(),
72
75
  metadata: import_zod.z.record(import_zod.z.unknown()).optional()
73
76
  });
74
- var CodeJudgeInputSchema = import_zod.z.object({
77
+ var CodeGraderInputSchema = import_zod.z.object({
75
78
  question: import_zod.z.string(),
76
79
  criteria: import_zod.z.string(),
77
80
  expectedOutput: import_zod.z.array(MessageSchema),
@@ -93,7 +96,7 @@ var CodeJudgeInputSchema = import_zod.z.object({
93
96
  workspacePath: import_zod.z.string().nullable().optional(),
94
97
  config: import_zod.z.record(import_zod.z.unknown()).nullable().optional()
95
98
  });
96
- var CodeJudgeResultSchema = import_zod.z.object({
99
+ var CodeGraderResultSchema = import_zod.z.object({
97
100
  score: import_zod.z.number().min(0).max(1),
98
101
  hits: import_zod.z.array(import_zod.z.string()).optional().default([]),
99
102
  misses: import_zod.z.array(import_zod.z.string()).optional().default([]),
@@ -101,7 +104,9 @@ var CodeJudgeResultSchema = import_zod.z.object({
101
104
  /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
102
105
  details: import_zod.z.record(import_zod.z.unknown()).optional()
103
106
  });
104
- var PromptTemplateInputSchema = CodeJudgeInputSchema;
107
+ var PromptTemplateInputSchema = CodeGraderInputSchema;
108
+ var CodeJudgeInputSchema = CodeGraderInputSchema;
109
+ var CodeJudgeResultSchema = CodeGraderResultSchema;
105
110
 
106
111
  // src/target-client.ts
107
112
  var TargetNotAvailableError = class extends Error {
@@ -280,7 +285,7 @@ async function runAssertion(handler) {
280
285
  const stdin = readStdin();
281
286
  const rawInput = JSON.parse(stdin);
282
287
  const camelInput = toCamelCaseDeep(rawInput);
283
- const input = CodeJudgeInputSchema.parse(camelInput);
288
+ const input = CodeGraderInputSchema.parse(camelInput);
284
289
  if (input.outputPath && (input.output === null || input.output === void 0)) {
285
290
  let cachedOutput;
286
291
  const filePath = input.outputPath;
@@ -297,7 +302,7 @@ async function runAssertion(handler) {
297
302
  }
298
303
  const rawResult = await handler(input);
299
304
  const normalized = normalizeScore(rawResult);
300
- const result = CodeJudgeResultSchema.parse(normalized);
305
+ const result = CodeGraderResultSchema.parse(normalized);
301
306
  console.log(JSON.stringify(result, null, 2));
302
307
  } catch (error) {
303
308
  const errorMessage = formatError(error);
@@ -348,12 +353,12 @@ function formatError2(error) {
348
353
  }
349
354
  return String(error);
350
355
  }
351
- async function runCodeJudge(handler) {
356
+ async function runCodeGrader(handler) {
352
357
  try {
353
358
  const stdin = readStdin3();
354
359
  const rawInput = JSON.parse(stdin);
355
360
  const camelInput = toCamelCaseDeep(rawInput);
356
- const input = CodeJudgeInputSchema.parse(camelInput);
361
+ const input = CodeGraderInputSchema.parse(camelInput);
357
362
  if (input.outputPath && (input.output === null || input.output === void 0)) {
358
363
  let cachedOutput;
359
364
  const filePath = input.outputPath;
@@ -369,7 +374,7 @@ async function runCodeJudge(handler) {
369
374
  });
370
375
  }
371
376
  const rawResult = await handler(input);
372
- const result = CodeJudgeResultSchema.parse({
377
+ const result = CodeGraderResultSchema.parse({
373
378
  ...rawResult,
374
379
  score: clampScore2(rawResult.score)
375
380
  });
@@ -388,9 +393,10 @@ async function runCodeJudge(handler) {
388
393
  }
389
394
 
390
395
  // src/index.ts
391
- function defineCodeJudge(handler) {
392
- runCodeJudge(handler);
396
+ function defineCodeGrader(handler) {
397
+ runCodeGrader(handler);
393
398
  }
399
+ var defineCodeJudge = defineCodeGrader;
394
400
  function definePromptTemplate(handler) {
395
401
  runPromptTemplate(handler);
396
402
  }
@@ -399,6 +405,8 @@ function defineAssertion(handler) {
399
405
  }
400
406
  // Annotate the CommonJS export names for ESM import in node:
401
407
  0 && (module.exports = {
408
+ CodeGraderInputSchema,
409
+ CodeGraderResultSchema,
402
410
  CodeJudgeInputSchema,
403
411
  CodeJudgeResultSchema,
404
412
  MessageSchema,
@@ -410,6 +418,7 @@ function defineAssertion(handler) {
410
418
  TraceSummarySchema,
411
419
  createTargetClient,
412
420
  defineAssertion,
421
+ defineCodeGrader,
413
422
  defineCodeJudge,
414
423
  definePromptTemplate,
415
424
  z
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/schemas.ts","../src/target-client.ts","../src/assertion.ts","../src/case-conversion.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer }) => ({\n * pass: answer.includes('hello'),\n * reasoning: 'Checks greeting',\n * }));\n * ```\n *\n * @example Code judge (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeJudge } from '@agentv/eval';\n *\n * export default defineCodeJudge(({ trace, answer }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * hits: ['Efficient tool usage'],\n * misses: [],\n * }));\n * ```\n *\n * @example Code judge with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeJudge, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeJudge(async ({ question }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, misses: ['Target not available'] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${question}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeJudgeHandler, runCodeJudge } from './runtime.js';\n\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code judge evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeJudge } from '@agentv/eval';\n *\n * export default defineCodeJudge(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, reasoning: 'No trace available' };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * hits: efficient ? ['Efficient execution'] : [],\n * misses: efficient ? [] : ['Too many tool calls'],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeJudge, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeJudge(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeJudge(handler: CodeJudgeHandler): void {\n // Run immediately when module is loaded\n runCodeJudge(handler);\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.question}\n * Answer: ${ctx.answer}\n *\n * ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.question}\n * Candidate Answer: ${ctx.answer}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer }) => ({\n * pass: answer.toLowerCase().includes('hello'),\n * reasoning: 'Checks for greeting',\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer, trace }) => {\n * const hasContent = answer.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * hits: [\n * ...(hasContent ? ['Has content'] : []),\n * ...(isEfficient ? ['Efficient'] : []),\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Zod schemas for code judge input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code judge input schema (camelCase, converted from snake_case wire format).\n */\nexport const CodeJudgeInputSchema = z.object({\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n referenceAnswer: z.string().optional(),\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n});\n\n/**\n * Code judge result schema (validated before output).\n */\nexport const CodeJudgeResultSchema = z.object({\n score: z.number().min(0).max(1),\n hits: z.array(z.string()).optional().default([]),\n misses: z.array(z.string()).optional().default([]),\n reasoning: z.string().optional(),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeJudgeInput = z.infer<typeof CodeJudgeInputSchema>;\nexport type CodeJudgeResult = z.infer<typeof CodeJudgeResultSchema>;\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeJudgeInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeJudgeInputSchema;\n\nexport type PromptTemplateInput = CodeJudgeInput;\n","/**\n * Client for invoking configured targets from code-judge scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_judge` evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeJudge } from '@agentv/eval';\n *\n * export default defineCodeJudge(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, reasoning: 'Target not available' };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code judges but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport {\n type CodeJudgeInput,\n CodeJudgeInputSchema,\n type CodeJudgeResult,\n CodeJudgeResultSchema,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n * Same shape as CodeJudgeInput — assertions receive full evaluation context.\n */\nexport type AssertionContext = CodeJudgeInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assert` blocks:\n * ```yaml\n * assert:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-judge'\n | 'code-judge'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'agent-judge'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // snake_case (legacy aliases, still accepted)\n | 'llm_judge'\n | 'code_judge'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'agent_judge'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with reasoning\n * ```ts\n * { pass: true, reasoning: 'Output contains expected keywords' }\n * ```\n *\n * @example Fail with misses\n * ```ts\n * { pass: false, misses: ['Missing required header'], score: 0.3 }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Aspects that passed. */\n readonly hits?: readonly string[];\n /** Aspects that failed. */\n readonly misses?: readonly string[];\n /** Human-readable explanation. */\n readonly reasoning?: string;\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeJudgeResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeJudgeResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n hits: result.hits ? [...result.hits] : [],\n misses: result.misses ? [...result.misses] : [],\n reasoning: result.reasoning,\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeJudgeInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeJudgeInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n const rawResult = await handler(input);\n const normalized = normalizeScore(rawResult);\n const result = CodeJudgeResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeJudgeResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Assertion failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { type PromptTemplateInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n */\nexport type PromptTemplateHandler = (input: PromptTemplateInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Run handler\n const prompt = await handler(input);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.question}\n * Answer: ${ctx.answer}\n *\n * ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.question}\n * Candidate Answer: ${ctx.answer}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.question}\\nAnswer: ${ctx.answer}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code judge evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport {\n type CodeJudgeInput,\n CodeJudgeInputSchema,\n type CodeJudgeResult,\n CodeJudgeResultSchema,\n} from './schemas.js';\n\n/**\n * Handler function type for code judges.\n */\nexport type CodeJudgeHandler = (\n input: CodeJudgeInput,\n) => CodeJudgeResult | Promise<CodeJudgeResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code judge handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeJudge.\n */\nexport async function runCodeJudge(handler: CodeJudgeHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeJudgeInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeJudgeInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Run handler\n const rawResult = await handler(input);\n\n // 7. Validate and normalize output\n const result = CodeJudgeResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 8. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeJudgeResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Evaluation failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,iBAAkB;AAKX,IAAM,mBAAmB,aAAE,OAAO;AAAA,EACvC,OAAO,aAAE,OAAO;AAAA,EAChB,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,YAAY,aAAE,OAAO;AAAA,EACrB,WAAW,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,OAAO,CAAC;AAAA,EAChD,YAAY,aAAE,OAAO;AAAA,EACrB,eAAe,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,MAAM,aAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,aAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,aAAE,OAAO;AAAA,EACrC,MAAM,aAAE,OAAO;AAAA,EACf,OAAO,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,aAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,aAAE,OAAO;AAAA,EACpC,MAAM,aAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,aAAE,MAAM,CAAC,aAAE,OAAO,GAAG,aAAE,OAAO,aAAE,QAAQ,CAAC,GAAG,aAAE,MAAM,aAAE,OAAO,aAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,aAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,aAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAKM,IAAM,uBAAuB,aAAE,OAAO;AAAA,EAC3C,UAAU,aAAE,OAAO;AAAA,EACnB,UAAU,aAAE,OAAO;AAAA,EACnB,gBAAgB,aAAE,MAAM,aAAa;AAAA,EACrC,iBAAiB,aAAE,OAAO,EAAE,SAAS;AAAA,EACrC,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAClC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,aAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AACpD,CAAC;AAKM,IAAM,wBAAwB,aAAE,OAAO;AAAA,EAC5C,OAAO,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,MAAM,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,QAAQ,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACjD,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,SAAS,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAgBM,IAAM,4BAA4B;;;AChClC,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;AF1JA,IAAAA,cAAkB;;;AG3ElB,qBAA6B;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ADgFA,SAAS,YAAoB;AAC3B,aAAO,6BAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAAyC;AAC/D,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,MAAM,OAAO,OAAO,CAAC,GAAG,OAAO,IAAI,IAAI,CAAC;AAAA,IACxC,QAAQ,OAAO,SAAS,CAAC,GAAG,OAAO,MAAM,IAAI,CAAC;AAAA,IAC9C,WAAW,OAAO;AAAA,IAClB,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,qBAAqB,MAAM,UAAU;AAGnD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,6BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAEA,UAAM,YAAY,MAAM,QAAQ,KAAK;AACrC,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,sBAAsB,MAAM,UAAU;AACrD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAA+B;AAAA,MACnC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,qBAAqB,YAAY;AAAA,IAC9C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AExMA,IAAAC,kBAA6B;AAc7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQA,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,UAAM,SAAS,MAAM,QAAQ,KAAK;AAGlC,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AC9CA,IAAAC,kBAA6B;AAoB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AAEF,UAAM,QAAQF,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,qBAAqB,MAAM,UAAU;AAGnD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,8BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,UAAM,YAAY,MAAM,QAAQ,KAAK;AAGrC,UAAM,SAAS,sBAAsB,MAAM;AAAA,MACzC,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAA+B;AAAA,MACnC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,sBAAsB,YAAY;AAAA,IAC/C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;ANsCO,SAAS,gBAAgB,SAAiC;AAE/D,eAAa,OAAO;AACtB;AAwCO,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["import_zod","import_node_fs","readStdin","import_node_fs","readStdin","clampScore","formatError"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/schemas.ts","../src/target-client.ts","../src/assertion.ts","../src/case-conversion.ts","../src/prompt-template.ts","../src/runtime.ts"],"sourcesContent":["/**\n * AgentV Evaluation SDK\n *\n * Build custom evaluators for AI agent outputs.\n *\n * @example Custom assertion (simplest way to add evaluation logic)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer }) => ({\n * pass: answer.includes('hello'),\n * reasoning: 'Checks greeting',\n * }));\n * ```\n *\n * @example Code grader (full control)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace, answer }) => ({\n * score: trace?.eventCount <= 5 ? 1.0 : 0.5,\n * hits: ['Efficient tool usage'],\n * misses: [],\n * }));\n * ```\n *\n * @example Code grader with target access (requires `target` config in YAML)\n * ```typescript\n * #!/usr/bin/env bun\n * import { defineCodeGrader, createTargetClient } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question }) => {\n * const target = createTargetClient();\n * if (!target) {\n * return { score: 0, misses: ['Target not available'] };\n * }\n *\n * const response = await target.invoke({\n * question: `Evaluate: ${question}`,\n * systemPrompt: 'Respond with JSON: { \"score\": 0-1 }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.score ?? 0 };\n * });\n * ```\n *\n * @packageDocumentation\n */\n\n// Re-export schemas and types\nexport {\n CodeGraderInputSchema,\n CodeGraderResultSchema,\n TraceSummarySchema,\n MessageSchema,\n ToolCallSchema,\n TokenUsageSchema,\n PromptTemplateInputSchema,\n type CodeGraderInput,\n type CodeGraderResult,\n type TraceSummary,\n type Message,\n type ToolCall,\n type TokenUsage,\n type PromptTemplateInput,\n // Backward-compat aliases (deprecated)\n CodeJudgeInputSchema,\n CodeJudgeResultSchema,\n type CodeJudgeInput,\n type CodeJudgeResult,\n} from './schemas.js';\n\n// Re-export target client\nexport {\n createTargetClient,\n TargetNotAvailableError,\n TargetInvocationError,\n type TargetClient,\n type TargetInfo,\n type TargetInvokeRequest,\n type TargetInvokeResponse,\n} from './target-client.js';\n\n// Re-export Zod for typed config support\nexport { z } from 'zod';\n\n// Re-export assertion types\nexport type {\n AssertionContext,\n AssertionHandler,\n AssertionScore,\n AssertionType,\n} from './assertion.js';\n\nimport { type AssertionHandler, runAssertion } from './assertion.js';\nimport { type PromptTemplateHandler, runPromptTemplate } from './prompt-template.js';\nimport { type CodeGraderHandler, type CodeJudgeHandler, runCodeGrader } from './runtime.js';\n\nexport type { CodeGraderHandler };\n/** @deprecated Use CodeGraderHandler */\nexport type { CodeJudgeHandler };\nexport type { PromptTemplateHandler };\n\n/**\n * Define a code grader evaluator with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Validates the result and outputs JSON to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the input and returns a result\n *\n * @example\n * ```typescript\n * import { defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(({ trace }) => {\n * if (!trace) {\n * return { score: 0.5, reasoning: 'No trace available' };\n * }\n *\n * const efficient = trace.eventCount <= 10;\n * return {\n * score: efficient ? 1.0 : 0.5,\n * hits: efficient ? ['Efficient execution'] : [],\n * misses: efficient ? [] : ['Too many tool calls'],\n * };\n * });\n * ```\n *\n * @example With typed config\n * ```typescript\n * import { defineCodeGrader, z } from '@agentv/eval';\n *\n * const ConfigSchema = z.object({\n * maxToolCalls: z.number().default(10),\n * });\n *\n * export default defineCodeGrader(({ trace, config }) => {\n * const { maxToolCalls } = ConfigSchema.parse(config ?? {});\n * // Use maxToolCalls...\n * });\n * ```\n */\nexport function defineCodeGrader(handler: CodeGraderHandler): void {\n // Run immediately when module is loaded\n runCodeGrader(handler);\n}\n\n/** @deprecated Use defineCodeGrader */\nexport const defineCodeJudge = defineCodeGrader;\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.question}\n * Answer: ${ctx.answer}\n *\n * ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.question}\n * Candidate Answer: ${ctx.answer}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n\n/**\n * Define a custom assertion evaluator with automatic stdin/stdout handling.\n *\n * Assertions are the simplest way to add custom evaluation logic. They receive\n * the full evaluation context and return a pass/fail result with optional\n * granular scoring.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed context\n * 4. Normalizes the result (pass→score, clamp, etc.)\n * 5. Outputs JSON to stdout\n * 6. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that evaluates the context and returns a result\n *\n * @example Simple pass/fail\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer }) => ({\n * pass: answer.toLowerCase().includes('hello'),\n * reasoning: 'Checks for greeting',\n * }));\n * ```\n *\n * @example Granular scoring\n * ```typescript\n * import { defineAssertion } from '@agentv/eval';\n *\n * export default defineAssertion(({ answer, trace }) => {\n * const hasContent = answer.length > 0 ? 0.5 : 0;\n * const isEfficient = (trace?.eventCount ?? 0) <= 5 ? 0.5 : 0;\n * return {\n * score: hasContent + isEfficient,\n * hits: [\n * ...(hasContent ? ['Has content'] : []),\n * ...(isEfficient ? ['Efficient'] : []),\n * ],\n * };\n * });\n * ```\n */\nexport function defineAssertion(handler: AssertionHandler): void {\n runAssertion(handler);\n}\n","/**\n * Zod schemas for code grader input/output validation.\n * Provides both compile-time types and runtime validation.\n */\nimport { z } from 'zod';\n\n/**\n * Token usage metrics schema.\n */\nexport const TokenUsageSchema = z.object({\n input: z.number(),\n output: z.number(),\n cached: z.number().optional(),\n});\n\n/**\n * Trace summary schema (camelCase for TypeScript ergonomics).\n */\nexport const TraceSummarySchema = z.object({\n eventCount: z.number(),\n toolNames: z.array(z.string()),\n toolCallsByName: z.record(z.string(), z.number()),\n errorCount: z.number(),\n toolDurations: z.record(z.string(), z.array(z.number())).optional(),\n llmCallCount: z.number().optional(),\n});\n\n/**\n * Tool call schema.\n */\nexport const ToolCallSchema = z.object({\n tool: z.string(),\n input: z.unknown().optional(),\n output: z.unknown().optional(),\n id: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n});\n\n/**\n * Unified message schema for input, expected, and output messages.\n */\nexport const MessageSchema = z.object({\n role: z.enum(['assistant', 'user', 'system', 'tool']),\n content: z.union([z.string(), z.record(z.unknown()), z.array(z.record(z.unknown()))]).optional(),\n toolCalls: z.array(ToolCallSchema).optional(),\n name: z.string().optional(),\n startTime: z.string().optional(),\n endTime: z.string().optional(),\n durationMs: z.number().optional(),\n metadata: z.record(z.unknown()).optional(),\n});\n\n/**\n * Code grader input schema (camelCase, converted from snake_case wire format).\n */\nexport const CodeGraderInputSchema = z.object({\n question: z.string(),\n criteria: z.string(),\n expectedOutput: z.array(MessageSchema),\n referenceAnswer: z.string().optional(),\n answer: z.string(),\n output: z.array(MessageSchema).nullable().optional(),\n /** Path to a temp file containing the output JSON (used for large payloads). */\n outputPath: z.string().optional(),\n guidelineFiles: z.array(z.string()),\n inputFiles: z.array(z.string()),\n input: z.array(MessageSchema),\n trace: TraceSummarySchema.nullable().optional(),\n tokenUsage: TokenUsageSchema.nullable().optional(),\n costUsd: z.number().nullable().optional(),\n durationMs: z.number().nullable().optional(),\n startTime: z.string().nullable().optional(),\n endTime: z.string().nullable().optional(),\n fileChanges: z.string().nullable().optional(),\n workspacePath: z.string().nullable().optional(),\n config: z.record(z.unknown()).nullable().optional(),\n});\n\n/**\n * Code grader result schema (validated before output).\n */\nexport const CodeGraderResultSchema = z.object({\n score: z.number().min(0).max(1),\n hits: z.array(z.string()).optional().default([]),\n misses: z.array(z.string()).optional().default([]),\n reasoning: z.string().optional(),\n /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */\n details: z.record(z.unknown()).optional(),\n});\n\n/**\n * Inferred types from schemas.\n */\nexport type CodeGraderInput = z.infer<typeof CodeGraderInputSchema>;\nexport type CodeGraderResult = z.infer<typeof CodeGraderResultSchema>;\nexport type TraceSummary = z.infer<typeof TraceSummarySchema>;\nexport type Message = z.infer<typeof MessageSchema>;\nexport type ToolCall = z.infer<typeof ToolCallSchema>;\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * Prompt template input schema (camelCase, converted from snake_case wire format).\n * Uses the same schema as CodeGraderInput since the orchestrator sends identical payloads.\n */\nexport const PromptTemplateInputSchema = CodeGraderInputSchema;\n\nexport type PromptTemplateInput = CodeGraderInput;\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderInputSchema */\nexport const CodeJudgeInputSchema = CodeGraderInputSchema;\n/** @deprecated Use CodeGraderResultSchema */\nexport const CodeJudgeResultSchema = CodeGraderResultSchema;\n/** @deprecated Use CodeGraderInput */\nexport type CodeJudgeInput = CodeGraderInput;\n/** @deprecated Use CodeGraderResult */\nexport type CodeJudgeResult = CodeGraderResult;\n","/**\n * Client for invoking configured targets from code-grader scripts.\n *\n * Environment variables (set automatically by AgentV when `target` config is present):\n * - AGENTV_TARGET_PROXY_URL: The URL of the local proxy server\n * - AGENTV_TARGET_PROXY_TOKEN: Bearer token for authentication\n */\n\nimport type { TokenUsage } from './schemas.js';\n\n/**\n * Request to invoke the target\n */\nexport interface TargetInvokeRequest {\n readonly question: string;\n readonly systemPrompt?: string;\n readonly evalCaseId?: string;\n readonly attempt?: number;\n /** Optional target override - use a different target for this invocation */\n readonly target?: string;\n}\n\n/**\n * Response from a target invocation\n */\nexport interface TargetInvokeResponse {\n readonly output: readonly unknown[];\n readonly rawText?: string;\n readonly tokenUsage?: TokenUsage;\n}\n\n/**\n * Information about the target proxy configuration\n */\nexport interface TargetInfo {\n /** Name of the default target being used */\n readonly targetName: string;\n /** Maximum number of calls allowed */\n readonly maxCalls: number;\n /** Current number of calls made */\n readonly callCount: number;\n /** List of all available target names */\n readonly availableTargets: readonly string[];\n}\n\n/**\n * Target client for making target invocations\n */\nexport interface TargetClient {\n /**\n * Invoke the configured target with a prompt.\n * @param request - The question and optional system prompt\n * @returns The target's response with output messages and optional raw text\n */\n invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse>;\n\n /**\n * Invoke the target with multiple requests in sequence.\n * Each request counts toward the max_calls limit.\n * @param requests - Array of target requests\n * @returns Array of target responses\n */\n invokeBatch(requests: readonly TargetInvokeRequest[]): Promise<readonly TargetInvokeResponse[]>;\n\n /**\n * Get information about the target proxy configuration.\n * Returns the default target name, max calls, current call count, and available targets.\n */\n getInfo(): Promise<TargetInfo>;\n}\n\n/**\n * Error thrown when target proxy is not available\n */\nexport class TargetNotAvailableError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'TargetNotAvailableError';\n }\n}\n\n/**\n * Error thrown when target invocation fails\n */\nexport class TargetInvocationError extends Error {\n readonly statusCode?: number;\n\n constructor(message: string, statusCode?: number) {\n super(message);\n this.name = 'TargetInvocationError';\n this.statusCode = statusCode;\n }\n}\n\n/**\n * Create a target client from environment variables.\n *\n * This function reads the proxy URL and token from environment variables\n * that are automatically set by AgentV when a `target` config block is present\n * on a `code_grader` (or `code_judge`) evaluator.\n *\n * @returns A target client if environment variables are set, otherwise undefined\n * @throws TargetNotAvailableError if token is missing when URL is present\n *\n * @example\n * ```typescript\n * import { createTargetClient, defineCodeGrader } from '@agentv/eval';\n *\n * export default defineCodeGrader(async ({ question, criteria }) => {\n * const target = createTargetClient();\n *\n * if (!target) {\n * // Target not available - no target config on this evaluator\n * return { score: 0.5, reasoning: 'Target not available' };\n * }\n *\n * const response = await target.invoke({\n * question: `Is this answer correct? Question: ${question}, Expected: ${criteria}`,\n * systemPrompt: 'You are an expert evaluator. Respond with JSON: { \"correct\": true/false }'\n * });\n *\n * const result = JSON.parse(response.rawText ?? '{}');\n * return { score: result.correct ? 1.0 : 0.0 };\n * });\n * ```\n */\nexport function createTargetClient(): TargetClient | undefined {\n const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;\n const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;\n\n if (!proxyUrl) {\n return undefined;\n }\n\n if (!proxyToken) {\n throw new TargetNotAvailableError(\n 'AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing',\n );\n }\n\n return createTargetClientInternal(proxyUrl, proxyToken);\n}\n\n/**\n * Internal: Create a target client with explicit URL and token.\n * Exported for testing only - use createTargetClient() in production.\n */\nexport function createTargetClientInternal(url: string, token: string): TargetClient {\n const headers = {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${token}`,\n };\n\n return {\n async invoke(request: TargetInvokeRequest): Promise<TargetInvokeResponse> {\n const response = await fetch(`${url}/invoke`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n question: request.question,\n systemPrompt: request.systemPrompt,\n evalCaseId: request.evalCaseId,\n attempt: request.attempt,\n target: request.target,\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInvokeResponse;\n },\n\n async invokeBatch(\n requests: readonly TargetInvokeRequest[],\n ): Promise<readonly TargetInvokeResponse[]> {\n const response = await fetch(`${url}/invokeBatch`, {\n method: 'POST',\n headers,\n body: JSON.stringify({\n requests: requests.map((r) => ({\n question: r.question,\n systemPrompt: r.systemPrompt,\n evalCaseId: r.evalCaseId,\n attempt: r.attempt,\n target: r.target,\n })),\n }),\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n const result = (await response.json()) as { responses: TargetInvokeResponse[] };\n return result.responses;\n },\n\n async getInfo(): Promise<TargetInfo> {\n const response = await fetch(`${url}/info`, {\n method: 'GET',\n headers,\n });\n\n if (!response.ok) {\n const errorBody = await response.text();\n let errorMessage: string;\n try {\n const errorJson = JSON.parse(errorBody) as { error?: string };\n errorMessage = errorJson.error ?? `HTTP ${response.status}`;\n } catch {\n errorMessage = errorBody || `HTTP ${response.status}`;\n }\n throw new TargetInvocationError(errorMessage, response.status);\n }\n\n return (await response.json()) as TargetInfo;\n },\n };\n}\n","/**\n * Runtime for custom assertion evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n *\n * Assertions receive the same input as code graders but use a simplified result\n * contract focused on pass/fail with optional score granularity.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n} from './schemas.js';\n\n/**\n * Context provided to assertion handlers.\n * Same shape as CodeGraderInput — assertions receive full evaluation context.\n */\nexport type AssertionContext = CodeGraderInput;\n\n/**\n * Known built-in assertion types. Custom types are extensible via string.\n *\n * Use in EVAL.yaml `assertions` blocks:\n * ```yaml\n * assertions:\n * - type: contains\n * value: \"Paris\"\n * ```\n *\n * Custom types registered via `.agentv/assertions/` or `defineAssertion()`\n * are also valid — the `string & {}` escape hatch provides autocomplete\n * for known types while accepting any string.\n */\nexport type AssertionType =\n // kebab-case (canonical internal form)\n | 'llm-grader'\n | 'code-grader'\n | 'rubrics'\n | 'composite'\n | 'tool-trajectory'\n | 'field-accuracy'\n | 'latency'\n | 'cost'\n | 'token-usage'\n | 'execution-metrics'\n | 'skill-trigger'\n | 'contains'\n | 'contains-any'\n | 'contains-all'\n | 'icontains'\n | 'icontains-any'\n | 'icontains-all'\n | 'starts-with'\n | 'ends-with'\n | 'equals'\n | 'regex'\n | 'is-json'\n // legacy aliases (still accepted)\n | 'llm-judge'\n | 'code-judge'\n | 'llm_judge'\n | 'code_judge'\n | 'llm_grader'\n | 'code_grader'\n | 'tool_trajectory'\n | 'field_accuracy'\n | 'token_usage'\n | 'execution_metrics'\n | 'contains_any'\n | 'contains_all'\n | 'icontains_any'\n | 'icontains_all'\n | 'starts_with'\n | 'ends_with'\n | 'is_json'\n | (string & {});\n\n/**\n * Result returned from an assertion handler.\n *\n * @example Pass with reasoning\n * ```ts\n * { pass: true, reasoning: 'Output contains expected keywords' }\n * ```\n *\n * @example Fail with misses\n * ```ts\n * { pass: false, misses: ['Missing required header'], score: 0.3 }\n * ```\n *\n * @example Granular score (0-1)\n * ```ts\n * { score: 0.75, hits: ['Format correct', 'Content relevant'], misses: ['Missing citation'] }\n * ```\n */\nexport interface AssertionScore {\n /** Explicit pass/fail. If omitted, derived from score (>= 0.5 = pass). */\n readonly pass?: boolean;\n /** Numeric score between 0 and 1. Defaults to 1 if pass=true, 0 if pass=false. */\n readonly score?: number;\n /** Aspects that passed. */\n readonly hits?: readonly string[];\n /** Aspects that failed. */\n readonly misses?: readonly string[];\n /** Human-readable explanation. */\n readonly reasoning?: string;\n /** Optional structured details for domain-specific metrics. */\n readonly details?: Record<string, unknown>;\n}\n\n/**\n * Handler function type for assertions.\n */\nexport type AssertionHandler = (ctx: AssertionContext) => AssertionScore | Promise<AssertionScore>;\n\n/**\n * Read stdin synchronously.\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Normalize an AssertionScore to a CodeGraderResult for wire compatibility.\n */\nfunction normalizeScore(result: AssertionScore): CodeGraderResult {\n let score: number;\n if (result.score !== undefined) {\n score = clampScore(result.score);\n } else if (result.pass !== undefined) {\n score = result.pass ? 1 : 0;\n } else {\n score = 0;\n }\n\n return {\n score,\n hits: result.hits ? [...result.hits] : [],\n misses: result.misses ? [...result.misses] : [],\n reasoning: result.reasoning,\n details: result.details,\n };\n}\n\n/**\n * Run an assertion handler with full stdin/stdout handling.\n * This is the internal implementation called by defineAssertion.\n */\nexport async function runAssertion(handler: AssertionHandler): Promise<void> {\n try {\n const stdin = readStdin();\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n const camelInput = toCamelCaseDeep(rawInput);\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // Lazy file-backed output loading\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n const rawResult = await handler(input);\n const normalized = normalizeScore(rawResult);\n const result = CodeGraderResultSchema.parse(normalized);\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Assertion failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n","/**\n * Case conversion utilities for JSON payloads.\n * Converts between snake_case (wire format) and camelCase (TypeScript).\n */\n\nfunction toCamelCase(str: string): string {\n // Don't convert keys that start with uppercase (proper nouns/tool names)\n if (/^[A-Z]/.test(str)) {\n return str;\n }\n return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());\n}\n\n/**\n * Recursively converts all keys in an object from snake_case to camelCase.\n * Used to map wire payloads into TypeScript-friendly shapes.\n *\n * @param obj - The object to convert (can be any JSON-serializable value)\n * @returns A new object with all keys converted to camelCase\n */\nexport function toCamelCaseDeep(obj: unknown): unknown {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => toCamelCaseDeep(item));\n }\n\n if (typeof obj === 'object') {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(obj)) {\n const camelKey = toCamelCase(key);\n result[camelKey] = toCamelCaseDeep(value);\n }\n return result;\n }\n\n return obj;\n}\n","/**\n * Runtime for prompt template evaluators.\n * Handles stdin parsing, validation, error handling, and string output.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport { type PromptTemplateInput, PromptTemplateInputSchema } from './schemas.js';\n\n/**\n * Handler function type for prompt templates.\n * Returns the prompt string to use for evaluation.\n */\nexport type PromptTemplateHandler = (input: PromptTemplateInput) => string | Promise<string>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Run a prompt template handler with full stdin/stdout handling.\n * This is the internal implementation called by definePromptTemplate.\n */\nexport async function runPromptTemplate(handler: PromptTemplateHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = PromptTemplateInputSchema.parse(camelInput);\n\n // 5. Run handler\n const prompt = await handler(input);\n\n // 6. Output raw string (not JSON) - the prompt itself\n console.log(prompt);\n } catch (error) {\n // Output error to stderr and exit with non-zero code\n console.error(error instanceof Error ? error.message : String(error));\n process.exit(1);\n }\n}\n\n/**\n * Define a prompt template with automatic stdin/stdout handling.\n *\n * This function:\n * 1. Reads JSON from stdin (snake_case format)\n * 2. Converts to camelCase and validates with Zod\n * 3. Calls your handler with typed input\n * 4. Outputs the generated prompt string to stdout\n * 5. Handles errors gracefully with proper exit codes\n *\n * @param handler - Function that generates the prompt string from input\n *\n * @example\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => `\n * Question: ${ctx.question}\n * Answer: ${ctx.answer}\n *\n * ${ctx.referenceAnswer ? `Reference: ${ctx.referenceAnswer}` : ''}\n * `);\n * ```\n *\n * @example With conditional logic\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate((ctx) => {\n * const rubric = ctx.config?.rubric as string | undefined;\n * return `\n * Question: ${ctx.question}\n * Candidate Answer: ${ctx.answer}\n * ${rubric ? `\\nEvaluation Criteria:\\n${rubric}` : ''}\n * `;\n * });\n * ```\n *\n * @example Async handler\n * ```typescript\n * import { definePromptTemplate } from '@agentv/eval';\n *\n * export default definePromptTemplate(async (ctx) => {\n * // Async operations are supported\n * return `Question: ${ctx.question}\\nAnswer: ${ctx.answer}`;\n * });\n * ```\n */\nexport function definePromptTemplate(handler: PromptTemplateHandler): void {\n // Run immediately when module is loaded\n runPromptTemplate(handler);\n}\n","/**\n * Runtime for code grader evaluators.\n * Handles stdin parsing, validation, error handling, and output formatting.\n */\nimport { readFileSync } from 'node:fs';\n\nimport { toCamelCaseDeep } from './case-conversion.js';\nimport {\n type CodeGraderInput,\n CodeGraderInputSchema,\n type CodeGraderResult,\n CodeGraderResultSchema,\n} from './schemas.js';\n\n/**\n * Handler function type for code graders.\n */\nexport type CodeGraderHandler = (\n input: CodeGraderInput,\n) => CodeGraderResult | Promise<CodeGraderResult>;\n\n/**\n * Read stdin synchronously (works in both Node.js and Bun).\n */\nfunction readStdin(): string {\n return readFileSync(0, 'utf8');\n}\n\n/**\n * Clamp a value to the range [0, 1].\n */\nfunction clampScore(value: number): number {\n if (Number.isNaN(value) || !Number.isFinite(value)) {\n return 0;\n }\n return Math.max(0, Math.min(1, value));\n}\n\n/**\n * Format an error for output.\n */\nfunction formatError(error: unknown): string {\n if (error instanceof Error) {\n return error.message;\n }\n return String(error);\n}\n\n/**\n * Run a code grader handler with full stdin/stdout handling.\n * This is the internal implementation called by defineCodeGrader.\n */\nexport async function runCodeGrader(handler: CodeGraderHandler): Promise<void> {\n try {\n // 1. Read stdin\n const stdin = readStdin();\n\n // 2. Parse JSON\n const rawInput = JSON.parse(stdin) as Record<string, unknown>;\n\n // 3. Convert snake_case to camelCase\n const camelInput = toCamelCaseDeep(rawInput);\n\n // 4. Validate input with Zod\n const input = CodeGraderInputSchema.parse(camelInput);\n\n // 5. Set up lazy file-backed output loading if applicable\n if (input.outputPath && (input.output === null || input.output === undefined)) {\n let cachedOutput: CodeGraderInput['output'] | undefined;\n const filePath = input.outputPath;\n Object.defineProperty(input, 'output', {\n get() {\n if (cachedOutput === undefined) {\n cachedOutput = JSON.parse(readFileSync(filePath, 'utf8'));\n }\n return cachedOutput;\n },\n configurable: true,\n enumerable: true,\n });\n }\n\n // 6. Run handler\n const rawResult = await handler(input);\n\n // 7. Validate and normalize output\n const result = CodeGraderResultSchema.parse({\n ...rawResult,\n score: clampScore(rawResult.score),\n });\n\n // 8. Output JSON\n console.log(JSON.stringify(result, null, 2));\n } catch (error) {\n // Output failure result\n const errorMessage = formatError(error);\n const errorResult: CodeGraderResult = {\n score: 0,\n hits: [],\n misses: [errorMessage],\n reasoning: `Evaluation failed: ${errorMessage}`,\n };\n console.log(JSON.stringify(errorResult, null, 2));\n process.exit(1);\n }\n}\n\n// ── Backward-compat aliases (deprecated) ────────────────────────────────────────\n/** @deprecated Use CodeGraderHandler */\nexport type CodeJudgeHandler = CodeGraderHandler;\n/** @deprecated Use runCodeGrader */\nexport const runCodeJudge = runCodeGrader;\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACIA,iBAAkB;AAKX,IAAM,mBAAmB,aAAE,OAAO;AAAA,EACvC,OAAO,aAAE,OAAO;AAAA,EAChB,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,OAAO,EAAE,SAAS;AAC9B,CAAC;AAKM,IAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,YAAY,aAAE,OAAO;AAAA,EACrB,WAAW,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC7B,iBAAiB,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,OAAO,CAAC;AAAA,EAChD,YAAY,aAAE,OAAO;AAAA,EACrB,eAAe,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,MAAM,aAAE,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA,EAClE,cAAc,aAAE,OAAO,EAAE,SAAS;AACpC,CAAC;AAKM,IAAM,iBAAiB,aAAE,OAAO;AAAA,EACrC,MAAM,aAAE,OAAO;AAAA,EACf,OAAO,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC5B,QAAQ,aAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,IAAI,aAAE,OAAO,EAAE,SAAS;AAAA,EACxB,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAKM,IAAM,gBAAgB,aAAE,OAAO;AAAA,EACpC,MAAM,aAAE,KAAK,CAAC,aAAa,QAAQ,UAAU,MAAM,CAAC;AAAA,EACpD,SAAS,aAAE,MAAM,CAAC,aAAE,OAAO,GAAG,aAAE,OAAO,aAAE,QAAQ,CAAC,GAAG,aAAE,MAAM,aAAE,OAAO,aAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC/F,WAAW,aAAE,MAAM,cAAc,EAAE,SAAS;AAAA,EAC5C,MAAM,aAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC3C,CAAC;AAKM,IAAM,wBAAwB,aAAE,OAAO;AAAA,EAC5C,UAAU,aAAE,OAAO;AAAA,EACnB,UAAU,aAAE,OAAO;AAAA,EACnB,gBAAgB,aAAE,MAAM,aAAa;AAAA,EACrC,iBAAiB,aAAE,OAAO,EAAE,SAAS;AAAA,EACrC,QAAQ,aAAE,OAAO;AAAA,EACjB,QAAQ,aAAE,MAAM,aAAa,EAAE,SAAS,EAAE,SAAS;AAAA;AAAA,EAEnD,YAAY,aAAE,OAAO,EAAE,SAAS;AAAA,EAChC,gBAAgB,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAClC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EAC9B,OAAO,aAAE,MAAM,aAAa;AAAA,EAC5B,OAAO,mBAAmB,SAAS,EAAE,SAAS;AAAA,EAC9C,YAAY,iBAAiB,SAAS,EAAE,SAAS;AAAA,EACjD,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,YAAY,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3C,WAAW,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC1C,SAAS,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EACxC,aAAa,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC5C,eAAe,aAAE,OAAO,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,QAAQ,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,SAAS;AACpD,CAAC;AAKM,IAAM,yBAAyB,aAAE,OAAO;AAAA,EAC7C,OAAO,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAC9B,MAAM,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC/C,QAAQ,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACjD,WAAW,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE/B,SAAS,aAAE,OAAO,aAAE,QAAQ,CAAC,EAAE,SAAS;AAC1C,CAAC;AAgBM,IAAM,4BAA4B;AAMlC,IAAM,uBAAuB;AAE7B,IAAM,wBAAwB;;;ACxC9B,IAAM,0BAAN,cAAsC,MAAM;AAAA,EACjD,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAKO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC;AAAA,EAET,YAAY,SAAiB,YAAqB;AAChD,UAAM,OAAO;AACb,SAAK,OAAO;AACZ,SAAK,aAAa;AAAA,EACpB;AACF;AAkCO,SAAS,qBAA+C;AAC7D,QAAM,WAAW,QAAQ,IAAI;AAC7B,QAAM,aAAa,QAAQ,IAAI;AAE/B,MAAI,CAAC,UAAU;AACb,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,YAAY;AACf,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO,2BAA2B,UAAU,UAAU;AACxD;AAMO,SAAS,2BAA2B,KAAa,OAA6B;AACnF,QAAM,UAAU;AAAA,IACd,gBAAgB;AAAA,IAChB,eAAe,UAAU,KAAK;AAAA,EAChC;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,SAA6D;AACxE,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,WAAW;AAAA,QAC5C,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,QAAQ;AAAA,UAClB,cAAc,QAAQ;AAAA,UACtB,YAAY,QAAQ;AAAA,UACpB,SAAS,QAAQ;AAAA,UACjB,QAAQ,QAAQ;AAAA,QAClB,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,IAEA,MAAM,YACJ,UAC0C;AAC1C,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,gBAAgB;AAAA,QACjD,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB,UAAU,SAAS,IAAI,CAAC,OAAO;AAAA,YAC7B,UAAU,EAAE;AAAA,YACZ,cAAc,EAAE;AAAA,YAChB,YAAY,EAAE;AAAA,YACd,SAAS,EAAE;AAAA,YACX,QAAQ,EAAE;AAAA,UACZ,EAAE;AAAA,QACJ,CAAC;AAAA,MACH,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,YAAM,SAAU,MAAM,SAAS,KAAK;AACpC,aAAO,OAAO;AAAA,IAChB;AAAA,IAEA,MAAM,UAA+B;AACnC,YAAM,WAAW,MAAM,MAAM,GAAG,GAAG,SAAS;AAAA,QAC1C,QAAQ;AAAA,QACR;AAAA,MACF,CAAC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,YAAY,MAAM,SAAS,KAAK;AACtC,YAAI;AACJ,YAAI;AACF,gBAAM,YAAY,KAAK,MAAM,SAAS;AACtC,yBAAe,UAAU,SAAS,QAAQ,SAAS,MAAM;AAAA,QAC3D,QAAQ;AACN,yBAAe,aAAa,QAAQ,SAAS,MAAM;AAAA,QACrD;AACA,cAAM,IAAI,sBAAsB,cAAc,SAAS,MAAM;AAAA,MAC/D;AAEA,aAAQ,MAAM,SAAS,KAAK;AAAA,IAC9B;AAAA,EACF;AACF;;;AFrJA,IAAAA,cAAkB;;;AGhFlB,qBAA6B;;;ACF7B,SAAS,YAAY,KAAqB;AAExC,MAAI,SAAS,KAAK,GAAG,GAAG;AACtB,WAAO;AAAA,EACT;AACA,SAAO,IAAI,QAAQ,gBAAgB,CAAC,GAAG,WAAW,OAAO,YAAY,CAAC;AACxE;AASO,SAAS,gBAAgB,KAAuB;AACrD,MAAI,QAAQ,QAAQ,QAAQ,QAAW;AACrC,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,GAAG,GAAG;AACtB,WAAO,IAAI,IAAI,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAAA,EAChD;AAEA,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,SAAkC,CAAC;AACzC,eAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,YAAM,WAAW,YAAY,GAAG;AAChC,aAAO,QAAQ,IAAI,gBAAgB,KAAK;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;;;ADmFA,SAAS,YAAoB;AAC3B,aAAO,6BAAa,GAAG,MAAM;AAC/B;AAKA,SAAS,WAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAKA,SAAS,eAAe,QAA0C;AAChE,MAAI;AACJ,MAAI,OAAO,UAAU,QAAW;AAC9B,YAAQ,WAAW,OAAO,KAAK;AAAA,EACjC,WAAW,OAAO,SAAS,QAAW;AACpC,YAAQ,OAAO,OAAO,IAAI;AAAA,EAC5B,OAAO;AACL,YAAQ;AAAA,EACV;AAEA,SAAO;AAAA,IACL;AAAA,IACA,MAAM,OAAO,OAAO,CAAC,GAAG,OAAO,IAAI,IAAI,CAAC;AAAA,IACxC,QAAQ,OAAO,SAAS,CAAC,GAAG,OAAO,MAAM,IAAI,CAAC;AAAA,IAC9C,WAAW,OAAO;AAAA,IAClB,SAAS,OAAO;AAAA,EAClB;AACF;AAMA,eAAsB,aAAa,SAA0C;AAC3E,MAAI;AACF,UAAM,QAAQ,UAAU;AACxB,UAAM,WAAW,KAAK,MAAM,KAAK;AACjC,UAAM,aAAa,gBAAgB,QAAQ;AAC3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,6BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAEA,UAAM,YAAY,MAAM,QAAQ,KAAK;AACrC,UAAM,aAAa,eAAe,SAAS;AAC3C,UAAM,SAAS,uBAAuB,MAAM,UAAU;AACtD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AACd,UAAM,eAAe,YAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,qBAAqB,YAAY;AAAA,IAC9C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AE3MA,IAAAC,kBAA6B;AAc7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAMA,eAAsB,kBAAkB,SAA+C;AACrF,MAAI;AAEF,UAAM,QAAQA,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,0BAA0B,MAAM,UAAU;AAGxD,UAAM,SAAS,MAAM,QAAQ,KAAK;AAGlC,YAAQ,IAAI,MAAM;AAAA,EACpB,SAAS,OAAO;AAEd,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AC9CA,IAAAC,kBAA6B;AAoB7B,SAASC,aAAoB;AAC3B,aAAO,8BAAa,GAAG,MAAM;AAC/B;AAKA,SAASC,YAAW,OAAuB;AACzC,MAAI,OAAO,MAAM,KAAK,KAAK,CAAC,OAAO,SAAS,KAAK,GAAG;AAClD,WAAO;AAAA,EACT;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAKA,SAASC,aAAY,OAAwB;AAC3C,MAAI,iBAAiB,OAAO;AAC1B,WAAO,MAAM;AAAA,EACf;AACA,SAAO,OAAO,KAAK;AACrB;AAMA,eAAsB,cAAc,SAA2C;AAC7E,MAAI;AAEF,UAAM,QAAQF,WAAU;AAGxB,UAAM,WAAW,KAAK,MAAM,KAAK;AAGjC,UAAM,aAAa,gBAAgB,QAAQ;AAG3C,UAAM,QAAQ,sBAAsB,MAAM,UAAU;AAGpD,QAAI,MAAM,eAAe,MAAM,WAAW,QAAQ,MAAM,WAAW,SAAY;AAC7E,UAAI;AACJ,YAAM,WAAW,MAAM;AACvB,aAAO,eAAe,OAAO,UAAU;AAAA,QACrC,MAAM;AACJ,cAAI,iBAAiB,QAAW;AAC9B,2BAAe,KAAK,UAAM,8BAAa,UAAU,MAAM,CAAC;AAAA,UAC1D;AACA,iBAAO;AAAA,QACT;AAAA,QACA,cAAc;AAAA,QACd,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAGA,UAAM,YAAY,MAAM,QAAQ,KAAK;AAGrC,UAAM,SAAS,uBAAuB,MAAM;AAAA,MAC1C,GAAG;AAAA,MACH,OAAOC,YAAW,UAAU,KAAK;AAAA,IACnC,CAAC;AAGD,YAAQ,IAAI,KAAK,UAAU,QAAQ,MAAM,CAAC,CAAC;AAAA,EAC7C,SAAS,OAAO;AAEd,UAAM,eAAeC,aAAY,KAAK;AACtC,UAAM,cAAgC;AAAA,MACpC,OAAO;AAAA,MACP,MAAM,CAAC;AAAA,MACP,QAAQ,CAAC,YAAY;AAAA,MACrB,WAAW,sBAAsB,YAAY;AAAA,IAC/C;AACA,YAAQ,IAAI,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC;AAChD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF;;;AN6CO,SAAS,iBAAiB,SAAkC;AAEjE,gBAAc,OAAO;AACvB;AAGO,IAAM,kBAAkB;AAwCxB,SAAS,qBAAqB,SAAsC;AAEzE,oBAAkB,OAAO;AAC3B;AA8CO,SAAS,gBAAgB,SAAiC;AAC/D,eAAa,OAAO;AACtB;","names":["import_zod","import_node_fs","readStdin","import_node_fs","readStdin","clampScore","formatError"]}