@agentv/eval 3.13.0 → 3.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1044,461 +1044,6 @@ declare const PromptTemplateInputSchema: z.ZodObject<{
1044
1044
  expectedOutputText?: string | undefined;
1045
1045
  }>;
1046
1046
  type PromptTemplateInput = CodeGraderInput;
1047
- /** @deprecated Use CodeGraderInputSchema */
1048
- declare const CodeJudgeInputSchema: z.ZodObject<{
1049
- criteria: z.ZodString;
1050
- expectedOutput: z.ZodArray<z.ZodObject<{
1051
- role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
1052
- content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
1053
- toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
1054
- tool: z.ZodString;
1055
- input: z.ZodOptional<z.ZodUnknown>;
1056
- output: z.ZodOptional<z.ZodUnknown>;
1057
- id: z.ZodOptional<z.ZodString>;
1058
- startTime: z.ZodOptional<z.ZodString>;
1059
- endTime: z.ZodOptional<z.ZodString>;
1060
- durationMs: z.ZodOptional<z.ZodNumber>;
1061
- }, "strip", z.ZodTypeAny, {
1062
- tool: string;
1063
- input?: unknown;
1064
- output?: unknown;
1065
- id?: string | undefined;
1066
- startTime?: string | undefined;
1067
- endTime?: string | undefined;
1068
- durationMs?: number | undefined;
1069
- }, {
1070
- tool: string;
1071
- input?: unknown;
1072
- output?: unknown;
1073
- id?: string | undefined;
1074
- startTime?: string | undefined;
1075
- endTime?: string | undefined;
1076
- durationMs?: number | undefined;
1077
- }>, "many">>;
1078
- name: z.ZodOptional<z.ZodString>;
1079
- startTime: z.ZodOptional<z.ZodString>;
1080
- endTime: z.ZodOptional<z.ZodString>;
1081
- durationMs: z.ZodOptional<z.ZodNumber>;
1082
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1083
- }, "strip", z.ZodTypeAny, {
1084
- role: "tool" | "assistant" | "user" | "system";
1085
- toolCalls?: {
1086
- tool: string;
1087
- input?: unknown;
1088
- output?: unknown;
1089
- id?: string | undefined;
1090
- startTime?: string | undefined;
1091
- endTime?: string | undefined;
1092
- durationMs?: number | undefined;
1093
- }[] | undefined;
1094
- startTime?: string | undefined;
1095
- endTime?: string | undefined;
1096
- durationMs?: number | undefined;
1097
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1098
- name?: string | undefined;
1099
- metadata?: Record<string, unknown> | undefined;
1100
- }, {
1101
- role: "tool" | "assistant" | "user" | "system";
1102
- toolCalls?: {
1103
- tool: string;
1104
- input?: unknown;
1105
- output?: unknown;
1106
- id?: string | undefined;
1107
- startTime?: string | undefined;
1108
- endTime?: string | undefined;
1109
- durationMs?: number | undefined;
1110
- }[] | undefined;
1111
- startTime?: string | undefined;
1112
- endTime?: string | undefined;
1113
- durationMs?: number | undefined;
1114
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1115
- name?: string | undefined;
1116
- metadata?: Record<string, unknown> | undefined;
1117
- }>, "many">;
1118
- /** Last assistant message content as string. */
1119
- outputText: z.ZodString;
1120
- output: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodObject<{
1121
- role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
1122
- content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
1123
- toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
1124
- tool: z.ZodString;
1125
- input: z.ZodOptional<z.ZodUnknown>;
1126
- output: z.ZodOptional<z.ZodUnknown>;
1127
- id: z.ZodOptional<z.ZodString>;
1128
- startTime: z.ZodOptional<z.ZodString>;
1129
- endTime: z.ZodOptional<z.ZodString>;
1130
- durationMs: z.ZodOptional<z.ZodNumber>;
1131
- }, "strip", z.ZodTypeAny, {
1132
- tool: string;
1133
- input?: unknown;
1134
- output?: unknown;
1135
- id?: string | undefined;
1136
- startTime?: string | undefined;
1137
- endTime?: string | undefined;
1138
- durationMs?: number | undefined;
1139
- }, {
1140
- tool: string;
1141
- input?: unknown;
1142
- output?: unknown;
1143
- id?: string | undefined;
1144
- startTime?: string | undefined;
1145
- endTime?: string | undefined;
1146
- durationMs?: number | undefined;
1147
- }>, "many">>;
1148
- name: z.ZodOptional<z.ZodString>;
1149
- startTime: z.ZodOptional<z.ZodString>;
1150
- endTime: z.ZodOptional<z.ZodString>;
1151
- durationMs: z.ZodOptional<z.ZodNumber>;
1152
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1153
- }, "strip", z.ZodTypeAny, {
1154
- role: "tool" | "assistant" | "user" | "system";
1155
- toolCalls?: {
1156
- tool: string;
1157
- input?: unknown;
1158
- output?: unknown;
1159
- id?: string | undefined;
1160
- startTime?: string | undefined;
1161
- endTime?: string | undefined;
1162
- durationMs?: number | undefined;
1163
- }[] | undefined;
1164
- startTime?: string | undefined;
1165
- endTime?: string | undefined;
1166
- durationMs?: number | undefined;
1167
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1168
- name?: string | undefined;
1169
- metadata?: Record<string, unknown> | undefined;
1170
- }, {
1171
- role: "tool" | "assistant" | "user" | "system";
1172
- toolCalls?: {
1173
- tool: string;
1174
- input?: unknown;
1175
- output?: unknown;
1176
- id?: string | undefined;
1177
- startTime?: string | undefined;
1178
- endTime?: string | undefined;
1179
- durationMs?: number | undefined;
1180
- }[] | undefined;
1181
- startTime?: string | undefined;
1182
- endTime?: string | undefined;
1183
- durationMs?: number | undefined;
1184
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1185
- name?: string | undefined;
1186
- metadata?: Record<string, unknown> | undefined;
1187
- }>, "many">>>;
1188
- /** Path to a temp file containing the output JSON (used for large payloads). */
1189
- outputPath: z.ZodOptional<z.ZodString>;
1190
- inputFiles: z.ZodArray<z.ZodString, "many">;
1191
- input: z.ZodArray<z.ZodObject<{
1192
- role: z.ZodEnum<["assistant", "user", "system", "tool"]>;
1193
- content: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>, z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>, "many">]>>;
1194
- toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
1195
- tool: z.ZodString;
1196
- input: z.ZodOptional<z.ZodUnknown>;
1197
- output: z.ZodOptional<z.ZodUnknown>;
1198
- id: z.ZodOptional<z.ZodString>;
1199
- startTime: z.ZodOptional<z.ZodString>;
1200
- endTime: z.ZodOptional<z.ZodString>;
1201
- durationMs: z.ZodOptional<z.ZodNumber>;
1202
- }, "strip", z.ZodTypeAny, {
1203
- tool: string;
1204
- input?: unknown;
1205
- output?: unknown;
1206
- id?: string | undefined;
1207
- startTime?: string | undefined;
1208
- endTime?: string | undefined;
1209
- durationMs?: number | undefined;
1210
- }, {
1211
- tool: string;
1212
- input?: unknown;
1213
- output?: unknown;
1214
- id?: string | undefined;
1215
- startTime?: string | undefined;
1216
- endTime?: string | undefined;
1217
- durationMs?: number | undefined;
1218
- }>, "many">>;
1219
- name: z.ZodOptional<z.ZodString>;
1220
- startTime: z.ZodOptional<z.ZodString>;
1221
- endTime: z.ZodOptional<z.ZodString>;
1222
- durationMs: z.ZodOptional<z.ZodNumber>;
1223
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1224
- }, "strip", z.ZodTypeAny, {
1225
- role: "tool" | "assistant" | "user" | "system";
1226
- toolCalls?: {
1227
- tool: string;
1228
- input?: unknown;
1229
- output?: unknown;
1230
- id?: string | undefined;
1231
- startTime?: string | undefined;
1232
- endTime?: string | undefined;
1233
- durationMs?: number | undefined;
1234
- }[] | undefined;
1235
- startTime?: string | undefined;
1236
- endTime?: string | undefined;
1237
- durationMs?: number | undefined;
1238
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1239
- name?: string | undefined;
1240
- metadata?: Record<string, unknown> | undefined;
1241
- }, {
1242
- role: "tool" | "assistant" | "user" | "system";
1243
- toolCalls?: {
1244
- tool: string;
1245
- input?: unknown;
1246
- output?: unknown;
1247
- id?: string | undefined;
1248
- startTime?: string | undefined;
1249
- endTime?: string | undefined;
1250
- durationMs?: number | undefined;
1251
- }[] | undefined;
1252
- startTime?: string | undefined;
1253
- endTime?: string | undefined;
1254
- durationMs?: number | undefined;
1255
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1256
- name?: string | undefined;
1257
- metadata?: Record<string, unknown> | undefined;
1258
- }>, "many">;
1259
- trace: z.ZodOptional<z.ZodNullable<z.ZodObject<{
1260
- eventCount: z.ZodNumber;
1261
- toolCalls: z.ZodRecord<z.ZodString, z.ZodNumber>;
1262
- errorCount: z.ZodNumber;
1263
- toolDurations: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodNumber, "many">>>;
1264
- llmCallCount: z.ZodOptional<z.ZodNumber>;
1265
- }, "strip", z.ZodTypeAny, {
1266
- eventCount: number;
1267
- toolCalls: Record<string, number>;
1268
- errorCount: number;
1269
- toolDurations?: Record<string, number[]> | undefined;
1270
- llmCallCount?: number | undefined;
1271
- }, {
1272
- eventCount: number;
1273
- toolCalls: Record<string, number>;
1274
- errorCount: number;
1275
- toolDurations?: Record<string, number[]> | undefined;
1276
- llmCallCount?: number | undefined;
1277
- }>>>;
1278
- tokenUsage: z.ZodOptional<z.ZodNullable<z.ZodObject<{
1279
- input: z.ZodNumber;
1280
- output: z.ZodNumber;
1281
- cached: z.ZodOptional<z.ZodNumber>;
1282
- }, "strip", z.ZodTypeAny, {
1283
- input: number;
1284
- output: number;
1285
- cached?: number | undefined;
1286
- }, {
1287
- input: number;
1288
- output: number;
1289
- cached?: number | undefined;
1290
- }>>>;
1291
- costUsd: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1292
- durationMs: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1293
- startTime: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1294
- endTime: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1295
- fileChanges: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1296
- workspacePath: z.ZodOptional<z.ZodNullable<z.ZodString>>;
1297
- config: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1298
- /** First user message content as string. */
1299
- inputText: z.ZodString;
1300
- /** Expected output content as string. */
1301
- expectedOutputText: z.ZodOptional<z.ZodString>;
1302
- }, "strip", z.ZodTypeAny, {
1303
- input: {
1304
- role: "tool" | "assistant" | "user" | "system";
1305
- toolCalls?: {
1306
- tool: string;
1307
- input?: unknown;
1308
- output?: unknown;
1309
- id?: string | undefined;
1310
- startTime?: string | undefined;
1311
- endTime?: string | undefined;
1312
- durationMs?: number | undefined;
1313
- }[] | undefined;
1314
- startTime?: string | undefined;
1315
- endTime?: string | undefined;
1316
- durationMs?: number | undefined;
1317
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1318
- name?: string | undefined;
1319
- metadata?: Record<string, unknown> | undefined;
1320
- }[];
1321
- criteria: string;
1322
- expectedOutput: {
1323
- role: "tool" | "assistant" | "user" | "system";
1324
- toolCalls?: {
1325
- tool: string;
1326
- input?: unknown;
1327
- output?: unknown;
1328
- id?: string | undefined;
1329
- startTime?: string | undefined;
1330
- endTime?: string | undefined;
1331
- durationMs?: number | undefined;
1332
- }[] | undefined;
1333
- startTime?: string | undefined;
1334
- endTime?: string | undefined;
1335
- durationMs?: number | undefined;
1336
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1337
- name?: string | undefined;
1338
- metadata?: Record<string, unknown> | undefined;
1339
- }[];
1340
- outputText: string;
1341
- inputFiles: string[];
1342
- inputText: string;
1343
- output?: {
1344
- role: "tool" | "assistant" | "user" | "system";
1345
- toolCalls?: {
1346
- tool: string;
1347
- input?: unknown;
1348
- output?: unknown;
1349
- id?: string | undefined;
1350
- startTime?: string | undefined;
1351
- endTime?: string | undefined;
1352
- durationMs?: number | undefined;
1353
- }[] | undefined;
1354
- startTime?: string | undefined;
1355
- endTime?: string | undefined;
1356
- durationMs?: number | undefined;
1357
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1358
- name?: string | undefined;
1359
- metadata?: Record<string, unknown> | undefined;
1360
- }[] | null | undefined;
1361
- startTime?: string | null | undefined;
1362
- endTime?: string | null | undefined;
1363
- durationMs?: number | null | undefined;
1364
- outputPath?: string | undefined;
1365
- trace?: {
1366
- eventCount: number;
1367
- toolCalls: Record<string, number>;
1368
- errorCount: number;
1369
- toolDurations?: Record<string, number[]> | undefined;
1370
- llmCallCount?: number | undefined;
1371
- } | null | undefined;
1372
- tokenUsage?: {
1373
- input: number;
1374
- output: number;
1375
- cached?: number | undefined;
1376
- } | null | undefined;
1377
- costUsd?: number | null | undefined;
1378
- fileChanges?: string | null | undefined;
1379
- workspacePath?: string | null | undefined;
1380
- config?: Record<string, unknown> | null | undefined;
1381
- expectedOutputText?: string | undefined;
1382
- }, {
1383
- input: {
1384
- role: "tool" | "assistant" | "user" | "system";
1385
- toolCalls?: {
1386
- tool: string;
1387
- input?: unknown;
1388
- output?: unknown;
1389
- id?: string | undefined;
1390
- startTime?: string | undefined;
1391
- endTime?: string | undefined;
1392
- durationMs?: number | undefined;
1393
- }[] | undefined;
1394
- startTime?: string | undefined;
1395
- endTime?: string | undefined;
1396
- durationMs?: number | undefined;
1397
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1398
- name?: string | undefined;
1399
- metadata?: Record<string, unknown> | undefined;
1400
- }[];
1401
- criteria: string;
1402
- expectedOutput: {
1403
- role: "tool" | "assistant" | "user" | "system";
1404
- toolCalls?: {
1405
- tool: string;
1406
- input?: unknown;
1407
- output?: unknown;
1408
- id?: string | undefined;
1409
- startTime?: string | undefined;
1410
- endTime?: string | undefined;
1411
- durationMs?: number | undefined;
1412
- }[] | undefined;
1413
- startTime?: string | undefined;
1414
- endTime?: string | undefined;
1415
- durationMs?: number | undefined;
1416
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1417
- name?: string | undefined;
1418
- metadata?: Record<string, unknown> | undefined;
1419
- }[];
1420
- outputText: string;
1421
- inputFiles: string[];
1422
- inputText: string;
1423
- output?: {
1424
- role: "tool" | "assistant" | "user" | "system";
1425
- toolCalls?: {
1426
- tool: string;
1427
- input?: unknown;
1428
- output?: unknown;
1429
- id?: string | undefined;
1430
- startTime?: string | undefined;
1431
- endTime?: string | undefined;
1432
- durationMs?: number | undefined;
1433
- }[] | undefined;
1434
- startTime?: string | undefined;
1435
- endTime?: string | undefined;
1436
- durationMs?: number | undefined;
1437
- content?: string | Record<string, unknown> | Record<string, unknown>[] | undefined;
1438
- name?: string | undefined;
1439
- metadata?: Record<string, unknown> | undefined;
1440
- }[] | null | undefined;
1441
- startTime?: string | null | undefined;
1442
- endTime?: string | null | undefined;
1443
- durationMs?: number | null | undefined;
1444
- outputPath?: string | undefined;
1445
- trace?: {
1446
- eventCount: number;
1447
- toolCalls: Record<string, number>;
1448
- errorCount: number;
1449
- toolDurations?: Record<string, number[]> | undefined;
1450
- llmCallCount?: number | undefined;
1451
- } | null | undefined;
1452
- tokenUsage?: {
1453
- input: number;
1454
- output: number;
1455
- cached?: number | undefined;
1456
- } | null | undefined;
1457
- costUsd?: number | null | undefined;
1458
- fileChanges?: string | null | undefined;
1459
- workspacePath?: string | null | undefined;
1460
- config?: Record<string, unknown> | null | undefined;
1461
- expectedOutputText?: string | undefined;
1462
- }>;
1463
- /** @deprecated Use CodeGraderResultSchema */
1464
- declare const CodeJudgeResultSchema: z.ZodObject<{
1465
- score: z.ZodNumber;
1466
- assertions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
1467
- text: z.ZodString;
1468
- passed: z.ZodBoolean;
1469
- evidence: z.ZodOptional<z.ZodString>;
1470
- }, "strip", z.ZodTypeAny, {
1471
- text: string;
1472
- passed: boolean;
1473
- evidence?: string | undefined;
1474
- }, {
1475
- text: string;
1476
- passed: boolean;
1477
- evidence?: string | undefined;
1478
- }>, "many">>>;
1479
- /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
1480
- details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1481
- }, "strip", z.ZodTypeAny, {
1482
- score: number;
1483
- assertions: {
1484
- text: string;
1485
- passed: boolean;
1486
- evidence?: string | undefined;
1487
- }[];
1488
- details?: Record<string, unknown> | undefined;
1489
- }, {
1490
- score: number;
1491
- assertions?: {
1492
- text: string;
1493
- passed: boolean;
1494
- evidence?: string | undefined;
1495
- }[] | undefined;
1496
- details?: Record<string, unknown> | undefined;
1497
- }>;
1498
- /** @deprecated Use CodeGraderInput */
1499
- type CodeJudgeInput = CodeGraderInput;
1500
- /** @deprecated Use CodeGraderResult */
1501
- type CodeJudgeResult = CodeGraderResult;
1502
1047
 
1503
1048
  /**
1504
1049
  * Client for invoking configured targets from code-grader scripts.
@@ -1581,7 +1126,7 @@ declare class TargetInvocationError extends Error {
1581
1126
  *
1582
1127
  * This function reads the proxy URL and token from environment variables
1583
1128
  * that are automatically set by AgentV when a `target` config block is present
1584
- * on a `code_grader` (or `code_judge`) evaluator.
1129
+ * on a `code_grader` evaluator.
1585
1130
  *
1586
1131
  * @returns A target client if environment variables are set, otherwise undefined
1587
1132
  * @throws TargetNotAvailableError if token is missing when URL is present
@@ -1632,7 +1177,7 @@ type AssertionContext = EnrichedCodeGraderInput;
1632
1177
  * are also valid — the `string & {}` escape hatch provides autocomplete
1633
1178
  * for known types while accepting any string.
1634
1179
  */
1635
- type AssertionType = 'llm-grader' | 'code-grader' | 'rubrics' | 'composite' | 'tool-trajectory' | 'field-accuracy' | 'latency' | 'cost' | 'token-usage' | 'execution-metrics' | 'skill-trigger' | 'contains' | 'contains-any' | 'contains-all' | 'icontains' | 'icontains-any' | 'icontains-all' | 'starts-with' | 'ends-with' | 'equals' | 'regex' | 'is-json' | 'llm-judge' | 'code-judge' | 'llm_judge' | 'code_judge' | 'llm_grader' | 'code_grader' | 'tool_trajectory' | 'field_accuracy' | 'token_usage' | 'execution_metrics' | 'contains_any' | 'contains_all' | 'icontains_any' | 'icontains_all' | 'starts_with' | 'ends_with' | 'is_json' | (string & {});
1180
+ type AssertionType = 'llm-grader' | 'code-grader' | 'rubrics' | 'composite' | 'tool-trajectory' | 'field-accuracy' | 'latency' | 'cost' | 'token-usage' | 'execution-metrics' | 'skill-trigger' | 'contains' | 'contains-any' | 'contains-all' | 'icontains' | 'icontains-any' | 'icontains-all' | 'starts-with' | 'ends-with' | 'equals' | 'regex' | 'is-json' | 'llm_grader' | 'code_grader' | 'tool_trajectory' | 'field_accuracy' | 'token_usage' | 'execution_metrics' | 'contains_any' | 'contains_all' | 'icontains_any' | 'icontains_all' | 'starts_with' | 'ends_with' | 'is_json' | (string & {});
1636
1181
  /**
1637
1182
  * Result returned from an assertion handler.
1638
1183
  *
@@ -1690,13 +1235,11 @@ type PromptTemplateHandler = (input: EnrichedCodeGraderInput) => string | Promis
1690
1235
  * `expectedOutputText` are always populated before the handler is called.
1691
1236
  */
1692
1237
  type CodeGraderHandler = (input: EnrichedCodeGraderInput) => CodeGraderResult | Promise<CodeGraderResult>;
1693
- /** @deprecated Use CodeGraderHandler */
1694
- type CodeJudgeHandler = CodeGraderHandler;
1695
1238
 
1696
1239
  /**
1697
1240
  * AgentV Evaluation SDK
1698
1241
  *
1699
- * Build custom evaluators for AI agent outputs.
1242
+ * Build custom graders for AI agent outputs.
1700
1243
  *
1701
1244
  * @example Custom assertion (simplest way to add evaluation logic)
1702
1245
  * ```typescript
@@ -1745,7 +1288,7 @@ type CodeJudgeHandler = CodeGraderHandler;
1745
1288
  */
1746
1289
 
1747
1290
  /**
1748
- * Define a code grader evaluator with automatic stdin/stdout handling.
1291
+ * Define a code grader with automatic stdin/stdout handling.
1749
1292
  *
1750
1293
  * This function:
1751
1294
  * 1. Reads JSON from stdin (snake_case format)
@@ -1788,8 +1331,6 @@ type CodeJudgeHandler = CodeGraderHandler;
1788
1331
  * ```
1789
1332
  */
1790
1333
  declare function defineCodeGrader(handler: CodeGraderHandler): void;
1791
- /** @deprecated Use defineCodeGrader */
1792
- declare const defineCodeJudge: typeof defineCodeGrader;
1793
1334
  /**
1794
1335
  * Define a prompt template with automatic stdin/stdout handling.
1795
1336
  *
@@ -1830,7 +1371,7 @@ declare const defineCodeJudge: typeof defineCodeGrader;
1830
1371
  */
1831
1372
  declare function definePromptTemplate(handler: PromptTemplateHandler): void;
1832
1373
  /**
1833
- * Define a custom assertion evaluator with automatic stdin/stdout handling.
1374
+ * Define a custom assertion grader with automatic stdin/stdout handling.
1834
1375
  *
1835
1376
  * Assertions are the simplest way to add custom evaluation logic. They receive
1836
1377
  * the full evaluation context and return a pass/fail result with optional
@@ -1875,4 +1416,4 @@ declare function definePromptTemplate(handler: PromptTemplateHandler): void;
1875
1416
  */
1876
1417
  declare function defineAssertion(handler: AssertionHandler): void;
1877
1418
 
1878
- export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type CodeJudgeHandler, type CodeJudgeInput, CodeJudgeInputSchema, type CodeJudgeResult, CodeJudgeResultSchema, type EnrichedCodeGraderInput, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeGrader, defineCodeJudge, definePromptTemplate };
1419
+ export { type AssertionContext, type AssertionHandler, type AssertionScore, type AssertionType, type CodeGraderHandler, type CodeGraderInput, CodeGraderInputSchema, type CodeGraderResult, CodeGraderResultSchema, type EnrichedCodeGraderInput, type Message, MessageSchema, type PromptTemplateHandler, type PromptTemplateInput, PromptTemplateInputSchema, type TargetClient, type TargetInfo, TargetInvocationError, type TargetInvokeRequest, type TargetInvokeResponse, TargetNotAvailableError, type TokenUsage, TokenUsageSchema, type ToolCall, ToolCallSchema, type TraceSummary, TraceSummarySchema, createTargetClient, defineAssertion, defineCodeGrader, definePromptTemplate };
package/dist/index.js CHANGED
@@ -68,8 +68,6 @@ var CodeGraderResultSchema = z.object({
68
68
  details: z.record(z.unknown()).optional()
69
69
  });
70
70
  var PromptTemplateInputSchema = CodeGraderInputSchema;
71
- var CodeJudgeInputSchema = CodeGraderInputSchema;
72
- var CodeJudgeResultSchema = CodeGraderResultSchema;
73
71
 
74
72
  // src/target-client.ts
75
73
  var TargetNotAvailableError = class extends Error {
@@ -369,7 +367,6 @@ async function runCodeGrader(handler) {
369
367
  function defineCodeGrader(handler) {
370
368
  runCodeGrader(handler);
371
369
  }
372
- var defineCodeJudge = defineCodeGrader;
373
370
  function definePromptTemplate(handler) {
374
371
  runPromptTemplate(handler);
375
372
  }
@@ -379,8 +376,6 @@ function defineAssertion(handler) {
379
376
  export {
380
377
  CodeGraderInputSchema,
381
378
  CodeGraderResultSchema,
382
- CodeJudgeInputSchema,
383
- CodeJudgeResultSchema,
384
379
  MessageSchema,
385
380
  PromptTemplateInputSchema,
386
381
  TargetInvocationError,
@@ -391,7 +386,6 @@ export {
391
386
  createTargetClient,
392
387
  defineAssertion,
393
388
  defineCodeGrader,
394
- defineCodeJudge,
395
389
  definePromptTemplate,
396
390
  z2 as z
397
391
  };