@agentv/sdk 4.41.3-next.1 → 4.41.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,562 @@
1
+ // src/schemas.ts
2
+ import { z } from "zod";
3
+ var TokenUsageSchema = z.object({
4
+ input: z.number(),
5
+ output: z.number(),
6
+ cached: z.number().optional(),
7
+ reasoning: z.number().optional()
8
+ });
9
+ var TraceSummarySchema = z.object({
10
+ eventCount: z.number(),
11
+ toolCalls: z.record(z.string(), z.number()),
12
+ errorCount: z.number(),
13
+ toolDurations: z.record(z.string(), z.array(z.number())).optional(),
14
+ llmCallCount: z.number().optional()
15
+ });
16
+ var TRACE_SOURCE_KINDS = [
17
+ "agentv_run",
18
+ "otlp",
19
+ "phoenix",
20
+ "langfuse",
21
+ "pi_session",
22
+ "imported_transcript",
23
+ "compact_transcript"
24
+ ];
25
+ var TRACE_EVENT_TYPES = [
26
+ "message",
27
+ "model_turn",
28
+ "tool_call",
29
+ "tool_result",
30
+ "final_response",
31
+ "error"
32
+ ];
33
+ var TRACE_TOOL_STATUSES = ["ok", "error", "timeout", "cancelled", "unknown"];
34
+ var TRACE_REDACTION_LEVELS = ["none", "partial", "full"];
35
+ var MetadataSchema = z.record(z.string(), z.unknown());
36
+ var TraceRedactionStateSchema = z.object({
37
+ level: z.enum(TRACE_REDACTION_LEVELS),
38
+ fields: z.array(z.string()).optional(),
39
+ reason: z.string().optional()
40
+ });
41
+ var TraceErrorSchema = z.object({
42
+ message: z.string(),
43
+ name: z.string().optional(),
44
+ code: z.string().optional(),
45
+ stack: z.string().optional(),
46
+ metadata: MetadataSchema.optional()
47
+ });
48
+ var TraceSourceSchema = z.object({
49
+ kind: z.enum(TRACE_SOURCE_KINDS),
50
+ path: z.string().optional(),
51
+ url: z.string().optional(),
52
+ provider: z.string().optional(),
53
+ format: z.string().optional(),
54
+ version: z.string().optional(),
55
+ metadata: MetadataSchema.optional()
56
+ });
57
+ var TraceSessionSchema = z.object({
58
+ sessionId: z.string().optional(),
59
+ conversationId: z.string().optional(),
60
+ cwd: z.string().optional(),
61
+ startedAt: z.string().optional(),
62
+ endedAt: z.string().optional(),
63
+ metadata: MetadataSchema.optional()
64
+ });
65
+ var TraceBranchSchema = z.object({
66
+ selectedLeafId: z.string().optional(),
67
+ selectedPathIds: z.array(z.string()).optional(),
68
+ includedEventIds: z.array(z.string()).optional(),
69
+ omittedEventIds: z.array(z.string()).optional(),
70
+ selectionReason: z.string().optional()
71
+ });
72
+ var TraceSourceRefSchema = z.object({
73
+ eventId: z.string().optional(),
74
+ messageId: z.string().optional(),
75
+ spanId: z.string().optional(),
76
+ traceId: z.string().optional(),
77
+ rawKind: z.string().optional(),
78
+ path: z.string().optional(),
79
+ line: z.number().int().nonnegative().optional(),
80
+ metadata: MetadataSchema.optional()
81
+ });
82
+ var TraceRawEvidenceSchema = z.object({
83
+ kind: z.string(),
84
+ ref: z.string().optional(),
85
+ mediaType: z.string().optional(),
86
+ content: z.unknown().optional(),
87
+ redacted: z.boolean().optional(),
88
+ metadata: MetadataSchema.optional()
89
+ });
90
+ var TraceMessageSchema = z.object({
91
+ role: z.string(),
92
+ name: z.string().optional(),
93
+ content: z.unknown().optional(),
94
+ redaction: TraceRedactionStateSchema.optional(),
95
+ tokenUsage: TokenUsageSchema.optional(),
96
+ metadata: MetadataSchema.optional()
97
+ });
98
+ var TraceModelSchema = z.object({
99
+ provider: z.string().optional(),
100
+ name: z.string().optional(),
101
+ invocationId: z.string().optional(),
102
+ tokenUsage: TokenUsageSchema.optional(),
103
+ metadata: MetadataSchema.optional()
104
+ });
105
+ var TraceToolSchema = z.object({
106
+ name: z.string(),
107
+ callId: z.string().optional(),
108
+ input: z.unknown().optional(),
109
+ output: z.unknown().optional(),
110
+ status: z.enum(TRACE_TOOL_STATUSES).optional(),
111
+ error: TraceErrorSchema.optional(),
112
+ redaction: TraceRedactionStateSchema.optional(),
113
+ metadata: MetadataSchema.optional()
114
+ });
115
+ var TraceEventSchema = z.object({
116
+ eventId: z.string(),
117
+ parentEventId: z.string().optional(),
118
+ ordinal: z.number().int().nonnegative(),
119
+ type: z.enum(TRACE_EVENT_TYPES),
120
+ timestamp: z.string().optional(),
121
+ durationMs: z.number().nonnegative().optional(),
122
+ durationInferred: z.boolean().optional(),
123
+ turnIndex: z.number().int().nonnegative().optional(),
124
+ message: TraceMessageSchema.optional(),
125
+ model: TraceModelSchema.optional(),
126
+ tool: TraceToolSchema.optional(),
127
+ error: TraceErrorSchema.optional(),
128
+ sourceRef: TraceSourceRefSchema.optional(),
129
+ rawEvidence: z.array(TraceRawEvidenceSchema).optional(),
130
+ redaction: TraceRedactionStateSchema.optional(),
131
+ metadata: MetadataSchema.optional()
132
+ });
133
+ var TraceArtifactSchema = z.object({
134
+ source: TraceSourceSchema,
135
+ session: TraceSessionSchema,
136
+ branch: TraceBranchSchema.optional(),
137
+ events: z.array(TraceEventSchema),
138
+ tokenUsage: TokenUsageSchema.optional(),
139
+ costUsd: z.number().optional(),
140
+ durationMs: z.number().optional(),
141
+ startedAt: z.string().optional(),
142
+ endedAt: z.string().optional(),
143
+ metadata: MetadataSchema.optional()
144
+ });
145
+ var ToolCallSchema = z.object({
146
+ tool: z.string(),
147
+ input: z.unknown().optional(),
148
+ output: z.unknown().optional(),
149
+ id: z.string().optional(),
150
+ startTime: z.string().optional(),
151
+ endTime: z.string().optional(),
152
+ durationMs: z.number().optional()
153
+ });
154
+ var ContentTextSchema = z.object({
155
+ type: z.literal("text"),
156
+ text: z.string()
157
+ });
158
+ var ContentImageSchema = z.object({
159
+ type: z.literal("image"),
160
+ media_type: z.string(),
161
+ path: z.string()
162
+ });
163
+ var ContentFileSchema = z.object({
164
+ type: z.literal("file"),
165
+ media_type: z.string(),
166
+ path: z.string()
167
+ });
168
+ var ContentSchema = z.discriminatedUnion("type", [
169
+ ContentTextSchema,
170
+ ContentImageSchema,
171
+ ContentFileSchema
172
+ ]);
173
+ var MessageContentBlockSchema = z.union([ContentSchema, z.record(z.unknown())]);
174
+ var MessageSchema = z.object({
175
+ role: z.enum(["assistant", "user", "system", "tool"]),
176
+ content: z.union([z.string(), z.array(MessageContentBlockSchema), z.record(z.unknown())]).optional(),
177
+ toolCalls: z.array(ToolCallSchema).optional(),
178
+ name: z.string().optional(),
179
+ startTime: z.string().optional(),
180
+ endTime: z.string().optional(),
181
+ durationMs: z.number().optional(),
182
+ metadata: z.record(z.unknown()).optional()
183
+ });
184
+ var TraceSchema = TraceSummarySchema.extend({
185
+ messages: z.array(MessageSchema),
186
+ events: z.array(TraceEventSchema),
187
+ tokenUsage: TokenUsageSchema.optional(),
188
+ costUsd: z.number().optional(),
189
+ durationMs: z.number().optional(),
190
+ startTime: z.string().optional(),
191
+ endTime: z.string().optional(),
192
+ metadata: MetadataSchema.optional()
193
+ });
194
+ var CodeGraderInputSchema = z.object({
195
+ criteria: z.string(),
196
+ expectedOutput: z.array(MessageSchema),
197
+ output: z.string().nullable().optional(),
198
+ messages: z.array(MessageSchema).optional().default([]),
199
+ /** Path to a temp file containing the output JSON (used for large payloads). */
200
+ outputPath: z.string().optional(),
201
+ inputFiles: z.array(z.string()),
202
+ input: z.array(MessageSchema),
203
+ metadata: z.record(z.unknown()).nullable().optional(),
204
+ trace: TraceSchema.nullable().optional(),
205
+ traceSummary: TraceSummarySchema.nullable().optional(),
206
+ tokenUsage: TokenUsageSchema.nullable().optional(),
207
+ costUsd: z.number().nullable().optional(),
208
+ durationMs: z.number().nullable().optional(),
209
+ startTime: z.string().nullable().optional(),
210
+ endTime: z.string().nullable().optional(),
211
+ fileChanges: z.string().nullable().optional(),
212
+ workspacePath: z.string().nullable().optional(),
213
+ config: z.record(z.unknown()).nullable().optional()
214
+ });
215
+ var CodeGraderResultSchema = z.object({
216
+ score: z.number().min(0).max(1),
217
+ assertions: z.array(
218
+ z.object({
219
+ text: z.string(),
220
+ passed: z.boolean(),
221
+ evidence: z.string().optional()
222
+ })
223
+ ).optional().default([]),
224
+ /** Optional structured details for domain-specific metrics (e.g., TP/TN/FP/FN counts, alignments). */
225
+ details: z.record(z.unknown()).optional()
226
+ });
227
+ var PromptTemplateInputSchema = CodeGraderInputSchema;
228
+
229
+ // src/target-client.ts
230
+ var TargetNotAvailableError = class extends Error {
231
+ constructor(message) {
232
+ super(message);
233
+ this.name = "TargetNotAvailableError";
234
+ }
235
+ };
236
+ var TargetInvocationError = class extends Error {
237
+ statusCode;
238
+ constructor(message, statusCode) {
239
+ super(message);
240
+ this.name = "TargetInvocationError";
241
+ this.statusCode = statusCode;
242
+ }
243
+ };
244
+ function createTargetClient() {
245
+ const proxyUrl = process.env.AGENTV_TARGET_PROXY_URL;
246
+ const proxyToken = process.env.AGENTV_TARGET_PROXY_TOKEN;
247
+ if (!proxyUrl) {
248
+ return void 0;
249
+ }
250
+ if (!proxyToken) {
251
+ throw new TargetNotAvailableError(
252
+ "AGENTV_TARGET_PROXY_URL is set but AGENTV_TARGET_PROXY_TOKEN is missing"
253
+ );
254
+ }
255
+ return createTargetClientInternal(proxyUrl, proxyToken);
256
+ }
257
+ function createTargetClientInternal(url, token) {
258
+ const headers = {
259
+ "Content-Type": "application/json",
260
+ Authorization: `Bearer ${token}`
261
+ };
262
+ return {
263
+ async invoke(request) {
264
+ const response = await fetch(`${url}/invoke`, {
265
+ method: "POST",
266
+ headers,
267
+ body: JSON.stringify({
268
+ question: request.question,
269
+ systemPrompt: request.systemPrompt,
270
+ evalCaseId: request.evalCaseId,
271
+ attempt: request.attempt,
272
+ target: request.target
273
+ })
274
+ });
275
+ if (!response.ok) {
276
+ const errorBody = await response.text();
277
+ let errorMessage;
278
+ try {
279
+ const errorJson = JSON.parse(errorBody);
280
+ errorMessage = errorJson.error ?? `HTTP ${response.status}`;
281
+ } catch {
282
+ errorMessage = errorBody || `HTTP ${response.status}`;
283
+ }
284
+ throw new TargetInvocationError(errorMessage, response.status);
285
+ }
286
+ return await response.json();
287
+ },
288
+ async invokeBatch(requests) {
289
+ const response = await fetch(`${url}/invokeBatch`, {
290
+ method: "POST",
291
+ headers,
292
+ body: JSON.stringify({
293
+ requests: requests.map((r) => ({
294
+ question: r.question,
295
+ systemPrompt: r.systemPrompt,
296
+ evalCaseId: r.evalCaseId,
297
+ attempt: r.attempt,
298
+ target: r.target
299
+ }))
300
+ })
301
+ });
302
+ if (!response.ok) {
303
+ const errorBody = await response.text();
304
+ let errorMessage;
305
+ try {
306
+ const errorJson = JSON.parse(errorBody);
307
+ errorMessage = errorJson.error ?? `HTTP ${response.status}`;
308
+ } catch {
309
+ errorMessage = errorBody || `HTTP ${response.status}`;
310
+ }
311
+ throw new TargetInvocationError(errorMessage, response.status);
312
+ }
313
+ const result = await response.json();
314
+ return result.responses;
315
+ },
316
+ async getInfo() {
317
+ const response = await fetch(`${url}/info`, {
318
+ method: "GET",
319
+ headers
320
+ });
321
+ if (!response.ok) {
322
+ const errorBody = await response.text();
323
+ let errorMessage;
324
+ try {
325
+ const errorJson = JSON.parse(errorBody);
326
+ errorMessage = errorJson.error ?? `HTTP ${response.status}`;
327
+ } catch {
328
+ errorMessage = errorBody || `HTTP ${response.status}`;
329
+ }
330
+ throw new TargetInvocationError(errorMessage, response.status);
331
+ }
332
+ return await response.json();
333
+ }
334
+ };
335
+ }
336
+
337
+ // src/index.ts
338
+ import { z as z2 } from "zod";
339
+
340
+ // src/assertion.ts
341
+ import { readFileSync } from "node:fs";
342
+
343
+ // src/case-conversion.ts
344
+ function toCamelCase(str) {
345
+ if (/^[A-Z]/.test(str)) {
346
+ return str;
347
+ }
348
+ return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());
349
+ }
350
+ function toCamelCaseDeep(obj) {
351
+ if (obj === null || obj === void 0) {
352
+ return obj;
353
+ }
354
+ if (Array.isArray(obj)) {
355
+ return obj.map((item) => toCamelCaseDeep(item));
356
+ }
357
+ if (typeof obj === "object") {
358
+ const result = {};
359
+ for (const [key, value] of Object.entries(obj)) {
360
+ const camelKey = toCamelCase(key);
361
+ result[camelKey] = toCamelCaseDeep(value);
362
+ }
363
+ return result;
364
+ }
365
+ return obj;
366
+ }
367
+
368
+ // src/deprecation.ts
369
+ function enrichInput(input) {
370
+ return input;
371
+ }
372
+
373
+ // src/assertion.ts
374
+ function readStdin() {
375
+ return readFileSync(0, "utf8");
376
+ }
377
+ function clampScore(value) {
378
+ if (Number.isNaN(value) || !Number.isFinite(value)) {
379
+ return 0;
380
+ }
381
+ return Math.max(0, Math.min(1, value));
382
+ }
383
+ function formatError(error) {
384
+ if (error instanceof Error) {
385
+ return error.message;
386
+ }
387
+ return String(error);
388
+ }
389
+ function normalizeScore(result) {
390
+ let score;
391
+ if (result.score !== void 0) {
392
+ score = clampScore(result.score);
393
+ } else if (result.pass !== void 0) {
394
+ score = result.pass ? 1 : 0;
395
+ } else {
396
+ score = 0;
397
+ }
398
+ return {
399
+ score,
400
+ assertions: result.assertions ? [...result.assertions] : [],
401
+ details: result.details
402
+ };
403
+ }
404
+ async function runAssertion(handler) {
405
+ try {
406
+ const stdin = readStdin();
407
+ const rawInput = JSON.parse(stdin);
408
+ const camelInput = toCamelCaseDeep(rawInput);
409
+ const input = CodeGraderInputSchema.parse(camelInput);
410
+ if (input.outputPath && (input.output === null || input.output === void 0)) {
411
+ let cachedOutput;
412
+ const filePath = input.outputPath;
413
+ Object.defineProperty(input, "output", {
414
+ get() {
415
+ if (cachedOutput === void 0) {
416
+ cachedOutput = JSON.parse(readFileSync(filePath, "utf8"));
417
+ }
418
+ return cachedOutput;
419
+ },
420
+ configurable: true,
421
+ enumerable: true
422
+ });
423
+ }
424
+ enrichInput(input);
425
+ const rawResult = await handler(input);
426
+ const normalized = normalizeScore(rawResult);
427
+ const result = CodeGraderResultSchema.parse(normalized);
428
+ console.log(JSON.stringify(result, null, 2));
429
+ } catch (error) {
430
+ const errorMessage = formatError(error);
431
+ const errorResult = {
432
+ score: 0,
433
+ assertions: [{ text: `Assertion failed: ${errorMessage}`, passed: false }]
434
+ };
435
+ console.log(JSON.stringify(errorResult, null, 2));
436
+ process.exit(1);
437
+ }
438
+ }
439
+
440
+ // src/prompt-template.ts
441
+ import { readFileSync as readFileSync2 } from "node:fs";
442
+ function readStdin2() {
443
+ return readFileSync2(0, "utf8");
444
+ }
445
+ async function runPromptTemplate(handler) {
446
+ try {
447
+ const stdin = readStdin2();
448
+ const rawInput = JSON.parse(stdin);
449
+ const camelInput = toCamelCaseDeep(rawInput);
450
+ const input = PromptTemplateInputSchema.parse(camelInput);
451
+ enrichInput(input);
452
+ const prompt = await handler(input);
453
+ console.log(prompt);
454
+ } catch (error) {
455
+ console.error(error instanceof Error ? error.message : String(error));
456
+ process.exit(1);
457
+ }
458
+ }
459
+
460
+ // src/runtime.ts
461
+ import { readFileSync as readFileSync3 } from "node:fs";
462
+ function readStdin3() {
463
+ return readFileSync3(0, "utf8");
464
+ }
465
+ function clampScore2(value) {
466
+ if (Number.isNaN(value) || !Number.isFinite(value)) {
467
+ return 0;
468
+ }
469
+ return Math.max(0, Math.min(1, value));
470
+ }
471
+ function formatError2(error) {
472
+ if (error instanceof Error) {
473
+ return error.message;
474
+ }
475
+ return String(error);
476
+ }
477
+ async function runCodeGrader(handler) {
478
+ try {
479
+ const stdin = readStdin3();
480
+ const rawInput = JSON.parse(stdin);
481
+ const camelInput = toCamelCaseDeep(rawInput);
482
+ const input = CodeGraderInputSchema.parse(camelInput);
483
+ if (input.outputPath && (input.output === null || input.output === void 0)) {
484
+ let cachedOutput;
485
+ const filePath = input.outputPath;
486
+ Object.defineProperty(input, "output", {
487
+ get() {
488
+ if (cachedOutput === void 0) {
489
+ cachedOutput = JSON.parse(readFileSync3(filePath, "utf8"));
490
+ }
491
+ return cachedOutput;
492
+ },
493
+ configurable: true,
494
+ enumerable: true
495
+ });
496
+ }
497
+ enrichInput(input);
498
+ const rawResult = await handler(input);
499
+ const result = CodeGraderResultSchema.parse({
500
+ ...rawResult,
501
+ score: clampScore2(rawResult.score)
502
+ });
503
+ console.log(JSON.stringify(result, null, 2));
504
+ } catch (error) {
505
+ const errorMessage = formatError2(error);
506
+ const errorResult = {
507
+ score: 0,
508
+ assertions: [{ text: `Evaluation failed: ${errorMessage}`, passed: false }]
509
+ };
510
+ console.log(JSON.stringify(errorResult, null, 2));
511
+ process.exit(1);
512
+ }
513
+ }
514
+
515
+ // src/index.ts
516
+ function defineCodeGrader(handler) {
517
+ runCodeGrader(handler);
518
+ }
519
+ function definePromptTemplate(handler) {
520
+ runPromptTemplate(handler);
521
+ }
522
+ function defineAssertion(handler) {
523
+ runAssertion(handler);
524
+ }
525
+ export {
526
+ CodeGraderInputSchema,
527
+ CodeGraderResultSchema,
528
+ ContentFileSchema,
529
+ ContentImageSchema,
530
+ ContentSchema,
531
+ ContentTextSchema,
532
+ MessageSchema,
533
+ PromptTemplateInputSchema,
534
+ TRACE_EVENT_TYPES,
535
+ TRACE_REDACTION_LEVELS,
536
+ TRACE_SOURCE_KINDS,
537
+ TRACE_TOOL_STATUSES,
538
+ TargetInvocationError,
539
+ TargetNotAvailableError,
540
+ TokenUsageSchema,
541
+ ToolCallSchema,
542
+ TraceArtifactSchema,
543
+ TraceBranchSchema,
544
+ TraceErrorSchema,
545
+ TraceEventSchema,
546
+ TraceMessageSchema,
547
+ TraceModelSchema,
548
+ TraceRawEvidenceSchema,
549
+ TraceRedactionStateSchema,
550
+ TraceSchema,
551
+ TraceSessionSchema,
552
+ TraceSourceRefSchema,
553
+ TraceSourceSchema,
554
+ TraceSummarySchema,
555
+ TraceToolSchema,
556
+ createTargetClient,
557
+ defineAssertion,
558
+ defineCodeGrader,
559
+ definePromptTemplate,
560
+ z2 as z
561
+ };
562
+ //# sourceMappingURL=index.js.map