@ls-stack/agent-eval 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-TjV5nDMM.mjs → app-hkNNN_jn.mjs} +53 -5
- package/dist/apps/web/dist/assets/index-ChgByJbI.css +1 -0
- package/dist/apps/web/dist/assets/index-CmY0_D5Z.js +113 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BTtgQLjB.mjs → cli-DrPk66xh.mjs} +13 -4
- package/dist/index.d.mts +466 -78
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +3 -2
- package/dist/{runOrchestration-HaMahl6b.mjs → runOrchestration-DA4Rh5g0.mjs} +2379 -179
- package/dist/{runner-CBDZos0Z.mjs → runner-BzT3B9OF.mjs} +1 -1
- package/dist/{runner-DGVoOyJt.mjs → runner-DTP5Ui4_.mjs} +2 -2
- package/dist/src-CfprG1RW.mjs +3 -0
- package/package.json +3 -3
- package/dist/apps/web/dist/assets/index-ClE28i5w.css +0 -1
- package/dist/apps/web/dist/assets/index-gGumCEnD.js +0 -112
- package/dist/src-Bt5Fz9HS.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -913,6 +913,20 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
913
913
|
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
914
914
|
}, z$1.core.$strip>>;
|
|
915
915
|
trial: z$1.ZodNumber;
|
|
916
|
+
cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
917
|
+
type: z$1.ZodLiteral<"value">;
|
|
918
|
+
name: z$1.ZodString;
|
|
919
|
+
namespace: z$1.ZodString;
|
|
920
|
+
key: z$1.ZodString;
|
|
921
|
+
status: z$1.ZodEnum<{
|
|
922
|
+
hit: "hit";
|
|
923
|
+
miss: "miss";
|
|
924
|
+
refresh: "refresh";
|
|
925
|
+
bypass: "bypass";
|
|
926
|
+
}>;
|
|
927
|
+
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
928
|
+
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
929
|
+
}, z$1.core.$strip>>>;
|
|
916
930
|
}, z$1.core.$strip>;
|
|
917
931
|
/** Full case payload including inputs, trace, outputs, and failures. */
|
|
918
932
|
type CaseDetail = z$1.infer<typeof caseDetailSchema>;
|
|
@@ -1363,60 +1377,16 @@ type EvalTitleLike = {
|
|
|
1363
1377
|
*/
|
|
1364
1378
|
declare function getEvalTitle(evalLike: EvalTitleLike): string;
|
|
1365
1379
|
//#endregion
|
|
1366
|
-
//#region ../shared/src/
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
"run.cancelled": "run.cancelled";
|
|
1377
|
-
"run.error": "run.error";
|
|
1378
|
-
}>;
|
|
1379
|
-
/** Server-sent event name emitted by the runner or backend. */
|
|
1380
|
-
type SseEventType = z$1.infer<typeof sseEventTypeSchema>;
|
|
1381
|
-
/** Schema for the SSE envelope used to stream run updates to clients. */
|
|
1382
|
-
declare const sseEnvelopeSchema: z$1.ZodObject<{
|
|
1383
|
-
type: z$1.ZodString;
|
|
1384
|
-
runId: z$1.ZodOptional<z$1.ZodString>;
|
|
1385
|
-
timestamp: z$1.ZodString;
|
|
1386
|
-
payload: z$1.ZodUnknown;
|
|
1387
|
-
}, z$1.core.$strip>;
|
|
1388
|
-
/** Wire format for a streamed event emitted during eval execution. */
|
|
1389
|
-
type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema>;
|
|
1390
|
-
//#endregion
|
|
1391
|
-
//#region ../shared/src/schemas/api.d.ts
|
|
1392
|
-
/** Schema for the API request that starts a new eval run. */
|
|
1393
|
-
declare const createRunRequestSchema: z$1.ZodObject<{
|
|
1394
|
-
target: z$1.ZodObject<{
|
|
1395
|
-
mode: z$1.ZodEnum<{
|
|
1396
|
-
all: "all";
|
|
1397
|
-
evalIds: "evalIds";
|
|
1398
|
-
caseIds: "caseIds";
|
|
1399
|
-
}>;
|
|
1400
|
-
evalIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1401
|
-
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1402
|
-
}, z$1.core.$strip>;
|
|
1403
|
-
trials: z$1.ZodNumber;
|
|
1404
|
-
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
1405
|
-
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
1406
|
-
use: "use";
|
|
1407
|
-
bypass: "bypass";
|
|
1408
|
-
refresh: "refresh";
|
|
1409
|
-
}>>;
|
|
1410
|
-
}, z$1.core.$strip>>;
|
|
1411
|
-
}, z$1.core.$strip>;
|
|
1412
|
-
/** Request payload accepted by the run creation endpoint. */
|
|
1413
|
-
type CreateRunRequest = z$1.infer<typeof createRunRequestSchema>;
|
|
1414
|
-
/** Schema for updating a UI-authored manual score on one persisted case. */
|
|
1415
|
-
declare const updateManualScoreRequestSchema: z$1.ZodObject<{
|
|
1416
|
-
value: z$1.ZodNullable<z$1.ZodNumber>;
|
|
1417
|
-
}, z$1.core.$strip>;
|
|
1418
|
-
/** Request payload accepted by the manual score update endpoint. */
|
|
1419
|
-
type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
|
|
1380
|
+
//#region ../shared/src/utils/getNestedAttribute.d.ts
|
|
1381
|
+
/**
|
|
1382
|
+
* Read a value from `source` by walking a dot-separated path.
|
|
1383
|
+
*
|
|
1384
|
+
* Returns `undefined` when any segment of the path is missing or when an
|
|
1385
|
+
* intermediate value is not a plain object. Used by trace-attribute display,
|
|
1386
|
+
* the LLM calls extractor, and any consumer that needs to look up nested
|
|
1387
|
+
* properties from a span's `attributes` record.
|
|
1388
|
+
*/
|
|
1389
|
+
declare function getNestedAttribute(value: unknown, path: string): unknown;
|
|
1420
1390
|
//#endregion
|
|
1421
1391
|
//#region ../shared/src/schemas/config.d.ts
|
|
1422
1392
|
/** Strategy used to collapse repeated trials into one stored case result. */
|
|
@@ -1426,6 +1396,144 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
|
|
|
1426
1396
|
}>;
|
|
1427
1397
|
/** Strategy used to collapse repeated trials into one stored case result. */
|
|
1428
1398
|
type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
1399
|
+
/** Render formats supported by an LLM-call metric in the UI. */
|
|
1400
|
+
declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
1401
|
+
string: "string";
|
|
1402
|
+
number: "number";
|
|
1403
|
+
boolean: "boolean";
|
|
1404
|
+
duration: "duration";
|
|
1405
|
+
json: "json";
|
|
1406
|
+
}>;
|
|
1407
|
+
/** Render format applied to an LLM-call metric value. */
|
|
1408
|
+
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
|
|
1409
|
+
/** Where an LLM-call metric is rendered inside the LLM calls tab. */
|
|
1410
|
+
declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
|
|
1411
|
+
header: "header";
|
|
1412
|
+
body: "body";
|
|
1413
|
+
}>;
|
|
1414
|
+
/** Placement option for an LLM-call metric. */
|
|
1415
|
+
type LlmCallMetricPlacement = z$1.infer<typeof llmCallMetricPlacementSchema>;
|
|
1416
|
+
/**
|
|
1417
|
+
* Schema for a single user-defined metric attached to LLM call rows.
|
|
1418
|
+
*
|
|
1419
|
+
* Each metric reads `path` from the span's `attributes` and renders the value
|
|
1420
|
+
* with the configured `format` and `numberFormat`. `placements` controls
|
|
1421
|
+
* whether the metric appears as a chip on the collapsed row header, as a row
|
|
1422
|
+
* inside the expanded body, or both. Defaults to `['body']` when omitted.
|
|
1423
|
+
*/
|
|
1424
|
+
declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
1425
|
+
label: z$1.ZodString;
|
|
1426
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1427
|
+
path: z$1.ZodString;
|
|
1428
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1429
|
+
string: "string";
|
|
1430
|
+
number: "number";
|
|
1431
|
+
boolean: "boolean";
|
|
1432
|
+
duration: "duration";
|
|
1433
|
+
json: "json";
|
|
1434
|
+
}>>;
|
|
1435
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1436
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1437
|
+
header: "header";
|
|
1438
|
+
body: "body";
|
|
1439
|
+
}>>>;
|
|
1440
|
+
}, z$1.core.$strip>;
|
|
1441
|
+
/** User-defined metric authored in `agent-evals.config.ts`. */
|
|
1442
|
+
type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
|
|
1443
|
+
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
1444
|
+
declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
1445
|
+
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1446
|
+
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
1447
|
+
model: z$1.ZodOptional<z$1.ZodString>;
|
|
1448
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1449
|
+
inputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1450
|
+
outputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1451
|
+
cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1452
|
+
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1453
|
+
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1454
|
+
totalTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1455
|
+
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1456
|
+
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1457
|
+
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1458
|
+
cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1459
|
+
cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1460
|
+
reasoningCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1461
|
+
steps: z$1.ZodOptional<z$1.ZodString>;
|
|
1462
|
+
finishReason: z$1.ZodOptional<z$1.ZodString>;
|
|
1463
|
+
input: z$1.ZodOptional<z$1.ZodString>;
|
|
1464
|
+
output: z$1.ZodOptional<z$1.ZodString>;
|
|
1465
|
+
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
1466
|
+
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
1467
|
+
}, z$1.core.$strip>>;
|
|
1468
|
+
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1469
|
+
label: z$1.ZodString;
|
|
1470
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1471
|
+
path: z$1.ZodString;
|
|
1472
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1473
|
+
string: "string";
|
|
1474
|
+
number: "number";
|
|
1475
|
+
boolean: "boolean";
|
|
1476
|
+
duration: "duration";
|
|
1477
|
+
json: "json";
|
|
1478
|
+
}>>;
|
|
1479
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1480
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1481
|
+
header: "header";
|
|
1482
|
+
body: "body";
|
|
1483
|
+
}>>>;
|
|
1484
|
+
}, z$1.core.$strip>>>;
|
|
1485
|
+
}, z$1.core.$strip>;
|
|
1486
|
+
/** Authored LLM calls config accepted from `agent-evals.config.ts`. */
|
|
1487
|
+
type LlmCallsConfigInput = z$1.infer<typeof llmCallsConfigSchema>;
|
|
1488
|
+
/** Resolved LLM-calls config sent to the UI with all defaults applied. */
|
|
1489
|
+
type ResolvedLlmCallsConfig = {
|
|
1490
|
+
kinds: string[];
|
|
1491
|
+
attributes: {
|
|
1492
|
+
model: string;
|
|
1493
|
+
provider: string;
|
|
1494
|
+
inputTokens: string;
|
|
1495
|
+
outputTokens: string;
|
|
1496
|
+
cachedInputTokens: string;
|
|
1497
|
+
cacheCreationInputTokens: string;
|
|
1498
|
+
reasoningTokens: string;
|
|
1499
|
+
totalTokens: string;
|
|
1500
|
+
cost: string;
|
|
1501
|
+
inputCost: string;
|
|
1502
|
+
outputCost: string;
|
|
1503
|
+
cachedInputCost: string;
|
|
1504
|
+
cacheCreationInputCost: string;
|
|
1505
|
+
reasoningCost: string;
|
|
1506
|
+
steps: string;
|
|
1507
|
+
finishReason: string;
|
|
1508
|
+
input: string;
|
|
1509
|
+
output: string;
|
|
1510
|
+
reasoning: string;
|
|
1511
|
+
toolCalls: string;
|
|
1512
|
+
};
|
|
1513
|
+
metrics: ResolvedLlmCallMetric[];
|
|
1514
|
+
};
|
|
1515
|
+
/** Fully-resolved LLM-call metric used by the runner and UI. */
|
|
1516
|
+
type ResolvedLlmCallMetric = {
|
|
1517
|
+
label: string;
|
|
1518
|
+
tooltip?: string;
|
|
1519
|
+
path: string;
|
|
1520
|
+
format: LlmCallMetricFormat;
|
|
1521
|
+
numberFormat?: NumberDisplayOptions;
|
|
1522
|
+
placements: LlmCallMetricPlacement[];
|
|
1523
|
+
};
|
|
1524
|
+
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
1525
|
+
declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
|
|
1526
|
+
/**
|
|
1527
|
+
* Resolve the user-authored LLM-calls config to a fully-defaulted shape used
|
|
1528
|
+
* by the UI to derive the LLM calls tab.
|
|
1529
|
+
*
|
|
1530
|
+
* - Missing or empty `kinds` falls back to `['llm']`.
|
|
1531
|
+
* - Missing `attributes.<field>` falls back to the corresponding default
|
|
1532
|
+
* attribute path.
|
|
1533
|
+
* - Missing `metrics[].format` defaults to `'string'`.
|
|
1534
|
+
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1535
|
+
*/
|
|
1536
|
+
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
1429
1537
|
/** Top-level config authored in `agent-evals.config.ts`. */
|
|
1430
1538
|
type AgentEvalsConfig = {
|
|
1431
1539
|
/** Root directory used to resolve all relative paths. Defaults to `process.cwd()`. */workspaceRoot?: string; /** Glob patterns (relative to `workspaceRoot`) used to discover eval files. */
|
|
@@ -1455,6 +1563,32 @@ type AgentEvalsConfig = {
|
|
|
1455
1563
|
* definition taking precedence for matching `key` or `path` entries.
|
|
1456
1564
|
*/
|
|
1457
1565
|
traceDisplay?: TraceDisplayInputConfig;
|
|
1566
|
+
/**
|
|
1567
|
+
* Configuration for the "LLM calls" tab in the case-run drawer.
|
|
1568
|
+
*
|
|
1569
|
+
* Determines which trace spans are treated as LLM calls (`kinds`), how
|
|
1570
|
+
* structured fields like `model` and `usage.inputTokens` are read from
|
|
1571
|
+
* span attributes, and which custom user-defined metrics are surfaced on
|
|
1572
|
+
* each call. All fields are optional and fall back to the documented
|
|
1573
|
+
* defaults; the LLM calls tab is shown automatically when at least one
|
|
1574
|
+
* matching span exists in a case run.
|
|
1575
|
+
*
|
|
1576
|
+
* @example
|
|
1577
|
+
* ```ts
|
|
1578
|
+
* llmCalls: {
|
|
1579
|
+
* kinds: ['llm', 'ai-sdk.generateText'],
|
|
1580
|
+
* attributes: {
|
|
1581
|
+
* cachedInputTokens: 'usage.cache_read_input_tokens',
|
|
1582
|
+
* },
|
|
1583
|
+
* metrics: [
|
|
1584
|
+
* { label: 'Tokens/sec', path: 'tokensPerSecond', format: 'number',
|
|
1585
|
+
* numberFormat: { decimalPlaces: 1 }, placements: ['header', 'body'] },
|
|
1586
|
+
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
1587
|
+
* ],
|
|
1588
|
+
* }
|
|
1589
|
+
* ```
|
|
1590
|
+
*/
|
|
1591
|
+
llmCalls?: LlmCallsConfigInput;
|
|
1458
1592
|
/**
|
|
1459
1593
|
* Optional controls for the operation cache. When omitted, the cache is
|
|
1460
1594
|
* enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
|
|
@@ -1463,9 +1597,15 @@ type AgentEvalsConfig = {
|
|
|
1463
1597
|
/** Disable the cache entirely; spans with `cache` options execute as if uncached. */enabled?: boolean; /** Override the directory used to persist cache entries. */
|
|
1464
1598
|
dir?: string;
|
|
1465
1599
|
/**
|
|
1466
|
-
*
|
|
1467
|
-
* non-positive or non-finite values fall back to the default.
|
|
1600
|
+
* Default maximum entries retained for each cache namespace. Defaults to
|
|
1601
|
+
* `100`; non-positive or non-finite values fall back to the default.
|
|
1602
|
+
*/
|
|
1603
|
+
maxEntriesPerNamespace?: number;
|
|
1604
|
+
/**
|
|
1605
|
+
* Exact namespace-specific retention caps. Values override
|
|
1606
|
+
* `maxEntriesPerNamespace` for matching namespaces.
|
|
1468
1607
|
*/
|
|
1608
|
+
maxEntriesByNamespace?: Record<string, number>; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
|
|
1469
1609
|
maxEntriesPerEval?: number;
|
|
1470
1610
|
};
|
|
1471
1611
|
};
|
|
@@ -1509,13 +1649,118 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1509
1649
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1510
1650
|
}, z$1.core.$strip>>>;
|
|
1511
1651
|
}, z$1.core.$strip>>;
|
|
1652
|
+
llmCalls: z$1.ZodOptional<z$1.ZodObject<{
|
|
1653
|
+
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1654
|
+
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
1655
|
+
model: z$1.ZodOptional<z$1.ZodString>;
|
|
1656
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1657
|
+
inputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1658
|
+
outputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1659
|
+
cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1660
|
+
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1661
|
+
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1662
|
+
totalTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1663
|
+
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1664
|
+
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1665
|
+
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1666
|
+
cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1667
|
+
cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1668
|
+
reasoningCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1669
|
+
steps: z$1.ZodOptional<z$1.ZodString>;
|
|
1670
|
+
finishReason: z$1.ZodOptional<z$1.ZodString>;
|
|
1671
|
+
input: z$1.ZodOptional<z$1.ZodString>;
|
|
1672
|
+
output: z$1.ZodOptional<z$1.ZodString>;
|
|
1673
|
+
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
1674
|
+
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
1675
|
+
}, z$1.core.$strip>>;
|
|
1676
|
+
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1677
|
+
label: z$1.ZodString;
|
|
1678
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1679
|
+
path: z$1.ZodString;
|
|
1680
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1681
|
+
string: "string";
|
|
1682
|
+
number: "number";
|
|
1683
|
+
boolean: "boolean";
|
|
1684
|
+
duration: "duration";
|
|
1685
|
+
json: "json";
|
|
1686
|
+
}>>;
|
|
1687
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1688
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1689
|
+
header: "header";
|
|
1690
|
+
body: "body";
|
|
1691
|
+
}>>>;
|
|
1692
|
+
}, z$1.core.$strip>>>;
|
|
1693
|
+
}, z$1.core.$strip>>;
|
|
1512
1694
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
1513
1695
|
enabled: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1514
1696
|
dir: z$1.ZodOptional<z$1.ZodString>;
|
|
1697
|
+
maxEntriesPerNamespace: z$1.ZodPipe<z$1.ZodTransform<number | undefined, unknown>, z$1.ZodOptional<z$1.ZodNumber>>;
|
|
1698
|
+
maxEntriesByNamespace: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodNumber>>;
|
|
1515
1699
|
maxEntriesPerEval: z$1.ZodPipe<z$1.ZodTransform<number | undefined, unknown>, z$1.ZodOptional<z$1.ZodNumber>>;
|
|
1516
1700
|
}, z$1.core.$strip>>;
|
|
1517
1701
|
}, z$1.core.$strip>;
|
|
1518
1702
|
//#endregion
|
|
1703
|
+
//#region ../shared/src/utils/extractLlmCalls.d.ts
|
|
1704
|
+
/** Resolved value for one user-defined metric on an LLM call row. */
|
|
1705
|
+
type LlmCallMetricValue = {
|
|
1706
|
+
label: string;
|
|
1707
|
+
tooltip: string | undefined;
|
|
1708
|
+
rawValue: unknown;
|
|
1709
|
+
format: LlmCallMetricFormat;
|
|
1710
|
+
numberFormat: NumberDisplayOptions | undefined;
|
|
1711
|
+
placements: LlmCallMetricPlacement[];
|
|
1712
|
+
};
|
|
1713
|
+
/** Single entry rendered as one expandable row in the LLM calls tab. */
|
|
1714
|
+
type LlmCallEntry = {
|
|
1715
|
+
id: string;
|
|
1716
|
+
name: string;
|
|
1717
|
+
kind: string;
|
|
1718
|
+
status: EvalTraceSpan['status'];
|
|
1719
|
+
model: string | null;
|
|
1720
|
+
provider: string | null;
|
|
1721
|
+
inputTokens: number | null;
|
|
1722
|
+
outputTokens: number | null;
|
|
1723
|
+
cachedInputTokens: number | null;
|
|
1724
|
+
cacheCreationInputTokens: number | null;
|
|
1725
|
+
reasoningTokens: number | null;
|
|
1726
|
+
totalTokens: number | null;
|
|
1727
|
+
costUsd: number | null;
|
|
1728
|
+
inputCostUsd: number | null;
|
|
1729
|
+
outputCostUsd: number | null;
|
|
1730
|
+
cachedInputCostUsd: number | null;
|
|
1731
|
+
cacheCreationInputCostUsd: number | null;
|
|
1732
|
+
reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
|
|
1733
|
+
stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
|
|
1734
|
+
stepDetails: unknown[] | null;
|
|
1735
|
+
finishReason: string | null;
|
|
1736
|
+
latencyMs: number | null;
|
|
1737
|
+
input: unknown;
|
|
1738
|
+
output: unknown;
|
|
1739
|
+
reasoning: unknown;
|
|
1740
|
+
toolCalls: unknown;
|
|
1741
|
+
metrics: LlmCallMetricValue[];
|
|
1742
|
+
warnings: EvalTraceSpanWarning[];
|
|
1743
|
+
error: EvalTraceSpanError | null;
|
|
1744
|
+
};
|
|
1745
|
+
/**
|
|
1746
|
+
* Filter `spans` down to LLM calls and project each one to the structured
|
|
1747
|
+
* shape consumed by the LLM calls tab.
|
|
1748
|
+
*
|
|
1749
|
+
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
1750
|
+
* (`model`, token counts, cost, etc.) are read via `getNestedAttribute` from
|
|
1751
|
+
* the configured paths, with safe coercion to `string | null` / `number |
|
|
1752
|
+
* null`. `totalTokens` falls back to a sum of input + output + cached when no
|
|
1753
|
+
* explicit total attribute is present. The `steps` attribute path may resolve
|
|
1754
|
+
* to either a number (rendered as the inference-round count) or an array of
|
|
1755
|
+
* per-step detail objects (rendered as a Steps section in the body, with
|
|
1756
|
+
* `stepCount` derived from the array length). `latencyMs` is `null` while the
|
|
1757
|
+
* span is still running. User-defined `metrics` whose path resolves to
|
|
1758
|
+
* `undefined` are dropped, but `null`, `0`, and `false` are preserved as
|
|
1759
|
+
* legitimate values worth displaying. Original span order is preserved so the
|
|
1760
|
+
* LLM calls tab matches the ordering in the Trace tab.
|
|
1761
|
+
*/
|
|
1762
|
+
declare function extractLlmCalls(spans: EvalTraceSpan[], config: ResolvedLlmCallsConfig): LlmCallEntry[];
|
|
1763
|
+
//#endregion
|
|
1519
1764
|
//#region ../shared/src/schemas/cache.d.ts
|
|
1520
1765
|
/**
|
|
1521
1766
|
* Mode that controls how the cache is consulted for a given run.
|
|
@@ -1535,6 +1780,7 @@ type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
|
1535
1780
|
declare const spanCacheOptionsSchema: z$1.ZodObject<{
|
|
1536
1781
|
key: z$1.ZodUnknown;
|
|
1537
1782
|
namespace: z$1.ZodOptional<z$1.ZodString>;
|
|
1783
|
+
serializeFileBytes: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1538
1784
|
}, z$1.core.$strip>;
|
|
1539
1785
|
/** Options accepted by an `evalTracer.span` call to opt the span into caching. */
|
|
1540
1786
|
type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
|
|
@@ -1545,6 +1791,38 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
1545
1791
|
}>;
|
|
1546
1792
|
/** Category of operation stored in the eval cache. */
|
|
1547
1793
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
1794
|
+
/** Status of a cache lookup recorded on a span or case scope. */
|
|
1795
|
+
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
1796
|
+
bypass: "bypass";
|
|
1797
|
+
refresh: "refresh";
|
|
1798
|
+
hit: "hit";
|
|
1799
|
+
miss: "miss";
|
|
1800
|
+
}>;
|
|
1801
|
+
/** Status of a cache lookup recorded on a span or case scope. */
|
|
1802
|
+
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
1803
|
+
/**
|
|
1804
|
+
* Reference to a value-cache lookup performed via `evalTracer.cache(...)`.
|
|
1805
|
+
*
|
|
1806
|
+
* Refs are appended to the active span's `cache.refs` attribute when the call
|
|
1807
|
+
* happens inside a `traceSpan(...)` body, or to the case scope's
|
|
1808
|
+
* `caseCacheRefs` bucket when the call is made directly from the case body.
|
|
1809
|
+
*/
|
|
1810
|
+
declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
1811
|
+
type: z$1.ZodLiteral<"value">;
|
|
1812
|
+
name: z$1.ZodString;
|
|
1813
|
+
namespace: z$1.ZodString;
|
|
1814
|
+
key: z$1.ZodString;
|
|
1815
|
+
status: z$1.ZodEnum<{
|
|
1816
|
+
bypass: "bypass";
|
|
1817
|
+
refresh: "refresh";
|
|
1818
|
+
hit: "hit";
|
|
1819
|
+
miss: "miss";
|
|
1820
|
+
}>;
|
|
1821
|
+
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
1822
|
+
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1823
|
+
}, z$1.core.$strip>;
|
|
1824
|
+
/** Reference to a value-cache lookup performed via `evalTracer.cache(...)`. */
|
|
1825
|
+
type TraceCacheRef = z$1.infer<typeof traceCacheRefSchema>;
|
|
1548
1826
|
/** Summary of a single persisted cache entry, used by list/delete endpoints. */
|
|
1549
1827
|
declare const cacheListItemSchema: z$1.ZodObject<{
|
|
1550
1828
|
key: z$1.ZodString;
|
|
@@ -1824,6 +2102,93 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
1824
2102
|
/** Persisted per-owner cache file contents. */
|
|
1825
2103
|
type CacheFile = z$1.infer<typeof cacheFileSchema>;
|
|
1826
2104
|
//#endregion
|
|
2105
|
+
//#region ../shared/src/utils/extractCacheHits.d.ts
|
|
2106
|
+
/**
|
|
2107
|
+
* Single cache-hit entry rendered as one row in the case drawer's
|
|
2108
|
+
* "Cache hits" tab.
|
|
2109
|
+
*
|
|
2110
|
+
* `origin === 'span'` rows came from a span's `cache.status` attribute or from
|
|
2111
|
+
* a `cache.refs` ref attached to a span body. `origin === 'caseRoot'` rows
|
|
2112
|
+
* came from `evalTracer.cache(...)` calls made directly from the case body
|
|
2113
|
+
* (no surrounding `traceSpan`), which would otherwise be invisible.
|
|
2114
|
+
*/
|
|
2115
|
+
type CacheHitEntry = {
|
|
2116
|
+
id: string;
|
|
2117
|
+
source: 'span' | 'value';
|
|
2118
|
+
origin: 'span' | 'caseRoot';
|
|
2119
|
+
name: string;
|
|
2120
|
+
namespace: string;
|
|
2121
|
+
key: string;
|
|
2122
|
+
storedAt: string | undefined;
|
|
2123
|
+
age: number | undefined;
|
|
2124
|
+
spanId: string | undefined;
|
|
2125
|
+
};
|
|
2126
|
+
/**
|
|
2127
|
+
* Collect every `status === 'hit'` cache event recorded for a case run.
|
|
2128
|
+
*
|
|
2129
|
+
* Walks `spans` for span-level cache hits (`attributes['cache.status'] ===
|
|
2130
|
+
* 'hit'`) and per-span value-cache refs (`attributes['cache.refs']`), then
|
|
2131
|
+
* appends spanless value-cache refs persisted on the case scope. Non-hit
|
|
2132
|
+
* statuses (`miss`/`refresh`/`bypass`) are skipped — they remain visible
|
|
2133
|
+
* inline in the Trace tab.
|
|
2134
|
+
*/
|
|
2135
|
+
declare function extractCacheHits(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheHitEntry[];
|
|
2136
|
+
//#endregion
|
|
2137
|
+
//#region ../shared/src/schemas/sse.d.ts
|
|
2138
|
+
declare const sseEventTypeSchema: z$1.ZodEnum<{
|
|
2139
|
+
"discovery.updated": "discovery.updated";
|
|
2140
|
+
"run.started": "run.started";
|
|
2141
|
+
"run.summary": "run.summary";
|
|
2142
|
+
"case.started": "case.started";
|
|
2143
|
+
"case.updated": "case.updated";
|
|
2144
|
+
"case.finished": "case.finished";
|
|
2145
|
+
"trace.span": "trace.span";
|
|
2146
|
+
"run.finished": "run.finished";
|
|
2147
|
+
"run.cancelled": "run.cancelled";
|
|
2148
|
+
"run.error": "run.error";
|
|
2149
|
+
}>;
|
|
2150
|
+
/** Server-sent event name emitted by the runner or backend. */
|
|
2151
|
+
type SseEventType = z$1.infer<typeof sseEventTypeSchema>;
|
|
2152
|
+
/** Schema for the SSE envelope used to stream run updates to clients. */
|
|
2153
|
+
declare const sseEnvelopeSchema: z$1.ZodObject<{
|
|
2154
|
+
type: z$1.ZodString;
|
|
2155
|
+
runId: z$1.ZodOptional<z$1.ZodString>;
|
|
2156
|
+
timestamp: z$1.ZodString;
|
|
2157
|
+
payload: z$1.ZodUnknown;
|
|
2158
|
+
}, z$1.core.$strip>;
|
|
2159
|
+
/** Wire format for a streamed event emitted during eval execution. */
|
|
2160
|
+
type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema>;
|
|
2161
|
+
//#endregion
|
|
2162
|
+
//#region ../shared/src/schemas/api.d.ts
|
|
2163
|
+
/** Schema for the API request that starts a new eval run. */
|
|
2164
|
+
declare const createRunRequestSchema: z$1.ZodObject<{
|
|
2165
|
+
target: z$1.ZodObject<{
|
|
2166
|
+
mode: z$1.ZodEnum<{
|
|
2167
|
+
all: "all";
|
|
2168
|
+
evalIds: "evalIds";
|
|
2169
|
+
caseIds: "caseIds";
|
|
2170
|
+
}>;
|
|
2171
|
+
evalIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2172
|
+
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2173
|
+
}, z$1.core.$strip>;
|
|
2174
|
+
trials: z$1.ZodNumber;
|
|
2175
|
+
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
2176
|
+
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
2177
|
+
use: "use";
|
|
2178
|
+
bypass: "bypass";
|
|
2179
|
+
refresh: "refresh";
|
|
2180
|
+
}>>;
|
|
2181
|
+
}, z$1.core.$strip>>;
|
|
2182
|
+
}, z$1.core.$strip>;
|
|
2183
|
+
/** Request payload accepted by the run creation endpoint. */
|
|
2184
|
+
type CreateRunRequest = z$1.infer<typeof createRunRequestSchema>;
|
|
2185
|
+
/** Schema for updating a UI-authored manual score on one persisted case. */
|
|
2186
|
+
declare const updateManualScoreRequestSchema: z$1.ZodObject<{
|
|
2187
|
+
value: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2188
|
+
}, z$1.core.$strip>;
|
|
2189
|
+
/** Request payload accepted by the manual score update endpoint. */
|
|
2190
|
+
type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
|
|
2191
|
+
//#endregion
|
|
1827
2192
|
//#region ../sdk/src/types.d.ts
|
|
1828
2193
|
/** Single authored eval case with its stable identifier and input payload. */
|
|
1829
2194
|
type EvalCase<TInput> = {
|
|
@@ -2077,6 +2442,12 @@ type EvalCaseScope = {
|
|
|
2077
2442
|
*/
|
|
2078
2443
|
replayingDepth: number; /** Runner-provided cache adapter + mode; absent when caching is disabled. */
|
|
2079
2444
|
cacheContext: CacheScopeContext | undefined;
|
|
2445
|
+
/**
|
|
2446
|
+
* Value-cache refs recorded by `evalTracer.cache(...)` calls made with no
|
|
2447
|
+
* active span. Span-bound refs are appended to the owning span's
|
|
2448
|
+
* `cache.refs` attribute instead.
|
|
2449
|
+
*/
|
|
2450
|
+
caseCacheRefs: TraceCacheRef[];
|
|
2080
2451
|
};
|
|
2081
2452
|
/** Error thrown when an eval assertion fails during case execution. */
|
|
2082
2453
|
declare class EvalAssertionError extends Error {
|
|
@@ -2171,43 +2542,46 @@ type CaptureEvalSpanErrorOptions = {
|
|
|
2171
2542
|
level?: CaptureEvalSpanErrorLevel;
|
|
2172
2543
|
};
|
|
2173
2544
|
//#endregion
|
|
2174
|
-
//#region ../sdk/src/cacheRecording.d.ts
|
|
2175
|
-
/** Cache reference appended to the active span by `evalTracer.cache(...)`. */
|
|
2176
|
-
type TraceCacheRef = {
|
|
2177
|
-
type: 'value';
|
|
2178
|
-
name: string;
|
|
2179
|
-
namespace: string;
|
|
2180
|
-
key: string;
|
|
2181
|
-
status: 'hit' | 'miss' | 'refresh' | 'bypass';
|
|
2182
|
-
storedAt?: string;
|
|
2183
|
-
age?: number;
|
|
2184
|
-
};
|
|
2185
|
-
//#endregion
|
|
2186
2545
|
//#region ../sdk/src/valueCache.d.ts
|
|
2187
2546
|
/** Info accepted by `evalTracer.cache(info, fn)` for spanless value caching. */
|
|
2188
2547
|
type TraceCacheInfo = {
|
|
2189
2548
|
/** Display name used for cache listings and the default namespace. */name: string; /** Arbitrary JSON-safe value used to derive the cache key. */
|
|
2190
2549
|
key: unknown; /** Override the default namespace (`${evalId}__${name}`). */
|
|
2191
2550
|
namespace?: string;
|
|
2551
|
+
/**
|
|
2552
|
+
* Include native `Blob`/`File` bytes in the cache key. By default only stable
|
|
2553
|
+
* metadata (`type`, `size`, plus `name`/`lastModified` for `File`) is used.
|
|
2554
|
+
*/
|
|
2555
|
+
serializeFileBytes?: boolean;
|
|
2192
2556
|
};
|
|
2193
2557
|
//#endregion
|
|
2194
2558
|
//#region ../sdk/src/cacheKey.d.ts
|
|
2559
|
+
/** Components folded into a deterministic cache key hash. */
|
|
2195
2560
|
type CacheKeyHashInput = {
|
|
2196
|
-
namespace: string;
|
|
2197
|
-
codeFingerprint: string;
|
|
2561
|
+
/** Cache namespace, usually derived from the eval id and operation name. */namespace: string; /** Eval source fingerprint used to invalidate cache entries on code edits. */
|
|
2562
|
+
codeFingerprint: string; /** User-authored cache key value. */
|
|
2198
2563
|
key: unknown;
|
|
2199
2564
|
};
|
|
2565
|
+
/** Optional controls for cache key hashing. */
|
|
2566
|
+
type CacheKeyHashOptions = {
|
|
2567
|
+
/**
|
|
2568
|
+
* When true, native `Blob` and `File` values are read asynchronously and
|
|
2569
|
+
* hashed by bytes plus stable metadata. Defaults to metadata-only hashing.
|
|
2570
|
+
*/
|
|
2571
|
+
serializeFileBytes?: boolean;
|
|
2572
|
+
};
|
|
2200
2573
|
/**
|
|
2201
2574
|
* Hash the components of a cache key into a deterministic hex digest.
|
|
2202
2575
|
*
|
|
2203
|
-
* Native `Blob` and `File` values
|
|
2204
|
-
*
|
|
2576
|
+
* Native `Blob` and `File` values use stable metadata by default. Pass
|
|
2577
|
+
* `serializeFileBytes: true` to read them asynchronously and include their byte
|
|
2578
|
+
* hash in the key.
|
|
2205
2579
|
*/
|
|
2206
|
-
declare function hashCacheKey(input: CacheKeyHashInput): Promise<string>;
|
|
2580
|
+
declare function hashCacheKey(input: CacheKeyHashInput, options?: CacheKeyHashOptions): Promise<string>;
|
|
2207
2581
|
/**
|
|
2208
2582
|
* Synchronously hash cache key components. This supports JSON-like data and
|
|
2209
2583
|
* in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
|
|
2210
|
-
*
|
|
2584
|
+
* plus stable metadata for native `Blob` and `File` values.
|
|
2211
2585
|
*/
|
|
2212
2586
|
declare function hashCacheKeySync(input: CacheKeyHashInput): string;
|
|
2213
2587
|
//#endregion
|
|
@@ -2296,8 +2670,8 @@ type TraceSpanInfoUncached = TraceSpanInfoBase & {
|
|
|
2296
2670
|
/**
|
|
2297
2671
|
* Info accepted by `evalTracer.span(info, fn)` when opting in to caching.
|
|
2298
2672
|
*
|
|
2299
|
-
* Cached spans return `Promise<unknown>` because the replayed value
|
|
2300
|
-
*
|
|
2673
|
+
* Cached spans return `Promise<unknown>` because the replayed value is revived
|
|
2674
|
+
* from persisted cache data on hit. Narrow the value yourself when you need a
|
|
2301
2675
|
* typed return.
|
|
2302
2676
|
*/
|
|
2303
2677
|
type TraceSpanInfoCached = TraceSpanInfoBase & {
|
|
@@ -2388,9 +2762,23 @@ type EvalRunner = {
|
|
|
2388
2762
|
subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
|
|
2389
2763
|
subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
|
|
2390
2764
|
close(): Promise<void>; /** Resolve the workspace root backing this runner instance. */
|
|
2391
|
-
getWorkspaceRoot(): string;
|
|
2765
|
+
getWorkspaceRoot(): string;
|
|
2766
|
+
/**
|
|
2767
|
+
* Resolved LLM-calls config used by the UI to derive the LLM calls tab.
|
|
2768
|
+
*
|
|
2769
|
+
* Returns the workspace's `llmCalls` config block from
|
|
2770
|
+
* `agent-evals.config.ts` with all defaults applied.
|
|
2771
|
+
*/
|
|
2772
|
+
getLlmCallsConfig(): ResolvedLlmCallsConfig; /** Resolve a persisted artifact path when artifact storage is supported. */
|
|
2392
2773
|
getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
|
|
2393
2774
|
listCache(): Promise<CacheListItem[]>;
|
|
2775
|
+
/**
|
|
2776
|
+
* Return the full persisted cache entry for `namespace` + `key`, including
|
|
2777
|
+
* its recording. Returns `null` when no entry matches. Used by the case
|
|
2778
|
+
* drawer's Cache hits tab to lazily fetch the cached return value when a
|
|
2779
|
+
* row is expanded.
|
|
2780
|
+
*/
|
|
2781
|
+
getCacheEntry(namespace: string, key: string): Promise<CacheEntry | null>;
|
|
2394
2782
|
/**
|
|
2395
2783
|
* Remove cache entries matching `filter`, or all entries when no filter is
|
|
2396
2784
|
* supplied.
|
|
@@ -2445,4 +2833,4 @@ declare function createRunner({
|
|
|
2445
2833
|
*/
|
|
2446
2834
|
declare function runCli(argv: string[]): Promise<void>;
|
|
2447
2835
|
//#endregion
|
|
2448
|
-
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
2836
|
+
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|