@ls-stack/agent-eval 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-sK9CjpNI.mjs → app-DI3IUGb_.mjs} +5 -4
- package/dist/apps/web/dist/assets/index-BZ60j9UY.css +1 -0
- package/dist/apps/web/dist/assets/index-CsSDwFI1.js +113 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DLlRkyLH.mjs → cli-COzPxKg2.mjs} +8 -3
- package/dist/index.d.mts +227 -2
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-DwqX9_T7.mjs → runOrchestration-COFhQvTJ.mjs} +213 -22
- package/dist/{runner-Gtlmvm3w.mjs → runner-nQjuRZGC.mjs} +2 -2
- package/dist/{runner-JrBz8ISs.mjs → runner-sMZXoDp3.mjs} +1 -1
- package/dist/src-OZSs693X.mjs +3 -0
- package/package.json +1 -1
- package/dist/apps/web/dist/assets/index-ChgByJbI.css +0 -1
- package/dist/apps/web/dist/assets/index-CmY0_D5Z.js +0 -113
- package/dist/src-Bx-CV6Wo.mjs +0 -3
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-CsSDwFI1.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BZ60j9UY.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, dn as getEvalRegistry, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-COFhQvTJ.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -418,6 +418,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
418
418
|
let localStateDir;
|
|
419
419
|
let cacheStore;
|
|
420
420
|
let llmCallsConfig = resolveLlmCallsConfig(void 0);
|
|
421
|
+
let apiCallsConfig = resolveApiCallsConfig(void 0);
|
|
421
422
|
const evals = /* @__PURE__ */ new Map();
|
|
422
423
|
const runs = /* @__PURE__ */ new Map();
|
|
423
424
|
const lastRunStatusMap = /* @__PURE__ */ new Map();
|
|
@@ -441,6 +442,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
441
442
|
workspaceRoot = config.workspaceRoot ?? process.cwd();
|
|
442
443
|
localStateDir = resolve(workspaceRoot, ".agent-evals");
|
|
443
444
|
llmCallsConfig = resolveLlmCallsConfig(config.llmCalls);
|
|
445
|
+
apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
|
|
444
446
|
await mkdir(localStateDir, { recursive: true });
|
|
445
447
|
await mkdir(join(localStateDir, "runs"), { recursive: true });
|
|
446
448
|
cacheStore = createFsCacheStore({
|
|
@@ -812,6 +814,9 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
812
814
|
getLlmCallsConfig() {
|
|
813
815
|
return llmCallsConfig;
|
|
814
816
|
},
|
|
817
|
+
getApiCallsConfig() {
|
|
818
|
+
return apiCallsConfig;
|
|
819
|
+
},
|
|
815
820
|
getArtifactPath(artifactId_) {
|
|
816
821
|
return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
|
|
817
822
|
}
|
|
@@ -1045,8 +1050,8 @@ async function commandApp(args) {
|
|
|
1045
1050
|
const { serve } = await import("@hono/node-server");
|
|
1046
1051
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1047
1052
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1048
|
-
const appModule = await import("./app-
|
|
1049
|
-
const runnerModule = await import("./runner-
|
|
1053
|
+
const appModule = await import("./app-DI3IUGb_.mjs");
|
|
1054
|
+
const runnerModule = await import("./runner-sMZXoDp3.mjs");
|
|
1050
1055
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1051
1056
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1052
1057
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -1406,6 +1406,16 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
|
1406
1406
|
}>;
|
|
1407
1407
|
/** Render format applied to an LLM-call metric value. */
|
|
1408
1408
|
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
|
|
1409
|
+
/** Render formats supported by an API-call metric in the UI. */
|
|
1410
|
+
declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
|
|
1411
|
+
string: "string";
|
|
1412
|
+
number: "number";
|
|
1413
|
+
boolean: "boolean";
|
|
1414
|
+
duration: "duration";
|
|
1415
|
+
json: "json";
|
|
1416
|
+
}>;
|
|
1417
|
+
/** Render format applied to an API-call metric value. */
|
|
1418
|
+
type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
|
|
1409
1419
|
/** Where an LLM-call metric is rendered inside the LLM calls tab. */
|
|
1410
1420
|
declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
|
|
1411
1421
|
header: "header";
|
|
@@ -1413,6 +1423,13 @@ declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
|
|
|
1413
1423
|
}>;
|
|
1414
1424
|
/** Placement option for an LLM-call metric. */
|
|
1415
1425
|
type LlmCallMetricPlacement = z$1.infer<typeof llmCallMetricPlacementSchema>;
|
|
1426
|
+
/** Where an API-call metric is rendered inside the API calls tab. */
|
|
1427
|
+
declare const apiCallMetricPlacementSchema: z$1.ZodEnum<{
|
|
1428
|
+
header: "header";
|
|
1429
|
+
body: "body";
|
|
1430
|
+
}>;
|
|
1431
|
+
/** Placement option for an API-call metric. */
|
|
1432
|
+
type ApiCallMetricPlacement = z$1.infer<typeof apiCallMetricPlacementSchema>;
|
|
1416
1433
|
/**
|
|
1417
1434
|
* Schema for a single user-defined metric attached to LLM call rows.
|
|
1418
1435
|
*
|
|
@@ -1440,6 +1457,33 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
|
1440
1457
|
}, z$1.core.$strip>;
|
|
1441
1458
|
/** User-defined metric authored in `agent-evals.config.ts`. */
|
|
1442
1459
|
type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
|
|
1460
|
+
/**
|
|
1461
|
+
* Schema for a single user-defined metric attached to API call rows.
|
|
1462
|
+
*
|
|
1463
|
+
* Each metric reads `path` from the span's `attributes` and renders the value
|
|
1464
|
+
* with the configured `format` and `numberFormat`. `placements` controls
|
|
1465
|
+
* whether the metric appears as a chip on the collapsed row header, as a row
|
|
1466
|
+
* inside the expanded body, or both. Defaults to `['body']` when omitted.
|
|
1467
|
+
*/
|
|
1468
|
+
declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
1469
|
+
label: z$1.ZodString;
|
|
1470
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1471
|
+
path: z$1.ZodString;
|
|
1472
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1473
|
+
string: "string";
|
|
1474
|
+
number: "number";
|
|
1475
|
+
boolean: "boolean";
|
|
1476
|
+
duration: "duration";
|
|
1477
|
+
json: "json";
|
|
1478
|
+
}>>;
|
|
1479
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1480
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1481
|
+
header: "header";
|
|
1482
|
+
body: "body";
|
|
1483
|
+
}>>>;
|
|
1484
|
+
}, z$1.core.$strip>;
|
|
1485
|
+
/** User-defined API-call metric authored in `agent-evals.config.ts`. */
|
|
1486
|
+
type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
|
|
1443
1487
|
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
1444
1488
|
declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
1445
1489
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -1485,6 +1529,41 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1485
1529
|
}, z$1.core.$strip>;
|
|
1486
1530
|
/** Authored LLM calls config accepted from `agent-evals.config.ts`. */
|
|
1487
1531
|
type LlmCallsConfigInput = z$1.infer<typeof llmCallsConfigSchema>;
|
|
1532
|
+
/** Schema for the global API calls config block in `agent-evals.config.ts`. */
|
|
1533
|
+
declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
1534
|
+
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1535
|
+
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
1536
|
+
method: z$1.ZodOptional<z$1.ZodString>;
|
|
1537
|
+
url: z$1.ZodOptional<z$1.ZodString>;
|
|
1538
|
+
statusCode: z$1.ZodOptional<z$1.ZodString>;
|
|
1539
|
+
request: z$1.ZodOptional<z$1.ZodString>;
|
|
1540
|
+
response: z$1.ZodOptional<z$1.ZodString>;
|
|
1541
|
+
requestBody: z$1.ZodOptional<z$1.ZodString>;
|
|
1542
|
+
responseBody: z$1.ZodOptional<z$1.ZodString>;
|
|
1543
|
+
headers: z$1.ZodOptional<z$1.ZodString>;
|
|
1544
|
+
durationMs: z$1.ZodOptional<z$1.ZodString>;
|
|
1545
|
+
error: z$1.ZodOptional<z$1.ZodString>;
|
|
1546
|
+
}, z$1.core.$strip>>;
|
|
1547
|
+
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1548
|
+
label: z$1.ZodString;
|
|
1549
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1550
|
+
path: z$1.ZodString;
|
|
1551
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1552
|
+
string: "string";
|
|
1553
|
+
number: "number";
|
|
1554
|
+
boolean: "boolean";
|
|
1555
|
+
duration: "duration";
|
|
1556
|
+
json: "json";
|
|
1557
|
+
}>>;
|
|
1558
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1559
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1560
|
+
header: "header";
|
|
1561
|
+
body: "body";
|
|
1562
|
+
}>>>;
|
|
1563
|
+
}, z$1.core.$strip>>>;
|
|
1564
|
+
}, z$1.core.$strip>;
|
|
1565
|
+
/** Authored API calls config accepted from `agent-evals.config.ts`. */
|
|
1566
|
+
type ApiCallsConfigInput = z$1.infer<typeof apiCallsConfigSchema>;
|
|
1488
1567
|
/** Resolved LLM-calls config sent to the UI with all defaults applied. */
|
|
1489
1568
|
type ResolvedLlmCallsConfig = {
|
|
1490
1569
|
kinds: string[];
|
|
@@ -1512,6 +1591,23 @@ type ResolvedLlmCallsConfig = {
|
|
|
1512
1591
|
};
|
|
1513
1592
|
metrics: ResolvedLlmCallMetric[];
|
|
1514
1593
|
};
|
|
1594
|
+
/** Resolved API-calls config sent to the UI with all defaults applied. */
|
|
1595
|
+
type ResolvedApiCallsConfig = {
|
|
1596
|
+
kinds: string[];
|
|
1597
|
+
attributes: {
|
|
1598
|
+
method: string;
|
|
1599
|
+
url: string;
|
|
1600
|
+
statusCode: string;
|
|
1601
|
+
request: string;
|
|
1602
|
+
response: string;
|
|
1603
|
+
requestBody: string;
|
|
1604
|
+
responseBody: string;
|
|
1605
|
+
headers: string;
|
|
1606
|
+
durationMs: string;
|
|
1607
|
+
error: string;
|
|
1608
|
+
};
|
|
1609
|
+
metrics: ResolvedApiCallMetric[];
|
|
1610
|
+
};
|
|
1515
1611
|
/** Fully-resolved LLM-call metric used by the runner and UI. */
|
|
1516
1612
|
type ResolvedLlmCallMetric = {
|
|
1517
1613
|
label: string;
|
|
@@ -1521,8 +1617,19 @@ type ResolvedLlmCallMetric = {
|
|
|
1521
1617
|
numberFormat?: NumberDisplayOptions;
|
|
1522
1618
|
placements: LlmCallMetricPlacement[];
|
|
1523
1619
|
};
|
|
1620
|
+
/** Fully-resolved API-call metric used by the runner and UI. */
|
|
1621
|
+
type ResolvedApiCallMetric = {
|
|
1622
|
+
label: string;
|
|
1623
|
+
tooltip?: string;
|
|
1624
|
+
path: string;
|
|
1625
|
+
format: ApiCallMetricFormat;
|
|
1626
|
+
numberFormat?: NumberDisplayOptions;
|
|
1627
|
+
placements: ApiCallMetricPlacement[];
|
|
1628
|
+
};
|
|
1524
1629
|
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
1525
1630
|
declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
|
|
1631
|
+
/** Default API-calls config the UI uses before the workspace fetch resolves. */
|
|
1632
|
+
declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
|
|
1526
1633
|
/**
|
|
1527
1634
|
* Resolve the user-authored LLM-calls config to a fully-defaulted shape used
|
|
1528
1635
|
* by the UI to derive the LLM calls tab.
|
|
@@ -1534,6 +1641,17 @@ declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
|
|
|
1534
1641
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1535
1642
|
*/
|
|
1536
1643
|
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
1644
|
+
/**
|
|
1645
|
+
* Resolve the user-authored API-calls config to a fully-defaulted shape used
|
|
1646
|
+
* by the UI to derive the API calls tab.
|
|
1647
|
+
*
|
|
1648
|
+
* - Missing or empty `kinds` falls back to common API/HTTP span kinds.
|
|
1649
|
+
* - Missing `attributes.<field>` falls back to the corresponding default
|
|
1650
|
+
* attribute path.
|
|
1651
|
+
* - Missing `metrics[].format` defaults to `'string'`.
|
|
1652
|
+
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1653
|
+
*/
|
|
1654
|
+
declare function resolveApiCallsConfig(input: ApiCallsConfigInput | undefined): ResolvedApiCallsConfig;
|
|
1537
1655
|
/** Top-level config authored in `agent-evals.config.ts`. */
|
|
1538
1656
|
type AgentEvalsConfig = {
|
|
1539
1657
|
/** Root directory used to resolve all relative paths. Defaults to `process.cwd()`. */workspaceRoot?: string; /** Glob patterns (relative to `workspaceRoot`) used to discover eval files. */
|
|
@@ -1589,6 +1707,30 @@ type AgentEvalsConfig = {
|
|
|
1589
1707
|
* ```
|
|
1590
1708
|
*/
|
|
1591
1709
|
llmCalls?: LlmCallsConfigInput;
|
|
1710
|
+
/**
|
|
1711
|
+
* Configuration for the "API calls" tab in the case-run drawer.
|
|
1712
|
+
*
|
|
1713
|
+
* Determines which trace spans are treated as API calls (`kinds`), how
|
|
1714
|
+
* structured fields like `method`, `url`, and `statusCode` are read from
|
|
1715
|
+
* span attributes, and which custom user-defined metrics are surfaced on
|
|
1716
|
+
* each call. All fields are optional and fall back to the documented
|
|
1717
|
+
* defaults; the API calls tab is shown automatically when at least one
|
|
1718
|
+
* matching span exists in a case run.
|
|
1719
|
+
*
|
|
1720
|
+
* @example
|
|
1721
|
+
* ```ts
|
|
1722
|
+
* apiCalls: {
|
|
1723
|
+
* kinds: ['api', 'http.client', 'undici.request'],
|
|
1724
|
+
* attributes: {
|
|
1725
|
+
* statusCode: 'http.status_code',
|
|
1726
|
+
* },
|
|
1727
|
+
* metrics: [
|
|
1728
|
+
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
1729
|
+
* ],
|
|
1730
|
+
* }
|
|
1731
|
+
* ```
|
|
1732
|
+
*/
|
|
1733
|
+
apiCalls?: ApiCallsConfigInput;
|
|
1592
1734
|
/**
|
|
1593
1735
|
* Optional controls for the operation cache. When omitted, the cache is
|
|
1594
1736
|
* enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
|
|
@@ -1691,6 +1833,38 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1691
1833
|
}>>>;
|
|
1692
1834
|
}, z$1.core.$strip>>>;
|
|
1693
1835
|
}, z$1.core.$strip>>;
|
|
1836
|
+
apiCalls: z$1.ZodOptional<z$1.ZodObject<{
|
|
1837
|
+
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1838
|
+
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
1839
|
+
method: z$1.ZodOptional<z$1.ZodString>;
|
|
1840
|
+
url: z$1.ZodOptional<z$1.ZodString>;
|
|
1841
|
+
statusCode: z$1.ZodOptional<z$1.ZodString>;
|
|
1842
|
+
request: z$1.ZodOptional<z$1.ZodString>;
|
|
1843
|
+
response: z$1.ZodOptional<z$1.ZodString>;
|
|
1844
|
+
requestBody: z$1.ZodOptional<z$1.ZodString>;
|
|
1845
|
+
responseBody: z$1.ZodOptional<z$1.ZodString>;
|
|
1846
|
+
headers: z$1.ZodOptional<z$1.ZodString>;
|
|
1847
|
+
durationMs: z$1.ZodOptional<z$1.ZodString>;
|
|
1848
|
+
error: z$1.ZodOptional<z$1.ZodString>;
|
|
1849
|
+
}, z$1.core.$strip>>;
|
|
1850
|
+
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1851
|
+
label: z$1.ZodString;
|
|
1852
|
+
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
1853
|
+
path: z$1.ZodString;
|
|
1854
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1855
|
+
string: "string";
|
|
1856
|
+
number: "number";
|
|
1857
|
+
boolean: "boolean";
|
|
1858
|
+
duration: "duration";
|
|
1859
|
+
json: "json";
|
|
1860
|
+
}>>;
|
|
1861
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1862
|
+
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
1863
|
+
header: "header";
|
|
1864
|
+
body: "body";
|
|
1865
|
+
}>>>;
|
|
1866
|
+
}, z$1.core.$strip>>>;
|
|
1867
|
+
}, z$1.core.$strip>>;
|
|
1694
1868
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
1695
1869
|
enabled: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1696
1870
|
dir: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1761,6 +1935,50 @@ type LlmCallEntry = {
|
|
|
1761
1935
|
*/
|
|
1762
1936
|
declare function extractLlmCalls(spans: EvalTraceSpan[], config: ResolvedLlmCallsConfig): LlmCallEntry[];
|
|
1763
1937
|
//#endregion
|
|
1938
|
+
//#region ../shared/src/utils/extractApiCalls.d.ts
|
|
1939
|
+
/** Resolved value for one user-defined metric on an API call row. */
|
|
1940
|
+
type ApiCallMetricValue = {
|
|
1941
|
+
label: string;
|
|
1942
|
+
tooltip: string | undefined;
|
|
1943
|
+
rawValue: unknown;
|
|
1944
|
+
format: ApiCallMetricFormat;
|
|
1945
|
+
numberFormat: NumberDisplayOptions | undefined;
|
|
1946
|
+
placements: ApiCallMetricPlacement[];
|
|
1947
|
+
};
|
|
1948
|
+
/** Single entry rendered as one expandable row in the API calls tab. */
|
|
1949
|
+
type ApiCallEntry = {
|
|
1950
|
+
id: string;
|
|
1951
|
+
name: string;
|
|
1952
|
+
kind: string;
|
|
1953
|
+
status: EvalTraceSpan['status'];
|
|
1954
|
+
method: string | null;
|
|
1955
|
+
url: string | null;
|
|
1956
|
+
statusCode: number | null;
|
|
1957
|
+
latencyMs: number | null;
|
|
1958
|
+
request: unknown;
|
|
1959
|
+
response: unknown;
|
|
1960
|
+
requestBody: unknown;
|
|
1961
|
+
responseBody: unknown;
|
|
1962
|
+
headers: unknown;
|
|
1963
|
+
errorPayload: unknown;
|
|
1964
|
+
metrics: ApiCallMetricValue[];
|
|
1965
|
+
warnings: EvalTraceSpanWarning[];
|
|
1966
|
+
error: EvalTraceSpanError | null;
|
|
1967
|
+
};
|
|
1968
|
+
/**
|
|
1969
|
+
* Filter `spans` down to API calls and project each one to the structured
|
|
1970
|
+
* shape consumed by the API calls tab.
|
|
1971
|
+
*
|
|
1972
|
+
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
1973
|
+
* (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
|
|
1974
|
+
* the configured paths. `durationMs` takes precedence for latency, with a
|
|
1975
|
+
* fallback to the span start/end timestamps. User-defined `metrics` whose path
|
|
1976
|
+
* resolves to `undefined` are dropped, but `null`, `0`, and `false` are
|
|
1977
|
+
* preserved as legitimate values worth displaying. Original span order is
|
|
1978
|
+
* preserved so the API calls tab matches the ordering in the Trace tab.
|
|
1979
|
+
*/
|
|
1980
|
+
declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
|
|
1981
|
+
//#endregion
|
|
1764
1982
|
//#region ../shared/src/schemas/cache.d.ts
|
|
1765
1983
|
/**
|
|
1766
1984
|
* Mode that controls how the cache is consulted for a given run.
|
|
@@ -2823,7 +3041,14 @@ type EvalRunner = {
|
|
|
2823
3041
|
* Returns the workspace's `llmCalls` config block from
|
|
2824
3042
|
* `agent-evals.config.ts` with all defaults applied.
|
|
2825
3043
|
*/
|
|
2826
|
-
getLlmCallsConfig(): ResolvedLlmCallsConfig;
|
|
3044
|
+
getLlmCallsConfig(): ResolvedLlmCallsConfig;
|
|
3045
|
+
/**
|
|
3046
|
+
* Resolved API-calls config used by the UI to derive the API calls tab.
|
|
3047
|
+
*
|
|
3048
|
+
* Returns the workspace's `apiCalls` config block from
|
|
3049
|
+
* `agent-evals.config.ts` with all defaults applied.
|
|
3050
|
+
*/
|
|
3051
|
+
getApiCallsConfig(): ResolvedApiCallsConfig; /** Resolve a persisted artifact path when artifact storage is supported. */
|
|
2827
3052
|
getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
|
|
2828
3053
|
listCache(): Promise<CacheListItem[]>;
|
|
2829
3054
|
/**
|
|
@@ -2887,4 +3112,4 @@ declare function createRunner({
|
|
|
2887
3112
|
*/
|
|
2888
3113
|
declare function runCli(argv: string[]): Promise<void>;
|
|
2889
3114
|
//#endregion
|
|
2890
|
-
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
3115
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as getEvalRegistry, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as repoFile, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as defineEval, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-COFhQvTJ.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-COzPxKg2.mjs";
|
|
3
|
+
import "./src-OZSs693X.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-COFhQvTJ.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { z } from "zod/v4";
|