@ls-stack/agent-eval 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-Bq4Dz6AV.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-b2k20tzL.css">
28
+ <script type="module" crossorigin src="/assets/index-ibhQ_P7i.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-B2GWGl5i.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-Dw9et3_Z.mjs";
2
+ import { t as runCli } from "./cli-B0QmsWCU.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -831,17 +831,13 @@ const columnDefSchema = z.object({
831
831
  const cellValueSchema = z.union([jsonCellSchema, fileRefSchema]);
832
832
  //#endregion
833
833
  //#region ../shared/src/schemas/trace.ts
834
- /** Schema for the semantic categories used to classify trace spans. */
835
- const traceSpanKindSchema = z.enum([
836
- "eval",
837
- "agent",
838
- "llm",
839
- "tool",
840
- "retrieval",
841
- "scorer",
842
- "checkpoint",
843
- "custom"
844
- ]);
834
+ /**
835
+ * Schema for span categories recorded in traces.
836
+ *
837
+ * The value is intentionally open-ended so external tracers can preserve their
838
+ * native span kinds instead of collapsing them into the built-in categories.
839
+ */
840
+ const traceSpanKindSchema = z.string().min(1);
845
841
  /** Schema for the supported presentation formats of trace attributes. */
846
842
  const traceAttributeDisplayFormatSchema = z.enum([
847
843
  "string",
@@ -3616,8 +3612,8 @@ async function commandApp(args) {
3616
3612
  const { serve } = await import("@hono/node-server");
3617
3613
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
3618
3614
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
3619
- const appModule = await import("./app-DXYLqlWb.mjs");
3620
- const runnerModule = await import("./runner-CToL8eJs.mjs");
3615
+ const appModule = await import("./app-CljutWb7.mjs");
3616
+ const runnerModule = await import("./runner-CsSJwWE4.mjs");
3621
3617
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
3622
3618
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
3623
3619
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -122,19 +122,13 @@ declare const cellValueSchema: z.ZodUnion<readonly [z.ZodType<string | number |
122
122
  type CellValue = z.infer<typeof cellValueSchema>;
123
123
  //#endregion
124
124
  //#region ../shared/src/schemas/trace.d.ts
125
- /** Schema for the semantic categories used to classify trace spans. */
126
- declare const traceSpanKindSchema: z.ZodEnum<{
127
- eval: "eval";
128
- agent: "agent";
129
- llm: "llm";
130
- tool: "tool";
131
- retrieval: "retrieval";
132
- scorer: "scorer";
133
- checkpoint: "checkpoint";
134
- custom: "custom";
135
- }>;
136
- /** Semantic category used to classify a trace span in the UI. */
137
- type TraceSpanKind = z.infer<typeof traceSpanKindSchema>;
125
+ /**
126
+ * Schema for span categories recorded in traces.
127
+ *
128
+ * The value is intentionally open-ended so external tracers can preserve their
129
+ * native span kinds instead of collapsing them into the built-in categories.
130
+ */
131
+ declare const traceSpanKindSchema: z.ZodString;
138
132
  /** Schema for the supported presentation formats of trace attributes. */
139
133
  declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
140
134
  string: "string";
@@ -306,16 +300,7 @@ declare const traceSpanSchema: z.ZodObject<{
306
300
  id: z.ZodString;
307
301
  parentId: z.ZodNullable<z.ZodString>;
308
302
  caseId: z.ZodString;
309
- kind: z.ZodEnum<{
310
- eval: "eval";
311
- agent: "agent";
312
- llm: "llm";
313
- tool: "tool";
314
- retrieval: "retrieval";
315
- scorer: "scorer";
316
- checkpoint: "checkpoint";
317
- custom: "custom";
318
- }>;
303
+ kind: z.ZodString;
319
304
  name: z.ZodString;
320
305
  startedAt: z.ZodString;
321
306
  endedAt: z.ZodNullable<z.ZodString>;
@@ -650,16 +635,7 @@ declare const scoreTraceSchema: z.ZodObject<{
650
635
  id: z.ZodString;
651
636
  parentId: z.ZodNullable<z.ZodString>;
652
637
  caseId: z.ZodString;
653
- kind: z.ZodEnum<{
654
- custom: "custom";
655
- eval: "eval";
656
- agent: "agent";
657
- llm: "llm";
658
- tool: "tool";
659
- retrieval: "retrieval";
660
- scorer: "scorer";
661
- checkpoint: "checkpoint";
662
- }>;
638
+ kind: z.ZodString;
663
639
  name: z.ZodString;
664
640
  startedAt: z.ZodString;
665
641
  endedAt: z.ZodNullable<z.ZodString>;
@@ -724,16 +700,7 @@ declare const caseDetailSchema: z.ZodObject<{
724
700
  id: z.ZodString;
725
701
  parentId: z.ZodNullable<z.ZodString>;
726
702
  caseId: z.ZodString;
727
- kind: z.ZodEnum<{
728
- custom: "custom";
729
- eval: "eval";
730
- agent: "agent";
731
- llm: "llm";
732
- tool: "tool";
733
- retrieval: "retrieval";
734
- scorer: "scorer";
735
- checkpoint: "checkpoint";
736
- }>;
703
+ kind: z.ZodString;
737
704
  name: z.ZodString;
738
705
  startedAt: z.ZodString;
739
706
  endedAt: z.ZodNullable<z.ZodString>;
@@ -783,16 +750,7 @@ declare const caseDetailSchema: z.ZodObject<{
783
750
  id: z.ZodString;
784
751
  parentId: z.ZodNullable<z.ZodString>;
785
752
  caseId: z.ZodString;
786
- kind: z.ZodEnum<{
787
- custom: "custom";
788
- eval: "eval";
789
- agent: "agent";
790
- llm: "llm";
791
- tool: "tool";
792
- retrieval: "retrieval";
793
- scorer: "scorer";
794
- checkpoint: "checkpoint";
795
- }>;
753
+ kind: z.ZodString;
796
754
  name: z.ZodString;
797
755
  startedAt: z.ZodString;
798
756
  endedAt: z.ZodNullable<z.ZodString>;
@@ -1491,16 +1449,7 @@ declare const cacheListItemSchema: z.ZodObject<{
1491
1449
  key: z.ZodString;
1492
1450
  namespace: z.ZodString;
1493
1451
  spanName: z.ZodString;
1494
- spanKind: z.ZodEnum<{
1495
- eval: "eval";
1496
- agent: "agent";
1497
- llm: "llm";
1498
- tool: "tool";
1499
- retrieval: "retrieval";
1500
- scorer: "scorer";
1501
- checkpoint: "checkpoint";
1502
- custom: "custom";
1503
- }>;
1452
+ spanKind: z.ZodString;
1504
1453
  storedAt: z.ZodString;
1505
1454
  codeFingerprint: z.ZodString;
1506
1455
  sizeBytes: z.ZodNumber;
@@ -1509,7 +1458,7 @@ declare const cacheListItemSchema: z.ZodObject<{
1509
1458
  type CacheListItem = z.infer<typeof cacheListItemSchema>;
1510
1459
  /** Serialized nested span captured while recording a cached operation. */
1511
1460
  type SerializedCacheSpan = {
1512
- kind: TraceSpanKind;
1461
+ kind: string;
1513
1462
  name: string;
1514
1463
  attributes?: Record<string, unknown>;
1515
1464
  status: 'running' | 'ok' | 'error' | 'cancelled';
@@ -1575,16 +1524,7 @@ declare const cacheEntrySchema: z.ZodObject<{
1575
1524
  key: z.ZodString;
1576
1525
  namespace: z.ZodString;
1577
1526
  spanName: z.ZodString;
1578
- spanKind: z.ZodEnum<{
1579
- eval: "eval";
1580
- agent: "agent";
1581
- llm: "llm";
1582
- tool: "tool";
1583
- retrieval: "retrieval";
1584
- scorer: "scorer";
1585
- checkpoint: "checkpoint";
1586
- custom: "custom";
1587
- }>;
1527
+ spanKind: z.ZodString;
1588
1528
  storedAt: z.ZodString;
1589
1529
  codeFingerprint: z.ZodString;
1590
1530
  recording: z.ZodObject<{
@@ -1619,16 +1559,7 @@ declare const cacheFileSchema: z.ZodObject<{
1619
1559
  key: z.ZodString;
1620
1560
  namespace: z.ZodString;
1621
1561
  spanName: z.ZodString;
1622
- spanKind: z.ZodEnum<{
1623
- eval: "eval";
1624
- agent: "agent";
1625
- llm: "llm";
1626
- tool: "tool";
1627
- retrieval: "retrieval";
1628
- scorer: "scorer";
1629
- checkpoint: "checkpoint";
1630
- custom: "custom";
1631
- }>;
1562
+ spanKind: z.ZodString;
1632
1563
  storedAt: z.ZodString;
1633
1564
  codeFingerprint: z.ZodString;
1634
1565
  recording: z.ZodObject<{
@@ -1703,7 +1634,7 @@ type EvalTraceTree = {
1703
1634
  spans: EvalTraceSpan[];
1704
1635
  rootSpans: EvalTraceSpan[];
1705
1636
  findSpan: (name: string) => EvalTraceSpan | undefined;
1706
- findSpansByKind: (kind: EvalTraceSpan['kind']) => EvalTraceSpan[];
1637
+ findSpansByKind: (kind: string) => EvalTraceSpan[];
1707
1638
  flattenDfs: () => EvalTraceSpan[];
1708
1639
  checkpoints: Map<string, unknown>;
1709
1640
  };
@@ -1963,7 +1894,7 @@ type TraceSpanTimestamp = Date | string;
1963
1894
  type TraceExternalSpanStartInfo = {
1964
1895
  /** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
1965
1896
  parentId?: string | null; /** Semantic category used by the trace UI. */
1966
- kind: EvalTraceSpan['kind']; /** Display name for the span. */
1897
+ kind: string; /** Display name for the span. */
1967
1898
  name: string; /** Span start time. Defaults to now. */
1968
1899
  startedAt?: TraceSpanTimestamp; /** Initial span attributes. Later updates merge into this object. */
1969
1900
  attributes?: Record<string, unknown>;
@@ -1984,7 +1915,7 @@ type TraceExternalSpanEndInfo = TraceExternalSpanUpdateInfo & {
1984
1915
  type TraceExternalSpanRecordInfo = {
1985
1916
  /** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
1986
1917
  parentId?: string | null; /** Semantic category used by the trace UI. */
1987
- kind: EvalTraceSpan['kind']; /** Display name for the span. */
1918
+ kind: string; /** Display name for the span. */
1988
1919
  name: string; /** Span start time. Defaults to now. */
1989
1920
  startedAt?: TraceSpanTimestamp; /** Span end time. Defaults to the start time. */
1990
1921
  endedAt?: TraceSpanTimestamp | null; /** Final span status. Defaults to `ok`. */
@@ -2008,7 +1939,7 @@ declare function recordExternalSpan(info: TraceExternalSpanRecordInfo): string;
2008
1939
  */
2009
1940
  declare const evalSpan: TraceActiveSpan;
2010
1941
  type TraceSpanInfoBase = {
2011
- kind: EvalTraceSpan['kind'];
1942
+ kind: string;
2012
1943
  name: string;
2013
1944
  attributes?: Record<string, unknown>;
2014
1945
  };
@@ -2175,4 +2106,4 @@ declare function createRunner({
2175
2106
  */
2176
2107
  declare function runCli(argv: string[]): Promise<void>;
2177
2108
  //#endregion
2178
- export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TraceSpanKind, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
2109
+ export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { $ as fileRefSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnKindSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as columnDefSchema, Y as cellValueSchema, Z as columnFormatSchema, _ as cacheListItemSchema, _t as repoFile, a as sseEnvelopeSchema, at as evalSpan, b as cacheRecordingSchema, c as deriveScopedSummaryFromCases, ct as hashCacheKeySync, d as runManifestSchema, dt as getCurrentScope, et as jsonCellSchema, f as runSummarySchema, ft as incrementEvalOutput, g as cacheFileSchema, gt as setScopeCacheContext, h as cacheEntrySchema, ht as setEvalOutput, i as updateManualScoreRequestSchema, it as buildTraceTree, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as EvalAssertionError, m as trialSelectionModeSchema, mt as runInEvalScope, n as createRunner, nt as repoFileRefSchema, o as getEvalTitle, ot as evalTracer, p as agentEvalsConfigSchema, pt as isInEvalScope, q as traceSpanKindSchema, r as createRunRequestSchema, rt as runArtifactRefSchema, s as getEvalDisplayStatus, st as hashCacheKey, t as runCli, tt as numberDisplayOptionsSchema, u as deriveStatusFromChildStatuses, ut as evalAssert, v as cacheModeSchema, vt as defineEval, w as caseDetailSchema, x as serializedCacheSpanSchema, y as cacheRecordingOpSchema, yt as getEvalRegistry, z as evalChartTypeSchema } from "./cli-Dw9et3_Z.mjs";
2
- import "./src-CXclO9ZI.mjs";
1
+ import { $ as fileRefSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnKindSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as columnDefSchema, Y as cellValueSchema, Z as columnFormatSchema, _ as cacheListItemSchema, _t as repoFile, a as sseEnvelopeSchema, at as evalSpan, b as cacheRecordingSchema, c as deriveScopedSummaryFromCases, ct as hashCacheKeySync, d as runManifestSchema, dt as getCurrentScope, et as jsonCellSchema, f as runSummarySchema, ft as incrementEvalOutput, g as cacheFileSchema, gt as setScopeCacheContext, h as cacheEntrySchema, ht as setEvalOutput, i as updateManualScoreRequestSchema, it as buildTraceTree, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as EvalAssertionError, m as trialSelectionModeSchema, mt as runInEvalScope, n as createRunner, nt as repoFileRefSchema, o as getEvalTitle, ot as evalTracer, p as agentEvalsConfigSchema, pt as isInEvalScope, q as traceSpanKindSchema, r as createRunRequestSchema, rt as runArtifactRefSchema, s as getEvalDisplayStatus, st as hashCacheKey, t as runCli, tt as numberDisplayOptionsSchema, u as deriveStatusFromChildStatuses, ut as evalAssert, v as cacheModeSchema, vt as defineEval, w as caseDetailSchema, x as serializedCacheSpanSchema, y as cacheRecordingOpSchema, yt as getEvalRegistry, z as evalChartTypeSchema } from "./cli-B0QmsWCU.mjs";
2
+ import "./src-Bivx1C6b.mjs";
3
3
  export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Dw9et3_Z.mjs";
2
- import "./src-CXclO9ZI.mjs";
1
+ import { n as createRunner } from "./cli-B0QmsWCU.mjs";
2
+ import "./src-Bivx1C6b.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-kSiHsl91.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-BY-y4OzF.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -0,0 +1,2 @@
1
+ import "./cli-B0QmsWCU.mjs";
2
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -57,8 +57,8 @@
57
57
  "@types/node": "^24.7.2",
58
58
  "typescript": "^5.9.2",
59
59
  "@agent-evals/runner": "0.0.1",
60
- "@agent-evals/sdk": "0.0.1",
61
- "@agent-evals/shared": "0.0.1"
60
+ "@agent-evals/shared": "0.0.1",
61
+ "@agent-evals/sdk": "0.0.1"
62
62
  },
63
63
  "scripts": {
64
64
  "build": "pnpm --filter @agent-evals/web build && tsdown",