@ls-stack/agent-eval 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DXYLqlWb.mjs → app-CljutWb7.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-B2GWGl5i.css +1 -0
- package/dist/apps/web/dist/assets/index-ibhQ_P7i.js +109 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Dw9et3_Z.mjs → cli-B0QmsWCU.mjs} +9 -13
- package/dist/index.d.mts +20 -89
- package/dist/index.mjs +2 -2
- package/dist/{runner-kSiHsl91.mjs → runner-BY-y4OzF.mjs} +2 -2
- package/dist/{runner-CToL8eJs.mjs → runner-CsSJwWE4.mjs} +1 -1
- package/dist/src-Bivx1C6b.mjs +2 -0
- package/package.json +3 -3
- package/dist/apps/web/dist/assets/index-Bq4Dz6AV.js +0 -109
- package/dist/apps/web/dist/assets/index-b2k20tzL.css +0 -1
- package/dist/src-CXclO9ZI.mjs +0 -2
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-ibhQ_P7i.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-B2GWGl5i.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -831,17 +831,13 @@ const columnDefSchema = z.object({
|
|
|
831
831
|
const cellValueSchema = z.union([jsonCellSchema, fileRefSchema]);
|
|
832
832
|
//#endregion
|
|
833
833
|
//#region ../shared/src/schemas/trace.ts
|
|
834
|
-
/**
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
"scorer",
|
|
842
|
-
"checkpoint",
|
|
843
|
-
"custom"
|
|
844
|
-
]);
|
|
834
|
+
/**
|
|
835
|
+
* Schema for span categories recorded in traces.
|
|
836
|
+
*
|
|
837
|
+
* The value is intentionally open-ended so external tracers can preserve their
|
|
838
|
+
* native span kinds instead of collapsing them into the built-in categories.
|
|
839
|
+
*/
|
|
840
|
+
const traceSpanKindSchema = z.string().min(1);
|
|
845
841
|
/** Schema for the supported presentation formats of trace attributes. */
|
|
846
842
|
const traceAttributeDisplayFormatSchema = z.enum([
|
|
847
843
|
"string",
|
|
@@ -3616,8 +3612,8 @@ async function commandApp(args) {
|
|
|
3616
3612
|
const { serve } = await import("@hono/node-server");
|
|
3617
3613
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
3618
3614
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
3619
|
-
const appModule = await import("./app-
|
|
3620
|
-
const runnerModule = await import("./runner-
|
|
3615
|
+
const appModule = await import("./app-CljutWb7.mjs");
|
|
3616
|
+
const runnerModule = await import("./runner-CsSJwWE4.mjs");
|
|
3621
3617
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
3622
3618
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
3623
3619
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -122,19 +122,13 @@ declare const cellValueSchema: z.ZodUnion<readonly [z.ZodType<string | number |
|
|
|
122
122
|
type CellValue = z.infer<typeof cellValueSchema>;
|
|
123
123
|
//#endregion
|
|
124
124
|
//#region ../shared/src/schemas/trace.d.ts
|
|
125
|
-
/**
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
scorer: "scorer";
|
|
133
|
-
checkpoint: "checkpoint";
|
|
134
|
-
custom: "custom";
|
|
135
|
-
}>;
|
|
136
|
-
/** Semantic category used to classify a trace span in the UI. */
|
|
137
|
-
type TraceSpanKind = z.infer<typeof traceSpanKindSchema>;
|
|
125
|
+
/**
|
|
126
|
+
* Schema for span categories recorded in traces.
|
|
127
|
+
*
|
|
128
|
+
* The value is intentionally open-ended so external tracers can preserve their
|
|
129
|
+
* native span kinds instead of collapsing them into the built-in categories.
|
|
130
|
+
*/
|
|
131
|
+
declare const traceSpanKindSchema: z.ZodString;
|
|
138
132
|
/** Schema for the supported presentation formats of trace attributes. */
|
|
139
133
|
declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
|
|
140
134
|
string: "string";
|
|
@@ -306,16 +300,7 @@ declare const traceSpanSchema: z.ZodObject<{
|
|
|
306
300
|
id: z.ZodString;
|
|
307
301
|
parentId: z.ZodNullable<z.ZodString>;
|
|
308
302
|
caseId: z.ZodString;
|
|
309
|
-
kind: z.
|
|
310
|
-
eval: "eval";
|
|
311
|
-
agent: "agent";
|
|
312
|
-
llm: "llm";
|
|
313
|
-
tool: "tool";
|
|
314
|
-
retrieval: "retrieval";
|
|
315
|
-
scorer: "scorer";
|
|
316
|
-
checkpoint: "checkpoint";
|
|
317
|
-
custom: "custom";
|
|
318
|
-
}>;
|
|
303
|
+
kind: z.ZodString;
|
|
319
304
|
name: z.ZodString;
|
|
320
305
|
startedAt: z.ZodString;
|
|
321
306
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -650,16 +635,7 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
650
635
|
id: z.ZodString;
|
|
651
636
|
parentId: z.ZodNullable<z.ZodString>;
|
|
652
637
|
caseId: z.ZodString;
|
|
653
|
-
kind: z.
|
|
654
|
-
custom: "custom";
|
|
655
|
-
eval: "eval";
|
|
656
|
-
agent: "agent";
|
|
657
|
-
llm: "llm";
|
|
658
|
-
tool: "tool";
|
|
659
|
-
retrieval: "retrieval";
|
|
660
|
-
scorer: "scorer";
|
|
661
|
-
checkpoint: "checkpoint";
|
|
662
|
-
}>;
|
|
638
|
+
kind: z.ZodString;
|
|
663
639
|
name: z.ZodString;
|
|
664
640
|
startedAt: z.ZodString;
|
|
665
641
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -724,16 +700,7 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
724
700
|
id: z.ZodString;
|
|
725
701
|
parentId: z.ZodNullable<z.ZodString>;
|
|
726
702
|
caseId: z.ZodString;
|
|
727
|
-
kind: z.
|
|
728
|
-
custom: "custom";
|
|
729
|
-
eval: "eval";
|
|
730
|
-
agent: "agent";
|
|
731
|
-
llm: "llm";
|
|
732
|
-
tool: "tool";
|
|
733
|
-
retrieval: "retrieval";
|
|
734
|
-
scorer: "scorer";
|
|
735
|
-
checkpoint: "checkpoint";
|
|
736
|
-
}>;
|
|
703
|
+
kind: z.ZodString;
|
|
737
704
|
name: z.ZodString;
|
|
738
705
|
startedAt: z.ZodString;
|
|
739
706
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -783,16 +750,7 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
783
750
|
id: z.ZodString;
|
|
784
751
|
parentId: z.ZodNullable<z.ZodString>;
|
|
785
752
|
caseId: z.ZodString;
|
|
786
|
-
kind: z.
|
|
787
|
-
custom: "custom";
|
|
788
|
-
eval: "eval";
|
|
789
|
-
agent: "agent";
|
|
790
|
-
llm: "llm";
|
|
791
|
-
tool: "tool";
|
|
792
|
-
retrieval: "retrieval";
|
|
793
|
-
scorer: "scorer";
|
|
794
|
-
checkpoint: "checkpoint";
|
|
795
|
-
}>;
|
|
753
|
+
kind: z.ZodString;
|
|
796
754
|
name: z.ZodString;
|
|
797
755
|
startedAt: z.ZodString;
|
|
798
756
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -1491,16 +1449,7 @@ declare const cacheListItemSchema: z.ZodObject<{
|
|
|
1491
1449
|
key: z.ZodString;
|
|
1492
1450
|
namespace: z.ZodString;
|
|
1493
1451
|
spanName: z.ZodString;
|
|
1494
|
-
spanKind: z.
|
|
1495
|
-
eval: "eval";
|
|
1496
|
-
agent: "agent";
|
|
1497
|
-
llm: "llm";
|
|
1498
|
-
tool: "tool";
|
|
1499
|
-
retrieval: "retrieval";
|
|
1500
|
-
scorer: "scorer";
|
|
1501
|
-
checkpoint: "checkpoint";
|
|
1502
|
-
custom: "custom";
|
|
1503
|
-
}>;
|
|
1452
|
+
spanKind: z.ZodString;
|
|
1504
1453
|
storedAt: z.ZodString;
|
|
1505
1454
|
codeFingerprint: z.ZodString;
|
|
1506
1455
|
sizeBytes: z.ZodNumber;
|
|
@@ -1509,7 +1458,7 @@ declare const cacheListItemSchema: z.ZodObject<{
|
|
|
1509
1458
|
type CacheListItem = z.infer<typeof cacheListItemSchema>;
|
|
1510
1459
|
/** Serialized nested span captured while recording a cached operation. */
|
|
1511
1460
|
type SerializedCacheSpan = {
|
|
1512
|
-
kind:
|
|
1461
|
+
kind: string;
|
|
1513
1462
|
name: string;
|
|
1514
1463
|
attributes?: Record<string, unknown>;
|
|
1515
1464
|
status: 'running' | 'ok' | 'error' | 'cancelled';
|
|
@@ -1575,16 +1524,7 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
1575
1524
|
key: z.ZodString;
|
|
1576
1525
|
namespace: z.ZodString;
|
|
1577
1526
|
spanName: z.ZodString;
|
|
1578
|
-
spanKind: z.
|
|
1579
|
-
eval: "eval";
|
|
1580
|
-
agent: "agent";
|
|
1581
|
-
llm: "llm";
|
|
1582
|
-
tool: "tool";
|
|
1583
|
-
retrieval: "retrieval";
|
|
1584
|
-
scorer: "scorer";
|
|
1585
|
-
checkpoint: "checkpoint";
|
|
1586
|
-
custom: "custom";
|
|
1587
|
-
}>;
|
|
1527
|
+
spanKind: z.ZodString;
|
|
1588
1528
|
storedAt: z.ZodString;
|
|
1589
1529
|
codeFingerprint: z.ZodString;
|
|
1590
1530
|
recording: z.ZodObject<{
|
|
@@ -1619,16 +1559,7 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
1619
1559
|
key: z.ZodString;
|
|
1620
1560
|
namespace: z.ZodString;
|
|
1621
1561
|
spanName: z.ZodString;
|
|
1622
|
-
spanKind: z.
|
|
1623
|
-
eval: "eval";
|
|
1624
|
-
agent: "agent";
|
|
1625
|
-
llm: "llm";
|
|
1626
|
-
tool: "tool";
|
|
1627
|
-
retrieval: "retrieval";
|
|
1628
|
-
scorer: "scorer";
|
|
1629
|
-
checkpoint: "checkpoint";
|
|
1630
|
-
custom: "custom";
|
|
1631
|
-
}>;
|
|
1562
|
+
spanKind: z.ZodString;
|
|
1632
1563
|
storedAt: z.ZodString;
|
|
1633
1564
|
codeFingerprint: z.ZodString;
|
|
1634
1565
|
recording: z.ZodObject<{
|
|
@@ -1703,7 +1634,7 @@ type EvalTraceTree = {
|
|
|
1703
1634
|
spans: EvalTraceSpan[];
|
|
1704
1635
|
rootSpans: EvalTraceSpan[];
|
|
1705
1636
|
findSpan: (name: string) => EvalTraceSpan | undefined;
|
|
1706
|
-
findSpansByKind: (kind:
|
|
1637
|
+
findSpansByKind: (kind: string) => EvalTraceSpan[];
|
|
1707
1638
|
flattenDfs: () => EvalTraceSpan[];
|
|
1708
1639
|
checkpoints: Map<string, unknown>;
|
|
1709
1640
|
};
|
|
@@ -1963,7 +1894,7 @@ type TraceSpanTimestamp = Date | string;
|
|
|
1963
1894
|
type TraceExternalSpanStartInfo = {
|
|
1964
1895
|
/** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
|
|
1965
1896
|
parentId?: string | null; /** Semantic category used by the trace UI. */
|
|
1966
|
-
kind:
|
|
1897
|
+
kind: string; /** Display name for the span. */
|
|
1967
1898
|
name: string; /** Span start time. Defaults to now. */
|
|
1968
1899
|
startedAt?: TraceSpanTimestamp; /** Initial span attributes. Later updates merge into this object. */
|
|
1969
1900
|
attributes?: Record<string, unknown>;
|
|
@@ -1984,7 +1915,7 @@ type TraceExternalSpanEndInfo = TraceExternalSpanUpdateInfo & {
|
|
|
1984
1915
|
type TraceExternalSpanRecordInfo = {
|
|
1985
1916
|
/** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
|
|
1986
1917
|
parentId?: string | null; /** Semantic category used by the trace UI. */
|
|
1987
|
-
kind:
|
|
1918
|
+
kind: string; /** Display name for the span. */
|
|
1988
1919
|
name: string; /** Span start time. Defaults to now. */
|
|
1989
1920
|
startedAt?: TraceSpanTimestamp; /** Span end time. Defaults to the start time. */
|
|
1990
1921
|
endedAt?: TraceSpanTimestamp | null; /** Final span status. Defaults to `ok`. */
|
|
@@ -2008,7 +1939,7 @@ declare function recordExternalSpan(info: TraceExternalSpanRecordInfo): string;
|
|
|
2008
1939
|
*/
|
|
2009
1940
|
declare const evalSpan: TraceActiveSpan;
|
|
2010
1941
|
type TraceSpanInfoBase = {
|
|
2011
|
-
kind:
|
|
1942
|
+
kind: string;
|
|
2012
1943
|
name: string;
|
|
2013
1944
|
attributes?: Record<string, unknown>;
|
|
2014
1945
|
};
|
|
@@ -2175,4 +2106,4 @@ declare function createRunner({
|
|
|
2175
2106
|
*/
|
|
2176
2107
|
declare function runCli(argv: string[]): Promise<void>;
|
|
2177
2108
|
//#endregion
|
|
2178
|
-
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type
|
|
2109
|
+
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { $ as fileRefSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnKindSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as columnDefSchema, Y as cellValueSchema, Z as columnFormatSchema, _ as cacheListItemSchema, _t as repoFile, a as sseEnvelopeSchema, at as evalSpan, b as cacheRecordingSchema, c as deriveScopedSummaryFromCases, ct as hashCacheKeySync, d as runManifestSchema, dt as getCurrentScope, et as jsonCellSchema, f as runSummarySchema, ft as incrementEvalOutput, g as cacheFileSchema, gt as setScopeCacheContext, h as cacheEntrySchema, ht as setEvalOutput, i as updateManualScoreRequestSchema, it as buildTraceTree, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as EvalAssertionError, m as trialSelectionModeSchema, mt as runInEvalScope, n as createRunner, nt as repoFileRefSchema, o as getEvalTitle, ot as evalTracer, p as agentEvalsConfigSchema, pt as isInEvalScope, q as traceSpanKindSchema, r as createRunRequestSchema, rt as runArtifactRefSchema, s as getEvalDisplayStatus, st as hashCacheKey, t as runCli, tt as numberDisplayOptionsSchema, u as deriveStatusFromChildStatuses, ut as evalAssert, v as cacheModeSchema, vt as defineEval, w as caseDetailSchema, x as serializedCacheSpanSchema, y as cacheRecordingOpSchema, yt as getEvalRegistry, z as evalChartTypeSchema } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { $ as fileRefSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnKindSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as columnDefSchema, Y as cellValueSchema, Z as columnFormatSchema, _ as cacheListItemSchema, _t as repoFile, a as sseEnvelopeSchema, at as evalSpan, b as cacheRecordingSchema, c as deriveScopedSummaryFromCases, ct as hashCacheKeySync, d as runManifestSchema, dt as getCurrentScope, et as jsonCellSchema, f as runSummarySchema, ft as incrementEvalOutput, g as cacheFileSchema, gt as setScopeCacheContext, h as cacheEntrySchema, ht as setEvalOutput, i as updateManualScoreRequestSchema, it as buildTraceTree, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as EvalAssertionError, m as trialSelectionModeSchema, mt as runInEvalScope, n as createRunner, nt as repoFileRefSchema, o as getEvalTitle, ot as evalTracer, p as agentEvalsConfigSchema, pt as isInEvalScope, q as traceSpanKindSchema, r as createRunRequestSchema, rt as runArtifactRefSchema, s as getEvalDisplayStatus, st as hashCacheKey, t as runCli, tt as numberDisplayOptionsSchema, u as deriveStatusFromChildStatuses, ut as evalAssert, v as cacheModeSchema, vt as defineEval, w as caseDetailSchema, x as serializedCacheSpanSchema, y as cacheRecordingOpSchema, yt as getEvalRegistry, z as evalChartTypeSchema } from "./cli-B0QmsWCU.mjs";
|
|
2
|
+
import "./src-Bivx1C6b.mjs";
|
|
3
3
|
export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-B0QmsWCU.mjs";
|
|
2
|
+
import "./src-Bivx1C6b.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-BY-y4OzF.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -57,8 +57,8 @@
|
|
|
57
57
|
"@types/node": "^24.7.2",
|
|
58
58
|
"typescript": "^5.9.2",
|
|
59
59
|
"@agent-evals/runner": "0.0.1",
|
|
60
|
-
"@agent-evals/
|
|
61
|
-
"@agent-evals/
|
|
60
|
+
"@agent-evals/shared": "0.0.1",
|
|
61
|
+
"@agent-evals/sdk": "0.0.1"
|
|
62
62
|
},
|
|
63
63
|
"scripts": {
|
|
64
64
|
"build": "pnpm --filter @agent-evals/web build && tsdown",
|