@ls-stack/agent-eval 0.16.1 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-B8e-oWYc.mjs → app-DTotEBoY.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-C5IRkeUz.js +118 -0
- package/dist/apps/web/dist/assets/{index-MARPw1bH.css → index-Cn9WoTj5.css} +1 -1
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BmrtjQj_.mjs → cli-CULTt3Xp.mjs} +64 -13
- package/dist/index.d.mts +586 -8
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +2 -1
- package/dist/{runOrchestration-BDyNrRQT.mjs → runOrchestration-D2okEB3I.mjs} +514 -125
- package/dist/{runner-CsZqhbiA.mjs → runner-BSXZiQIi.mjs} +2 -2
- package/dist/{runner-DABFPXkx.mjs → runner-DyM0Gp8G.mjs} +1 -1
- package/dist/src-CNf3xwVw.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +17 -4
- package/dist/apps/web/dist/assets/index-BZ1TdyEg.js +0 -117
- package/dist/src-CEAJYN_X.mjs +0 -3
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as caseDetailSchema, $t as buildTraceTree, A as getEvalTitle, At as serializedCacheSpanSchema, B as apiCallMetricFormatSchema, Bt as traceSpanKindSchema, C as createRunRequestSchema, Cn as setEvalOutput, Ct as cacheFileSchema, D as extractApiCalls, Dn as defineEval, Dt as cacheRecordingOpSchema, E as extractCacheHits, En as repoFile, Et as cacheOperationTypeSchema, F as runManifestSchema, Ft as traceAttributeDisplayPlacementSchema, G as llmCallMetricPlacementSchema, Gt as columnFormatSchema, H as apiCallMetricSchema, Ht as traceSpanWarningSchema, I as runSummarySchema, It as traceAttributeDisplaySchema, J as resolveApiCallsConfig, Jt as jsonCellSchema, K as llmCallMetricSchema, Kt as columnKindSchema, L as DEFAULT_API_CALLS_CONFIG, Lt as traceDisplayConfigSchema, M as deriveScopedSummaryFromCases, Mt as traceCacheRefSchema, N as deriveStatusFromCaseRows, Nt as traceAttributeDisplayFormatSchema, O as extractLlmCalls, On as getEvalRegistry, Ot as cacheRecordingSchema, P as deriveStatusFromChildStatuses, Pt as traceAttributeDisplayInputSchema, Q as assertionFailureSchema, Qt as z, R as DEFAULT_LLM_CALLS_CONFIG, Rt as traceDisplayInputConfigSchema, Sn as runInExistingEvalScope, St as cacheEntryWithDebugKeySchema, T as sseEnvelopeSchema, Tn as startEvalBackgroundJob, Tt as cacheModeSchema, U as apiCallsConfigSchema, Ut as cellValueSchema, V as apiCallMetricPlacementSchema, Vt as traceSpanSchema, W as llmCallMetricFormatSchema, Wt as columnDefSchema, X as runLogsConfigSchema, Xt as repoFileRefSchema, Y as resolveLlmCallsConfig, Yt as numberDisplayOptionsSchema, Z as trialSelectionModeSchema, Zt as runArtifactRefSchema, _n as isInEvalScope, _t as evalChartTypeSchema, an as deserializeCacheRecording, at as evalSummarySchema, bn as runInEvalRuntimeScope, bt as cacheDebugKeyFileSchema, cn as serializeCacheValue, ct as runLogPhaseSchema, dt as evalChartAxisSchema, en as captureEvalSpanError, et as caseRowSchema, fn as evalAssert, ft as evalChartBuiltinMetricSchema, gn as incrementEvalOutput, gt as evalChartTooltipExtraSchema, hn as getEvalCaseInput, ht as evalChartMetricSchema, in as hashCacheKeySync, it as evalStatsConfigSchema, j as getEvalDisplayStatus, jt as spanCacheOptionsSchema, k as getNestedAttribute, kt as cacheStatusSchema, ln as EvalAssertionError, lt as scoreTraceSchema, mn as getCurrentScope, mt as evalChartConfigSchema, nn as evalTracer, nt as evalStatAggregateSchema, on as deserializeCacheValue, ot as runLogEntrySchema, pn as evalLog, pt as evalChartColorSchema, q as llmCallsConfigSchema, qt as fileRefSchema, rn as hashCacheKey, rt as evalStatItemSchema, sn as serializeCacheRecording, st as runLogLevelSchema, tn as evalSpan, tt as evalFreshnessStatusSchema, un as appendToEvalOutput, ut as evalChartAggregateSchema, vn as mergeEvalOutput, vt as evalChartsConfigSchema, w as updateManualScoreRequestSchema, wn as setScopeCacheContext, wt as cacheListItemSchema, xn as runInEvalScope, xt as cacheEntrySchema, yn as nextEvalId, yt as cacheDebugKeyEntrySchema, z as agentEvalsConfigSchema, zt as traceSpanErrorSchema } from "./runOrchestration-D2okEB3I.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-CULTt3Xp.mjs";
|
|
3
|
+
import "./src-CNf3xwVw.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as createRunRequestSchema, F as runManifestSchema, I as runSummarySchema,
|
|
1
|
+
import { C as createRunRequestSchema, F as runManifestSchema, I as runSummarySchema, S as createFsCacheStore, Wt as columnDefSchema, dn as configureEvalRunLogs, it as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, vt as evalChartsConfigSchema, y as loadConfig } from "./runOrchestration-D2okEB3I.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -82,6 +82,7 @@ async function main() {
|
|
|
82
82
|
const context = await readContext(process.argv[2]);
|
|
83
83
|
process.chdir(context.workspaceRoot);
|
|
84
84
|
const config = await loadConfig();
|
|
85
|
+
configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
|
|
85
86
|
const cacheStore = createFsCacheStore({
|
|
86
87
|
workspaceRoot: context.workspaceRoot,
|
|
87
88
|
dir: config.cache?.dir,
|