@ls-stack/agent-eval 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-ByMLOds2.mjs → app-mBbAN-Gt.mjs} +15 -3
- package/dist/apps/web/dist/assets/index-8VE7b6RK.css +1 -0
- package/dist/apps/web/dist/assets/index-Czer_MdN.js +118 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DRkwWgTj.mjs → cli-BQwRbqsL.mjs} +75 -4
- package/dist/index.d.mts +342 -90
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-DB0dwGrd.mjs → runOrchestration-ClWYWPen.mjs} +446 -203
- package/dist/{runner-39KGoaM1.mjs → runner-BQn_xf36.mjs} +1 -1
- package/dist/{runner-DRINcaN_.mjs → runner-DbVB66h9.mjs} +2 -2
- package/dist/src-CuirVcPY.mjs +3 -0
- package/package.json +6 -4
- package/skills/agent-eval/SKILL.md +52 -20
- package/dist/apps/web/dist/assets/index-DOXT0Y9V.css +0 -1
- package/dist/apps/web/dist/assets/index-DR2haqvV.js +0 -118
- package/dist/bin.d.mts +0 -1
- package/dist/runChild.d.mts +0 -1
- package/dist/src-BwKm3sKU.mjs +0 -3
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as llmCallMetricFormatSchema, $t as traceAttributeDisplayPlacementSchema, A as extractCacheHits, An as advanceEvalTime, At as evalChartBuiltinMetricSchema, B as runManifestSchema, Bn as mergeEvalOutput, Bt as cacheEntryWithDebugKeySchema, Cn as deserializeCacheRecording, Ct as runLogEntrySchema, D as updateManualScoreRequestSchema, Dn as repoFile, Dt as scoreTraceSchema, E as createRunRequestSchema, En as serializeCacheValue, Et as runLogPhaseSchema, F as getEvalTitle, Fn as getCurrentScope, Ft as evalChartTypeSchema, G as apiCallMetricFormatSchema, Gn as setEvalOutput, Gt as cacheRecordingOpSchema, H as DEFAULT_API_CALLS_CONFIG, Hn as runInEvalRuntimeScope, Ht as cacheListItemSchema, I as getEvalDisplayStatus, In as getEvalCaseInput, It as evalChartsConfigSchema, J as apiCallsConfigSchema, Jn as defineEval, Jt as serializedCacheSpanSchema, K as apiCallMetricPlacementSchema, Kn as setScopeCacheContext, Kt as cacheRecordingSchema, L as deriveScopedSummaryFromCases, Ln as getEvalStartTime, Lt as cacheDebugKeyEntrySchema, M as extractLlmCalls, Mt as evalChartConfigSchema, N as applyDerivedCallAttributes, Nn as evalAssert, Nt as evalChartMetricSchema, O as sseEnvelopeSchema, On as evalExpect, Ot as evalChartAggregateSchema, P as getNestedAttribute, Pn as evalLog, Pt as evalChartTooltipExtraSchema, Q as evalDeriveConfigSchema, Qt as traceAttributeDisplayInputSchema, R as deriveStatusFromCaseRows, Rn as incrementEvalOutput, Rt as cacheDebugKeyFileSchema, Sn as hashCacheKeySync, St as evalSummarySchema, Tn as serializeCacheRecording, Tt as runLogLocationSchema, U as DEFAULT_LLM_CALLS_CONFIG, Un as runInEvalScope, Ut as cacheModeSchema, V as runSummarySchema, Vn as nextEvalId, Vt as cacheFileSchema, W as agentEvalsConfigSchema, Wn as runInExistingEvalScope, Wt as cacheOperationTypeSchema, X as evalColumnOverrideSchema, Xt as traceCacheRefSchema, Y as defaultConfigKeySchema, Yn as getEvalRegistry, Yt as spanCacheOptionsSchema, Z as evalColumnsSchema, Zt as traceAttributeDisplayFormatSchema, _n as buildTraceTree, _t as discoveryIssueSchema, an as traceSpanSchema, at as removeDefaultConfigSchema, bn as evalTracer, bt as evalStatItemSchema, cn as columnDefSchema, ct as runLogsConfigSchema, dn as fileRefSchema, dt as buildEvalKey, en as traceAttributeDisplaySchema, et as llmCallMetricPlacementSchema, fn as jsonCellSchema, ft as getCaseRowCaseKey, gn as z, gt as caseRowSchema, hn as runArtifactRefSchema, ht as caseDetailSchema, in as traceSpanKindSchema, it as llmCallsConfigSchema, j as extractApiCalls, jn as appendToEvalOutput, jt as evalChartColorSchema, k as extractCacheEntries, kn as EvalAssertionError, kt as evalChartAxisSchema, ln as columnFormatSchema, lt as trialSelectionModeSchema, mn as repoFileRefSchema, mt as assertionFailureSchema, nn as traceDisplayInputConfigSchema, nt as llmCallPricingRateSchema, on as traceSpanWarningSchema, ot as resolveApiCallsConfig, pn as numberDisplayOptionsSchema, pt as getCaseRowEvalKey, q as apiCallMetricSchema, qn as startEvalBackgroundJob, qt as cacheStatusSchema, rn as traceSpanErrorSchema, rt as llmCallPricingSchema, sn as cellValueSchema, st as resolveLlmCallsConfig, tn as traceDisplayConfigSchema, tt as llmCallMetricSchema, un as columnKindSchema, ut as buildCaseKey, vn as captureEvalSpanError, vt as evalFreshnessStatusSchema, wn as deserializeCacheValue, wt as runLogLevelSchema, xn as hashCacheKey, xt as evalStatsConfigSchema, yn as evalSpan, yt as evalStatAggregateSchema, z as deriveStatusFromChildStatuses, zn as isInEvalScope, zt as cacheEntrySchema } from "./runOrchestration-ClWYWPen.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-BQwRbqsL.mjs";
|
|
3
|
+
import "./src-CuirVcPY.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { B as
|
|
1
|
+
import { B as runManifestSchema, E as createRunRequestSchema, It as evalChartsConfigSchema, Mn as configureEvalRunLogs, T as createFsCacheStore, V as runSummarySchema, cn as columnDefSchema, dt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as loadConfig, xt as evalStatsConfigSchema, y as parseEvalDiscovery } from "./runOrchestration-ClWYWPen.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|