@ls-stack/agent-eval 0.60.3 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-l3ynaNsb.mjs → app-Dm_9ZTVa.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CM_zUhl_.css +1 -0
- package/dist/apps/web/dist/assets/{index-CM6MDNqo.js → index-DxZsizjg.js} +76 -76
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-BSVUCUxr.mjs → cli-CPBIcMP-.mjs} +11 -26
- package/dist/index.d.mts +99 -90
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Bq0Y3y_1.mjs → runExecution-D-CnSRYy.mjs} +18 -2
- package/dist/{runOrchestration-C7qQISz2.mjs → runOrchestration-Basvyp4u.mjs} +1 -1
- package/dist/{runner-C9xNJHt3.mjs → runner-B6UT1K7L.mjs} +1 -1
- package/dist/{runner-DmkSq-QG.mjs → runner-DwNb5TCb.mjs} +2 -2
- package/dist/{src-D5vGo2iv.mjs → src-SixIk0b7.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +76 -432
- package/dist/apps/web/dist/assets/index-CqWfzcFb.css +0 -1
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CPBIcMP-.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-SixIk0b7.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as
|
|
2
|
-
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -1214,6 +1214,7 @@ const apiCallsConfigSchema = z.object({
|
|
|
1214
1214
|
attributes: z.object({
|
|
1215
1215
|
method: z.string().optional(),
|
|
1216
1216
|
url: z.string().optional(),
|
|
1217
|
+
routeAlias: z.string().optional(),
|
|
1217
1218
|
statusCode: z.string().optional(),
|
|
1218
1219
|
request: z.string().optional(),
|
|
1219
1220
|
response: z.string().optional(),
|
|
@@ -1278,6 +1279,7 @@ const DEFAULT_API_CALLS_CONFIG = {
|
|
|
1278
1279
|
attributes: {
|
|
1279
1280
|
method: "method",
|
|
1280
1281
|
url: "url",
|
|
1282
|
+
routeAlias: "routeAlias",
|
|
1281
1283
|
statusCode: "statusCode",
|
|
1282
1284
|
request: "request",
|
|
1283
1285
|
response: "response",
|
|
@@ -2494,6 +2496,17 @@ function pickError(span) {
|
|
|
2494
2496
|
if (span.errors && span.errors.length > 0) return span.errors[0] ?? null;
|
|
2495
2497
|
return null;
|
|
2496
2498
|
}
|
|
2499
|
+
function stripSearchAndHash(value) {
|
|
2500
|
+
const endIndex = [value.indexOf("?"), value.indexOf("#")].filter((index) => index !== -1).toSorted((a, b) => a - b)[0];
|
|
2501
|
+
return endIndex === void 0 ? value : value.slice(0, endIndex);
|
|
2502
|
+
}
|
|
2503
|
+
function normalizeRouteAlias(routeAlias) {
|
|
2504
|
+
if (routeAlias === null) return null;
|
|
2505
|
+
const trimmed = routeAlias.trim();
|
|
2506
|
+
if (trimmed.length === 0) return null;
|
|
2507
|
+
const withoutSearch = stripSearchAndHash(trimmed);
|
|
2508
|
+
return withoutSearch.startsWith("/") ? withoutSearch : `/${withoutSearch}`;
|
|
2509
|
+
}
|
|
2497
2510
|
/**
|
|
2498
2511
|
* Filter `spans` down to API calls and project each one to the structured
|
|
2499
2512
|
* shape consumed by the API calls tab.
|
|
@@ -2525,13 +2538,16 @@ function extractApiCalls(spans, config) {
|
|
|
2525
2538
|
placements: metric.placements
|
|
2526
2539
|
});
|
|
2527
2540
|
}
|
|
2541
|
+
const url = readString$1(attrs, config.attributes.url);
|
|
2542
|
+
const routeAlias = normalizeRouteAlias(readString$1(attrs, config.attributes.routeAlias));
|
|
2528
2543
|
result.push({
|
|
2529
2544
|
id: span.id,
|
|
2530
2545
|
name: span.name,
|
|
2531
2546
|
kind: span.kind,
|
|
2532
2547
|
status: span.status,
|
|
2533
2548
|
method: readString$1(attrs, config.attributes.method),
|
|
2534
|
-
url
|
|
2549
|
+
url,
|
|
2550
|
+
routeAlias,
|
|
2535
2551
|
statusCode: readNumber$1(attrs, config.attributes.statusCode),
|
|
2536
2552
|
durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
|
|
2537
2553
|
request: getNestedAttribute(attrs, config.attributes.request),
|
|
@@ -7239,4 +7255,4 @@ function recordAssertionFailure(scope, failure) {
|
|
|
7239
7255
|
});
|
|
7240
7256
|
}
|
|
7241
7257
|
//#endregion
|
|
7242
|
-
export {
|
|
7258
|
+
export { startEvalBackgroundJob as $, manualInputFileValueSchema as A, manualInputDescriptorSchema as At, getCurrentScope as B, hashCacheKey as C, buildCaseKey as Ct, serializeCacheRecording as D, caseRowSchema as Dt, deserializeCacheValue as E, caseDetailSchema as Et, appendToEvalOutput as F, runWithEvalRegistry as Ft, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, evalExpect as M, columnDefSchema as Mt, EvalAssertionError as N, defineEval as Nt, serializeCacheValue as O, evalStatAggregateSchema as Ot, EvalRuntimeUsageError as P, getEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalTracer as S, resolveLlmCallsConfig as St, deserializeCacheRecording as T, getCaseRowCaseKey as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createFsCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, captureEvalSpanError as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, createBufferedCacheStore as g, matchesTagsFilter as gt, commitPendingCacheWrites as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, readManualInputFile as j, evalChartsConfigSchema as jt, repoFile as k, evalStatsConfigSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, getCacheRetentionOptions as v, validateTagsFilterExpression as vt, hashCacheKeySync as w, buildEvalKey as wt, evalSpan as x, resolveApiCallsConfig as xt, buildTraceTree as y, runManifestSchema as yt, evalTime as z };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DwNb5TCb.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-CPBIcMP-.mjs";
|
|
2
|
+
import "./src-SixIk0b7.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import "./cli-CPBIcMP-.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|