@tangle-network/agent-eval 0.30.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{baseline-BwdCXUS8.d.ts → baseline-4R5deP0N.d.ts} +1 -1
- package/dist/benchmarks/index.d.ts +3 -3
- package/dist/builder-eval/index.d.ts +3 -3
- package/dist/builder-eval/index.js +2 -2
- package/dist/{chunk-R5UQJNKC.js → chunk-4L3WJXQJ.js} +2 -2
- package/dist/{chunk-RUI6SIHY.js → chunk-75ZREHD7.js} +4 -4
- package/dist/{chunk-5AKPEK5L.js → chunk-CXJOVDJR.js} +2 -2
- package/dist/{chunk-K33INZHH.js → chunk-GVQT44CS.js} +2 -2
- package/dist/{chunk-UW4NOOZI.js → chunk-HIO4UIS5.js} +308 -2
- package/dist/chunk-HIO4UIS5.js.map +1 -0
- package/dist/{chunk-4S4BM3QQ.js → chunk-M6RZ5LJN.js} +2 -2
- package/dist/{chunk-NG236HPC.js → chunk-QYJT52YW.js} +1 -1
- package/dist/chunk-QYJT52YW.js.map +1 -0
- package/dist/{chunk-XFZCM5Z3.js → chunk-SMSGXM74.js} +2 -2
- package/dist/{chunk-KTGTIOFD.js → chunk-UBPIXOC4.js} +2 -2
- package/dist/{chunk-DBIGN5MJ.js → chunk-WGXZAQLR.js} +3 -3
- package/dist/{chunk-NLMNWKVM.js → chunk-WSI4K3WB.js} +2 -2
- package/dist/{chunk-PALJO75S.js → chunk-XEL6UP7C.js} +2 -2
- package/dist/{chunk-SZSBQUIJ.js → chunk-Y2CPBYKH.js} +3 -3
- package/dist/{chunk-QHF6EQKK.js → chunk-YTMXBHFM.js} +2 -2
- package/dist/cli.js +3 -3
- package/dist/{control-rJhEDdpy.d.ts → control-BFpqHFV2.d.ts} +5 -5
- package/dist/{control-runtime-BRdQ0wrx.d.ts → control-runtime-BZ_lVLYW.d.ts} +2 -2
- package/dist/control.d.ts +8 -8
- package/dist/control.js +3 -3
- package/dist/{dataset-CiK_3LDr.d.ts → dataset-ueRVTUoY.d.ts} +1 -1
- package/dist/{emitter-BqjeOvJh.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
- package/dist/{errors-BZ9sTdz7.d.ts → errors-mje_cKOs.d.ts} +1 -1
- package/dist/{failure-cluster-D1NZKqYu.d.ts → failure-cluster-Cw65_5FY.d.ts} +1 -1
- package/dist/{feedback-trajectory-j0nJFgC6.d.ts → feedback-trajectory-iATEAHmc.d.ts} +2 -2
- package/dist/governance/index.d.ts +4 -4
- package/dist/{index-Cgt3DKXr.d.ts → index-DPILdKbP.d.ts} +2 -2
- package/dist/{index--fVrWDiR.d.ts → index-TVjRYWRm.d.ts} +1 -1
- package/dist/index.d.ts +108 -38
- package/dist/index.js +159 -14
- package/dist/index.js.map +1 -1
- package/dist/{integrity-BAxLGJ9I.d.ts → integrity-DYR5gWlb.d.ts} +2 -2
- package/dist/knowledge/index.d.ts +3 -3
- package/dist/meta-eval/index.d.ts +4 -4
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +11 -11
- package/dist/optimization.js +8 -8
- package/dist/pipelines/index.d.ts +6 -6
- package/dist/pipelines/index.js +3 -3
- package/dist/prm/index.d.ts +4 -4
- package/dist/{query-BFDT0kX_.d.ts → query-DODUYdPg.d.ts} +1 -1
- package/dist/{release-report-PWhGlpfO.d.ts → release-report-C8r4Vben.d.ts} +3 -3
- package/dist/reporting.d.ts +8 -8
- package/dist/reporting.js +4 -4
- package/dist/{researcher-ClDX3KZx.d.ts → researcher-BmgJ_901.d.ts} +6 -6
- package/dist/rl.d.ts +10 -10
- package/dist/rl.js +6 -6
- package/dist/{rubric-DgSqjqqj.d.ts → rubric-D5tjHNJQ.d.ts} +2 -2
- package/dist/{rubric-predictive-validity-C0uDYwG6.d.ts → rubric-predictive-validity-Bm-CbN46.d.ts} +1 -1
- package/dist/{run-record-CqzahIbx.d.ts → run-record-nYf9x2hU.d.ts} +1 -1
- package/dist/{store-BP5be6s7.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
- package/dist/{summary-report-jrSGb2xZ.d.ts → summary-report-dir7A-eQ.d.ts} +2 -2
- package/dist/{test-graded-scenario-BJ54PDan.d.ts → test-graded-scenario-B2kWEdh9.d.ts} +2 -2
- package/dist/traces.d.ts +533 -10
- package/dist/traces.js +14 -300
- package/dist/traces.js.map +1 -1
- package/dist/{trajectory-BFmveYZt.d.ts → trajectory-CnoBo-JY.d.ts} +1 -1
- package/dist/wire/index.d.ts +6 -6
- package/dist/wire/index.js +3 -3
- package/package.json +12 -21
- package/dist/chunk-NG236HPC.js.map +0 -1
- package/dist/chunk-UW4NOOZI.js.map +0 -1
- package/dist/replay-BX5Fm8en.d.ts +0 -529
- /package/dist/{chunk-R5UQJNKC.js.map → chunk-4L3WJXQJ.js.map} +0 -0
- /package/dist/{chunk-RUI6SIHY.js.map → chunk-75ZREHD7.js.map} +0 -0
- /package/dist/{chunk-5AKPEK5L.js.map → chunk-CXJOVDJR.js.map} +0 -0
- /package/dist/{chunk-K33INZHH.js.map → chunk-GVQT44CS.js.map} +0 -0
- /package/dist/{chunk-4S4BM3QQ.js.map → chunk-M6RZ5LJN.js.map} +0 -0
- /package/dist/{chunk-XFZCM5Z3.js.map → chunk-SMSGXM74.js.map} +0 -0
- /package/dist/{chunk-KTGTIOFD.js.map → chunk-UBPIXOC4.js.map} +0 -0
- /package/dist/{chunk-DBIGN5MJ.js.map → chunk-WGXZAQLR.js.map} +0 -0
- /package/dist/{chunk-NLMNWKVM.js.map → chunk-WSI4K3WB.js.map} +0 -0
- /package/dist/{chunk-PALJO75S.js.map → chunk-XEL6UP7C.js.map} +0 -0
- /package/dist/{chunk-SZSBQUIJ.js.map → chunk-Y2CPBYKH.js.map} +0 -0
- /package/dist/{chunk-QHF6EQKK.js.map → chunk-YTMXBHFM.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
failureClusterView,
|
|
12
12
|
iqr,
|
|
13
13
|
welchsTTest
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-GVQT44CS.js";
|
|
15
15
|
import {
|
|
16
16
|
exportTrainingData,
|
|
17
17
|
toNdjson
|
|
@@ -28,7 +28,7 @@ import {
|
|
|
28
28
|
pytestTestParser,
|
|
29
29
|
runTestGradedScenario,
|
|
30
30
|
vitestTestParser
|
|
31
|
-
} from "./chunk-
|
|
31
|
+
} from "./chunk-YTMXBHFM.js";
|
|
32
32
|
import {
|
|
33
33
|
classifyEuAiRisk,
|
|
34
34
|
euAiActReport,
|
|
@@ -54,7 +54,7 @@ import {
|
|
|
54
54
|
runProposeReview,
|
|
55
55
|
runProposeReviewAsControlLoop,
|
|
56
56
|
scoreFromEvals
|
|
57
|
-
} from "./chunk-
|
|
57
|
+
} from "./chunk-XEL6UP7C.js";
|
|
58
58
|
import {
|
|
59
59
|
allCriticalPassed,
|
|
60
60
|
objectiveEval,
|
|
@@ -96,14 +96,14 @@ import {
|
|
|
96
96
|
summarizePreferenceMemory,
|
|
97
97
|
trialTraceFromMultiShotTrial,
|
|
98
98
|
withAssignedFeedbackSplit
|
|
99
|
-
} from "./chunk-
|
|
99
|
+
} from "./chunk-Y2CPBYKH.js";
|
|
100
100
|
import {
|
|
101
101
|
RunRecordValidationError,
|
|
102
102
|
isRunRecord,
|
|
103
103
|
parseRunRecordSafe,
|
|
104
104
|
roundTripRunRecord,
|
|
105
105
|
validateRunRecord
|
|
106
|
-
} from "./chunk-
|
|
106
|
+
} from "./chunk-WSI4K3WB.js";
|
|
107
107
|
import {
|
|
108
108
|
assertReleaseConfidence,
|
|
109
109
|
bootstrapCi,
|
|
@@ -111,10 +111,10 @@ import {
|
|
|
111
111
|
judgeReplayGate,
|
|
112
112
|
releaseTraceEvidenceFromMultiShotTrials,
|
|
113
113
|
renderReleaseReport
|
|
114
|
-
} from "./chunk-
|
|
114
|
+
} from "./chunk-WGXZAQLR.js";
|
|
115
115
|
import {
|
|
116
116
|
runEvalCampaign
|
|
117
|
-
} from "./chunk-
|
|
117
|
+
} from "./chunk-75ZREHD7.js";
|
|
118
118
|
import {
|
|
119
119
|
LlmCallError,
|
|
120
120
|
LlmClient,
|
|
@@ -124,7 +124,7 @@ import {
|
|
|
124
124
|
callLlmJson,
|
|
125
125
|
probeLlm,
|
|
126
126
|
stripFencedJson
|
|
127
|
-
} from "./chunk-
|
|
127
|
+
} from "./chunk-M6RZ5LJN.js";
|
|
128
128
|
import {
|
|
129
129
|
evaluateInterimReleaseConfidence,
|
|
130
130
|
pairedEvalueSequence
|
|
@@ -141,7 +141,7 @@ import {
|
|
|
141
141
|
requiredSampleSize,
|
|
142
142
|
researchReport,
|
|
143
143
|
summaryTable
|
|
144
|
-
} from "./chunk-
|
|
144
|
+
} from "./chunk-CXJOVDJR.js";
|
|
145
145
|
import {
|
|
146
146
|
calibrateJudge,
|
|
147
147
|
calibrateJudgeContinuous,
|
|
@@ -160,24 +160,43 @@ import {
|
|
|
160
160
|
verbosityBias,
|
|
161
161
|
weightedMean,
|
|
162
162
|
wilcoxonSignedRank
|
|
163
|
-
} from "./chunk-
|
|
163
|
+
} from "./chunk-4L3WJXQJ.js";
|
|
164
164
|
import {
|
|
165
165
|
DEFAULT_REDACTION_RULES,
|
|
166
|
+
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
166
167
|
FileSystemTraceStore,
|
|
167
168
|
InMemoryTraceStore,
|
|
168
169
|
OTEL_AGENT_EVAL_SCOPE,
|
|
170
|
+
OtlpFileTraceStore,
|
|
169
171
|
REDACTION_VERSION,
|
|
170
172
|
ReplayCache,
|
|
171
173
|
ReplayCacheMissError,
|
|
174
|
+
SpanNotFoundError,
|
|
175
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
176
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
177
|
+
TRACE_ANALYST_SUBAGENT_DESCRIPTION,
|
|
178
|
+
TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
|
|
172
179
|
TraceFileMissingError,
|
|
180
|
+
TraceNotFoundError,
|
|
173
181
|
analyzeTraces,
|
|
174
182
|
buildTraceAnalystTools,
|
|
183
|
+
buildTraceInsightContext,
|
|
184
|
+
buildTraceInsightPrompt,
|
|
175
185
|
createReplayFetch,
|
|
186
|
+
defaultTraceInsightPanel,
|
|
187
|
+
describeTraceInsightScope,
|
|
188
|
+
domainEvidencePattern,
|
|
176
189
|
exportRunAsOtlp,
|
|
190
|
+
inferDomainKeywords,
|
|
177
191
|
iterateRawCalls,
|
|
192
|
+
planTraceInsightQuestions,
|
|
178
193
|
redactString,
|
|
179
|
-
redactValue
|
|
180
|
-
|
|
194
|
+
redactValue,
|
|
195
|
+
scoreTraceInsightReadiness,
|
|
196
|
+
tokenizeDomainWords,
|
|
197
|
+
traceAnalystFunctionGroup,
|
|
198
|
+
traceAnalystOnRunComplete
|
|
199
|
+
} from "./chunk-HIO4UIS5.js";
|
|
181
200
|
import {
|
|
182
201
|
aggregateLlm,
|
|
183
202
|
argHash,
|
|
@@ -201,7 +220,7 @@ import {
|
|
|
201
220
|
RunIntegrityError,
|
|
202
221
|
assertRunCaptured,
|
|
203
222
|
throwIfRunIncomplete
|
|
204
|
-
} from "./chunk-
|
|
223
|
+
} from "./chunk-UBPIXOC4.js";
|
|
205
224
|
import {
|
|
206
225
|
FileSystemRawProviderSink,
|
|
207
226
|
InMemoryRawProviderSink,
|
|
@@ -229,7 +248,7 @@ import {
|
|
|
229
248
|
ReplayError,
|
|
230
249
|
ValidationError,
|
|
231
250
|
VerificationError
|
|
232
|
-
} from "./chunk-
|
|
251
|
+
} from "./chunk-QYJT52YW.js";
|
|
233
252
|
import "./chunk-PZ5AY32C.js";
|
|
234
253
|
|
|
235
254
|
// src/run-score.ts
|
|
@@ -3237,6 +3256,107 @@ function suggestionForManifest(input) {
|
|
|
3237
3256
|
return "No action required.";
|
|
3238
3257
|
}
|
|
3239
3258
|
|
|
3259
|
+
// src/integrity/backend-integrity.ts
|
|
3260
|
+
var BackendIntegrityError = class extends AgentEvalError {
|
|
3261
|
+
constructor(message, report) {
|
|
3262
|
+
super("backend_integrity", message);
|
|
3263
|
+
this.report = report;
|
|
3264
|
+
}
|
|
3265
|
+
report;
|
|
3266
|
+
};
|
|
3267
|
+
function isStubRecord(rec) {
|
|
3268
|
+
return rec.tokenUsage.input === 0 && rec.tokenUsage.output === 0;
|
|
3269
|
+
}
|
|
3270
|
+
function isUncostedRecord(rec) {
|
|
3271
|
+
return rec.tokenUsage.output > 0 && rec.costUsd === 0;
|
|
3272
|
+
}
|
|
3273
|
+
function summarizeBackendIntegrity(records) {
|
|
3274
|
+
const totalRecords = records.length;
|
|
3275
|
+
let stubRecords = 0;
|
|
3276
|
+
let realRecords = 0;
|
|
3277
|
+
let uncostedRecords = 0;
|
|
3278
|
+
let totalInputTokens = 0;
|
|
3279
|
+
let totalOutputTokens = 0;
|
|
3280
|
+
let totalCostUsd = 0;
|
|
3281
|
+
for (const rec of records) {
|
|
3282
|
+
totalInputTokens += rec.tokenUsage.input;
|
|
3283
|
+
totalOutputTokens += rec.tokenUsage.output;
|
|
3284
|
+
totalCostUsd += rec.costUsd;
|
|
3285
|
+
if (isStubRecord(rec)) stubRecords++;
|
|
3286
|
+
else realRecords++;
|
|
3287
|
+
if (isUncostedRecord(rec)) uncostedRecords++;
|
|
3288
|
+
}
|
|
3289
|
+
const verdict = totalRecords === 0 ? "stub" : stubRecords === totalRecords ? "stub" : stubRecords === 0 ? "real" : "mixed";
|
|
3290
|
+
const diagnosis = buildDiagnosis({
|
|
3291
|
+
totalRecords,
|
|
3292
|
+
stubRecords,
|
|
3293
|
+
realRecords,
|
|
3294
|
+
uncostedRecords,
|
|
3295
|
+
totalInputTokens,
|
|
3296
|
+
totalOutputTokens,
|
|
3297
|
+
totalCostUsd,
|
|
3298
|
+
verdict
|
|
3299
|
+
});
|
|
3300
|
+
return {
|
|
3301
|
+
totalRecords,
|
|
3302
|
+
stubRecords,
|
|
3303
|
+
realRecords,
|
|
3304
|
+
uncostedRecords,
|
|
3305
|
+
totalInputTokens,
|
|
3306
|
+
totalOutputTokens,
|
|
3307
|
+
totalCostUsd,
|
|
3308
|
+
verdict,
|
|
3309
|
+
diagnosis
|
|
3310
|
+
};
|
|
3311
|
+
}
|
|
3312
|
+
function buildDiagnosis(r) {
|
|
3313
|
+
if (r.totalRecords === 0) {
|
|
3314
|
+
return "no records \u2014 eval produced zero runs; backend likely failed before first turn";
|
|
3315
|
+
}
|
|
3316
|
+
if (r.verdict === "stub") {
|
|
3317
|
+
return [
|
|
3318
|
+
`all ${r.totalRecords} records have zero token usage \u2014 the LLM backend was never called.`,
|
|
3319
|
+
"common causes: --backend sandbox without a sandbox bridge running; stub model returning hard-coded strings;",
|
|
3320
|
+
"auth misconfigured so requests were silently dropped before the LLM. Re-run with --backend tcloud and TANGLE_API_KEY set,",
|
|
3321
|
+
"or boot the cli-bridge / sandbox before invoking the eval."
|
|
3322
|
+
].join(" ");
|
|
3323
|
+
}
|
|
3324
|
+
if (r.verdict === "mixed") {
|
|
3325
|
+
const pct = (r.stubRecords / r.totalRecords * 100).toFixed(0);
|
|
3326
|
+
return [
|
|
3327
|
+
`${r.stubRecords}/${r.totalRecords} records (${pct}%) have zero token usage \u2014 the backend partially failed.`,
|
|
3328
|
+
"common causes: rate-limit cascade (429s after the first N personas);",
|
|
3329
|
+
"transient auth expiry mid-run; provider outage. Treat the affected records as missing data, not agent failures."
|
|
3330
|
+
].join(" ");
|
|
3331
|
+
}
|
|
3332
|
+
if (r.uncostedRecords > 0) {
|
|
3333
|
+
const pct = (r.uncostedRecords / r.totalRecords * 100).toFixed(0);
|
|
3334
|
+
return [
|
|
3335
|
+
`${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens).`,
|
|
3336
|
+
`${r.uncostedRecords} (${pct}%) have output tokens but costUsd=0 \u2014 cost ledger is mis-wired (no input-token`,
|
|
3337
|
+
"propagation from the runtime stream into RunRecord)."
|
|
3338
|
+
].join(" ");
|
|
3339
|
+
}
|
|
3340
|
+
return `${r.totalRecords} records with real LLM activity (in=${r.totalInputTokens}, out=${r.totalOutputTokens} tokens, $${r.totalCostUsd.toFixed(4)}).`;
|
|
3341
|
+
}
|
|
3342
|
+
function assertRealBackend(records, opts = {}) {
|
|
3343
|
+
const report = summarizeBackendIntegrity(records);
|
|
3344
|
+
const allowMixed = opts.allowMixed ?? true;
|
|
3345
|
+
if (report.verdict === "stub") {
|
|
3346
|
+
throw new BackendIntegrityError(
|
|
3347
|
+
`backend-integrity: ran against a stub or unconfigured backend \u2014 ${report.diagnosis}`,
|
|
3348
|
+
report
|
|
3349
|
+
);
|
|
3350
|
+
}
|
|
3351
|
+
if (!allowMixed && report.verdict === "mixed") {
|
|
3352
|
+
throw new BackendIntegrityError(
|
|
3353
|
+
`backend-integrity: partial backend failure rejected \u2014 ${report.diagnosis}`,
|
|
3354
|
+
report
|
|
3355
|
+
);
|
|
3356
|
+
}
|
|
3357
|
+
return report;
|
|
3358
|
+
}
|
|
3359
|
+
|
|
3240
3360
|
// src/judges.ts
|
|
3241
3361
|
function createDomainExpertJudge(domain) {
|
|
3242
3362
|
return async (tc, { scenario, turns }) => {
|
|
@@ -10299,6 +10419,7 @@ export {
|
|
|
10299
10419
|
AnalystRegistry,
|
|
10300
10420
|
AxGepaSteeringOptimizer,
|
|
10301
10421
|
BENCHMARK_SPLIT_SEED,
|
|
10422
|
+
BackendIntegrityError,
|
|
10302
10423
|
BenchmarkRunner,
|
|
10303
10424
|
BudgetBreachError,
|
|
10304
10425
|
BudgetGuard,
|
|
@@ -10320,6 +10441,7 @@ export {
|
|
|
10320
10441
|
DEFAULT_RED_TEAM_CORPUS,
|
|
10321
10442
|
DEFAULT_RUN_SCORE_WEIGHTS,
|
|
10322
10443
|
DEFAULT_SEVERITY_WEIGHTS,
|
|
10444
|
+
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
10323
10445
|
DEFAULT_TRACE_ANALYST_KINDS,
|
|
10324
10446
|
Dataset,
|
|
10325
10447
|
DockerSandboxDriver,
|
|
@@ -10367,6 +10489,7 @@ export {
|
|
|
10367
10489
|
NoopResearcher,
|
|
10368
10490
|
NotFoundError,
|
|
10369
10491
|
OTEL_AGENT_EVAL_SCOPE,
|
|
10492
|
+
OtlpFileTraceStore,
|
|
10370
10493
|
PairwiseSteeringOptimizer,
|
|
10371
10494
|
ProductClient,
|
|
10372
10495
|
PromptRegistry,
|
|
@@ -10383,10 +10506,17 @@ export {
|
|
|
10383
10506
|
SEMANTIC_CONCEPT_JUDGE_VERSION,
|
|
10384
10507
|
SandboxHarness,
|
|
10385
10508
|
ScenarioRegistry,
|
|
10509
|
+
SpanNotFoundError,
|
|
10386
10510
|
SubprocessSandboxDriver,
|
|
10511
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
10512
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
10513
|
+
TRACE_ANALYST_SUBAGENT_DESCRIPTION,
|
|
10514
|
+
TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
|
|
10387
10515
|
TRACE_SCHEMA_VERSION,
|
|
10388
10516
|
TokenCounter,
|
|
10389
10517
|
TraceEmitter,
|
|
10518
|
+
TraceFileMissingError,
|
|
10519
|
+
TraceNotFoundError,
|
|
10390
10520
|
TrialTelemetry,
|
|
10391
10521
|
UNIVERSAL_FINDERS,
|
|
10392
10522
|
ValidationError,
|
|
@@ -10399,8 +10529,10 @@ export {
|
|
|
10399
10529
|
allCriticalPassed,
|
|
10400
10530
|
analyzeAntiSlop,
|
|
10401
10531
|
analyzeSeries,
|
|
10532
|
+
analyzeTraces,
|
|
10402
10533
|
argHash,
|
|
10403
10534
|
assertLlmRoute,
|
|
10535
|
+
assertRealBackend,
|
|
10404
10536
|
assertReleaseConfidence,
|
|
10405
10537
|
assertRunCaptured,
|
|
10406
10538
|
assignFeedbackSplit,
|
|
@@ -10415,6 +10547,9 @@ export {
|
|
|
10415
10547
|
bootstrapCi,
|
|
10416
10548
|
buildReflectionPrompt,
|
|
10417
10549
|
buildReviewerPrompt,
|
|
10550
|
+
buildTraceAnalystTools,
|
|
10551
|
+
buildTraceInsightContext,
|
|
10552
|
+
buildTraceInsightPrompt,
|
|
10418
10553
|
buildTraceToolsForGroup,
|
|
10419
10554
|
buildTrajectory,
|
|
10420
10555
|
byteLengthRange,
|
|
@@ -10479,10 +10614,13 @@ export {
|
|
|
10479
10614
|
defaultMultiShotObjectives,
|
|
10480
10615
|
defaultProviderRedactor,
|
|
10481
10616
|
defaultReferenceReplayMatcher,
|
|
10617
|
+
defaultTraceInsightPanel,
|
|
10482
10618
|
deployGateLayer,
|
|
10619
|
+
describeTraceInsightScope,
|
|
10483
10620
|
diffFindings,
|
|
10484
10621
|
discoverPersonas,
|
|
10485
10622
|
distillPlaybook,
|
|
10623
|
+
domainEvidencePattern,
|
|
10486
10624
|
dominates,
|
|
10487
10625
|
estimateCost,
|
|
10488
10626
|
estimateTokens,
|
|
@@ -10526,6 +10664,7 @@ export {
|
|
|
10526
10664
|
httpGithubClient,
|
|
10527
10665
|
inMemoryReferenceReplayStore,
|
|
10528
10666
|
inMemoryReviewStore,
|
|
10667
|
+
inferDomainKeywords,
|
|
10529
10668
|
integrationAsi,
|
|
10530
10669
|
integrationGateEvals,
|
|
10531
10670
|
integrationInvokeFailedPayload,
|
|
@@ -10583,6 +10722,7 @@ export {
|
|
|
10583
10722
|
partialCredit,
|
|
10584
10723
|
passOrthogonality,
|
|
10585
10724
|
pixelDeltaRatio,
|
|
10725
|
+
planTraceInsightQuestions,
|
|
10586
10726
|
politenessPrefixMutator,
|
|
10587
10727
|
positionalBias,
|
|
10588
10728
|
printDriverSummary,
|
|
@@ -10651,6 +10791,7 @@ export {
|
|
|
10651
10791
|
scoreKnowledgeReadiness,
|
|
10652
10792
|
scoreRedTeamOutput,
|
|
10653
10793
|
scoreReferenceReplay,
|
|
10794
|
+
scoreTraceInsightReadiness,
|
|
10654
10795
|
securityJudge,
|
|
10655
10796
|
selectHarnessVariant,
|
|
10656
10797
|
selfPreference,
|
|
@@ -10664,6 +10805,7 @@ export {
|
|
|
10664
10805
|
stripFencedJson,
|
|
10665
10806
|
subjectiveEval,
|
|
10666
10807
|
summarize,
|
|
10808
|
+
summarizeBackendIntegrity,
|
|
10667
10809
|
summarizeHarnessResults,
|
|
10668
10810
|
summarizePreferenceMemory,
|
|
10669
10811
|
summaryTable,
|
|
@@ -10672,8 +10814,11 @@ export {
|
|
|
10672
10814
|
throwIfRunIncomplete,
|
|
10673
10815
|
toLangfuseEnvelope,
|
|
10674
10816
|
toPrometheusText,
|
|
10817
|
+
tokenizeDomainWords,
|
|
10675
10818
|
toolNamesForRun,
|
|
10676
10819
|
toolSpans,
|
|
10820
|
+
traceAnalystFunctionGroup,
|
|
10821
|
+
traceAnalystOnRunComplete,
|
|
10677
10822
|
trialTraceFromMultiShotTrial,
|
|
10678
10823
|
typoMutator,
|
|
10679
10824
|
urlContains,
|