@tangle-network/agent-eval 0.20.12 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +177 -0
- package/README.md +43 -1
- package/dist/{chunk-KWUAAIHR.js → chunk-4W4NCYM2.js} +182 -1
- package/dist/chunk-4W4NCYM2.js.map +1 -0
- package/dist/{chunk-PKCVBYTQ.js → chunk-5IIQKMD5.js} +38 -2
- package/dist/chunk-5IIQKMD5.js.map +1 -0
- package/dist/{chunk-HNJLMAJ2.js → chunk-6KQG5HAH.js} +2 -2
- package/dist/chunk-6M774GY6.js +53 -0
- package/dist/chunk-6M774GY6.js.map +1 -0
- package/dist/{chunk-MCMV7DUL.js → chunk-ARZ6BEV6.js} +2 -2
- package/dist/chunk-IOXMGMHQ.js +1226 -0
- package/dist/chunk-IOXMGMHQ.js.map +1 -0
- package/dist/{chunk-75MCTH7P.js → chunk-KAO3Q65R.js} +198 -3
- package/dist/chunk-KAO3Q65R.js.map +1 -0
- package/dist/chunk-QUKKGHTZ.js +121 -0
- package/dist/chunk-QUKKGHTZ.js.map +1 -0
- package/dist/chunk-SQQLHODJ.js +163 -0
- package/dist/chunk-SQQLHODJ.js.map +1 -0
- package/dist/{chunk-IKFVX537.js → chunk-UAND2LOT.js} +232 -211
- package/dist/chunk-UAND2LOT.js.map +1 -0
- package/dist/{chunk-HKYRWNHV.js → chunk-USHQBPMH.js} +283 -7
- package/dist/chunk-USHQBPMH.js.map +1 -0
- package/dist/cli.js +3 -2
- package/dist/cli.js.map +1 -1
- package/dist/{control-C8NKbF3w.d.ts → control-cxwMOAsy.d.ts} +3 -2
- package/dist/control.d.ts +4 -3
- package/dist/control.js +2 -2
- package/dist/emitter-B2XqDKFU.d.ts +121 -0
- package/dist/{feedback-trajectory-BGQ_ANCN.d.ts → feedback-trajectory-CB0A32o3.d.ts} +2 -1
- package/dist/index.d.ts +16 -302
- package/dist/index.js +70 -62
- package/dist/index.js.map +1 -1
- package/dist/integrity-K2oVlF57.d.ts +210 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization-UVDNKaO6.d.ts +574 -0
- package/dist/optimization.d.ts +7 -144
- package/dist/optimization.js +9 -2
- package/dist/reporting-B82RSv9C.d.ts +593 -0
- package/dist/reporting.d.ts +5 -426
- package/dist/reporting.js +17 -6
- package/dist/{emitter-BYO2nSDA.d.ts → store-u47QaJ9G.d.ts} +1 -91
- package/dist/{multi-shot-optimization-Bvtz294B.d.ts → summary-report-D4p7RlDu.d.ts} +381 -1
- package/dist/traces.d.ts +179 -3
- package/dist/traces.js +35 -4
- package/dist/wire/index.js +3 -2
- package/docs/research-report-methodology.md +170 -0
- package/docs/wire-protocol.md +1 -1
- package/package.json +11 -13
- package/dist/chunk-75MCTH7P.js.map +0 -1
- package/dist/chunk-HKYRWNHV.js.map +0 -1
- package/dist/chunk-IKFVX537.js.map +0 -1
- package/dist/chunk-KWUAAIHR.js.map +0 -1
- package/dist/chunk-ODFINDLQ.js +0 -413
- package/dist/chunk-ODFINDLQ.js.map +0 -1
- package/dist/chunk-PKCVBYTQ.js.map +0 -1
- /package/dist/{chunk-HNJLMAJ2.js.map → chunk-6KQG5HAH.js.map} +0 -0
- /package/dist/{chunk-MCMV7DUL.js.map → chunk-ARZ6BEV6.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
stopOnNoProgress,
|
|
20
20
|
stopOnRepeatedAction,
|
|
21
21
|
subjectiveEval
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-ARZ6BEV6.js";
|
|
23
23
|
import {
|
|
24
24
|
CallbackResearcher,
|
|
25
25
|
DEFAULT_MUTATION_PRIMITIVES,
|
|
@@ -46,6 +46,7 @@ import {
|
|
|
46
46
|
renderPreferenceMemoryMarkdown,
|
|
47
47
|
replayFeedbackTrajectories,
|
|
48
48
|
replayFeedbackTrajectory,
|
|
49
|
+
runEvalCampaign,
|
|
49
50
|
runMultiShotOptimization,
|
|
50
51
|
runPromptEvolution,
|
|
51
52
|
scalarScore,
|
|
@@ -53,7 +54,7 @@ import {
|
|
|
53
54
|
summarizePreferenceMemory,
|
|
54
55
|
trialTraceFromMultiShotTrial,
|
|
55
56
|
withAssignedFeedbackSplit
|
|
56
|
-
} from "./chunk-
|
|
57
|
+
} from "./chunk-USHQBPMH.js";
|
|
57
58
|
import {
|
|
58
59
|
RunRecordValidationError,
|
|
59
60
|
isRunRecord,
|
|
@@ -64,31 +65,36 @@ import {
|
|
|
64
65
|
import {
|
|
65
66
|
assertReleaseConfidence,
|
|
66
67
|
bootstrapCi,
|
|
68
|
+
evaluateInterimReleaseConfidence,
|
|
67
69
|
evaluateReleaseConfidence,
|
|
68
|
-
gainHistogram,
|
|
69
70
|
judgeReplayGate,
|
|
70
|
-
|
|
71
|
+
pairedEvalueSequence,
|
|
71
72
|
releaseTraceEvidenceFromMultiShotTrials,
|
|
72
73
|
renderReleaseReport,
|
|
73
|
-
|
|
74
|
-
} from "./chunk-
|
|
74
|
+
rubricPredictiveValidity
|
|
75
|
+
} from "./chunk-UAND2LOT.js";
|
|
75
76
|
import {
|
|
77
|
+
RESEARCH_REPORT_HARD_PAIR_FLOOR,
|
|
76
78
|
benjaminiHochberg,
|
|
77
79
|
bhAdjust,
|
|
78
80
|
bonferroni,
|
|
79
81
|
cohensD,
|
|
80
82
|
confidenceInterval,
|
|
83
|
+
gainHistogram,
|
|
81
84
|
interRaterReliability,
|
|
82
85
|
mannWhitneyU,
|
|
83
86
|
normalizeScores,
|
|
84
87
|
pairedBootstrap,
|
|
85
88
|
pairedTTest,
|
|
86
89
|
pairedWilcoxon,
|
|
90
|
+
paretoChart,
|
|
87
91
|
partialCredit,
|
|
88
92
|
requiredSampleSize,
|
|
93
|
+
researchReport,
|
|
94
|
+
summaryTable,
|
|
89
95
|
weightedMean,
|
|
90
96
|
wilcoxonSignedRank
|
|
91
|
-
} from "./chunk-
|
|
97
|
+
} from "./chunk-IOXMGMHQ.js";
|
|
92
98
|
import {
|
|
93
99
|
DEFAULT_REDACTION_RULES,
|
|
94
100
|
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
@@ -98,6 +104,8 @@ import {
|
|
|
98
104
|
OTEL_AGENT_EVAL_SCOPE,
|
|
99
105
|
OtlpFileTraceStore,
|
|
100
106
|
REDACTION_VERSION,
|
|
107
|
+
ReplayCache,
|
|
108
|
+
ReplayCacheMissError,
|
|
101
109
|
SpanNotFoundError,
|
|
102
110
|
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
103
111
|
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
@@ -112,6 +120,7 @@ import {
|
|
|
112
120
|
buildTraceAnalystTools,
|
|
113
121
|
buildTraceInsightContext,
|
|
114
122
|
buildTraceInsightPrompt,
|
|
123
|
+
createReplayFetch,
|
|
115
124
|
defaultTraceInsightPanel,
|
|
116
125
|
describeTraceInsightScope,
|
|
117
126
|
domainEvidencePattern,
|
|
@@ -123,6 +132,7 @@ import {
|
|
|
123
132
|
isRetrievalSpan,
|
|
124
133
|
isSandboxSpan,
|
|
125
134
|
isToolSpan,
|
|
135
|
+
iterateRawCalls,
|
|
126
136
|
judgeSpans,
|
|
127
137
|
llmSpans,
|
|
128
138
|
planTraceInsightQuestions,
|
|
@@ -133,20 +143,42 @@ import {
|
|
|
133
143
|
scoreTraceInsightReadiness,
|
|
134
144
|
tokenizeDomainWords,
|
|
135
145
|
toolSpans,
|
|
136
|
-
traceAnalystFunctionGroup
|
|
137
|
-
|
|
146
|
+
traceAnalystFunctionGroup,
|
|
147
|
+
traceAnalystOnRunComplete
|
|
148
|
+
} from "./chunk-4W4NCYM2.js";
|
|
149
|
+
import {
|
|
150
|
+
RunIntegrityError,
|
|
151
|
+
assertRunCaptured,
|
|
152
|
+
throwIfRunIncomplete
|
|
153
|
+
} from "./chunk-QUKKGHTZ.js";
|
|
138
154
|
import {
|
|
139
155
|
TraceEmitter,
|
|
140
156
|
llmSpanFromProvider
|
|
141
|
-
} from "./chunk-
|
|
157
|
+
} from "./chunk-5IIQKMD5.js";
|
|
158
|
+
import {
|
|
159
|
+
canonicalize,
|
|
160
|
+
evaluateHypothesis,
|
|
161
|
+
hashJson,
|
|
162
|
+
signManifest,
|
|
163
|
+
verifyManifest
|
|
164
|
+
} from "./chunk-6M774GY6.js";
|
|
142
165
|
import {
|
|
143
166
|
LlmCallError,
|
|
144
167
|
LlmClient,
|
|
168
|
+
LlmRouteAssertionError,
|
|
169
|
+
assertLlmRoute,
|
|
145
170
|
callLlm,
|
|
146
171
|
callLlmJson,
|
|
147
172
|
probeLlm,
|
|
148
173
|
stripFencedJson
|
|
149
|
-
} from "./chunk-
|
|
174
|
+
} from "./chunk-KAO3Q65R.js";
|
|
175
|
+
import {
|
|
176
|
+
FileSystemRawProviderSink,
|
|
177
|
+
InMemoryRawProviderSink,
|
|
178
|
+
NoopRawProviderSink,
|
|
179
|
+
defaultProviderRedactor,
|
|
180
|
+
providerFromBaseUrl
|
|
181
|
+
} from "./chunk-SQQLHODJ.js";
|
|
150
182
|
import "./chunk-PZ5AY32C.js";
|
|
151
183
|
|
|
152
184
|
// src/client.ts
|
|
@@ -4847,7 +4879,7 @@ var Dataset = class _Dataset {
|
|
|
4847
4879
|
* Write to disk for contamination-verifiable archives.
|
|
4848
4880
|
*/
|
|
4849
4881
|
toJsonl() {
|
|
4850
|
-
return this.scenarios.slice().sort((a, b) => a.id.localeCompare(b.id)).map((s) => JSON.stringify(
|
|
4882
|
+
return this.scenarios.slice().sort((a, b) => a.id.localeCompare(b.id)).map((s) => JSON.stringify(canonicalize2(s))).join("\n") + "\n";
|
|
4851
4883
|
}
|
|
4852
4884
|
static fromJsonl(jsonl, manifest) {
|
|
4853
4885
|
const scenarios = [];
|
|
@@ -4860,18 +4892,18 @@ var Dataset = class _Dataset {
|
|
|
4860
4892
|
}
|
|
4861
4893
|
};
|
|
4862
4894
|
async function hashScenarios(scenarios) {
|
|
4863
|
-
const canonical = scenarios.slice().sort((a, b) => a.id.localeCompare(b.id)).map(
|
|
4895
|
+
const canonical = scenarios.slice().sort((a, b) => a.id.localeCompare(b.id)).map(canonicalize2);
|
|
4864
4896
|
const text = JSON.stringify(canonical);
|
|
4865
4897
|
const bytes = new TextEncoder().encode(text);
|
|
4866
4898
|
const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
|
|
4867
4899
|
return Array.from(new Uint8Array(digest)).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
4868
4900
|
}
|
|
4869
|
-
function
|
|
4901
|
+
function canonicalize2(v) {
|
|
4870
4902
|
if (v === null || typeof v !== "object") return v;
|
|
4871
|
-
if (Array.isArray(v)) return v.map(
|
|
4903
|
+
if (Array.isArray(v)) return v.map(canonicalize2);
|
|
4872
4904
|
const keys = Object.keys(v).sort();
|
|
4873
4905
|
const out = {};
|
|
4874
|
-
for (const k of keys) out[k] =
|
|
4906
|
+
for (const k of keys) out[k] = canonicalize2(v[k]);
|
|
4875
4907
|
return out;
|
|
4876
4908
|
}
|
|
4877
4909
|
function seededShuffle(items, seed) {
|
|
@@ -6978,51 +7010,6 @@ function attributeStep(op, prmA, prmB) {
|
|
|
6978
7010
|
};
|
|
6979
7011
|
}
|
|
6980
7012
|
|
|
6981
|
-
// src/pre-registration.ts
|
|
6982
|
-
function canonicalize2(v) {
|
|
6983
|
-
if (v === null || typeof v !== "object") return v;
|
|
6984
|
-
if (Array.isArray(v)) return v.map(canonicalize2);
|
|
6985
|
-
const keys = Object.keys(v).sort();
|
|
6986
|
-
const out = {};
|
|
6987
|
-
for (const k of keys) out[k] = canonicalize2(v[k]);
|
|
6988
|
-
return out;
|
|
6989
|
-
}
|
|
6990
|
-
async function hashJson(obj) {
|
|
6991
|
-
const canonical = canonicalize2(obj);
|
|
6992
|
-
const bytes = new TextEncoder().encode(JSON.stringify(canonical));
|
|
6993
|
-
const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
|
|
6994
|
-
return Array.from(new Uint8Array(digest)).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
6995
|
-
}
|
|
6996
|
-
async function signManifest(m) {
|
|
6997
|
-
const hash = await hashJson(m);
|
|
6998
|
-
return { ...m, contentHash: hash, algo: "sha256-content" };
|
|
6999
|
-
}
|
|
7000
|
-
async function verifyManifest(m) {
|
|
7001
|
-
const { contentHash, algo: _algo, ...rest } = m;
|
|
7002
|
-
void _algo;
|
|
7003
|
-
const resigned = await signManifest(rest);
|
|
7004
|
-
return resigned.contentHash === contentHash;
|
|
7005
|
-
}
|
|
7006
|
-
async function evaluateHypothesis(manifest, observed) {
|
|
7007
|
-
if (!await verifyManifest(manifest)) {
|
|
7008
|
-
throw new Error("evaluateHypothesis: manifest content hash mismatch (tampered)");
|
|
7009
|
-
}
|
|
7010
|
-
const reasons = [];
|
|
7011
|
-
const directionOk = manifest.direction === "increase" ? observed.effect > 0 : observed.effect < 0;
|
|
7012
|
-
if (!directionOk) reasons.push("wrong_direction");
|
|
7013
|
-
if (Math.abs(observed.effect) < manifest.minEffect) reasons.push("effect_too_small");
|
|
7014
|
-
if (observed.pValue >= manifest.alpha) reasons.push("not_significant");
|
|
7015
|
-
if (observed.n < manifest.preRegisteredN) reasons.push("undersampled");
|
|
7016
|
-
return {
|
|
7017
|
-
manifest,
|
|
7018
|
-
observedN: observed.n,
|
|
7019
|
-
observedEffect: observed.effect,
|
|
7020
|
-
observedPValue: observed.pValue,
|
|
7021
|
-
confirmed: reasons.length === 0,
|
|
7022
|
-
rejectionReasons: reasons
|
|
7023
|
-
};
|
|
7024
|
-
}
|
|
7025
|
-
|
|
7026
7013
|
// src/self-play.ts
|
|
7027
7014
|
async function runSelfPlay(proposer, scorer, targets, options = {}) {
|
|
7028
7015
|
if (targets.length < 2) throw new Error("runSelfPlay: at least 2 targets required (need a difference to measure)");
|
|
@@ -10481,6 +10468,7 @@ export {
|
|
|
10481
10468
|
FileSystemExperimentStore,
|
|
10482
10469
|
FileSystemFeedbackTrajectoryStore,
|
|
10483
10470
|
FileSystemOutcomeStore,
|
|
10471
|
+
FileSystemRawProviderSink,
|
|
10484
10472
|
FileSystemTraceStore,
|
|
10485
10473
|
HeldOutGate,
|
|
10486
10474
|
HoldoutAuditor,
|
|
@@ -10489,6 +10477,7 @@ export {
|
|
|
10489
10477
|
InMemoryExperimentStore,
|
|
10490
10478
|
InMemoryFeedbackTrajectoryStore,
|
|
10491
10479
|
InMemoryOutcomeStore,
|
|
10480
|
+
InMemoryRawProviderSink,
|
|
10492
10481
|
InMemoryTraceStore,
|
|
10493
10482
|
InMemoryTrialCache,
|
|
10494
10483
|
InMemoryWorkspaceInspector,
|
|
@@ -10497,12 +10486,14 @@ export {
|
|
|
10497
10486
|
LineageRecorder,
|
|
10498
10487
|
LlmCallError,
|
|
10499
10488
|
LlmClient,
|
|
10489
|
+
LlmRouteAssertionError,
|
|
10500
10490
|
LockedJsonlAppender,
|
|
10501
10491
|
MODEL_PRICING,
|
|
10502
10492
|
MetricsCollector,
|
|
10503
10493
|
MultiLayerVerifier,
|
|
10504
10494
|
MutationTelemetry,
|
|
10505
10495
|
Mutex,
|
|
10496
|
+
NoopRawProviderSink,
|
|
10506
10497
|
NoopResearcher,
|
|
10507
10498
|
OTEL_AGENT_EVAL_SCOPE,
|
|
10508
10499
|
OtlpFileTraceStore,
|
|
@@ -10512,7 +10503,11 @@ export {
|
|
|
10512
10503
|
ProjectRegistry,
|
|
10513
10504
|
PromptRegistry,
|
|
10514
10505
|
REDACTION_VERSION,
|
|
10506
|
+
RESEARCH_REPORT_HARD_PAIR_FLOOR,
|
|
10507
|
+
ReplayCache,
|
|
10508
|
+
ReplayCacheMissError,
|
|
10515
10509
|
RunCritic,
|
|
10510
|
+
RunIntegrityError,
|
|
10516
10511
|
RunRecordValidationError,
|
|
10517
10512
|
SEMANTIC_CONCEPT_JUDGE_VERSION,
|
|
10518
10513
|
SandboxHarness,
|
|
@@ -10539,7 +10534,9 @@ export {
|
|
|
10539
10534
|
analyzeSeries,
|
|
10540
10535
|
analyzeTraces,
|
|
10541
10536
|
argHash,
|
|
10537
|
+
assertLlmRoute,
|
|
10542
10538
|
assertReleaseConfidence,
|
|
10539
|
+
assertRunCaptured,
|
|
10543
10540
|
assignFeedbackSplit,
|
|
10544
10541
|
attributeCounterfactuals,
|
|
10545
10542
|
deterministicSplit as benchmarkDeterministicSplit,
|
|
@@ -10563,7 +10560,7 @@ export {
|
|
|
10563
10560
|
callLlm,
|
|
10564
10561
|
callLlmJson,
|
|
10565
10562
|
canaryLeakView,
|
|
10566
|
-
|
|
10563
|
+
canonicalize,
|
|
10567
10564
|
causalAttribution,
|
|
10568
10565
|
checkBehavioralCanary,
|
|
10569
10566
|
checkCanaries,
|
|
@@ -10597,6 +10594,7 @@ export {
|
|
|
10597
10594
|
createFeedbackTrajectory,
|
|
10598
10595
|
createIntentMatchJudge,
|
|
10599
10596
|
createLlmReviewer,
|
|
10597
|
+
createReplayFetch,
|
|
10600
10598
|
createSandboxCodeMutator,
|
|
10601
10599
|
createSandboxPool,
|
|
10602
10600
|
createSemanticConceptJudge,
|
|
@@ -10606,6 +10604,7 @@ export {
|
|
|
10606
10604
|
decideReferenceReplayRunPromotion,
|
|
10607
10605
|
defaultJudges,
|
|
10608
10606
|
defaultMultiShotObjectives,
|
|
10607
|
+
defaultProviderRedactor,
|
|
10609
10608
|
defaultReferenceReplayMatcher,
|
|
10610
10609
|
defaultTraceInsightPanel,
|
|
10611
10610
|
deployGateLayer,
|
|
@@ -10619,6 +10618,7 @@ export {
|
|
|
10619
10618
|
evaluateActionPolicy,
|
|
10620
10619
|
evaluateContract,
|
|
10621
10620
|
evaluateHypothesis,
|
|
10621
|
+
evaluateInterimReleaseConfidence,
|
|
10622
10622
|
evaluateOracles,
|
|
10623
10623
|
evaluateReleaseConfidence,
|
|
10624
10624
|
executeScenario,
|
|
@@ -10670,6 +10670,7 @@ export {
|
|
|
10670
10670
|
isRunRecord,
|
|
10671
10671
|
isSandboxSpan,
|
|
10672
10672
|
isToolSpan,
|
|
10673
|
+
iterateRawCalls,
|
|
10673
10674
|
jestTestParser,
|
|
10674
10675
|
jsonHasKeys,
|
|
10675
10676
|
jsonShape,
|
|
@@ -10698,6 +10699,7 @@ export {
|
|
|
10698
10699
|
objectiveEval,
|
|
10699
10700
|
outputLengthRubric,
|
|
10700
10701
|
pairedBootstrap,
|
|
10702
|
+
pairedEvalueSequence,
|
|
10701
10703
|
pairedTTest,
|
|
10702
10704
|
pairedWilcoxon,
|
|
10703
10705
|
paraphraseRobustness,
|
|
@@ -10720,6 +10722,7 @@ export {
|
|
|
10720
10722
|
probeLlm,
|
|
10721
10723
|
promptBisect,
|
|
10722
10724
|
proposeSynthesisTargets,
|
|
10725
|
+
providerFromBaseUrl,
|
|
10723
10726
|
pytestTestParser,
|
|
10724
10727
|
redTeamDataset,
|
|
10725
10728
|
redTeamReport,
|
|
@@ -10742,17 +10745,20 @@ export {
|
|
|
10742
10745
|
replayScorerOverCorpus,
|
|
10743
10746
|
replayTraceThroughJudge,
|
|
10744
10747
|
requiredSampleSize,
|
|
10748
|
+
researchReport,
|
|
10745
10749
|
resetLockedAppendersForTesting,
|
|
10746
10750
|
resumeBuilderSession,
|
|
10747
10751
|
roundTripRunRecord,
|
|
10748
10752
|
rowCount,
|
|
10749
10753
|
rowWhere,
|
|
10754
|
+
rubricPredictiveValidity,
|
|
10750
10755
|
runAgentControlLoop,
|
|
10751
10756
|
runAssertions,
|
|
10752
10757
|
runBehavioralCanaries,
|
|
10753
10758
|
runCanaries,
|
|
10754
10759
|
runCounterfactual,
|
|
10755
10760
|
runE2EWorkflow,
|
|
10761
|
+
runEvalCampaign,
|
|
10756
10762
|
runExpectations,
|
|
10757
10763
|
runFailureClass,
|
|
10758
10764
|
runHarnessExperiment,
|
|
@@ -10799,6 +10805,7 @@ export {
|
|
|
10799
10805
|
summaryTable,
|
|
10800
10806
|
testJudge,
|
|
10801
10807
|
textInSnapshot,
|
|
10808
|
+
throwIfRunIncomplete,
|
|
10802
10809
|
toLangfuseEnvelope,
|
|
10803
10810
|
toNdjson,
|
|
10804
10811
|
toPrometheusText,
|
|
@@ -10810,6 +10817,7 @@ export {
|
|
|
10810
10817
|
toolSuccessRubric,
|
|
10811
10818
|
toolWasteView,
|
|
10812
10819
|
traceAnalystFunctionGroup,
|
|
10820
|
+
traceAnalystOnRunComplete,
|
|
10813
10821
|
trialTraceFromMultiShotTrial,
|
|
10814
10822
|
typoMutator,
|
|
10815
10823
|
urlContains,
|