npm - @tangle-network/agent-eval - Versions diffs - 0.38.0 → 0.40.1 - Mend

@tangle-network/agent-eval 0.38.0 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/dist/campaign/index.d.ts +695 -0
package/dist/campaign/index.js +741 -0
package/dist/campaign/index.js.map +1 -0
package/dist/chunk-5U2DOJU4.js +565 -0
package/dist/chunk-5U2DOJU4.js.map +1 -0
package/dist/{chunk-KE7TDJUO.js → chunk-AU2JLNSZ.js} +2 -2
package/dist/{chunk-TSPOEDM3.js → chunk-BWZEGTES.js} +2 -5
package/dist/chunk-BWZEGTES.js.map +1 -0
package/dist/{chunk-3HYQXPC2.js → chunk-DMW5VENN.js} +3 -3
package/dist/{chunk-TQL7BAOY.js → chunk-EGIPWXHL.js} +2 -2
package/dist/chunk-GGE4NNQT.js +65 -0
package/dist/chunk-GGE4NNQT.js.map +1 -0
package/dist/{chunk-7PR3WPWE.js → chunk-L7XMNXLO.js} +2 -2
package/dist/{chunk-RL6TERL2.js → chunk-LCIDRYGP.js} +3 -3
package/dist/{chunk-L5UNCDAJ.js → chunk-MAOZCN36.js} +2 -64
package/dist/chunk-MAOZCN36.js.map +1 -0
package/dist/{chunk-LGAPK7NA.js → chunk-NKLGKF2Q.js} +2 -2
package/dist/chunk-TMXPFWC7.js +305 -0
package/dist/chunk-TMXPFWC7.js.map +1 -0
package/dist/{chunk-KHZRNY3F.js → chunk-WP7SY7AI.js} +5 -4
package/dist/chunk-WP7SY7AI.js.map +1 -0
package/dist/chunk-YV7J7X5N.js +313 -0
package/dist/chunk-YV7J7X5N.js.map +1 -0
package/dist/{control-DVrmvM_k.d.ts → control-CmLJk3IG.d.ts} +1 -1
package/dist/control.d.ts +3 -3
package/dist/control.js +2 -2
package/dist/{dataset-ueRVTUoY.d.ts → dataset-BlwAtYYf.d.ts} +1 -1
package/dist/{feedback-trajectory-iATEAHmc.d.ts → feedback-trajectory-Dvy-bt7x.d.ts} +1 -1
package/dist/governance/index.d.ts +133 -5
package/dist/index.d.ts +35 -34
package/dist/index.js +97 -630
package/dist/index.js.map +1 -1
package/dist/multishot/index.d.ts +21 -21
package/dist/multishot/index.js +64 -15
package/dist/multishot/index.js.map +1 -1
package/dist/openapi.json +1 -1
package/dist/optimization.d.ts +2 -2
package/dist/optimization.js +5 -5
package/dist/pipelines/index.js +2 -2
package/dist/red-team-30II1T4o.d.ts +63 -0
package/dist/{release-report-D2ykiLSe.d.ts → release-report-Di84bXD7.d.ts} +5 -2
package/dist/reporting.d.ts +2 -2
package/dist/reporting.js +3 -3
package/dist/rl.js +15 -315
package/dist/rl.js.map +1 -1
package/dist/run-campaign-JYJXYHHL.js +10 -0
package/dist/run-campaign-JYJXYHHL.js.map +1 -0
package/dist/traces.js +7 -5
package/dist/wire/index.d.ts +2 -2
package/docs/design/loop-taxonomy.md +233 -0
package/package.json +33 -24
package/dist/chunk-KHZRNY3F.js.map +0 -1
package/dist/chunk-L5UNCDAJ.js.map +0 -1
package/dist/chunk-TSPOEDM3.js.map +0 -1
package/dist/index-CN2agEaO.d.ts +0 -191
/package/dist/{chunk-KE7TDJUO.js.map → chunk-AU2JLNSZ.js.map} +0 -0
/package/dist/{chunk-3HYQXPC2.js.map → chunk-DMW5VENN.js.map} +0 -0
/package/dist/{chunk-TQL7BAOY.js.map → chunk-EGIPWXHL.js.map} +0 -0
/package/dist/{chunk-7PR3WPWE.js.map → chunk-L7XMNXLO.js.map} +0 -0
/package/dist/{chunk-RL6TERL2.js.map → chunk-LCIDRYGP.js.map} +0 -0
/package/dist/{chunk-LGAPK7NA.js.map → chunk-NKLGKF2Q.js.map} +0 -0

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DVrmvM_k.js';
+export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-CmLJk3IG.js';
 import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
 export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-BGY6bHRh.js';
 import { AxAIService, AxFunction } from '@ax-llm/ax';
@@ -10,16 +10,16 @@ import { L as LlmClientOptions, m as LlmCallRequest, n as LlmCallResult } from '
 export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, o as LlmCallError, p as LlmClient, q as LlmMessage, s as LlmRouteAssertionError, t as LlmRouteRequirements, u as LlmUsage, N as NoopResearcher, R as Researcher, S as SteeringChange, v as assertLlmRoute, w as backoffMs, x as callLlm, y as callLlmJson, z as isTransientLlmError, A as probeLlm, r as runEvalCampaign, B as stripFencedJson } from './researcher-DeZ_EArp.js';
 import { TraceAnalysisStore, AnalyzeTracesOptions, OtelExporter, OtelExportConfig, AnalyzeTracesInput, AnalyzeTracesResult } from './traces.js';
 export { AnalyzeTracesTurnSnapshot, DEFAULT_REDACTION_RULES, DEFAULT_TRACE_ANALYST_BUDGETS, DatasetOverview, ExportableSpan, OTEL_AGENT_EVAL_SCOPE, OtlpExport, OtlpFileTraceStore, OtlpFileTraceStoreOptions, OtlpResourceSpans, OtlpSpan, QueryTracesPage, REDACTION_VERSION, RedactionReport, RedactionRule, ReplayCache, ReplayCacheEntry, ReplayCacheMissError, ReplayCacheStats, ReplayFetchOptions, SearchSpanResult, SearchTraceResult, SpanMatchRecord, SpanNotFoundError, TRACE_ANALYST_ACTOR_DESCRIPTION, TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION, TRACE_ANALYST_SUBAGENT_DESCRIPTION, TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, TraceAnalystByteBudgets, TraceAnalystFilters, TraceAnalystHookOptions, TraceAnalystSpan, TraceAnalystSpanKind, TraceAnalystSpanStatus, TraceAnalystTraceSummary, TraceFileMissingError, TraceInsightContext, TraceInsightFinding, TraceInsightPanelRole, TraceInsightPromptInput, TraceInsightQualityGate, TraceInsightQuestion, TraceInsightReadiness, TraceInsightSuite, TraceInsightTask, TraceNotFoundError, ViewSpansResult, ViewTraceOversized, ViewTraceResult, analyzeTraces, buildTraceAnalystTools, buildTraceInsightContext, buildTraceInsightPrompt, createOtelExporter, createOtelTracingStore, createReplayFetch, defaultTraceInsightPanel, describeTraceInsightScope, domainEvidencePattern, exportRunAsOtlp, inferDomainKeywords, iterateRawCalls, otelRunCompleteHook, planTraceInsightQuestions, redactString, redactValue, scoreTraceInsightReadiness, tokenizeDomainWords, traceAnalystFunctionGroup, traceAnalystOnRunComplete } from './traces.js';
-import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-D2ykiLSe.js';
-export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-D2ykiLSe.js';
+import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-Di84bXD7.js';
+export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-Di84bXD7.js';
 import { TCloud } from '@tangle-network/tcloud';
 import { z } from 'zod';
 import { C as ControlEvalResult } from './control-runtime-BZ_lVLYW.js';
 export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-BZ_lVLYW.js';
 import { A as AgentEvalError } from './errors-mje_cKOs.js';
 export { a as AgentEvalErrorCode, C as CaptureIntegrityError, b as ConfigError, J as JudgeError, N as NotFoundError, R as ReplayError, V as ValidationError, c as VerificationError } from './errors-mje_cKOs.js';
-import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-iATEAHmc.js';
-export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-iATEAHmc.js';
+import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-Dvy-bt7x.js';
+export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-Dvy-bt7x.js';
 import { A as ActionableSideInfo, h as MultiShotRunner, j as MultiShotScorer, c as MultiShotMutateAdapter, a4 as HeldOutGateConfig, E as EvolvableVariant, m as MultiShotTrialResult, e as MultiShotOptimizationResult, a3 as GateDecision, a5 as Objective, a6 as ParetoResult, V as VariantAggregate, t as TrialResult, o as MutateAdapter, T as TrialCache } from './summary-report-DuZXOk7K.js';
 export { a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, a7 as Direction, C as GainDistributionBin, F as GainDistributionFigureSpec, H as GainDistributionOptions, a8 as GateEvidence, G as GenerationReport, a9 as HeldOutGate, aa as HeldOutGateRejectionCode, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, d as MultiShotOptimizationConfig, f as MultiShotRun, g as MultiShotRunInput, i as MultiShotScore, k as MultiShotSplit, l as MultiShotTrace, n as MultiShotVariant, J as ParetoFigureSpec, K as ParetoPoint, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, L as RESEARCH_REPORT_HARD_PAIR_FLOOR, R as ReflectionContext, r as ReflectionProposal, N as ResearchReport, O as ResearchReportCandidate, Q as ResearchReportDecision, U as ResearchReportMethodology, W as ResearchReportOptions, X as ResearchReportRecommendation, S as ScenarioAggregate, s as ScoreAdapter, Y as SummaryTable, Z as SummaryTableOptions, _ as SummaryTableRow, u as TrialTrace, v as buildReflectionPrompt, ab as crowdingDistance, w as defaultMultiShotObjectives, ac as dominates, $ as gainHistogram, a0 as paretoChart, ad as paretoFrontier, ae as paretoFrontierWithCrowding, x as parseReflectionResponse, a1 as researchReport, y as runMultiShotOptimization, z as runPromptEvolution, af as scalarScore, a2 as summaryTable, B as trialTraceFromMultiShotTrial } from './summary-report-DuZXOk7K.js';
 export { DataAcquisitionPlan, KnowledgeAcquisitionMode, KnowledgeBundle, KnowledgeFallbackPolicy, KnowledgeFreshness, KnowledgeImportance, KnowledgeReadinessReport, KnowledgeRecommendedAction, KnowledgeRequirement, KnowledgeRequirementCategory, KnowledgeResponsibleSurface, KnowledgeSensitivity, ScoreKnowledgeReadinessOptions, UserQuestion, acquisitionPlansForKnowledgeGaps, blockingKnowledgeEval, knowledgeReadinessTracePayload, scoreKnowledgeReadiness, userQuestionsForKnowledgeGaps } from './knowledge/index.js';
@@ -35,11 +35,12 @@ import { a as BaselineReport } from './baseline-4R5deP0N.js';
 export { B as BaselineOptions, M as MetricSamples, b as MetricVerdict, T as ToolStats, d as ToolUseMetrics, e as ToolUseOptions, f as compareToBaseline, c as computeToolUseMetrics, i as iqr, w as welchsTTest } from './baseline-4R5deP0N.js';
 import { T as Trajectory, a as TrajectoryStep } from './trajectory-CnoBo-JY.js';
 export { b as buildTrajectory } from './trajectory-CnoBo-JY.js';
-import { a as DatasetScenario, c as Dataset } from './dataset-ueRVTUoY.js';
-export { d as DatasetDifficulty, b as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-ueRVTUoY.js';
+import { a as DatasetScenario, b as Dataset } from './dataset-BlwAtYYf.js';
+export { d as DatasetDifficulty, c as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-BlwAtYYf.js';
 export { b as CalibrationResult, c as CandidateScore, a as ContinuousAgreement, C as ContinuousAgreementOptions, d as ContinuousCalibrationResult, G as GoldenItem, P as PositionalBiasResult, S as SelfPreferenceResult, V as VerbosityBiasResult, e as calibrateJudge, f as calibrateJudgeContinuous, g as continuousAgreement, p as positionalBias, s as selfPreference, v as verbosityBias } from './judge-calibration-DilmB3Ml.js';
-export { D as DEFAULT_RED_TEAM_CORPUS, E as EuRiskClass, G as GovernanceContext, a as GovernanceFinding, b as GovernanceReport, R as RedTeamCase, c as RedTeamCategory, d as RedTeamFinding, e as RedTeamPayload, f as RedTeamReport, U as UseCaseSignals, g as classifyEuAiRisk, h as euAiActReport, n as nistAiRmfReport, r as redTeamDataset, i as redTeamReport, j as renderMarkdown, s as scoreRedTeamOutput, k as soc2Report, l as summarize, t as toolNamesForRun } from './index-CN2agEaO.js';
+export { D as DEFAULT_RED_TEAM_CORPUS, R as RedTeamCase, a as RedTeamCategory, b as RedTeamFinding, c as RedTeamPayload, d as RedTeamReport, r as redTeamDataset, e as redTeamReport, s as scoreRedTeamOutput, t as toolNamesForRun } from './red-team-30II1T4o.js';
 import { a as PrmGrader } from './rubric-D5tjHNJQ.js';
+export { EuRiskClass, GovernanceContext, GovernanceFinding, GovernanceReport, UseCaseSignals, classifyEuAiRisk, euAiActReport, nistAiRmfReport, renderMarkdown, soc2Report, summarize } from './governance/index.js';
 export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-0pu_fBwZ.js';
 export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
 import './outcome-store-D6KWmYvj.js';
@@ -1159,7 +1160,7 @@ interface AnalystHooks {
         analyst: Analyst;
         error: Error;
         runId: string;
-    }): AnalystFinding[] | void | Promise<AnalystFinding[] | void>;
+    }): AnalystFinding[] | undefined | Promise<AnalystFinding[] | undefined>;
     /** Once after registry.run() completes. Use for final aggregation, persistence. */
     onComplete?(args: {
         result: AnalystRunResult;
@@ -6311,6 +6312,31 @@ declare function withOtelPipeline(opts?: OtelPipelineOptions): OtelPipelineHandl
  */
 declare function isOtelConfigured(): boolean;
+/**
+ * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
+ * analyst's internal LLM calls (actor + responder turns) appear in the
+ * trace tree. Also wraps each actor turn callback with a span.
+ *
+ * Since the analyst uses @ax-llm/ax internally (an agent framework with
+ * its own turn loop), we cannot wrap individual `tc.chat()` calls without
+ * forking ax. Instead, we wrap at the boundary:
+ *   1. A parent span for the entire analyst run.
+ *   2. Per-turn child spans from the `onTurn` callback (captures code,
+ *      output size, error status).
+ *   3. Summary attributes on the parent (total turns, usage, findings).
+ */
+interface TracedAnalystOptions {
+    /** TraceEmitter for span emission. */
+    emitter: TraceEmitter;
+    /** Parent span id. If omitted, uses emitter stack. */
+    parentSpanId?: string;
+}
+/**
+ * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
+ */
+declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
 /**
  * Traced judge wrappers — instruments every LLM call inside the judge
  * ensemble with child spans so OTEL sinks see per-judge latency, model,
@@ -6337,31 +6363,6 @@ declare function traceJudge(judge: JudgeFn, judgeName: string, opts: TracedJudge
  */
 declare function traceJudgeEnsemble(judges: JudgeFn[], judgeNames: string[], opts: TracedJudgeOptions): JudgeFn;
-/**
- * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
- * analyst's internal LLM calls (actor + responder turns) appear in the
- * trace tree. Also wraps each actor turn callback with a span.
- *
- * Since the analyst uses @ax-llm/ax internally (an agent framework with
- * its own turn loop), we cannot wrap individual `tc.chat()` calls without
- * forking ax. Instead, we wrap at the boundary:
- *   1. A parent span for the entire analyst run.
- *   2. Per-turn child spans from the `onTurn` callback (captures code,
- *      output size, error status).
- *   3. Summary attributes on the parent (total turns, usage, findings).
- */
-interface TracedAnalystOptions {
-    /** TraceEmitter for span emission. */
-    emitter: TraceEmitter;
-    /** Parent span id. If omitted, uses emitter stack. */
-    parentSpanId?: string;
-}
-/**
- * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
- */
-declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
 /**
  * Traced mutator wrapper — instruments reflective-mutation LLM calls.
  *