npm - @tangle-network/agent-eval - Versions diffs - 0.37.0 → 0.40.1 - Mend

@tangle-network/agent-eval 0.37.0 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/dist/campaign/index.d.ts +695 -0
package/dist/campaign/index.js +741 -0
package/dist/campaign/index.js.map +1 -0
package/dist/chunk-5U2DOJU4.js +565 -0
package/dist/chunk-5U2DOJU4.js.map +1 -0
package/dist/{chunk-KE7TDJUO.js → chunk-AU2JLNSZ.js} +2 -2
package/dist/{chunk-TSPOEDM3.js → chunk-BWZEGTES.js} +2 -5
package/dist/chunk-BWZEGTES.js.map +1 -0
package/dist/{chunk-3HYQXPC2.js → chunk-DMW5VENN.js} +3 -3
package/dist/{chunk-TQL7BAOY.js → chunk-EGIPWXHL.js} +2 -2
package/dist/chunk-GGE4NNQT.js +65 -0
package/dist/chunk-GGE4NNQT.js.map +1 -0
package/dist/{chunk-7PR3WPWE.js → chunk-L7XMNXLO.js} +2 -2
package/dist/{chunk-RL6TERL2.js → chunk-LCIDRYGP.js} +3 -3
package/dist/{chunk-L5UNCDAJ.js → chunk-MAOZCN36.js} +2 -64
package/dist/chunk-MAOZCN36.js.map +1 -0
package/dist/{chunk-LGAPK7NA.js → chunk-NKLGKF2Q.js} +2 -2
package/dist/chunk-QWV226SL.js +276 -0
package/dist/chunk-QWV226SL.js.map +1 -0
package/dist/chunk-TMXPFWC7.js +305 -0
package/dist/chunk-TMXPFWC7.js.map +1 -0
package/dist/{chunk-KHZRNY3F.js → chunk-WP7SY7AI.js} +5 -4
package/dist/chunk-WP7SY7AI.js.map +1 -0
package/dist/chunk-YV7J7X5N.js +313 -0
package/dist/chunk-YV7J7X5N.js.map +1 -0
package/dist/{control-DVrmvM_k.d.ts → control-CmLJk3IG.d.ts} +1 -1
package/dist/control.d.ts +3 -3
package/dist/control.js +2 -2
package/dist/{dataset-ueRVTUoY.d.ts → dataset-BlwAtYYf.d.ts} +1 -1
package/dist/{feedback-trajectory-iATEAHmc.d.ts → feedback-trajectory-Dvy-bt7x.d.ts} +1 -1
package/dist/governance/index.d.ts +133 -5
package/dist/index.d.ts +35 -34
package/dist/index.js +97 -630
package/dist/index.js.map +1 -1
package/dist/matrix/index.d.ts +2 -109
package/dist/matrix/index.js +5 -270
package/dist/matrix/index.js.map +1 -1
package/dist/multishot/index.d.ts +276 -0
package/dist/multishot/index.js +516 -0
package/dist/multishot/index.js.map +1 -0
package/dist/openapi.json +1 -1
package/dist/optimization.d.ts +2 -2
package/dist/optimization.js +5 -5
package/dist/pipelines/index.js +2 -2
package/dist/red-team-30II1T4o.d.ts +63 -0
package/dist/{release-report-D2ykiLSe.d.ts → release-report-Di84bXD7.d.ts} +5 -2
package/dist/reporting.d.ts +2 -2
package/dist/reporting.js +3 -3
package/dist/rl.js +15 -315
package/dist/rl.js.map +1 -1
package/dist/run-campaign-JYJXYHHL.js +10 -0
package/dist/run-campaign-JYJXYHHL.js.map +1 -0
package/dist/traces.js +7 -5
package/dist/types-DHqkLwEU.d.ts +110 -0
package/dist/wire/index.d.ts +2 -2
package/docs/design/loop-taxonomy.md +233 -0
package/package.json +38 -24
package/dist/chunk-KHZRNY3F.js.map +0 -1
package/dist/chunk-L5UNCDAJ.js.map +0 -1
package/dist/chunk-TSPOEDM3.js.map +0 -1
package/dist/index-CN2agEaO.d.ts +0 -191
/package/dist/{chunk-KE7TDJUO.js.map → chunk-AU2JLNSZ.js.map} +0 -0
/package/dist/{chunk-3HYQXPC2.js.map → chunk-DMW5VENN.js.map} +0 -0
/package/dist/{chunk-TQL7BAOY.js.map → chunk-EGIPWXHL.js.map} +0 -0
/package/dist/{chunk-7PR3WPWE.js.map → chunk-L7XMNXLO.js.map} +0 -0
/package/dist/{chunk-RL6TERL2.js.map → chunk-LCIDRYGP.js.map} +0 -0
/package/dist/{chunk-LGAPK7NA.js.map → chunk-NKLGKF2Q.js.map} +0 -0

package/dist/{dataset-ueRVTUoY.d.ts → dataset-BlwAtYYf.d.ts} RENAMED Viewed

@@ -112,4 +112,4 @@ declare class Dataset {
 }
 declare function hashScenarios(scenarios: DatasetScenario[]): Promise<string>;
-export { type DatasetSplit as D, HoldoutLockedError as H, type SliceOptions as S, type DatasetScenario as a, type DatasetManifest as b, Dataset as c, type DatasetDifficulty as d, type DatasetProvenance as e, hashScenarios as h };
+export { type DatasetSplit as D, HoldoutLockedError as H, type SliceOptions as S, type DatasetScenario as a, Dataset as b, type DatasetManifest as c, type DatasetDifficulty as d, type DatasetProvenance as e, hashScenarios as h };

package/dist/{feedback-trajectory-iATEAHmc.d.ts → feedback-trajectory-Dvy-bt7x.d.ts} RENAMED Viewed

@@ -1,5 +1,5 @@
 import { C as ControlEvalResult, a as ControlRunResult, b as ControlStep } from './control-runtime-BZ_lVLYW.js';
-import { D as DatasetSplit, a as DatasetScenario } from './dataset-ueRVTUoY.js';
+import { D as DatasetSplit, a as DatasetScenario } from './dataset-BlwAtYYf.js';
 type FeedbackArtifactType = 'text' | 'code' | 'plan' | 'research' | 'action' | 'ui' | 'decision' | 'data' | 'other';
 type FeedbackLabelSource = 'user' | 'judge' | 'environment' | 'metric' | 'policy' | 'system';

package/dist/governance/index.d.ts CHANGED Viewed

@@ -1,6 +1,134 @@
-export { E as EuRiskClass, G as GovernanceContext, a as GovernanceFinding, b as GovernanceReport, U as UseCaseSignals, g as classifyEuAiRisk, h as euAiActReport, n as nistAiRmfReport, j as renderMarkdown, k as soc2Report, l as summarize } from '../index-CN2agEaO.js';
-import '../dataset-ueRVTUoY.js';
+import { c as DatasetManifest } from '../dataset-BlwAtYYf.js';
+import { b as CalibrationResult } from '../judge-calibration-DilmB3Ml.js';
+import { O as OutcomeStore } from '../outcome-store-D6KWmYvj.js';
+import { d as RedTeamReport } from '../red-team-30II1T4o.js';
+import { T as TraceStore } from '../store-Db2Bv8Cf.js';
 import '../errors-mje_cKOs.js';
-import '../judge-calibration-DilmB3Ml.js';
-import '../outcome-store-D6KWmYvj.js';
-import '../store-Db2Bv8Cf.js';
+/**
+ * Governance reporting — shared types.
+ *
+ * The framework collects a `GovernanceContext` (traces + outcomes +
+ * dataset manifests + red-team results + judge calibration) and each
+ * specific template (NIST AI RMF, SOC2, EU AI Act) renders a
+ * structured report from it.
+ *
+ * Reports are machine-readable JSON first; human-readable Markdown is a
+ * pure transform on top. External auditors consume the Markdown; CI
+ * consumes the JSON.
+ */
+interface GovernanceContext {
+    /** Legal / org identity for the report. */
+    organization: string;
+    /** System / agent identifier. */
+    systemName: string;
+    /** ISO8601 period the report covers. */
+    periodStart: string;
+    periodEnd: string;
+    /** Versioned dataset manifests used during the period. */
+    datasets: DatasetManifest[];
+    traceStore: TraceStore;
+    outcomeStore?: OutcomeStore;
+    /** Cached red-team results for the period, if available. */
+    redTeam?: RedTeamReport;
+    /** Judge-vs-human calibration results, if measured. */
+    judgeCalibration?: CalibrationResult[];
+    /** Responsible owner for the system — role + name + email. */
+    owner: {
+        role: string;
+        name: string;
+        email: string;
+    };
+}
+interface GovernanceFinding {
+    id: string;
+    severity: 'info' | 'low' | 'medium' | 'high' | 'critical';
+    /** Control reference the finding maps to (e.g. "NIST-AI-RMF:MEASURE-2.1"). */
+    control: string;
+    summary: string;
+    evidence?: string;
+    remediation?: string;
+}
+interface GovernanceReport {
+    framework: 'NIST-AI-RMF' | 'SOC2' | 'EU-AI-ACT';
+    version: string;
+    context: Pick<GovernanceContext, 'organization' | 'systemName' | 'periodStart' | 'periodEnd' | 'owner'>;
+    summary: {
+        findings: number;
+        byeverity: Record<GovernanceFinding['severity'], number>;
+        overall: 'compliant' | 'compliant-with-findings' | 'non-compliant';
+    };
+    findings: GovernanceFinding[];
+    /** Framework-specific structured payload (mapped controls, risk class, etc.). */
+    payload: Record<string, unknown>;
+    generatedAt: string;
+}
+declare function renderMarkdown(report: GovernanceReport): string;
+declare function summarize(findings: GovernanceFinding[]): GovernanceReport['summary'];
+/**
+ * EU AI Act — risk-class classification + compliance checklist.
+ *
+ * Classification is declarative: caller supplies the domain/use-case
+ * signals (biometric? critical infrastructure? education? employment?
+ * access to services?) and we map to the Act's risk tiers:
+ *   - "unacceptable" (prohibited)
+ *   - "high"        (Annex III — strict obligations)
+ *   - "limited"     (transparency obligations)
+ *   - "minimal"     (voluntary codes of conduct)
+ *
+ * Then the compliance checklist enumerates Article 9 (risk mgmt),
+ * 10 (data + data governance), 11 (technical documentation), 13
+ * (transparency), 14 (human oversight), 15 (accuracy + robustness)
+ * requirements and flags gaps.
+ */
+type EuRiskClass = 'unacceptable' | 'high' | 'limited' | 'minimal';
+interface UseCaseSignals {
+    /** Used for biometric identification in public spaces? (Art. 5 — unacceptable). */
+    biometricPublic?: boolean;
+    /** Social scoring by public authorities? (Art. 5). */
+    socialScoring?: boolean;
+    /** Subliminal manipulation? (Art. 5). */
+    subliminal?: boolean;
+    /** Annex III sector: critical infrastructure / education / employment /
+     *  access to essential services / law enforcement / migration /
+     *  administration of justice / democratic processes? */
+    annexIII?: boolean;
+    /** Interacts directly with natural persons (chatbot, agent)? — limited risk. */
+    chatbot?: boolean;
+    /** Generates synthetic media (image/audio/video/text deepfakes)? — limited risk. */
+    generatesSyntheticMedia?: boolean;
+}
+declare function classifyEuAiRisk(signals: UseCaseSignals): EuRiskClass;
+declare function euAiActReport(ctx: GovernanceContext, signals: UseCaseSignals): Promise<GovernanceReport>;
+/**
+ * NIST AI RMF 1.0 — Govern / Map / Measure / Manage mapping.
+ *
+ * Each subcategory derives its status from concrete framework state:
+ *   MEASURE 2.x: do we have a calibration regime? contamination controls?
+ *   MEASURE 2.7: are red-team results available?
+ *   MANAGE 1.x: are outcome metrics captured? correlation measured?
+ *   GOVERN 1.x: dataset + prompt provenance recorded?
+ *
+ * We ship the mapping and the derivation rules; consumers supply the
+ * GovernanceContext.
+ */
+declare function nistAiRmfReport(ctx: GovernanceContext): Promise<GovernanceReport>;
+/**
+ * SOC 2 — Common Criteria 7 (system operations + change management)
+ * audit trail derived from the trace corpus.
+ *
+ * This is NOT a formal SOC2 report — that requires an external
+ * auditor. What we ship is the machine-readable *evidence* package
+ * that an auditor consumes: run counts, deploy events, access log
+ * summary, anomaly tracking, response-time SLOs.
+ */
+declare function soc2Report(ctx: GovernanceContext): Promise<GovernanceReport>;
+export { type EuRiskClass, type GovernanceContext, type GovernanceFinding, type GovernanceReport, type UseCaseSignals, classifyEuAiRisk, euAiActReport, nistAiRmfReport, renderMarkdown, soc2Report, summarize };

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DVrmvM_k.js';
+export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-CmLJk3IG.js';
 import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
 export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-BGY6bHRh.js';
 import { AxAIService, AxFunction } from '@ax-llm/ax';
@@ -10,16 +10,16 @@ import { L as LlmClientOptions, m as LlmCallRequest, n as LlmCallResult } from '
 export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, o as LlmCallError, p as LlmClient, q as LlmMessage, s as LlmRouteAssertionError, t as LlmRouteRequirements, u as LlmUsage, N as NoopResearcher, R as Researcher, S as SteeringChange, v as assertLlmRoute, w as backoffMs, x as callLlm, y as callLlmJson, z as isTransientLlmError, A as probeLlm, r as runEvalCampaign, B as stripFencedJson } from './researcher-DeZ_EArp.js';
 import { TraceAnalysisStore, AnalyzeTracesOptions, OtelExporter, OtelExportConfig, AnalyzeTracesInput, AnalyzeTracesResult } from './traces.js';
 export { AnalyzeTracesTurnSnapshot, DEFAULT_REDACTION_RULES, DEFAULT_TRACE_ANALYST_BUDGETS, DatasetOverview, ExportableSpan, OTEL_AGENT_EVAL_SCOPE, OtlpExport, OtlpFileTraceStore, OtlpFileTraceStoreOptions, OtlpResourceSpans, OtlpSpan, QueryTracesPage, REDACTION_VERSION, RedactionReport, RedactionRule, ReplayCache, ReplayCacheEntry, ReplayCacheMissError, ReplayCacheStats, ReplayFetchOptions, SearchSpanResult, SearchTraceResult, SpanMatchRecord, SpanNotFoundError, TRACE_ANALYST_ACTOR_DESCRIPTION, TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION, TRACE_ANALYST_SUBAGENT_DESCRIPTION, TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, TraceAnalystByteBudgets, TraceAnalystFilters, TraceAnalystHookOptions, TraceAnalystSpan, TraceAnalystSpanKind, TraceAnalystSpanStatus, TraceAnalystTraceSummary, TraceFileMissingError, TraceInsightContext, TraceInsightFinding, TraceInsightPanelRole, TraceInsightPromptInput, TraceInsightQualityGate, TraceInsightQuestion, TraceInsightReadiness, TraceInsightSuite, TraceInsightTask, TraceNotFoundError, ViewSpansResult, ViewTraceOversized, ViewTraceResult, analyzeTraces, buildTraceAnalystTools, buildTraceInsightContext, buildTraceInsightPrompt, createOtelExporter, createOtelTracingStore, createReplayFetch, defaultTraceInsightPanel, describeTraceInsightScope, domainEvidencePattern, exportRunAsOtlp, inferDomainKeywords, iterateRawCalls, otelRunCompleteHook, planTraceInsightQuestions, redactString, redactValue, scoreTraceInsightReadiness, tokenizeDomainWords, traceAnalystFunctionGroup, traceAnalystOnRunComplete } from './traces.js';
-import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-D2ykiLSe.js';
-export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-D2ykiLSe.js';
+import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-Di84bXD7.js';
+export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-Di84bXD7.js';
 import { TCloud } from '@tangle-network/tcloud';
 import { z } from 'zod';
 import { C as ControlEvalResult } from './control-runtime-BZ_lVLYW.js';
 export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-BZ_lVLYW.js';
 import { A as AgentEvalError } from './errors-mje_cKOs.js';
 export { a as AgentEvalErrorCode, C as CaptureIntegrityError, b as ConfigError, J as JudgeError, N as NotFoundError, R as ReplayError, V as ValidationError, c as VerificationError } from './errors-mje_cKOs.js';
-import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-iATEAHmc.js';
-export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-iATEAHmc.js';
+import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-Dvy-bt7x.js';
+export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-Dvy-bt7x.js';
 import { A as ActionableSideInfo, h as MultiShotRunner, j as MultiShotScorer, c as MultiShotMutateAdapter, a4 as HeldOutGateConfig, E as EvolvableVariant, m as MultiShotTrialResult, e as MultiShotOptimizationResult, a3 as GateDecision, a5 as Objective, a6 as ParetoResult, V as VariantAggregate, t as TrialResult, o as MutateAdapter, T as TrialCache } from './summary-report-DuZXOk7K.js';
 export { a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, a7 as Direction, C as GainDistributionBin, F as GainDistributionFigureSpec, H as GainDistributionOptions, a8 as GateEvidence, G as GenerationReport, a9 as HeldOutGate, aa as HeldOutGateRejectionCode, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, d as MultiShotOptimizationConfig, f as MultiShotRun, g as MultiShotRunInput, i as MultiShotScore, k as MultiShotSplit, l as MultiShotTrace, n as MultiShotVariant, J as ParetoFigureSpec, K as ParetoPoint, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, L as RESEARCH_REPORT_HARD_PAIR_FLOOR, R as ReflectionContext, r as ReflectionProposal, N as ResearchReport, O as ResearchReportCandidate, Q as ResearchReportDecision, U as ResearchReportMethodology, W as ResearchReportOptions, X as ResearchReportRecommendation, S as ScenarioAggregate, s as ScoreAdapter, Y as SummaryTable, Z as SummaryTableOptions, _ as SummaryTableRow, u as TrialTrace, v as buildReflectionPrompt, ab as crowdingDistance, w as defaultMultiShotObjectives, ac as dominates, $ as gainHistogram, a0 as paretoChart, ad as paretoFrontier, ae as paretoFrontierWithCrowding, x as parseReflectionResponse, a1 as researchReport, y as runMultiShotOptimization, z as runPromptEvolution, af as scalarScore, a2 as summaryTable, B as trialTraceFromMultiShotTrial } from './summary-report-DuZXOk7K.js';
 export { DataAcquisitionPlan, KnowledgeAcquisitionMode, KnowledgeBundle, KnowledgeFallbackPolicy, KnowledgeFreshness, KnowledgeImportance, KnowledgeReadinessReport, KnowledgeRecommendedAction, KnowledgeRequirement, KnowledgeRequirementCategory, KnowledgeResponsibleSurface, KnowledgeSensitivity, ScoreKnowledgeReadinessOptions, UserQuestion, acquisitionPlansForKnowledgeGaps, blockingKnowledgeEval, knowledgeReadinessTracePayload, scoreKnowledgeReadiness, userQuestionsForKnowledgeGaps } from './knowledge/index.js';
@@ -35,11 +35,12 @@ import { a as BaselineReport } from './baseline-4R5deP0N.js';
 export { B as BaselineOptions, M as MetricSamples, b as MetricVerdict, T as ToolStats, d as ToolUseMetrics, e as ToolUseOptions, f as compareToBaseline, c as computeToolUseMetrics, i as iqr, w as welchsTTest } from './baseline-4R5deP0N.js';
 import { T as Trajectory, a as TrajectoryStep } from './trajectory-CnoBo-JY.js';
 export { b as buildTrajectory } from './trajectory-CnoBo-JY.js';
-import { a as DatasetScenario, c as Dataset } from './dataset-ueRVTUoY.js';
-export { d as DatasetDifficulty, b as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-ueRVTUoY.js';
+import { a as DatasetScenario, b as Dataset } from './dataset-BlwAtYYf.js';
+export { d as DatasetDifficulty, c as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-BlwAtYYf.js';
 export { b as CalibrationResult, c as CandidateScore, a as ContinuousAgreement, C as ContinuousAgreementOptions, d as ContinuousCalibrationResult, G as GoldenItem, P as PositionalBiasResult, S as SelfPreferenceResult, V as VerbosityBiasResult, e as calibrateJudge, f as calibrateJudgeContinuous, g as continuousAgreement, p as positionalBias, s as selfPreference, v as verbosityBias } from './judge-calibration-DilmB3Ml.js';
-export { D as DEFAULT_RED_TEAM_CORPUS, E as EuRiskClass, G as GovernanceContext, a as GovernanceFinding, b as GovernanceReport, R as RedTeamCase, c as RedTeamCategory, d as RedTeamFinding, e as RedTeamPayload, f as RedTeamReport, U as UseCaseSignals, g as classifyEuAiRisk, h as euAiActReport, n as nistAiRmfReport, r as redTeamDataset, i as redTeamReport, j as renderMarkdown, s as scoreRedTeamOutput, k as soc2Report, l as summarize, t as toolNamesForRun } from './index-CN2agEaO.js';
+export { D as DEFAULT_RED_TEAM_CORPUS, R as RedTeamCase, a as RedTeamCategory, b as RedTeamFinding, c as RedTeamPayload, d as RedTeamReport, r as redTeamDataset, e as redTeamReport, s as scoreRedTeamOutput, t as toolNamesForRun } from './red-team-30II1T4o.js';
 import { a as PrmGrader } from './rubric-D5tjHNJQ.js';
+export { EuRiskClass, GovernanceContext, GovernanceFinding, GovernanceReport, UseCaseSignals, classifyEuAiRisk, euAiActReport, nistAiRmfReport, renderMarkdown, soc2Report, summarize } from './governance/index.js';
 export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-0pu_fBwZ.js';
 export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
 import './outcome-store-D6KWmYvj.js';
@@ -1159,7 +1160,7 @@ interface AnalystHooks {
         analyst: Analyst;
         error: Error;
         runId: string;
-    }): AnalystFinding[] | void | Promise<AnalystFinding[] | void>;
+    }): AnalystFinding[] | undefined | Promise<AnalystFinding[] | undefined>;
     /** Once after registry.run() completes. Use for final aggregation, persistence. */
     onComplete?(args: {
         result: AnalystRunResult;
@@ -6311,6 +6312,31 @@ declare function withOtelPipeline(opts?: OtelPipelineOptions): OtelPipelineHandl
  */
 declare function isOtelConfigured(): boolean;
+/**
+ * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
+ * analyst's internal LLM calls (actor + responder turns) appear in the
+ * trace tree. Also wraps each actor turn callback with a span.
+ *
+ * Since the analyst uses @ax-llm/ax internally (an agent framework with
+ * its own turn loop), we cannot wrap individual `tc.chat()` calls without
+ * forking ax. Instead, we wrap at the boundary:
+ *   1. A parent span for the entire analyst run.
+ *   2. Per-turn child spans from the `onTurn` callback (captures code,
+ *      output size, error status).
+ *   3. Summary attributes on the parent (total turns, usage, findings).
+ */
+interface TracedAnalystOptions {
+    /** TraceEmitter for span emission. */
+    emitter: TraceEmitter;
+    /** Parent span id. If omitted, uses emitter stack. */
+    parentSpanId?: string;
+}
+/**
+ * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
+ */
+declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
 /**
  * Traced judge wrappers — instruments every LLM call inside the judge
  * ensemble with child spans so OTEL sinks see per-judge latency, model,
@@ -6337,31 +6363,6 @@ declare function traceJudge(judge: JudgeFn, judgeName: string, opts: TracedJudge
  */
 declare function traceJudgeEnsemble(judges: JudgeFn[], judgeNames: string[], opts: TracedJudgeOptions): JudgeFn;
-/**
- * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
- * analyst's internal LLM calls (actor + responder turns) appear in the
- * trace tree. Also wraps each actor turn callback with a span.
- *
- * Since the analyst uses @ax-llm/ax internally (an agent framework with
- * its own turn loop), we cannot wrap individual `tc.chat()` calls without
- * forking ax. Instead, we wrap at the boundary:
- *   1. A parent span for the entire analyst run.
- *   2. Per-turn child spans from the `onTurn` callback (captures code,
- *      output size, error status).
- *   3. Summary attributes on the parent (total turns, usage, findings).
- */
-interface TracedAnalystOptions {
-    /** TraceEmitter for span emission. */
-    emitter: TraceEmitter;
-    /** Parent span id. If omitted, uses emitter stack. */
-    parentSpanId?: string;
-}
-/**
- * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
- */
-declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
 /**
  * Traced mutator wrapper — instruments reflective-mutation LLM calls.
  *