@tangle-network/agent-eval 0.38.0 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/campaign/index.d.ts +695 -0
  2. package/dist/campaign/index.js +741 -0
  3. package/dist/campaign/index.js.map +1 -0
  4. package/dist/chunk-5U2DOJU4.js +565 -0
  5. package/dist/chunk-5U2DOJU4.js.map +1 -0
  6. package/dist/{chunk-KE7TDJUO.js → chunk-AU2JLNSZ.js} +2 -2
  7. package/dist/{chunk-TSPOEDM3.js → chunk-BWZEGTES.js} +2 -5
  8. package/dist/chunk-BWZEGTES.js.map +1 -0
  9. package/dist/{chunk-3HYQXPC2.js → chunk-DMW5VENN.js} +3 -3
  10. package/dist/{chunk-TQL7BAOY.js → chunk-EGIPWXHL.js} +2 -2
  11. package/dist/chunk-GGE4NNQT.js +65 -0
  12. package/dist/chunk-GGE4NNQT.js.map +1 -0
  13. package/dist/{chunk-7PR3WPWE.js → chunk-L7XMNXLO.js} +2 -2
  14. package/dist/{chunk-RL6TERL2.js → chunk-LCIDRYGP.js} +3 -3
  15. package/dist/{chunk-L5UNCDAJ.js → chunk-MAOZCN36.js} +2 -64
  16. package/dist/chunk-MAOZCN36.js.map +1 -0
  17. package/dist/{chunk-LGAPK7NA.js → chunk-NKLGKF2Q.js} +2 -2
  18. package/dist/chunk-TMXPFWC7.js +305 -0
  19. package/dist/chunk-TMXPFWC7.js.map +1 -0
  20. package/dist/{chunk-KHZRNY3F.js → chunk-WP7SY7AI.js} +5 -4
  21. package/dist/chunk-WP7SY7AI.js.map +1 -0
  22. package/dist/chunk-YV7J7X5N.js +313 -0
  23. package/dist/chunk-YV7J7X5N.js.map +1 -0
  24. package/dist/{control-DVrmvM_k.d.ts → control-CmLJk3IG.d.ts} +1 -1
  25. package/dist/control.d.ts +3 -3
  26. package/dist/control.js +2 -2
  27. package/dist/{dataset-ueRVTUoY.d.ts → dataset-BlwAtYYf.d.ts} +1 -1
  28. package/dist/{feedback-trajectory-iATEAHmc.d.ts → feedback-trajectory-Dvy-bt7x.d.ts} +1 -1
  29. package/dist/governance/index.d.ts +133 -5
  30. package/dist/index.d.ts +35 -34
  31. package/dist/index.js +97 -630
  32. package/dist/index.js.map +1 -1
  33. package/dist/multishot/index.d.ts +21 -21
  34. package/dist/multishot/index.js +64 -15
  35. package/dist/multishot/index.js.map +1 -1
  36. package/dist/openapi.json +1 -1
  37. package/dist/optimization.d.ts +2 -2
  38. package/dist/optimization.js +5 -5
  39. package/dist/pipelines/index.js +2 -2
  40. package/dist/red-team-30II1T4o.d.ts +63 -0
  41. package/dist/{release-report-D2ykiLSe.d.ts → release-report-Di84bXD7.d.ts} +5 -2
  42. package/dist/reporting.d.ts +2 -2
  43. package/dist/reporting.js +3 -3
  44. package/dist/rl.js +15 -315
  45. package/dist/rl.js.map +1 -1
  46. package/dist/run-campaign-JYJXYHHL.js +10 -0
  47. package/dist/run-campaign-JYJXYHHL.js.map +1 -0
  48. package/dist/traces.js +7 -5
  49. package/dist/wire/index.d.ts +2 -2
  50. package/docs/design/loop-taxonomy.md +233 -0
  51. package/package.json +33 -24
  52. package/dist/chunk-KHZRNY3F.js.map +0 -1
  53. package/dist/chunk-L5UNCDAJ.js.map +0 -1
  54. package/dist/chunk-TSPOEDM3.js.map +0 -1
  55. package/dist/index-CN2agEaO.d.ts +0 -191
  56. /package/dist/{chunk-KE7TDJUO.js.map → chunk-AU2JLNSZ.js.map} +0 -0
  57. /package/dist/{chunk-3HYQXPC2.js.map → chunk-DMW5VENN.js.map} +0 -0
  58. /package/dist/{chunk-TQL7BAOY.js.map → chunk-EGIPWXHL.js.map} +0 -0
  59. /package/dist/{chunk-7PR3WPWE.js.map → chunk-L7XMNXLO.js.map} +0 -0
  60. /package/dist/{chunk-RL6TERL2.js.map → chunk-LCIDRYGP.js.map} +0 -0
  61. /package/dist/{chunk-LGAPK7NA.js.map → chunk-NKLGKF2Q.js.map} +0 -0
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DVrmvM_k.js';
1
+ export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-CmLJk3IG.js';
2
2
  import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
3
3
  export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-BGY6bHRh.js';
4
4
  import { AxAIService, AxFunction } from '@ax-llm/ax';
@@ -10,16 +10,16 @@ import { L as LlmClientOptions, m as LlmCallRequest, n as LlmCallResult } from '
10
10
  export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, o as LlmCallError, p as LlmClient, q as LlmMessage, s as LlmRouteAssertionError, t as LlmRouteRequirements, u as LlmUsage, N as NoopResearcher, R as Researcher, S as SteeringChange, v as assertLlmRoute, w as backoffMs, x as callLlm, y as callLlmJson, z as isTransientLlmError, A as probeLlm, r as runEvalCampaign, B as stripFencedJson } from './researcher-DeZ_EArp.js';
11
11
  import { TraceAnalysisStore, AnalyzeTracesOptions, OtelExporter, OtelExportConfig, AnalyzeTracesInput, AnalyzeTracesResult } from './traces.js';
12
12
  export { AnalyzeTracesTurnSnapshot, DEFAULT_REDACTION_RULES, DEFAULT_TRACE_ANALYST_BUDGETS, DatasetOverview, ExportableSpan, OTEL_AGENT_EVAL_SCOPE, OtlpExport, OtlpFileTraceStore, OtlpFileTraceStoreOptions, OtlpResourceSpans, OtlpSpan, QueryTracesPage, REDACTION_VERSION, RedactionReport, RedactionRule, ReplayCache, ReplayCacheEntry, ReplayCacheMissError, ReplayCacheStats, ReplayFetchOptions, SearchSpanResult, SearchTraceResult, SpanMatchRecord, SpanNotFoundError, TRACE_ANALYST_ACTOR_DESCRIPTION, TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION, TRACE_ANALYST_SUBAGENT_DESCRIPTION, TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, TraceAnalystByteBudgets, TraceAnalystFilters, TraceAnalystHookOptions, TraceAnalystSpan, TraceAnalystSpanKind, TraceAnalystSpanStatus, TraceAnalystTraceSummary, TraceFileMissingError, TraceInsightContext, TraceInsightFinding, TraceInsightPanelRole, TraceInsightPromptInput, TraceInsightQualityGate, TraceInsightQuestion, TraceInsightReadiness, TraceInsightSuite, TraceInsightTask, TraceNotFoundError, ViewSpansResult, ViewTraceOversized, ViewTraceResult, analyzeTraces, buildTraceAnalystTools, buildTraceInsightContext, buildTraceInsightPrompt, createOtelExporter, createOtelTracingStore, createReplayFetch, defaultTraceInsightPanel, describeTraceInsightScope, domainEvidencePattern, exportRunAsOtlp, inferDomainKeywords, iterateRawCalls, otelRunCompleteHook, planTraceInsightQuestions, redactString, redactValue, scoreTraceInsightReadiness, tokenizeDomainWords, traceAnalystFunctionGroup, traceAnalystOnRunComplete } from './traces.js';
13
- import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-D2ykiLSe.js';
14
- export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-D2ykiLSe.js';
13
+ import { t as JudgeInput, u as JudgeFn, v as BenchmarkRunnerConfig, S as Scenario, x as BenchmarkReport, y as ProductClientConfig, C as CheckResult, T as TestResult, z as PersonaConfig, D as DriverResult, A as DriverState, E as CollectedArtifacts, F as ScenarioResult, i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard, G as TurnMetrics, H as ScenarioFile, I as CompletionCriterion } from './release-report-Di84bXD7.js';
14
+ export { K as ArtifactCheck, L as ArtifactResult, B as BootstrapOptions, a as BootstrapResult, M as CorpusAgreementOptions, N as CorpusAgreementPerDimension, O as CorpusAgreementReport, Q as CorpusScoreRecord, U as EvalResult, W as FeedbackPattern, X as JudgeConfig, J as JudgeReplayGateArgs, Y as JudgeRubric, Z as JudgeScore, P as PairedBootstrapOptions, b as PairedBootstrapResult, _ as PersonaRigor, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, $ as RouteMap, a0 as RubricDimension, a1 as Turn, a2 as TurnResult, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, a3 as bonferroni, n as bootstrapCi, a4 as cohensD, a5 as confidenceInterval, a6 as corpusInterRaterAgreement, a7 as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, a8 as interRaterReliability, p as judgeReplayGate, a9 as mannWhitneyU, aa as normalizeScores, q as pairedBootstrap, ab as pairedMde, ac as pairedTTest, ad as partialCredit, r as releaseTraceEvidenceFromMultiShotTrials, s as renderReleaseReport, ae as requiredSampleSize, af as weightedMean, w as wilcoxonSignedRank } from './release-report-Di84bXD7.js';
15
15
  import { TCloud } from '@tangle-network/tcloud';
16
16
  import { z } from 'zod';
17
17
  import { C as ControlEvalResult } from './control-runtime-BZ_lVLYW.js';
18
18
  export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-BZ_lVLYW.js';
19
19
  import { A as AgentEvalError } from './errors-mje_cKOs.js';
20
20
  export { a as AgentEvalErrorCode, C as CaptureIntegrityError, b as ConfigError, J as JudgeError, N as NotFoundError, R as ReplayError, V as ValidationError, c as VerificationError } from './errors-mje_cKOs.js';
21
- import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-iATEAHmc.js';
22
- export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-iATEAHmc.js';
21
+ import { b as FeedbackLabel, n as FeedbackTrajectoryStore, l as FeedbackTrajectory } from './feedback-trajectory-Dvy-bt7x.js';
22
+ export { F as FeedbackArtifactType, a as FeedbackAttempt, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, m as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-Dvy-bt7x.js';
23
23
  import { A as ActionableSideInfo, h as MultiShotRunner, j as MultiShotScorer, c as MultiShotMutateAdapter, a4 as HeldOutGateConfig, E as EvolvableVariant, m as MultiShotTrialResult, e as MultiShotOptimizationResult, a3 as GateDecision, a5 as Objective, a6 as ParetoResult, V as VariantAggregate, t as TrialResult, o as MutateAdapter, T as TrialCache } from './summary-report-DuZXOk7K.js';
24
24
  export { a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, a7 as Direction, C as GainDistributionBin, F as GainDistributionFigureSpec, H as GainDistributionOptions, a8 as GateEvidence, G as GenerationReport, a9 as HeldOutGate, aa as HeldOutGateRejectionCode, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, d as MultiShotOptimizationConfig, f as MultiShotRun, g as MultiShotRunInput, i as MultiShotScore, k as MultiShotSplit, l as MultiShotTrace, n as MultiShotVariant, J as ParetoFigureSpec, K as ParetoPoint, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, L as RESEARCH_REPORT_HARD_PAIR_FLOOR, R as ReflectionContext, r as ReflectionProposal, N as ResearchReport, O as ResearchReportCandidate, Q as ResearchReportDecision, U as ResearchReportMethodology, W as ResearchReportOptions, X as ResearchReportRecommendation, S as ScenarioAggregate, s as ScoreAdapter, Y as SummaryTable, Z as SummaryTableOptions, _ as SummaryTableRow, u as TrialTrace, v as buildReflectionPrompt, ab as crowdingDistance, w as defaultMultiShotObjectives, ac as dominates, $ as gainHistogram, a0 as paretoChart, ad as paretoFrontier, ae as paretoFrontierWithCrowding, x as parseReflectionResponse, a1 as researchReport, y as runMultiShotOptimization, z as runPromptEvolution, af as scalarScore, a2 as summaryTable, B as trialTraceFromMultiShotTrial } from './summary-report-DuZXOk7K.js';
25
25
  export { DataAcquisitionPlan, KnowledgeAcquisitionMode, KnowledgeBundle, KnowledgeFallbackPolicy, KnowledgeFreshness, KnowledgeImportance, KnowledgeReadinessReport, KnowledgeRecommendedAction, KnowledgeRequirement, KnowledgeRequirementCategory, KnowledgeResponsibleSurface, KnowledgeSensitivity, ScoreKnowledgeReadinessOptions, UserQuestion, acquisitionPlansForKnowledgeGaps, blockingKnowledgeEval, knowledgeReadinessTracePayload, scoreKnowledgeReadiness, userQuestionsForKnowledgeGaps } from './knowledge/index.js';
@@ -35,11 +35,12 @@ import { a as BaselineReport } from './baseline-4R5deP0N.js';
35
35
  export { B as BaselineOptions, M as MetricSamples, b as MetricVerdict, T as ToolStats, d as ToolUseMetrics, e as ToolUseOptions, f as compareToBaseline, c as computeToolUseMetrics, i as iqr, w as welchsTTest } from './baseline-4R5deP0N.js';
36
36
  import { T as Trajectory, a as TrajectoryStep } from './trajectory-CnoBo-JY.js';
37
37
  export { b as buildTrajectory } from './trajectory-CnoBo-JY.js';
38
- import { a as DatasetScenario, c as Dataset } from './dataset-ueRVTUoY.js';
39
- export { d as DatasetDifficulty, b as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-ueRVTUoY.js';
38
+ import { a as DatasetScenario, b as Dataset } from './dataset-BlwAtYYf.js';
39
+ export { d as DatasetDifficulty, c as DatasetManifest, e as DatasetProvenance, D as DatasetSplit, H as HoldoutLockedError, S as SliceOptions, h as hashScenarios } from './dataset-BlwAtYYf.js';
40
40
  export { b as CalibrationResult, c as CandidateScore, a as ContinuousAgreement, C as ContinuousAgreementOptions, d as ContinuousCalibrationResult, G as GoldenItem, P as PositionalBiasResult, S as SelfPreferenceResult, V as VerbosityBiasResult, e as calibrateJudge, f as calibrateJudgeContinuous, g as continuousAgreement, p as positionalBias, s as selfPreference, v as verbosityBias } from './judge-calibration-DilmB3Ml.js';
41
- export { D as DEFAULT_RED_TEAM_CORPUS, E as EuRiskClass, G as GovernanceContext, a as GovernanceFinding, b as GovernanceReport, R as RedTeamCase, c as RedTeamCategory, d as RedTeamFinding, e as RedTeamPayload, f as RedTeamReport, U as UseCaseSignals, g as classifyEuAiRisk, h as euAiActReport, n as nistAiRmfReport, r as redTeamDataset, i as redTeamReport, j as renderMarkdown, s as scoreRedTeamOutput, k as soc2Report, l as summarize, t as toolNamesForRun } from './index-CN2agEaO.js';
41
+ export { D as DEFAULT_RED_TEAM_CORPUS, R as RedTeamCase, a as RedTeamCategory, b as RedTeamFinding, c as RedTeamPayload, d as RedTeamReport, r as redTeamDataset, e as redTeamReport, s as scoreRedTeamOutput, t as toolNamesForRun } from './red-team-30II1T4o.js';
42
42
  import { a as PrmGrader } from './rubric-D5tjHNJQ.js';
43
+ export { EuRiskClass, GovernanceContext, GovernanceFinding, GovernanceReport, UseCaseSignals, classifyEuAiRisk, euAiActReport, nistAiRmfReport, renderMarkdown, soc2Report, summarize } from './governance/index.js';
43
44
  export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-0pu_fBwZ.js';
44
45
  export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
45
46
  import './outcome-store-D6KWmYvj.js';
@@ -1159,7 +1160,7 @@ interface AnalystHooks {
1159
1160
  analyst: Analyst;
1160
1161
  error: Error;
1161
1162
  runId: string;
1162
- }): AnalystFinding[] | void | Promise<AnalystFinding[] | void>;
1163
+ }): AnalystFinding[] | undefined | Promise<AnalystFinding[] | undefined>;
1163
1164
  /** Once after registry.run() completes. Use for final aggregation, persistence. */
1164
1165
  onComplete?(args: {
1165
1166
  result: AnalystRunResult;
@@ -6311,6 +6312,31 @@ declare function withOtelPipeline(opts?: OtelPipelineOptions): OtelPipelineHandl
6311
6312
  */
6312
6313
  declare function isOtelConfigured(): boolean;
6313
6314
 
6315
+ /**
6316
+ * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
6317
+ * analyst's internal LLM calls (actor + responder turns) appear in the
6318
+ * trace tree. Also wraps each actor turn callback with a span.
6319
+ *
6320
+ * Since the analyst uses @ax-llm/ax internally (an agent framework with
6321
+ * its own turn loop), we cannot wrap individual `tc.chat()` calls without
6322
+ * forking ax. Instead, we wrap at the boundary:
6323
+ * 1. A parent span for the entire analyst run.
6324
+ * 2. Per-turn child spans from the `onTurn` callback (captures code,
6325
+ * output size, error status).
6326
+ * 3. Summary attributes on the parent (total turns, usage, findings).
6327
+ */
6328
+
6329
+ interface TracedAnalystOptions {
6330
+ /** TraceEmitter for span emission. */
6331
+ emitter: TraceEmitter;
6332
+ /** Parent span id. If omitted, uses emitter stack. */
6333
+ parentSpanId?: string;
6334
+ }
6335
+ /**
6336
+ * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
6337
+ */
6338
+ declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
6339
+
6314
6340
  /**
6315
6341
  * Traced judge wrappers — instruments every LLM call inside the judge
6316
6342
  * ensemble with child spans so OTEL sinks see per-judge latency, model,
@@ -6337,31 +6363,6 @@ declare function traceJudge(judge: JudgeFn, judgeName: string, opts: TracedJudge
6337
6363
  */
6338
6364
  declare function traceJudgeEnsemble(judges: JudgeFn[], judgeNames: string[], opts: TracedJudgeOptions): JudgeFn;
6339
6365
 
6340
- /**
6341
- * Traced analyst wrapper — instruments `analyzeTraces` with spans so the
6342
- * analyst's internal LLM calls (actor + responder turns) appear in the
6343
- * trace tree. Also wraps each actor turn callback with a span.
6344
- *
6345
- * Since the analyst uses @ax-llm/ax internally (an agent framework with
6346
- * its own turn loop), we cannot wrap individual `tc.chat()` calls without
6347
- * forking ax. Instead, we wrap at the boundary:
6348
- * 1. A parent span for the entire analyst run.
6349
- * 2. Per-turn child spans from the `onTurn` callback (captures code,
6350
- * output size, error status).
6351
- * 3. Summary attributes on the parent (total turns, usage, findings).
6352
- */
6353
-
6354
- interface TracedAnalystOptions {
6355
- /** TraceEmitter for span emission. */
6356
- emitter: TraceEmitter;
6357
- /** Parent span id. If omitted, uses emitter stack. */
6358
- parentSpanId?: string;
6359
- }
6360
- /**
6361
- * Run `analyzeTraces` wrapped in a parent span with per-turn child spans.
6362
- */
6363
- declare function tracedAnalyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions, traceOpts: TracedAnalystOptions): Promise<AnalyzeTracesResult>;
6364
-
6365
6366
  /**
6366
6367
  * Traced mutator wrapper — instruments reflective-mutation LLM calls.
6367
6368
  *