@tangle-network/agent-eval 0.61.0 → 0.63.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -8
- package/dist/adapters/http.d.ts +4 -1
- package/dist/adapters/langchain.d.ts +4 -1
- package/dist/adapters/otel.d.ts +4 -4
- package/dist/{agent-profile-9J9hxdm2.d.ts → agent-profile-DzcPHR1Z.d.ts} +1 -1
- package/dist/benchmarks/index.d.ts +2 -2
- package/dist/campaign/index.d.ts +388 -11
- package/dist/campaign/index.js +597 -12
- package/dist/campaign/index.js.map +1 -1
- package/dist/{chunk-GMXHLSLL.js → chunk-4ODZXQV2.js} +81 -98
- package/dist/chunk-4ODZXQV2.js.map +1 -0
- package/dist/{chunk-OLULBECP.js → chunk-7TPYV2ER.js} +27 -1
- package/dist/chunk-7TPYV2ER.js.map +1 -0
- package/dist/chunk-E22YUOAL.js +111 -0
- package/dist/chunk-E22YUOAL.js.map +1 -0
- package/dist/{chunk-SUGME4OT.js → chunk-Z7ZU7IYZ.js} +209 -85
- package/dist/chunk-Z7ZU7IYZ.js.map +1 -0
- package/dist/contract/index.d.ts +9 -9
- package/dist/contract/index.js +4 -3
- package/dist/contract/index.js.map +1 -1
- package/dist/{control-Bf8owbuG.d.ts → control-DxvZeV5X.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/hosted/index.d.ts +4 -4
- package/dist/{index-Bvk35ils.d.ts → index-DsnOpCO6.d.ts} +1 -1
- package/dist/{index-D9dwa00f.d.ts → index-GISRh500.d.ts} +2 -2
- package/dist/index.d.ts +98 -14
- package/dist/index.js +331 -128
- package/dist/index.js.map +1 -1
- package/dist/meta-eval/index.d.ts +2 -2
- package/dist/multishot/index.js.map +1 -1
- package/dist/openapi.json +1 -1
- package/dist/{provenance-D0WeCXt1.d.ts → provenance-cUnovpWV.d.ts} +42 -11
- package/dist/{registry-qmbYT3Eo.d.ts → registry-DPly4_hZ.d.ts} +1 -1
- package/dist/{release-report-DszkgvJ3.d.ts → release-report-DGoeObZT.d.ts} +2 -2
- package/dist/reporting.d.ts +4 -4
- package/dist/{researcher-BaVsy0sW.d.ts → researcher-WJvIpX3L.d.ts} +2 -2
- package/dist/rl.d.ts +6 -6
- package/dist/{rubric-predictive-validity-DgBHWsh7.d.ts → rubric-predictive-validity-D_4BSXGV.d.ts} +1 -1
- package/dist/{run-campaign-HXPJAUZ3.js → run-campaign-5J3ED2UJ.js} +3 -2
- package/dist/{run-record-DgUVo5pw.d.ts → run-record-BgTFzO2r.d.ts} +1 -1
- package/dist/{summary-report-BQvXpvaR.d.ts → summary-report-ByiOUrHj.d.ts} +1 -1
- package/dist/{types-Beb6KPqZ.d.ts → types-c2R2kfmv.d.ts} +45 -12
- package/package.json +1 -1
- package/dist/chunk-GMXHLSLL.js.map +0 -1
- package/dist/chunk-OLULBECP.js.map +0 -1
- package/dist/chunk-SUGME4OT.js.map +0 -1
- /package/dist/{run-campaign-HXPJAUZ3.js.map → run-campaign-5J3ED2UJ.js.map} +0 -0
|
@@ -3,7 +3,7 @@ import { C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfi
|
|
|
3
3
|
import { T as TraceEmitter } from './emitter-DEZwY14K.js';
|
|
4
4
|
import { F as FailureClass } from './schema-m0gsnbt3.js';
|
|
5
5
|
import { T as TraceStore } from './store-CKUAgsJz.js';
|
|
6
|
-
import {
|
|
6
|
+
import { b as RunSplitTag, a as RunTokenUsage, R as RunRecord } from './run-record-BgTFzO2r.js';
|
|
7
7
|
|
|
8
8
|
interface ActionExecutionPolicy {
|
|
9
9
|
allowedTypes?: string[];
|
package/dist/control.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
1
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DxvZeV5X.js';
|
|
2
2
|
export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-DuFBYg7A.js';
|
|
3
3
|
import './feedback-trajectory-8hKC5EOb.js';
|
|
4
4
|
import './dataset-B2kL-fSM.js';
|
|
@@ -6,4 +6,4 @@ import './errors-Dwqw-T_m.js';
|
|
|
6
6
|
import './emitter-DEZwY14K.js';
|
|
7
7
|
import './schema-m0gsnbt3.js';
|
|
8
8
|
import './store-CKUAgsJz.js';
|
|
9
|
-
import './run-record-
|
|
9
|
+
import './run-record-BgTFzO2r.js';
|
package/dist/hosted/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
export { E as EvalRunCellScore, d as EvalRunEvent, e as EvalRunGenerationSnapshot, f as EvalRunStatus, g as HOSTED_WIRE_VERSION, H as HostedClient, h as HostedIngestHeaders, a as HostedTenant, i as HostedWireVersion, j as IngestEvalRunsRequest, k as IngestResponse, l as IngestTracesRequest, T as TraceSpanEvent, m as createHostedClient } from '../index-
|
|
2
|
-
import '../types-
|
|
3
|
-
import '../
|
|
4
|
-
import '../run-record-DgUVo5pw.js';
|
|
1
|
+
export { E as EvalRunCellScore, d as EvalRunEvent, e as EvalRunGenerationSnapshot, f as EvalRunStatus, g as HOSTED_WIRE_VERSION, H as HostedClient, h as HostedIngestHeaders, a as HostedTenant, i as HostedWireVersion, j as IngestEvalRunsRequest, k as IngestResponse, l as IngestTracesRequest, T as TraceSpanEvent, m as createHostedClient } from '../index-GISRh500.js';
|
|
2
|
+
import '../types-c2R2kfmv.js';
|
|
3
|
+
import '../run-record-BgTFzO2r.js';
|
|
5
4
|
import '../errors-Dwqw-T_m.js';
|
|
6
5
|
import '../schema-m0gsnbt3.js';
|
|
6
|
+
import '../summary-report-ByiOUrHj.js';
|
|
7
7
|
import '../failure-cluster-CL7IVgkJ.js';
|
|
8
8
|
import '../store-CKUAgsJz.js';
|
|
9
9
|
import '../judge-calibration-DilmB3Ml.js';
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as MutableSurface,
|
|
2
|
-
import { G as GainDistributionBin, P as ParetoFigureSpec } from './summary-report-
|
|
1
|
+
import { M as MutableSurface, j as GateDecision } from './types-c2R2kfmv.js';
|
|
2
|
+
import { G as GainDistributionBin, P as ParetoFigureSpec } from './summary-report-ByiOUrHj.js';
|
|
3
3
|
import { a as ContinuousAgreement } from './judge-calibration-DilmB3Ml.js';
|
|
4
4
|
|
|
5
5
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
2
|
-
import { R as RunRecord } from './run-record-
|
|
3
|
-
export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError,
|
|
1
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DxvZeV5X.js';
|
|
2
|
+
import { R as RunRecord } from './run-record-BgTFzO2r.js';
|
|
3
|
+
export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunSplitTag, a as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-BgTFzO2r.js';
|
|
4
4
|
import { AxAIService, AxFunction } from '@ax-llm/ax';
|
|
5
|
-
import { d as Severity, M as MultiLayerVerifier, e as VerifyOptions, L as Layer, f as LayerResult, g as VerifyContext } from './researcher-
|
|
6
|
-
export { C as CallbackResearcher, h as CallbackResearcherOptions, i as CampaignFactoryParams, j as CampaignIntegrityPolicy, k as CampaignRunContext, l as CampaignRunOutcome, m as CampaignRunner, n as CampaignScenario, o as CampaignVariant, c as EvalCampaignOptions, b as EvalCampaignResult, E as ExperimentPlan, a as ExperimentResult, p as FailedRun, F as FailureMode, q as Finding, s as LayerStatus, N as NoopResearcher, R as Researcher, S as SteeringChange, V as VerificationReport, t as gradeSemanticStatus, r as runEvalCampaign } from './researcher-
|
|
5
|
+
import { d as Severity, M as MultiLayerVerifier, e as VerifyOptions, L as Layer, f as LayerResult, g as VerifyContext } from './researcher-WJvIpX3L.js';
|
|
6
|
+
export { C as CallbackResearcher, h as CallbackResearcherOptions, i as CampaignFactoryParams, j as CampaignIntegrityPolicy, k as CampaignRunContext, l as CampaignRunOutcome, m as CampaignRunner, n as CampaignScenario, o as CampaignVariant, c as EvalCampaignOptions, b as EvalCampaignResult, E as ExperimentPlan, a as ExperimentResult, p as FailedRun, F as FailureMode, q as Finding, s as LayerStatus, N as NoopResearcher, R as Researcher, S as SteeringChange, V as VerificationReport, t as gradeSemanticStatus, r as runEvalCampaign } from './researcher-WJvIpX3L.js';
|
|
7
7
|
import { R as Run$1, S as Span, a as TraceEvent, A as Artifact$1, B as BudgetLedgerEntry, h as BudgetSpec, L as LlmSpan } from './schema-m0gsnbt3.js';
|
|
8
8
|
export { E as EventKind, i as FAILURE_CLASSES, F as FailureClass, G as GenericSpan, J as JudgeSpan, M as Message, d as RetrievalSpan, g as RunLayer, f as RunStatus, e as SandboxSpan, j as SpanBase, c as SpanKind, k as SpanStatus, l as TRACE_SCHEMA_VERSION, T as ToolSpan, m as isJudgeSpan, n as isLlmSpan, o as isRetrievalSpan, p as isSandboxSpan, q as isToolSpan } from './schema-m0gsnbt3.js';
|
|
9
9
|
import { T as TraceStore, R as RunFilter } from './store-CKUAgsJz.js';
|
|
@@ -16,8 +16,8 @@ import { T as TraceAnalysisStore } from './store-jzKpMl16.js';
|
|
|
16
16
|
export { D as DEFAULT_TRACE_ANALYST_BUDGETS, a as DatasetOverview, Q as QueryTracesPage, S as SearchSpanResult, b as SearchTraceResult, c as SpanMatchRecord, d as TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, e as TraceAnalystByteBudgets, f as TraceAnalystFilters, g as TraceAnalystSpan, h as TraceAnalystSpanKind, i as TraceAnalystSpanStatus, j as TraceAnalystTraceSummary, V as ViewSpansResult, k as ViewTraceOversized, l as ViewTraceResult } from './store-jzKpMl16.js';
|
|
17
17
|
import { b as JudgeFn, J as JudgeInput, B as BenchmarkRunnerConfig, S as Scenario, c as BenchmarkReport, P as ProductClientConfig, C as CheckResult, T as TestResult, d as PersonaConfig, D as DriverResult, e as DriverState, f as CollectedArtifacts, g as ScenarioResult, h as TurnMetrics, i as ScenarioFile, j as CompletionCriterion } from './types-DhqpAi_z.js';
|
|
18
18
|
export { A as ArtifactCheck, k as ArtifactResult, E as EvalResult, F as FeedbackPattern, l as JudgeConfig, m as JudgeRubric, a as JudgeScore, n as PersonaRigor, R as RouteMap, o as RubricDimension, p as Turn, q as TurnResult } from './types-DhqpAi_z.js';
|
|
19
|
-
import { a as Analyst, b as AnalystSeverity, c as AnalystFinding, d as AnalystCost, e as AnalystContext } from './registry-
|
|
20
|
-
export { f as AnalystHooks, g as AnalystInputKind, A as AnalystRegistry, h as AnalystRegistryOptions, i as AnalystRequirements, j as AnalystRunEvent, k as AnalystRunInputs, l as AnalystRunResult, m as AnalystRunSummary, B as BudgetPolicy, C as ChatCallOpts, n as ChatClient, o as ChatRequest, p as ChatResponse, q as ChatTransport, r as CliBridgeTransportOpts, s as CreateChatClientOpts, D as DirectProviderTransportOpts, E as EvidenceRef, M as MockTransportOpts, R as RegistryRunOpts, t as RouterTransportOpts, S as SandboxSdkTransportOpts, u as computeFindingId, v as createChatClient, w as makeFinding } from './registry-
|
|
19
|
+
import { a as Analyst, b as AnalystSeverity, c as AnalystFinding, d as AnalystCost, e as AnalystContext } from './registry-DPly4_hZ.js';
|
|
20
|
+
export { f as AnalystHooks, g as AnalystInputKind, A as AnalystRegistry, h as AnalystRegistryOptions, i as AnalystRequirements, j as AnalystRunEvent, k as AnalystRunInputs, l as AnalystRunResult, m as AnalystRunSummary, B as BudgetPolicy, C as ChatCallOpts, n as ChatClient, o as ChatRequest, p as ChatResponse, q as ChatTransport, r as CliBridgeTransportOpts, s as CreateChatClientOpts, D as DirectProviderTransportOpts, E as EvidenceRef, M as MockTransportOpts, R as RegistryRunOpts, t as RouterTransportOpts, S as SandboxSdkTransportOpts, u as computeFindingId, v as createChatClient, w as makeFinding } from './registry-DPly4_hZ.js';
|
|
21
21
|
import { TCloud } from '@tangle-network/tcloud';
|
|
22
22
|
import { z } from 'zod';
|
|
23
23
|
export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-DuFBYg7A.js';
|
|
@@ -25,11 +25,11 @@ import { A as AgentEvalError } from './errors-Dwqw-T_m.js';
|
|
|
25
25
|
export { a as AgentEvalErrorCode, C as CaptureIntegrityError, b as ConfigError, J as JudgeError, N as NotFoundError, R as ReplayError, V as ValidationError, c as VerificationError } from './errors-Dwqw-T_m.js';
|
|
26
26
|
import { a as FeedbackLabel, F as FeedbackTrajectoryStore, b as FeedbackTrajectory } from './feedback-trajectory-8hKC5EOb.js';
|
|
27
27
|
export { c as FeedbackArtifactType, d as FeedbackAttempt, e as FeedbackLabelKind, f as FeedbackLabelSource, g as FeedbackOptimizerRow, h as FeedbackOutcome, i as FeedbackReplayAdapter, j as FeedbackReplayResult, k as FeedbackSeverity, l as FeedbackSplitPolicy, m as FeedbackTask, n as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-8hKC5EOb.js';
|
|
28
|
-
import { A as AgentProfile } from './agent-profile-
|
|
29
|
-
export { a as BackendIntegrityError, B as BackendIntegrityReport, b as agentProfileHash, c as assertRealBackend, s as summarizeBackendIntegrity } from './agent-profile-
|
|
28
|
+
import { A as AgentProfile } from './agent-profile-DzcPHR1Z.js';
|
|
29
|
+
export { a as BackendIntegrityError, B as BackendIntegrityReport, b as agentProfileHash, c as assertRealBackend, s as summarizeBackendIntegrity } from './agent-profile-DzcPHR1Z.js';
|
|
30
30
|
export { DataAcquisitionPlan, KnowledgeAcquisitionMode, KnowledgeBundle, KnowledgeFallbackPolicy, KnowledgeFreshness, KnowledgeImportance, KnowledgeReadinessReport, KnowledgeRecommendedAction, KnowledgeRequirement, KnowledgeRequirementCategory, KnowledgeResponsibleSurface, KnowledgeSensitivity, ScoreKnowledgeReadinessOptions, UserQuestion, acquisitionPlansForKnowledgeGaps, blockingKnowledgeEval, knowledgeReadinessTracePayload, scoreKnowledgeReadiness, userQuestionsForKnowledgeGaps } from './knowledge/index.js';
|
|
31
|
-
import { i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard } from './release-report-
|
|
32
|
-
export { A as ActionableSideInfo, s as AsiSeverity, B as BootstrapOptions, a as BootstrapResult, C as CliffsMagnitude, t as CorpusAgreementOptions, u as CorpusAgreementPerDimension, v as CorpusAgreementReport, x as CorpusScoreRecord, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, W as WeightedCompositeInput, y as WeightedCompositeResult, l as assertReleaseConfidence, m as benjaminiHochberg, z as bonferroni, n as bootstrapCi, D as cliffsDelta, E as cohensD, F as confidenceInterval, G as corpusInterRaterAgreement, H as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, I as interRaterReliability, K as interpretCliffs, p as judgeReplayGate, L as mannWhitneyU, M as normalizeScores, q as pairedBootstrap, N as pairedMde, O as pairedTTest, Q as partialCredit, r as renderReleaseReport, S as requiredSampleSize, T as weightedComposite, U as weightedMean, w as wilcoxonSignedRank } from './release-report-
|
|
31
|
+
import { i as ReleaseConfidenceThresholds, g as ReleaseConfidenceScorecard } from './release-report-DGoeObZT.js';
|
|
32
|
+
export { A as ActionableSideInfo, s as AsiSeverity, B as BootstrapOptions, a as BootstrapResult, C as CliffsMagnitude, t as CorpusAgreementOptions, u as CorpusAgreementPerDimension, v as CorpusAgreementReport, x as CorpusScoreRecord, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, h as ReleaseConfidenceStatus, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, W as WeightedCompositeInput, y as WeightedCompositeResult, l as assertReleaseConfidence, m as benjaminiHochberg, z as bonferroni, n as bootstrapCi, D as cliffsDelta, E as cohensD, F as confidenceInterval, G as corpusInterRaterAgreement, H as corpusInterRaterAgreementFromJudgeScores, o as evaluateReleaseConfidence, I as interRaterReliability, K as interpretCliffs, p as judgeReplayGate, L as mannWhitneyU, M as normalizeScores, q as pairedBootstrap, N as pairedMde, O as pairedTTest, Q as partialCredit, r as renderReleaseReport, S as requiredSampleSize, T as weightedComposite, U as weightedMean, w as wilcoxonSignedRank } from './release-report-DGoeObZT.js';
|
|
33
33
|
import { S as SandboxDriver, H as HarnessConfig, a as SandboxHarnessResult } from './test-graded-scenario-BdVaPyHT.js';
|
|
34
34
|
export { D as DockerSandboxDriver, c as SandboxHarness, d as SandboxResult, e as SubprocessSandboxDriver, f as SubprocessSandboxDriverOptions, g as TestGradedRunOptions, b as TestGradedRunResult, T as TestGradedScenario, h as TestOutputParser, i as composeParsers, j as jestTestParser, p as pytestTestParser, r as runTestGradedScenario, v as vitestTestParser } from './test-graded-scenario-BdVaPyHT.js';
|
|
35
35
|
import { T as TraceEmitter } from './emitter-DEZwY14K.js';
|
|
@@ -49,8 +49,8 @@ export { b as CalibrationResult, c as CandidateScore, a as ContinuousAgreement,
|
|
|
49
49
|
export { D as DEFAULT_RED_TEAM_CORPUS, R as RedTeamCase, a as RedTeamCategory, b as RedTeamFinding, c as RedTeamPayload, d as RedTeamReport, r as redTeamDataset, e as redTeamReport, s as scoreRedTeamOutput, t as toolNamesForRun } from './red-team-DW9Ca_tj.js';
|
|
50
50
|
import { a as PrmGrader } from './rubric-BOfxn4ja.js';
|
|
51
51
|
export { EuRiskClass, GovernanceContext, GovernanceFinding, GovernanceReport, UseCaseSignals, classifyEuAiRisk, euAiActReport, nistAiRmfReport, renderMarkdown, soc2Report, summarize } from './governance/index.js';
|
|
52
|
-
export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-
|
|
53
|
-
export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, m as GateDecision, n as GateEvidence, H as HeldOutGate, o as HeldOutGateConfig, q as HeldOutGateRejectionCode, P as ParetoFigureSpec, c as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, d as ResearchReport, e as ResearchReportCandidate, f as ResearchReportDecision, g as ResearchReportMethodology, h as ResearchReportOptions, i as ResearchReportRecommendation, S as SummaryTable, j as SummaryTableOptions, k as SummaryTableRow, l as gainHistogram, p as paretoChart, r as researchReport, s as summaryTable } from './summary-report-
|
|
52
|
+
export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-DsnOpCO6.js';
|
|
53
|
+
export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, m as GateDecision, n as GateEvidence, H as HeldOutGate, o as HeldOutGateConfig, q as HeldOutGateRejectionCode, P as ParetoFigureSpec, c as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, d as ResearchReport, e as ResearchReportCandidate, f as ResearchReportDecision, g as ResearchReportMethodology, h as ResearchReportOptions, i as ResearchReportRecommendation, S as SummaryTable, j as SummaryTableOptions, k as SummaryTableRow, l as gainHistogram, p as paretoChart, r as researchReport, s as summaryTable } from './summary-report-ByiOUrHj.js';
|
|
54
54
|
export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
|
|
55
55
|
import './outcome-store-D6KWmYvj.js';
|
|
56
56
|
|
|
@@ -802,6 +802,90 @@ declare const KNOWLEDGE_POISONING_KIND_SPEC: TraceAnalystKindSpec;
|
|
|
802
802
|
*/
|
|
803
803
|
declare const DEFAULT_TRACE_ANALYST_KINDS: readonly TraceAnalystKindSpec[];
|
|
804
804
|
|
|
805
|
+
/**
|
|
806
|
+
* Skill-usage analyst — a DETERMINISTIC `Analyst` over a Claude/Codex skill
|
|
807
|
+
* library + its trace corpus. Unlike the trace-store kinds (failure-mode,
|
|
808
|
+
* improvement, ...) this kind calls no LLM: it mines real usage and skill
|
|
809
|
+
* structure and emits findings by rule.
|
|
810
|
+
*
|
|
811
|
+
* It exists because the naive "Skill-tool invocation count" lies low — it
|
|
812
|
+
* misses orchestrated sub-dispatch (a leaf skill run BY /pursue or /governor
|
|
813
|
+
* logs under the parent), slash-command entry, local-script bypass, and
|
|
814
|
+
* on-disk artifacts. The 2026-05-30 skill audit found 39/53 skills at zero
|
|
815
|
+
* direct invocations, yet only one was a genuine cut: the rest were
|
|
816
|
+
* measurement-invisible or discovery-limited. This analyst encodes that
|
|
817
|
+
* lesson as a multi-signal usage model so a cheap repeatable pass can keep
|
|
818
|
+
* the library honest, and so the expensive audit workflow's verdicts can
|
|
819
|
+
* GEPA-distill it toward agreement (see `gold/skill-verdicts.gold.jsonl`).
|
|
820
|
+
*
|
|
821
|
+
* Report-building (`buildSkillUsageReport`, an fs scan) is separated from
|
|
822
|
+
* finding emission (`SkillUsageAnalyst.analyze`, pure) so the slow scan runs
|
|
823
|
+
* once at the registry boundary and the rule logic stays unit-testable.
|
|
824
|
+
*/
|
|
825
|
+
|
|
826
|
+
type SkillKind = 'public' | 'private';
|
|
827
|
+
/** One skill's multi-signal usage + structure. All counts are deterministic. */
|
|
828
|
+
interface SkillUsageRecord {
|
|
829
|
+
name: string;
|
|
830
|
+
kind: SkillKind;
|
|
831
|
+
/** Absolute path to the skill's SKILL.md. */
|
|
832
|
+
path: string;
|
|
833
|
+
lines: number;
|
|
834
|
+
/** `"skill":"<name>"` Skill-tool invocations across the trace corpus. */
|
|
835
|
+
directInvocations: number;
|
|
836
|
+
/** `<command-name>/<name>` slash invocations across the trace corpus. */
|
|
837
|
+
slashInvocations: number;
|
|
838
|
+
/** Sibling skills whose SKILL.md dispatches to this one (`/<name>`). Proxy
|
|
839
|
+
* for orchestrated sub-dispatch the per-skill counter cannot see. */
|
|
840
|
+
inboundRefs: number;
|
|
841
|
+
/** On-disk artifacts attributable to the skill (e.g. `.evolve/<name>/**`). */
|
|
842
|
+
artifactCount: number;
|
|
843
|
+
/** Tangle-private reference count in the body (leak signal for public skills). */
|
|
844
|
+
tanglePrivateRefs: number;
|
|
845
|
+
hasReferencesDir: boolean;
|
|
846
|
+
hasEvalsDir: boolean;
|
|
847
|
+
/** Body mentions `skill-runs.jsonl` (visible to /reflect + /governor). */
|
|
848
|
+
logsRuns: boolean;
|
|
849
|
+
/** Description carries an explicit `Triggers:` clause / trigger phrases. */
|
|
850
|
+
hasTriggerPhrases: boolean;
|
|
851
|
+
}
|
|
852
|
+
interface SkillUsageReport {
|
|
853
|
+
generatedFromTraces: number;
|
|
854
|
+
records: SkillUsageRecord[];
|
|
855
|
+
}
|
|
856
|
+
interface SkillUsageScanConfig {
|
|
857
|
+
/** Dirs holding `*.jsonl` transcripts (Claude `~/.claude/projects`, Codex sessions). */
|
|
858
|
+
transcriptDirs: string[];
|
|
859
|
+
/** Skill roots to scan; each dir directly under `root` with a `SKILL.md` is a skill. */
|
|
860
|
+
skillRoots: {
|
|
861
|
+
root: string;
|
|
862
|
+
kind: SkillKind;
|
|
863
|
+
}[];
|
|
864
|
+
/** Roots scanned for `<root>/.evolve/<skill>` artifact dirs. */
|
|
865
|
+
artifactRoots?: string[];
|
|
866
|
+
/** Token-prefixed mappings: skill name → extra artifact subpaths under an artifactRoot
|
|
867
|
+
* (e.g. reflect → `.evolve/reflections`). Catches non-eponymous artifact dirs. */
|
|
868
|
+
artifactAliases?: Record<string, string[]>;
|
|
869
|
+
/** Cap files read per transcript dir (bounds a huge corpus); 0 = unbounded. */
|
|
870
|
+
maxTranscriptsPerDir?: number;
|
|
871
|
+
}
|
|
872
|
+
/** Scan the corpus + skill roots into a {@link SkillUsageReport}. Deterministic. */
|
|
873
|
+
declare function buildSkillUsageReport(config: SkillUsageScanConfig): SkillUsageReport;
|
|
874
|
+
/** Pure rule pass over a report → findings. Exported for direct/unit use. */
|
|
875
|
+
declare function emitSkillUsageFindings(report: SkillUsageReport, producedAt: string): AnalystFinding[];
|
|
876
|
+
declare class SkillUsageAnalyst implements Analyst<SkillUsageReport> {
|
|
877
|
+
readonly id = "skill-usage";
|
|
878
|
+
readonly description = "Deterministic multi-signal skill-usage analysis: flags dead skills, measurement-invisible (orchestrated) usage, discovery gaps, public-repo leaks, bloat, missing evals, and missing run-logging.";
|
|
879
|
+
readonly inputKind: "custom";
|
|
880
|
+
readonly cost: {
|
|
881
|
+
kind: "deterministic";
|
|
882
|
+
est_usd_per_run: number;
|
|
883
|
+
};
|
|
884
|
+
readonly version = "1.0.0";
|
|
885
|
+
analyze(input: SkillUsageReport, ctx: AnalystContext): Promise<AnalystFinding[]>;
|
|
886
|
+
}
|
|
887
|
+
declare const SKILL_USAGE_ANALYST: SkillUsageAnalyst;
|
|
888
|
+
|
|
805
889
|
/**
|
|
806
890
|
* Pre-curated tool subsets for analyst kinds.
|
|
807
891
|
*
|
|
@@ -5493,4 +5577,4 @@ declare function traceJudge(judge: JudgeFn, judgeName: string, opts: TracedJudge
|
|
|
5493
5577
|
*/
|
|
5494
5578
|
declare function traceJudgeEnsemble(judges: JudgeFn[], judgeNames: string[], opts: TracedJudgeOptions): JudgeFn;
|
|
5495
5579
|
|
|
5496
|
-
export { ANALYST_SEVERITIES, type ActiveLearningOptions, type AdapterRun, AgentDriver, type AgentDriverConfig, AgentEvalError, AgentProfile, type AlignmentOp, Analyst, AnalystContext, AnalystCost, AnalystFinding, AnalystSeverity, AnalyzeTracesInput, AnalyzeTracesOptions, AnalyzeTracesResult, type AntiSlopConfig, type AntiSlopIssue, type AntiSlopReport, Artifact$1 as Artifact, type Artifact as ArtifactCheckArtifact, type ArtifactEventLike, type ArtifactValidator, type AssertCrossFamilyOptions, type AssertSingleBackendOptions, type AutoPrClient, AxGepaSteeringOptimizer, type AxSteeringOptimizerConfig, type BackendDescriptor, BaselineReport, BehaviorAssertion, BenchmarkReport, BenchmarkRunner, BenchmarkRunnerConfig, type BisectOptions, type BisectResult, type BisectStep, BudgetBreachError, BudgetGuard, BudgetLedgerEntry, BudgetSpec, CallExpectation, type CanaryAlert, type CanaryKind, type CanaryLeak, type CanaryOptions, type CanaryReport, type CanarySeverity, type CandidateScenario, type CausalAttributionReport, type CellVerdict, CheckResult, CollectedArtifacts, type CommandRunner, CompletionCriterion, type CompletionRequirement, type CompletionVerdict, type ConceptComplexity, type ConceptFinding, type ConceptSpec, type ConceptWeightStrategy, type ContinuityCheck, type ContinuityCheckResult, type ContinuityReport, type ContinuitySnapshotPair, type ContractMetric, type ContractReport, ConvergenceTracker, type CorrectnessChecker, type CostEntry, type CostSummary, CostTracker, type CounterfactualContext, type CounterfactualMutation, type CounterfactualResult, type CounterfactualRunner, type CreateDefaultReviewerOptions, type CreateSandboxPoolOpts, type CreateTraceAnalystKindOpts, CrossFamilyError, type CrossTraceDiff, type CrossTraceDiffOptions, D1ExperimentStore, type D1ExperimentStoreOptions, type D1Like, type D1PreparedStatementLike, DEFAULT_AGENT_SLOS, DEFAULT_COMPLEXITY_WEIGHTS, DEFAULT_FINDERS, DEFAULT_HARNESS_OBJECTIVES, DEFAULT_MUTATION_PRIMITIVES, DEFAULT_MUTATORS, DEFAULT_PR_REVIEW_SCORE_WEIGHTS, DEFAULT_RUN_SCORE_WEIGHTS, DEFAULT_SEVERITY_WEIGHTS, DEFAULT_TRACE_ANALYST_KINDS, Dataset, DatasetScenario, type DecideNextUserTurnOpts, type DeployFamily, type DeployGateLayerInput, type DeployRunResult, type DeployRunner, type DiffPolicy, type DiffScorecardOptions, type DirEntry, type Direction, type DiscoverPersonasOptions, type DiscoveredPersona, DriverResult, DriverState, DualAgentBench, type DualAgentBenchConfig, type DualAgentReport, type DualAgentRound, type DualAgentScenario, type DualAgentScenarioResult, ERROR_COUNT_PATTERNS, type ErrorCountPattern, type EvolutionRound, type ExecutorConfig, type Expectation, type Experiment, type Run as ExperimentRun, type ExperimentStore, ExperimentTracker, type ExportedRewardModel, type ExtractOptions, type ExtractResult, FAILURE_MODE_KIND_SPEC, FINDING_SUBJECT_GRAMMAR_PROMPT, FINDING_SUBJECT_KINDS, type FactorContribution, type FactorialCell, FeedbackLabel, FeedbackTrajectory, FeedbackTrajectoryStore, type FileChange, FileSystemExperimentStore, type FileSystemExperimentStoreOptions, type FindingSubject, type FindingSubjectKind, FindingSubjectStringSchema, type FindingsDiff, FindingsStore, type FlowAction, type FlowLayerEnv, type FlowLayerFactoryInput, type FlowRunner, type FlowRunnerStepResult, type FlowSpec, type FlowStep, type GhCliClientOptions, type GoldenSeverity, type GoldenSpec, type HarnessAdapter, HarnessConfig, type HarnessExperimentConfig, type HarnessExperimentResult, type HarnessIntervention, type HarnessRunRequest, type HarnessRunResult, type HarnessScenario, type HarnessSelection, type HarnessVariant, type HarnessVariantReport, HoldoutAuditor, type HostedJudgeConfig, type HostedJudgeDimension, type HostedJudgeRequest, type HostedJudgeResponse, type HostedRunCriticConfig, type HostedRunScoreRequest, type HostedRunScoreResponse, type HttpGithubClientOptions, type HypothesisManifest, type HypothesisResult, IMPROVEMENT_KIND_SPEC, INTENT_MATCH_JUDGE_VERSION, type ImageData, InMemoryExperimentStore, InMemoryWorkspaceInspector, type InferenceScorer, type InspectorContext, type IntentMatchInput, type IntentMatchOptions, type IntentMatchResult, type InteractionContribution, type JudgeAdapterOpts, type JudgeFamily, type JudgeFleetOptions, JudgeFn, JudgeInput, type JudgeReplayResult, type JudgeRetryOutcome, type JudgeRetryPolicy, JudgeRunner, KIND_EXPECTED_SUBJECTS, KNOWLEDGE_GAP_KIND_SPEC, KNOWLEDGE_POISONING_KIND_SPEC, type KeywordConceptSpec, type KeywordCoverageFinding, type KeywordCoverageOptions, type KeywordCoverageResult, type LangfuseEnvelope, type LangfuseGeneration, type LangfuseScore, Layer, LayerResult, type LiveProofArtifact, type LiveProofConfig, type LiveProofContext, type LiveProofResult, LlmClientOptions, type LlmCorrectnessCheckerOpts, LlmSpan, LockedJsonlAppender, MODEL_PRICING, type MatchResult, type MatcherResult, type MeasurementPolicy, type MergeOptions, MetricsCollector, type MuffledFinder, type MuffledFinding, MultiLayerVerifier, type MultiToolchainLayerConfig, type Mutator, Mutex, type Objective, type Oracle, type OracleObservation, type OracleReport, type OracleResult, type OrthogonalityInput, type OrthogonalityResult, OtelExportConfig, OtelExporter, type OtelPipelineHandle, type OtelPipelineOptions, PairwiseSteeringOptimizer, type ParaphraseRobustnessScenarioInput, type ParaphraseRobustnessScenarioResult, type ParetoResult, type PersistedFinding, PersonaConfig, type Playbook, type PlaybookEntry, type PoolSlot, type PrReviewAuditCase, type PrReviewBenchmarkSummary, type PrReviewComment, type PrReviewMatchedFinding, type PrReviewOutcome, type PrReviewReferenceFinding, type PrReviewScore, type PrReviewScoreWeights, type PrReviewSeverity, type PrReviewSource, type ProducedProposal, type ProducedState, ProductClient, ProductClientConfig, type PromptHandle, PromptRegistry, type ProposalEventLike, type ProposeAutomatedPullRequestInput, type ProposeAutomatedPullRequestResult, RAW_FINDING_SCHEMA_PROMPT, type RawAnalystFinding, RawAnalystFindingSchema, type RecordRunsOptions, type ReferenceMatchResult, type ReferenceReplayAdapter, type ReferenceReplayAdapterFn, type ReferenceReplayAdapterLike, type ReferenceReplayAggregate, type ReferenceReplayCandidate, type ReferenceReplayCase, type ReferenceReplayCaseRun, type ReferenceReplayExecutionScenario, type ReferenceReplayItem, type ReferenceReplayMatch, type ReferenceReplayMatchStrategy, type ReferenceReplayMatcher, type ReferenceReplayPromotionDecision, type ReferenceReplayPromotionPolicy, type ReferenceReplayRun, type ReferenceReplayRunContext, type ReferenceReplayRunOptions, type ReferenceReplayRunStore, type ReferenceReplayScenario, type ReferenceReplayScenarioScore, type ReferenceReplayScore, type ReferenceReplayScoreOptions, type ReferenceReplaySplit, type ReferenceReplaySplitComparison, type ReferenceReplaySteeringRowsOptions, type ReflectionContext, type ReflectionProposal, ReleaseConfidenceScorecard, ReleaseConfidenceThresholds, type RepoRef, type RequirementCheck, type ReviewerMemoryEntry, type ReviewerOutput, type ReviewerPromptInput, type ReviewerSoftFailDefaults, type ReviewerVerificationSummary, type RobustnessResult, Run$1 as Run, type RunCommandInput, type RunCommandResult, type RunConfig, RunCritic, type RunCriticAdapterOpts, type RunCriticOptions, type RunDiff, RunFilter, RunRecord, type RunScore, type RunScoreWeights, type RunTrace, type RuntimeEventLike, SEMANTIC_CONCEPT_JUDGE_VERSION, SandboxDriver, SandboxHarnessResult, type SandboxJudgeKind, type SandboxJudgeResult, type SandboxJudgeSpec, type SandboxPool, type SatisfiedBy, type ScanOptions, Scenario, type ScenarioCost, ScenarioFile, ScenarioRegistry, ScenarioResult, type Scorecard, type ScorecardCell, type ScorecardCellDiff, type ScorecardDiff, type ScorecardEntry, type ScorecardLogLine, type ScoredTarget, type SelfPlayOptions, type SelfPlayProposer, type SelfPlayScorer, type SemanticConceptJudgeAdapterOpts, type SemanticConceptJudgeInput, type SemanticConceptJudgeOptions, type SemanticConceptJudgeResult, type SeriesConvergenceOptions, type SeriesConvergenceResult, Severity, type SignedManifest, type SignedManifestAlgo, type SingleBackendDivergence, SingleBackendError, type SingleBackendField, type SingleBackendReport, type Slo, type SloCheckResult, type SloComparator, type SloReport, type SloSeverity, type SlopCategory, type SlotFactory, Span, type SteeringBundle, type SteeringDelta, type SteeringOptimizationResult, type SteeringOptimizationRow, type SteeringOptimizationSelector, type SteeringOptimizerBackend, type SteeringOptimizerConfig, type SteeringRolePrompt, type StepAttribution, type SynthesisReason, type SynthesisTarget, type TaskGold, TestResult, type ThresholdContract, TokenCounter, type TokenSpec, type ToolCallEventLike, TraceAnalysisStore, type TraceAnalystAdapterOpts, type TraceAnalystGolden, type TraceAnalystKindSpec, TraceEmitter, TraceEvent, TraceStore, type TraceToolGroupName, type TracedAnalystOptions, type TracedJudgeOptions, Trajectory, TrajectoryStep, type TrialTrace, TurnMetrics, UNIVERSAL_FINDERS, type ValidationContext, type ValidationIssue, type ValidationResult, type VerifierAdapterOpts, VerifyContext, VerifyOptions, type VisualDiffOptions, type VisualDiffResult, type ViteDeployRunnerInput, type WorkflowTopology, type WorkspaceAssertion, type WorkspaceAssertionResult, type WorkspaceInspector, type WorkspaceSnapshot, type WranglerDeployRunnerInput, adversarialJudge, aggregatePrReviewScore, aggregateRunScore, analyzeAntiSlop, analyzeSeries, appendScorecard, assertCrossFamily, assertSingleBackend, attributeCounterfactuals, bisect, buildDriverSystemPrompt, buildReflectionPrompt, buildReviewerPrompt, buildTraceToolsForGroup, byteLengthRange, canaryLeakView, canonicalize, causalAttribution, checkBehavioralCanary, checkCanaries, checkSlos, clamp01, codeExecutionJudge, coherenceJudge, collectionPreserved, commentsForSource, commitBisect, compareReferenceReplay, compilerJudge, composeValidators, containsAll, createAntiSlopJudge, createCustomJudge, createDefaultReviewer, createDomainExpertJudge, createIntentMatchJudge, createJudgeAdapter, createLlmCorrectnessChecker, createRunCriticAdapter, createSandboxPool, createSemanticConceptJudge, createSemanticConceptJudgeAdapter, createTraceAnalystAdapter, createTraceAnalystKind, createVerifierAdapter, crossTraceDiff, crowdingDistance, decideNextUserTurn, decideReferenceReplayPromotion, decideReferenceReplayRunPromotion, defaultIsMaterial, defaultJudges, defaultReferenceReplayMatcher, deployGateLayer, diffFindings, diffScorecard, discoverPersonas, distillPlaybook, dominates, estimateCost, estimateTokens, evaluateContract, evaluateHypothesis, evaluateOracles, executeScenario, expectAgent, exportRewardModel, extractAssetUrls, extractErrorCount, extractProducedState, fileContains, fileExists, findAutoMatchNoExpectation, findConstructorCwdDropped, findFallbackToPass, findLiteralTruePass, findSkipCountsAsPass, flowLayer, formatBenchmarkReport, formatDriverReport, formatFindings, formatScorecardDiff, ghCliClient, precision as goldenPrecision, hashContent, hashJson, htmlContainsElement, httpGithubClient, inMemoryReferenceReplayStore, isModelPriced, isOtelConfigured, jsonHasKeys, jsonShape, jsonlReferenceReplayStore, judgeFamily, keyPreserved, liftSeverity, linterJudge, loadScorecard, loadScorerFromGrader, localCommandRunner, lowercaseMutator, matchGoldens, mergeLayerResults, mergeSteeringBundle, multiToolchainLayer, notBlocked, paraphraseRobustness, paraphraseRobustnessScenarios, paretoFrontier, paretoFrontierWithCrowding, parseCorrectnessResponse, parseFindingSubject, parseRawFinding, parseReflectionResponse, passOrthogonality, pixelDeltaRatio, politenessPrefixMutator, printDriverSummary, promptBisect, proposeAutomatedPullRequest, proposeSynthesisTargets, recordRuns, recordRunsToScorecard, referenceReplayRunsToSteeringRows, referenceReplayScenarioToRunScore, regexMatch, regexMatches, renderFindingSubject, renderMarkdownReport, renderPlaybookMarkdown, renderPriorFindings, renderSteeringText, replayScorerOverCorpus, replayTraceThroughJudge, resetLockedAppendersForTesting, resolveModelPricing, rowCount, rowWhere, runAssertions, runBehavioralCanaries, runCanaries, runCounterfactual, runE2EWorkflow, runExpectations, runHarnessExperiment, runIntentMatchJudge, runJudgeFleet, runKeywordCoverageJudge, runKeywordCoverageJudgeUrl, runLiveProof, runReferenceReplay, runSelfPlay, runSemanticConceptJudge, scalarScore, scanForMuffledGates, scoreContinuity, scorePrReviewComments, scorePrReviewSource, scoreReferenceReplay, securityJudge, selectHarnessVariant, sentenceReorderMutator, signManifest, statusAdvanced, summarizeHarnessResults, summarizePrReviewBenchmark, testJudge, textInSnapshot, toLangfuseEnvelope, toPrometheusText, traceJudge, traceJudgeEnsemble, tracedAnalyzeTraces, typoMutator, urlContains, verifyCompletion, verifyManifest, visualDiff, viteDeployRunner, weightedRecall, whitespaceCollapseMutator, withJudgeRetry, withOtelPipeline, wranglerDeployRunner };
|
|
5580
|
+
export { ANALYST_SEVERITIES, type ActiveLearningOptions, type AdapterRun, AgentDriver, type AgentDriverConfig, AgentEvalError, AgentProfile, type AlignmentOp, Analyst, AnalystContext, AnalystCost, AnalystFinding, AnalystSeverity, AnalyzeTracesInput, AnalyzeTracesOptions, AnalyzeTracesResult, type AntiSlopConfig, type AntiSlopIssue, type AntiSlopReport, Artifact$1 as Artifact, type Artifact as ArtifactCheckArtifact, type ArtifactEventLike, type ArtifactValidator, type AssertCrossFamilyOptions, type AssertSingleBackendOptions, type AutoPrClient, AxGepaSteeringOptimizer, type AxSteeringOptimizerConfig, type BackendDescriptor, BaselineReport, BehaviorAssertion, BenchmarkReport, BenchmarkRunner, BenchmarkRunnerConfig, type BisectOptions, type BisectResult, type BisectStep, BudgetBreachError, BudgetGuard, BudgetLedgerEntry, BudgetSpec, CallExpectation, type CanaryAlert, type CanaryKind, type CanaryLeak, type CanaryOptions, type CanaryReport, type CanarySeverity, type CandidateScenario, type CausalAttributionReport, type CellVerdict, CheckResult, CollectedArtifacts, type CommandRunner, CompletionCriterion, type CompletionRequirement, type CompletionVerdict, type ConceptComplexity, type ConceptFinding, type ConceptSpec, type ConceptWeightStrategy, type ContinuityCheck, type ContinuityCheckResult, type ContinuityReport, type ContinuitySnapshotPair, type ContractMetric, type ContractReport, ConvergenceTracker, type CorrectnessChecker, type CostEntry, type CostSummary, CostTracker, type CounterfactualContext, type CounterfactualMutation, type CounterfactualResult, type CounterfactualRunner, type CreateDefaultReviewerOptions, type CreateSandboxPoolOpts, type CreateTraceAnalystKindOpts, CrossFamilyError, type CrossTraceDiff, type CrossTraceDiffOptions, D1ExperimentStore, type D1ExperimentStoreOptions, type D1Like, type D1PreparedStatementLike, DEFAULT_AGENT_SLOS, DEFAULT_COMPLEXITY_WEIGHTS, DEFAULT_FINDERS, DEFAULT_HARNESS_OBJECTIVES, DEFAULT_MUTATION_PRIMITIVES, DEFAULT_MUTATORS, DEFAULT_PR_REVIEW_SCORE_WEIGHTS, DEFAULT_RUN_SCORE_WEIGHTS, DEFAULT_SEVERITY_WEIGHTS, DEFAULT_TRACE_ANALYST_KINDS, Dataset, DatasetScenario, type DecideNextUserTurnOpts, type DeployFamily, type DeployGateLayerInput, type DeployRunResult, type DeployRunner, type DiffPolicy, type DiffScorecardOptions, type DirEntry, type Direction, type DiscoverPersonasOptions, type DiscoveredPersona, DriverResult, DriverState, DualAgentBench, type DualAgentBenchConfig, type DualAgentReport, type DualAgentRound, type DualAgentScenario, type DualAgentScenarioResult, ERROR_COUNT_PATTERNS, type ErrorCountPattern, type EvolutionRound, type ExecutorConfig, type Expectation, type Experiment, type Run as ExperimentRun, type ExperimentStore, ExperimentTracker, type ExportedRewardModel, type ExtractOptions, type ExtractResult, FAILURE_MODE_KIND_SPEC, FINDING_SUBJECT_GRAMMAR_PROMPT, FINDING_SUBJECT_KINDS, type FactorContribution, type FactorialCell, FeedbackLabel, FeedbackTrajectory, FeedbackTrajectoryStore, type FileChange, FileSystemExperimentStore, type FileSystemExperimentStoreOptions, type FindingSubject, type FindingSubjectKind, FindingSubjectStringSchema, type FindingsDiff, FindingsStore, type FlowAction, type FlowLayerEnv, type FlowLayerFactoryInput, type FlowRunner, type FlowRunnerStepResult, type FlowSpec, type FlowStep, type GhCliClientOptions, type GoldenSeverity, type GoldenSpec, type HarnessAdapter, HarnessConfig, type HarnessExperimentConfig, type HarnessExperimentResult, type HarnessIntervention, type HarnessRunRequest, type HarnessRunResult, type HarnessScenario, type HarnessSelection, type HarnessVariant, type HarnessVariantReport, HoldoutAuditor, type HostedJudgeConfig, type HostedJudgeDimension, type HostedJudgeRequest, type HostedJudgeResponse, type HostedRunCriticConfig, type HostedRunScoreRequest, type HostedRunScoreResponse, type HttpGithubClientOptions, type HypothesisManifest, type HypothesisResult, IMPROVEMENT_KIND_SPEC, INTENT_MATCH_JUDGE_VERSION, type ImageData, InMemoryExperimentStore, InMemoryWorkspaceInspector, type InferenceScorer, type InspectorContext, type IntentMatchInput, type IntentMatchOptions, type IntentMatchResult, type InteractionContribution, type JudgeAdapterOpts, type JudgeFamily, type JudgeFleetOptions, JudgeFn, JudgeInput, type JudgeReplayResult, type JudgeRetryOutcome, type JudgeRetryPolicy, JudgeRunner, KIND_EXPECTED_SUBJECTS, KNOWLEDGE_GAP_KIND_SPEC, KNOWLEDGE_POISONING_KIND_SPEC, type KeywordConceptSpec, type KeywordCoverageFinding, type KeywordCoverageOptions, type KeywordCoverageResult, type LangfuseEnvelope, type LangfuseGeneration, type LangfuseScore, Layer, LayerResult, type LiveProofArtifact, type LiveProofConfig, type LiveProofContext, type LiveProofResult, LlmClientOptions, type LlmCorrectnessCheckerOpts, LlmSpan, LockedJsonlAppender, MODEL_PRICING, type MatchResult, type MatcherResult, type MeasurementPolicy, type MergeOptions, MetricsCollector, type MuffledFinder, type MuffledFinding, MultiLayerVerifier, type MultiToolchainLayerConfig, type Mutator, Mutex, type Objective, type Oracle, type OracleObservation, type OracleReport, type OracleResult, type OrthogonalityInput, type OrthogonalityResult, OtelExportConfig, OtelExporter, type OtelPipelineHandle, type OtelPipelineOptions, PairwiseSteeringOptimizer, type ParaphraseRobustnessScenarioInput, type ParaphraseRobustnessScenarioResult, type ParetoResult, type PersistedFinding, PersonaConfig, type Playbook, type PlaybookEntry, type PoolSlot, type PrReviewAuditCase, type PrReviewBenchmarkSummary, type PrReviewComment, type PrReviewMatchedFinding, type PrReviewOutcome, type PrReviewReferenceFinding, type PrReviewScore, type PrReviewScoreWeights, type PrReviewSeverity, type PrReviewSource, type ProducedProposal, type ProducedState, ProductClient, ProductClientConfig, type PromptHandle, PromptRegistry, type ProposalEventLike, type ProposeAutomatedPullRequestInput, type ProposeAutomatedPullRequestResult, RAW_FINDING_SCHEMA_PROMPT, type RawAnalystFinding, RawAnalystFindingSchema, type RecordRunsOptions, type ReferenceMatchResult, type ReferenceReplayAdapter, type ReferenceReplayAdapterFn, type ReferenceReplayAdapterLike, type ReferenceReplayAggregate, type ReferenceReplayCandidate, type ReferenceReplayCase, type ReferenceReplayCaseRun, type ReferenceReplayExecutionScenario, type ReferenceReplayItem, type ReferenceReplayMatch, type ReferenceReplayMatchStrategy, type ReferenceReplayMatcher, type ReferenceReplayPromotionDecision, type ReferenceReplayPromotionPolicy, type ReferenceReplayRun, type ReferenceReplayRunContext, type ReferenceReplayRunOptions, type ReferenceReplayRunStore, type ReferenceReplayScenario, type ReferenceReplayScenarioScore, type ReferenceReplayScore, type ReferenceReplayScoreOptions, type ReferenceReplaySplit, type ReferenceReplaySplitComparison, type ReferenceReplaySteeringRowsOptions, type ReflectionContext, type ReflectionProposal, ReleaseConfidenceScorecard, ReleaseConfidenceThresholds, type RepoRef, type RequirementCheck, type ReviewerMemoryEntry, type ReviewerOutput, type ReviewerPromptInput, type ReviewerSoftFailDefaults, type ReviewerVerificationSummary, type RobustnessResult, Run$1 as Run, type RunCommandInput, type RunCommandResult, type RunConfig, RunCritic, type RunCriticAdapterOpts, type RunCriticOptions, type RunDiff, RunFilter, RunRecord, type RunScore, type RunScoreWeights, type RunTrace, type RuntimeEventLike, SEMANTIC_CONCEPT_JUDGE_VERSION, SKILL_USAGE_ANALYST, SandboxDriver, SandboxHarnessResult, type SandboxJudgeKind, type SandboxJudgeResult, type SandboxJudgeSpec, type SandboxPool, type SatisfiedBy, type ScanOptions, Scenario, type ScenarioCost, ScenarioFile, ScenarioRegistry, ScenarioResult, type Scorecard, type ScorecardCell, type ScorecardCellDiff, type ScorecardDiff, type ScorecardEntry, type ScorecardLogLine, type ScoredTarget, type SelfPlayOptions, type SelfPlayProposer, type SelfPlayScorer, type SemanticConceptJudgeAdapterOpts, type SemanticConceptJudgeInput, type SemanticConceptJudgeOptions, type SemanticConceptJudgeResult, type SeriesConvergenceOptions, type SeriesConvergenceResult, Severity, type SignedManifest, type SignedManifestAlgo, type SingleBackendDivergence, SingleBackendError, type SingleBackendField, type SingleBackendReport, SkillUsageAnalyst, type SkillUsageRecord, type SkillUsageReport, type SkillUsageScanConfig, type Slo, type SloCheckResult, type SloComparator, type SloReport, type SloSeverity, type SlopCategory, type SlotFactory, Span, type SteeringBundle, type SteeringDelta, type SteeringOptimizationResult, type SteeringOptimizationRow, type SteeringOptimizationSelector, type SteeringOptimizerBackend, type SteeringOptimizerConfig, type SteeringRolePrompt, type StepAttribution, type SynthesisReason, type SynthesisTarget, type TaskGold, TestResult, type ThresholdContract, TokenCounter, type TokenSpec, type ToolCallEventLike, TraceAnalysisStore, type TraceAnalystAdapterOpts, type TraceAnalystGolden, type TraceAnalystKindSpec, TraceEmitter, TraceEvent, TraceStore, type TraceToolGroupName, type TracedAnalystOptions, type TracedJudgeOptions, Trajectory, TrajectoryStep, type TrialTrace, TurnMetrics, UNIVERSAL_FINDERS, type ValidationContext, type ValidationIssue, type ValidationResult, type VerifierAdapterOpts, VerifyContext, VerifyOptions, type VisualDiffOptions, type VisualDiffResult, type ViteDeployRunnerInput, type WorkflowTopology, type WorkspaceAssertion, type WorkspaceAssertionResult, type WorkspaceInspector, type WorkspaceSnapshot, type WranglerDeployRunnerInput, adversarialJudge, aggregatePrReviewScore, aggregateRunScore, analyzeAntiSlop, analyzeSeries, appendScorecard, assertCrossFamily, assertSingleBackend, attributeCounterfactuals, bisect, buildDriverSystemPrompt, buildReflectionPrompt, buildReviewerPrompt, buildSkillUsageReport, buildTraceToolsForGroup, byteLengthRange, canaryLeakView, canonicalize, causalAttribution, checkBehavioralCanary, checkCanaries, checkSlos, clamp01, codeExecutionJudge, coherenceJudge, collectionPreserved, commentsForSource, commitBisect, compareReferenceReplay, compilerJudge, composeValidators, containsAll, createAntiSlopJudge, createCustomJudge, createDefaultReviewer, createDomainExpertJudge, createIntentMatchJudge, createJudgeAdapter, createLlmCorrectnessChecker, createRunCriticAdapter, createSandboxPool, createSemanticConceptJudge, createSemanticConceptJudgeAdapter, createTraceAnalystAdapter, createTraceAnalystKind, createVerifierAdapter, crossTraceDiff, crowdingDistance, decideNextUserTurn, decideReferenceReplayPromotion, decideReferenceReplayRunPromotion, defaultIsMaterial, defaultJudges, defaultReferenceReplayMatcher, deployGateLayer, diffFindings, diffScorecard, discoverPersonas, distillPlaybook, dominates, emitSkillUsageFindings, estimateCost, estimateTokens, evaluateContract, evaluateHypothesis, evaluateOracles, executeScenario, expectAgent, exportRewardModel, extractAssetUrls, extractErrorCount, extractProducedState, fileContains, fileExists, findAutoMatchNoExpectation, findConstructorCwdDropped, findFallbackToPass, findLiteralTruePass, findSkipCountsAsPass, flowLayer, formatBenchmarkReport, formatDriverReport, formatFindings, formatScorecardDiff, ghCliClient, precision as goldenPrecision, hashContent, hashJson, htmlContainsElement, httpGithubClient, inMemoryReferenceReplayStore, isModelPriced, isOtelConfigured, jsonHasKeys, jsonShape, jsonlReferenceReplayStore, judgeFamily, keyPreserved, liftSeverity, linterJudge, loadScorecard, loadScorerFromGrader, localCommandRunner, lowercaseMutator, matchGoldens, mergeLayerResults, mergeSteeringBundle, multiToolchainLayer, notBlocked, paraphraseRobustness, paraphraseRobustnessScenarios, paretoFrontier, paretoFrontierWithCrowding, parseCorrectnessResponse, parseFindingSubject, parseRawFinding, parseReflectionResponse, passOrthogonality, pixelDeltaRatio, politenessPrefixMutator, printDriverSummary, promptBisect, proposeAutomatedPullRequest, proposeSynthesisTargets, recordRuns, recordRunsToScorecard, referenceReplayRunsToSteeringRows, referenceReplayScenarioToRunScore, regexMatch, regexMatches, renderFindingSubject, renderMarkdownReport, renderPlaybookMarkdown, renderPriorFindings, renderSteeringText, replayScorerOverCorpus, replayTraceThroughJudge, resetLockedAppendersForTesting, resolveModelPricing, rowCount, rowWhere, runAssertions, runBehavioralCanaries, runCanaries, runCounterfactual, runE2EWorkflow, runExpectations, runHarnessExperiment, runIntentMatchJudge, runJudgeFleet, runKeywordCoverageJudge, runKeywordCoverageJudgeUrl, runLiveProof, runReferenceReplay, runSelfPlay, runSemanticConceptJudge, scalarScore, scanForMuffledGates, scoreContinuity, scorePrReviewComments, scorePrReviewSource, scoreReferenceReplay, securityJudge, selectHarnessVariant, sentenceReorderMutator, signManifest, statusAdvanced, summarizeHarnessResults, summarizePrReviewBenchmark, testJudge, textInSnapshot, toLangfuseEnvelope, toPrometheusText, traceJudge, traceJudgeEnsemble, tracedAnalyzeTraces, typoMutator, urlContains, verifyCompletion, verifyManifest, visualDiff, viteDeployRunner, weightedRecall, whitespaceCollapseMutator, withJudgeRetry, withOtelPipeline, wranglerDeployRunner };
|