npm - @tangle-network/agent-eval - Versions diffs - 0.49.0 → 0.50.1 - Mend

@tangle-network/agent-eval 0.49.0 → 0.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/CHANGELOG.md +135 -0
package/README.md +235 -331
package/dist/adapters/http.d.ts +1 -1
package/dist/adapters/langchain.d.ts +1 -1
package/dist/adapters/otel.d.ts +8 -2
package/dist/campaign/index.d.ts +3 -3
package/dist/{chunk-PD3MH6WU.js → chunk-5KSDYBYH.js} +2 -2
package/dist/{chunk-MNL6LXGQ.js → chunk-EGIPWXHL.js} +2 -98
package/dist/chunk-EGIPWXHL.js.map +1 -0
package/dist/{chunk-OYI6RZJK.js → chunk-FQK2CCIM.js} +1 -1
package/dist/chunk-FQK2CCIM.js.map +1 -0
package/dist/chunk-MAZ26DC7.js +99 -0
package/dist/chunk-MAZ26DC7.js.map +1 -0
package/dist/chunk-SHTXZ4O2.js +113 -0
package/dist/chunk-SHTXZ4O2.js.map +1 -0
package/dist/{chunk-KQ26DYTQ.js → chunk-UBQGWD3O.js} +2 -2
package/dist/contract/index.d.ts +206 -9
package/dist/contract/index.js +751 -3
package/dist/contract/index.js.map +1 -1
package/dist/governance/index.d.ts +1 -1
package/dist/hosted/index.d.ts +8 -192
package/dist/hosted/index.js +1 -1
package/dist/index-BRxz6qov.d.ts +409 -0
package/dist/index.d.ts +18 -462
package/dist/index.js +14 -106
package/dist/index.js.map +1 -1
package/dist/meta-eval/index.d.ts +3 -3
package/dist/openapi.json +1 -1
package/dist/{outcome-store-BxJ3DQKJ.d.ts → outcome-store-D6KWmYvj.d.ts} +1 -1
package/dist/registry-8KAs18kY.d.ts +457 -0
package/dist/{release-report-DBB8lB1P.d.ts → release-report-DSu0DWy8.d.ts} +3 -296
package/dist/reporting.d.ts +6 -4
package/dist/reporting.js +6 -4
package/dist/{researcher-CHMO56K0.d.ts → researcher-LZD0qHEa.d.ts} +1 -1
package/dist/rl.d.ts +9 -8
package/dist/rl.js +3 -2
package/dist/rl.js.map +1 -1
package/dist/{rubric-predictive-validity-CJ08tGwq.d.ts → rubric-predictive-validity-ByZEC3BX.d.ts} +1 -1
package/dist/{run-improvement-loop-B-L8GgpW.d.ts → run-improvement-loop-BPMjNKMJ.d.ts} +2 -2
package/dist/sequential-5iSVfzl2.d.ts +139 -0
package/dist/store-CJbzDxZ2.d.ts +220 -0
package/dist/{sequential-CbFH___X.d.ts → summary-report-B7gNRX-r.d.ts} +1 -139
package/dist/traces.d.ts +3 -220
package/dist/{types-8u72Gc76.d.ts → types-Dbj5gu8n.d.ts} +1 -1
package/dist/types-DhqpAi_z.d.ts +296 -0
package/docs/concepts.md +20 -0
package/docs/customer-journeys.md +208 -0
package/docs/insight-report.md +337 -0
package/package.json +1 -1
package/dist/chunk-MNL6LXGQ.js.map +0 -1
package/dist/chunk-OYI6RZJK.js.map +0 -1
/package/dist/{chunk-PD3MH6WU.js.map → chunk-5KSDYBYH.js.map} +0 -0
/package/dist/{chunk-KQ26DYTQ.js.map → chunk-UBQGWD3O.js.map} +0 -0

package/dist/{release-report-DBB8lB1P.d.ts → release-report-DSu0DWy8.d.ts} RENAMED Viewed

@@ -1,302 +1,9 @@
 import { C as ContinuousAgreementOptions, a as ContinuousAgreement } from './judge-calibration-DilmB3Ml.js';
-import { TCloud } from '@tangle-network/tcloud';
+import { a as JudgeScore } from './types-DhqpAi_z.js';
 import { D as DatasetSplit, c as DatasetManifest, a as DatasetScenario } from './dataset-BlwAtYYf.js';
-import { w as GateDecision } from './sequential-CbFH___X.js';
+import { m as GateDecision } from './summary-report-B7gNRX-r.js';
 import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
-interface Scenario {
-    id: string;
-    persona: string;
-    label: string;
-    thesis: string;
-    dimensions: string[];
-    turns: Turn[];
-    artifactChecks: ArtifactCheck[];
-    systemPromptAppend?: string;
-}
-interface Turn {
-    user: string;
-    expectedBehaviors: string[];
-    adversarial?: boolean;
-    feedbackType?: 'correction' | 'rejection' | 'vague' | 'contradictory' | 'escalation';
-}
-interface ArtifactCheck {
-    type: 'vault_file_exists' | 'vault_file_contains' | 'block_extracted' | 'code_valid' | 'generation_produced' | 'tool_created' | string;
-    target: string;
-    contains?: string;
-    minCount?: number;
-    description: string;
-}
-interface JudgeConfig {
-    model: string;
-    temperature: number;
-    rubric: JudgeRubric;
-}
-interface JudgeRubric {
-    name: string;
-    description: string;
-    dimensions: RubricDimension[];
-}
-interface RubricDimension {
-    name: string;
-    description: string;
-    anchor_low: string;
-    anchor_high: string;
-    weight: number;
-}
-interface ScenarioResult {
-    scenarioId: string;
-    persona: string;
-    turns: TurnResult[];
-    artifactResults: ArtifactResult[];
-    judgeScores: JudgeScore[];
-    judgeErrors: number;
-    overallScore: number;
-    totalDurationMs: number;
-    artifacts: CollectedArtifacts;
-}
-interface TurnResult {
-    turnIndex: number;
-    userMessage: string;
-    agentResponse: string;
-    durationMs: number;
-    blocksExtracted: {
-        type: string;
-        title: string;
-    }[];
-    containsCode: boolean;
-    containsToolCall: boolean;
-}
-interface ArtifactResult {
-    check: ArtifactCheck;
-    passed: boolean;
-    detail?: string;
-}
-interface JudgeScore {
-    judgeName: string;
-    dimension: string;
-    score: number;
-    reasoning: string;
-    evidence?: string;
-}
-interface CollectedArtifacts {
-    vaultFiles: {
-        path: string;
-        content: string;
-    }[];
-    blocksExtracted: {
-        type: string;
-        fields: Record<string, string>;
-    }[];
-    codeBlocks: {
-        language: string;
-        code: string;
-    }[];
-    toolCalls: string[];
-}
-interface BenchmarkReport {
-    timestamp: string;
-    generation: number;
-    promptVersion: string;
-    scenarioCount: number;
-    results: ScenarioResult[];
-    summary: {
-        overallAvg: number;
-        byPersona: Record<string, {
-            avg: number;
-            passed: number;
-            total: number;
-        }>;
-        byDimension: Record<string, {
-            avg: number;
-            scores: number[];
-        }>;
-        weakest: {
-            scenario: string;
-            score: number;
-            reason: string;
-        }[];
-        strongest: {
-            scenario: string;
-            score: number;
-            reason: string;
-        }[];
-    };
-}
-interface RouteMap {
-    signup?: string;
-    login?: string;
-    workspaces?: string;
-    threads?: string;
-    chat?: string;
-    tasks?: string;
-    events?: string;
-    approvals?: string;
-    vault?: string;
-    generations?: string;
-    [key: string]: string | undefined;
-}
-interface ProductClientConfig {
-    baseUrl: string;
-    routes: RouteMap;
-}
-interface ScenarioFile {
-    id: string;
-    category: string;
-    persona: string;
-    label: string;
-    thesis: string;
-    isControl?: boolean;
-    rubric?: {
-        dimensions: {
-            name: string;
-            description: string;
-            weight: number;
-        }[];
-    };
-    turns: Turn[];
-    artifactChecks: ArtifactCheck[];
-}
-interface CompletionCriterion {
-    name: string;
-    check: (state: DriverState) => boolean;
-    progress?: (state: DriverState) => number;
-}
-interface FeedbackPattern {
-    trigger: string;
-    response: string;
-}
-/**
- * How hard the simulated user pushes back. The driver LLM scales its tone
- * and follow-up aggression to this:
- *   cooperative — forgiving early adopter; accepts reasonable answers.
- *   demanding   — experienced professional; rejects vague or hedged answers.
- *   relentless  — senior partner reviewing for a client who will litigate;
- *                 interrogates every claim, accepts nothing undefended.
- */
-type PersonaRigor = 'cooperative' | 'demanding' | 'relentless';
-interface PersonaConfig {
-    id: string;
-    role: string;
-    goal: string;
-    completionCriteria: CompletionCriterion[];
-    feedbackPatterns?: FeedbackPattern[];
-    maxTurns: number;
-    driverModel?: string;
-    /** How adversarial the simulated user is. Defaults to 'demanding'. */
-    rigor?: PersonaRigor;
-    /**
-     * Domain expertise the simulated user holds — quoted into the driver
-     * prompt so it challenges the agent with authority instead of vague
-     * dissatisfaction. e.g. "a 15-year M&A partner who knows GAAP
-     * working-capital mechanics cold".
-     */
-    expertise?: string;
-    /**
-     * Substantive issues a senior professional in this role would
-     * interrogate — traps the scenario hides, claims that must be defended.
-     * The driver probes these without revealing them verbatim; the agent
-     * must surface them on its own.
-     */
-    pressurePoints?: string[];
-    /**
-     * Curveballs the driver may inject once the agent is coasting — changed
-     * facts, a hostile counterparty position, a new constraint. Forces the
-     * agent to re-derive rather than recite.
-     */
-    curveballs?: string[];
-}
-interface DriverState {
-    tasks: number;
-    events: number;
-    proposals: {
-        pending: number;
-        approved: number;
-        rejected: number;
-    };
-    vaultFiles: string[];
-    codeBlocks: number;
-    generations: number;
-}
-interface TurnMetrics {
-    turn: number;
-    timestamp: string;
-    tasks: number;
-    events: number;
-    proposals: {
-        pending: number;
-        approved: number;
-        rejected: number;
-    };
-    vaultFiles: number;
-    responseLatencyMs: number;
-    responseChars: number;
-    codeBlocksProduced: number;
-    blocksExtracted: number;
-    qualityScore?: number;
-    inputTokens: number;
-    outputTokens: number;
-    estimatedCostUsd: number;
-    totalCostUsd: number;
-    completionPercent: number;
-}
-interface DriverResult {
-    personaId: string;
-    /** True when the simulated user professionally signed off (driver said DONE). */
-    completed: boolean;
-    /** Turn at which the simulated user signed off, or null if it never did. */
-    turnsToCompletion: number | null;
-    /**
-     * Turn at which nominal completionCriteria were first all met, or null.
-     * Distinct from turnsToCompletion: criteria can be met while the
-     * simulated professional is still unsatisfied with the work's rigor.
-     */
-    criteriaMetAtTurn: number | null;
-    totalTurns: number;
-    metrics: TurnMetrics[];
-    finalState: DriverState;
-    convergenceCurve: number[];
-    totalCostUsd: number;
-    finalQualityScore: number | null;
-}
-interface BenchmarkRunnerConfig {
-    scenarios: Scenario[];
-    judges: JudgeFn[];
-    systemPrompt: string;
-    model?: string;
-    judgeModel?: string;
-    passThreshold?: number;
-    generation?: number;
-    promptVersion?: string;
-}
-interface JudgeInput {
-    scenario: Scenario;
-    turns: TurnResult[];
-    artifacts: CollectedArtifacts;
-}
-type JudgeFn = (tc: TCloud, input: JudgeInput) => Promise<JudgeScore[]>;
-interface TestResult {
-    name: string;
-    passed: boolean;
-    duration: number;
-    detail?: string;
-    checks: CheckResult[];
-}
-interface CheckResult {
-    name: string;
-    passed: boolean;
-    expected: string;
-    actual: string;
-}
-interface EvalResult {
-    scenario: string;
-    status: 'pass' | 'fail' | 'skip';
-    duration: number;
-    detail?: string;
-    artifact?: string;
-}
 /**
  * Release confidence gate.
  *
@@ -731,4 +438,4 @@ interface RenderReleaseReportOptions {
 }
 declare function renderReleaseReport(scorecard: ReleaseConfidenceScorecard, options?: RenderReleaseReportOptions): string;
-export { type PersonaRigor as $, type CollectedArtifacts as A, type BootstrapOptions as B, type CheckResult as C, type DriverResult as D, type ScenarioResult as E, type TurnMetrics as F, type ScenarioFile as G, type CompletionCriterion as H, type ActionableSideInfo as I, type JudgeReplayGateArgs as J, type ArtifactCheck as K, type ArtifactResult as L, type AsiSeverity as M, type CorpusAgreementOptions as N, type CorpusAgreementPerDimension as O, type PairedBootstrapOptions as P, type CorpusAgreementReport as Q, type ReleaseConfidenceAxis as R, type Scenario as S, type TestResult as T, type CorpusScoreRecord as U, type Verdict as V, type EvalResult as W, type FeedbackPattern as X, type JudgeConfig as Y, type JudgeRubric as Z, type JudgeScore as _, type BootstrapResult as a, type RouteMap as a0, type RubricDimension as a1, type Turn as a2, type TurnResult as a3, bonferroni as a4, cohensD as a5, confidenceInterval as a6, corpusInterRaterAgreement as a7, corpusInterRaterAgreementFromJudgeScores as a8, interRaterReliability as a9, mannWhitneyU as aa, normalizeScores as ab, pairedMde as ac, pairedTTest as ad, partialCredit as ae, requiredSampleSize as af, weightedMean as ag, type PairedBootstrapResult as b, type ReleaseConfidenceAxisName as c, type ReleaseConfidenceInput as d, type ReleaseConfidenceIssue as e, type ReleaseConfidenceMetrics as f, type ReleaseConfidenceScorecard as g, type ReleaseConfidenceStatus as h, type ReleaseConfidenceThresholds as i, type ReleaseTraceEvidence as j, type RenderReleaseReportOptions as k, assertReleaseConfidence as l, benjaminiHochberg as m, bootstrapCi as n, evaluateReleaseConfidence as o, judgeReplayGate as p, pairedBootstrap as q, renderReleaseReport as r, type JudgeInput as s, type JudgeFn as t, type BenchmarkRunnerConfig as u, type BenchmarkReport as v, wilcoxonSignedRank as w, type ProductClientConfig as x, type PersonaConfig as y, type DriverState as z };
+export { type ActionableSideInfo as A, type BootstrapOptions as B, type CorpusAgreementOptions as C, corpusInterRaterAgreement as D, corpusInterRaterAgreementFromJudgeScores as E, interRaterReliability as F, mannWhitneyU as G, normalizeScores as H, pairedMde as I, type JudgeReplayGateArgs as J, pairedTTest as K, partialCredit as L, requiredSampleSize as M, weightedMean as N, type PairedBootstrapOptions as P, type ReleaseConfidenceAxis as R, type Verdict as V, type BootstrapResult as a, type PairedBootstrapResult as b, type ReleaseConfidenceAxisName as c, type ReleaseConfidenceInput as d, type ReleaseConfidenceIssue as e, type ReleaseConfidenceMetrics as f, type ReleaseConfidenceScorecard as g, type ReleaseConfidenceStatus as h, type ReleaseConfidenceThresholds as i, type ReleaseTraceEvidence as j, type RenderReleaseReportOptions as k, assertReleaseConfidence as l, benjaminiHochberg as m, bootstrapCi as n, evaluateReleaseConfidence as o, judgeReplayGate as p, pairedBootstrap as q, renderReleaseReport as r, type AsiSeverity as s, type CorpusAgreementPerDimension as t, type CorpusAgreementReport as u, type CorpusScoreRecord as v, wilcoxonSignedRank as w, bonferroni as x, cohensD as y, confidenceInterval as z };

package/dist/reporting.d.ts CHANGED Viewed

@@ -1,10 +1,12 @@
-export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from './rubric-predictive-validity-CJ08tGwq.js';
-export { B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, g as ReleaseConfidenceScorecard, h as ReleaseConfidenceStatus, i as ReleaseConfidenceThresholds, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, n as bootstrapCi, o as evaluateReleaseConfidence, p as judgeReplayGate, q as pairedBootstrap, r as renderReleaseReport, w as wilcoxonSignedRank } from './release-report-DBB8lB1P.js';
-export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, I as InterimReleaseConfidence, c as InterimReleaseConfidenceInput, P as PairedEvalueOptions, d as PairedEvalueSequence, e as PairedEvalueStep, f as ParetoFigureSpec, g as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, h as ResearchReport, i as ResearchReportCandidate, j as ResearchReportDecision, k as ResearchReportMethodology, l as ResearchReportOptions, m as ResearchReportRecommendation, S as SequentialDecision, n as SummaryTable, o as SummaryTableOptions, p as SummaryTableRow, q as evaluateInterimReleaseConfidence, r as gainHistogram, s as pairedEvalueSequence, t as paretoChart, u as researchReport, v as summaryTable } from './sequential-CbFH___X.js';
+export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from './rubric-predictive-validity-ByZEC3BX.js';
+export { B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, g as ReleaseConfidenceScorecard, h as ReleaseConfidenceStatus, i as ReleaseConfidenceThresholds, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, l as assertReleaseConfidence, m as benjaminiHochberg, n as bootstrapCi, o as evaluateReleaseConfidence, p as judgeReplayGate, q as pairedBootstrap, r as renderReleaseReport, w as wilcoxonSignedRank } from './release-report-DSu0DWy8.js';
+export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
+export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, P as ParetoFigureSpec, c as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, d as ResearchReport, e as ResearchReportCandidate, f as ResearchReportDecision, g as ResearchReportMethodology, h as ResearchReportOptions, i as ResearchReportRecommendation, S as SummaryTable, j as SummaryTableOptions, k as SummaryTableRow, l as gainHistogram, p as paretoChart, r as researchReport, s as summaryTable } from './summary-report-B7gNRX-r.js';
 import './run-record-BGY6bHRh.js';
 import './errors-mje_cKOs.js';
-import './outcome-store-BxJ3DQKJ.js';
+import './outcome-store-D6KWmYvj.js';
 import './judge-calibration-DilmB3Ml.js';
+import './types-DhqpAi_z.js';
 import '@tangle-network/tcloud';
 import './dataset-BlwAtYYf.js';
 import './failure-cluster-Cw65_5FY.js';

package/dist/reporting.js CHANGED Viewed

@@ -4,19 +4,21 @@ import {
   evaluateReleaseConfidence,
   judgeReplayGate,
   renderReleaseReport
-} from "./chunk-KQ26DYTQ.js";
+} from "./chunk-UBQGWD3O.js";
 import {
   rubricPredictiveValidity
 } from "./chunk-YRZ4M5GS.js";
 import {
-  RESEARCH_REPORT_HARD_PAIR_FLOOR,
   evaluateInterimReleaseConfidence,
+  pairedEvalueSequence
+} from "./chunk-MAZ26DC7.js";
+import {
+  RESEARCH_REPORT_HARD_PAIR_FLOOR,
   gainHistogram,
-  pairedEvalueSequence,
   paretoChart,
   researchReport,
   summaryTable
-} from "./chunk-MNL6LXGQ.js";
+} from "./chunk-EGIPWXHL.js";
 import {
   benjaminiHochberg,
   pairedBootstrap,

package/dist/{researcher-CHMO56K0.d.ts → researcher-LZD0qHEa.d.ts} RENAMED Viewed

@@ -1,6 +1,6 @@
 import { a as RunSplitTag, b as RunTokenUsage, c as RunJudgeMetadata, J as JudgeScoresRecord, A as AgentProfileCell, d as AgentProfileCellInput, R as RunRecord } from './run-record-BGY6bHRh.js';
 import { L as LlmClientOptions, a as LlmRouteRequirements } from './llm-client-BXVRUZyX.js';
-import { l as ResearchReportOptions, h as ResearchReport, w as GateDecision } from './sequential-CbFH___X.js';
+import { h as ResearchReportOptions, d as ResearchReport, m as GateDecision } from './summary-report-B7gNRX-r.js';
 import { T as TraceEmitter, R as RunCompleteHook } from './emitter-DP_cSSiw.js';
 import { R as RunIntegrityExpectations, a as RunIntegrityReport } from './integrity-CTDhR1Sg.js';
 import { R as RawProviderSink } from './raw-provider-sink-C46HDghv.js';

package/dist/rl.d.ts CHANGED Viewed

@@ -1,18 +1,19 @@
 import { R as RunRecord, a as RunSplitTag } from './run-record-BGY6bHRh.js';
-import { d as CampaignResult } from './types-8u72Gc76.js';
-import { V as VerificationReport, R as Researcher, F as FailureMode, S as SteeringChange, E as ExperimentPlan, a as ExperimentResult, b as EvalCampaignResult, c as EvalCampaignOptions } from './researcher-CHMO56K0.js';
-export { r as runEvalCampaign } from './researcher-CHMO56K0.js';
+import { j as CampaignResult } from './types-Dbj5gu8n.js';
+import { V as VerificationReport, R as Researcher, F as FailureMode, S as SteeringChange, E as ExperimentPlan, a as ExperimentResult, b as EvalCampaignResult, c as EvalCampaignOptions } from './researcher-LZD0qHEa.js';
+export { r as runEvalCampaign } from './researcher-LZD0qHEa.js';
 import { S as Span, T as TraceStore } from './store-Db2Bv8Cf.js';
-import { O as OutcomeStore } from './outcome-store-BxJ3DQKJ.js';
-export { D as DeploymentOutcome, F as FileSystemOutcomeStore, a as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore } from './outcome-store-BxJ3DQKJ.js';
-import { b as RubricPredictiveValidityReport } from './rubric-predictive-validity-CJ08tGwq.js';
-import { I as InterimReleaseConfidence } from './sequential-CbFH___X.js';
+import { O as OutcomeStore } from './outcome-store-D6KWmYvj.js';
+export { D as DeploymentOutcome, F as FileSystemOutcomeStore, b as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore } from './outcome-store-D6KWmYvj.js';
+import { b as RubricPredictiveValidityReport } from './rubric-predictive-validity-ByZEC3BX.js';
+import { I as InterimReleaseConfidence } from './sequential-5iSVfzl2.js';
 import './errors-mje_cKOs.js';
 import './llm-client-BXVRUZyX.js';
 import './raw-provider-sink-C46HDghv.js';
+import './summary-report-B7gNRX-r.js';
+import './failure-cluster-Cw65_5FY.js';
 import './emitter-DP_cSSiw.js';
 import './integrity-CTDhR1Sg.js';
-import './failure-cluster-Cw65_5FY.js';
 /**
  * Test-time compute scaling curves.

package/dist/rl.js CHANGED Viewed

@@ -10,14 +10,15 @@ import {
 } from "./chunk-3RF76KTD.js";
 import {
   runEvalCampaign
-} from "./chunk-PD3MH6WU.js";
+} from "./chunk-5KSDYBYH.js";
 import "./chunk-BWZEGTES.js";
 import {
   rubricPredictiveValidity
 } from "./chunk-YRZ4M5GS.js";
 import {
   evaluateInterimReleaseConfidence
-} from "./chunk-MNL6LXGQ.js";
+} from "./chunk-MAZ26DC7.js";
+import "./chunk-EGIPWXHL.js";
 import {
   benjaminiHochberg,
   wilcoxonSignedRank