@tangle-network/agent-eval 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/README.md +4 -5
- package/dist/{baseline-4R5deP0N.d.ts → baseline-BwdCXUS8.d.ts} +1 -1
- package/dist/builder-eval/index.d.ts +3 -3
- package/dist/builder-eval/index.js +1 -1
- package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
- package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
- package/dist/chunk-5AKPEK5L.js.map +1 -0
- package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
- package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
- package/dist/chunk-K33INZHH.js.map +1 -0
- package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
- package/dist/chunk-MAZ26DC7.js.map +1 -0
- package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
- package/dist/chunk-NCRFYPS3.js.map +1 -0
- package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
- package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
- package/dist/chunk-QHF6EQKK.js.map +1 -0
- package/dist/chunk-R5UQJNKC.js +722 -0
- package/dist/chunk-R5UQJNKC.js.map +1 -0
- package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
- package/dist/chunk-RUI6SIHY.js.map +1 -0
- package/dist/{chunk-WHZMVFUV.js → chunk-SZSBQUIJ.js} +2 -2
- package/dist/chunk-SZSBQUIJ.js.map +1 -0
- package/dist/chunk-UW4NOOZI.js +1561 -0
- package/dist/chunk-UW4NOOZI.js.map +1 -0
- package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
- package/dist/chunk-VSMTAMNK.js.map +1 -0
- package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
- package/dist/chunk-XFZCM5Z3.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/{control-CBShYYA6.d.ts → control-rJhEDdpy.d.ts} +4 -4
- package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BRdQ0wrx.d.ts} +3 -2
- package/dist/control.d.ts +5 -5
- package/dist/control.js +2 -2
- package/dist/{emitter-DP_cSSiw.d.ts → emitter-BqjeOvJh.d.ts} +1 -1
- package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-D1NZKqYu.d.ts} +2 -3
- package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-j0nJFgC6.d.ts} +1 -1
- package/dist/governance/index.d.ts +2 -2
- package/dist/{index-D3iBCjdF.d.ts → index-Cgt3DKXr.d.ts} +2 -2
- package/dist/index.d.ts +1279 -468
- package/dist/index.js +1992 -1259
- package/dist/index.js.map +1 -1
- package/dist/{integrity-DK2EBVZC.d.ts → integrity-BAxLGJ9I.d.ts} +2 -2
- package/dist/knowledge/index.d.ts +3 -3
- package/dist/knowledge/index.js +2 -2
- package/dist/meta-eval/index.d.ts +1 -1
- package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-BNi4-8lR.d.ts} +2 -2
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +8 -8
- package/dist/optimization.js +5 -5
- package/dist/pipelines/index.d.ts +6 -6
- package/dist/pipelines/index.js +2 -2
- package/dist/prm/index.d.ts +4 -4
- package/dist/{query-DODUYdPg.d.ts → query-BFDT0kX_.d.ts} +1 -1
- package/dist/{release-report-wfUySN5F.d.ts → release-report-PWhGlpfO.d.ts} +1 -1
- package/dist/replay-BX5Fm8en.d.ts +529 -0
- package/dist/reporting.d.ts +5 -5
- package/dist/reporting.js +5 -5
- package/dist/{researcher-bGkI7vCl.d.ts → researcher-ClDX3KZx.d.ts} +13 -14
- package/dist/rl.d.ts +29 -47
- package/dist/rl.js +5 -5
- package/dist/rl.js.map +1 -1
- package/dist/{rubric-D5tjHNJQ.d.ts → rubric-DgSqjqqj.d.ts} +2 -2
- package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
- package/dist/{store-Db2Bv8Cf.d.ts → store-BP5be6s7.d.ts} +1 -1
- package/dist/{summary-report-DZVXOCK_.d.ts → summary-report-jrSGb2xZ.d.ts} +5 -5
- package/dist/{test-graded-scenario-B2kWEdh9.d.ts → test-graded-scenario-BJ54PDan.d.ts} +2 -2
- package/dist/traces.d.ts +9 -311
- package/dist/traces.js +16 -987
- package/dist/traces.js.map +1 -1
- package/dist/{trajectory-CnoBo-JY.d.ts → trajectory-BFmveYZt.d.ts} +1 -1
- package/dist/wire/index.d.ts +4 -4
- package/dist/wire/index.js +1 -1
- package/docs/research-report-methodology.md +4 -4
- package/docs/three-package-architecture.md +12 -24
- package/package.json +1 -1
- package/dist/chunk-2A5XJB43.js.map +0 -1
- package/dist/chunk-4F5DQN55.js.map +0 -1
- package/dist/chunk-5LBB5B3Z.js.map +0 -1
- package/dist/chunk-I4MBDTY5.js +0 -272
- package/dist/chunk-I4MBDTY5.js.map +0 -1
- package/dist/chunk-JLZQWFV3.js.map +0 -1
- package/dist/chunk-K2TPS5LB.js +0 -569
- package/dist/chunk-K2TPS5LB.js.map +0 -1
- package/dist/chunk-LSH4MMOZ.js.map +0 -1
- package/dist/chunk-NU65VQ7M.js.map +0 -1
- package/dist/chunk-OWLAAMME.js.map +0 -1
- package/dist/chunk-SESZDQPX.js.map +0 -1
- package/dist/chunk-WHZMVFUV.js.map +0 -1
- package/dist/replay-BL96gCEP.d.ts +0 -226
- /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
- /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
- /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { C as CaptureIntegrityError } from './errors-BZ9sTdz7.js';
|
|
2
|
-
import { T as TraceStore } from './store-
|
|
2
|
+
import { T as TraceStore } from './store-BP5be6s7.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* RawProviderSink — first-class persistence for the actual HTTP-level
|
|
@@ -208,4 +208,4 @@ declare function assertRunCaptured(store: TraceStore, runId: string, expectation
|
|
|
208
208
|
/** Strict mode: throws `RunIntegrityError` when the report isn't ok. */
|
|
209
209
|
declare function throwIfRunIncomplete(report: RunIntegrityReport): void;
|
|
210
210
|
|
|
211
|
-
export { FileSystemRawProviderSink as F, InMemoryRawProviderSink as I, NoopRawProviderSink as N, type ProviderRedactor as P, type RawProviderSink as R, type RunIntegrityExpectations as a, type RunIntegrityReport as b, type
|
|
211
|
+
export { FileSystemRawProviderSink as F, InMemoryRawProviderSink as I, NoopRawProviderSink as N, type ProviderRedactor as P, type RawProviderSink as R, type RunIntegrityExpectations as a, type RunIntegrityReport as b, type FileSystemRawProviderSinkOptions as c, type InMemoryRawProviderSinkOptions as d, type RawProviderDirection as e, type RawProviderEvent as f, type RawProviderSinkFilter as g, RunIntegrityError as h, type RunIntegrityIssue as i, type RunIntegrityIssueCode as j, assertRunCaptured as k, defaultProviderRedactor as l, providerFromBaseUrl as p, throwIfRunIncomplete as t };
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { j as ControlSeverity, C as ControlEvalResult } from '../control-runtime-
|
|
2
|
-
import { T as TraceEmitter } from '../emitter-
|
|
3
|
-
import '../store-
|
|
1
|
+
import { j as ControlSeverity, C as ControlEvalResult } from '../control-runtime-BRdQ0wrx.js';
|
|
2
|
+
import { T as TraceEmitter } from '../emitter-BqjeOvJh.js';
|
|
3
|
+
import '../store-BP5be6s7.js';
|
|
4
4
|
|
|
5
5
|
type KnowledgeRequirementCategory = 'user_specific' | 'company_specific' | 'domain_specific' | 'codebase_specific' | 'market_specific' | 'regulatory' | 'tool_api' | 'credential_or_secret' | 'runtime_environment' | 'preference' | 'historical_context';
|
|
6
6
|
type KnowledgeAcquisitionMode = 'ask_user' | 'search_web' | 'query_connector' | 'inspect_repo' | 'run_command' | 'infer_low_confidence' | 'not_available';
|
package/dist/knowledge/index.js
CHANGED
|
@@ -4,8 +4,8 @@ import {
|
|
|
4
4
|
knowledgeReadinessTracePayload,
|
|
5
5
|
scoreKnowledgeReadiness,
|
|
6
6
|
userQuestionsForKnowledgeGaps
|
|
7
|
-
} from "../chunk-
|
|
8
|
-
import "../chunk-
|
|
7
|
+
} from "../chunk-3CKU6VGU.js";
|
|
8
|
+
import "../chunk-NCRFYPS3.js";
|
|
9
9
|
import "../chunk-TVVP3ZZQ.js";
|
|
10
10
|
import "../chunk-PZ5AY32C.js";
|
|
11
11
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { R as Run, T as TraceStore } from '../store-
|
|
1
|
+
import { R as Run, T as TraceStore } from '../store-BP5be6s7.js';
|
|
2
2
|
import { a as OutcomeFilter, O as OutcomeStore } from '../outcome-store-D6KWmYvj.js';
|
|
3
3
|
export { D as DeploymentOutcome, F as FileSystemOutcomeStore, b as FileSystemOutcomeStoreOptions, I as InMemoryOutcomeStore } from '../outcome-store-D6KWmYvj.js';
|
|
4
4
|
export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from '../rubric-predictive-validity-C0uDYwG6.js';
|
|
@@ -51,7 +51,7 @@ interface LayerResult {
|
|
|
51
51
|
* diagnostic name; null = "diagnostic not applicable / not measured."
|
|
52
52
|
* Renderers that know the keys can display them; ones that don't,
|
|
53
53
|
* ignore. Free-form on purpose — consumers type the value shape in
|
|
54
|
-
* their own namespace.
|
|
54
|
+
* their own namespace.
|
|
55
55
|
*/
|
|
56
56
|
diagnostics?: Record<string, number | null>;
|
|
57
57
|
/** Any rich per-layer detail — rendered as-is by consumers that know the layer. */
|
|
@@ -138,4 +138,4 @@ declare class MultiLayerVerifier<Env = unknown> {
|
|
|
138
138
|
run(opts: VerifyOptions<Env>): Promise<VerificationReport>;
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
-
export { type Finding as F, type Layer as L, MultiLayerVerifier as M, type Severity as S, type VerificationReport as V, type
|
|
141
|
+
export { type Finding as F, type Layer as L, MultiLayerVerifier as M, type Severity as S, type VerificationReport as V, type VerifyOptions as a, type LayerResult as b, type VerifyContext as c, type LayerStatus as d, gradeSemanticStatus as g };
|
package/dist/openapi.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"openapi": "3.1.0",
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "@tangle-network/agent-eval — wire protocol",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.28.0",
|
|
6
6
|
"description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
|
|
7
7
|
"contact": {
|
|
8
8
|
"name": "Tangle Network",
|
package/dist/optimization.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './researcher-
|
|
2
|
-
export { F as FeedbackArtifactType, a as FeedbackAttempt, b as FeedbackLabel, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, l as FeedbackTrajectory, m as FeedbackTrajectoryFilter, n as FeedbackTrajectoryStore, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-
|
|
3
|
-
export { A as ActionableSideInfo, a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, E as EvolvableVariant, G as GenerationReport, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, c as MultiShotMutateAdapter, d as MultiShotOptimizationConfig, e as MultiShotOptimizationResult, f as MultiShotRun, g as MultiShotRunInput, h as MultiShotRunner, i as MultiShotScore, j as MultiShotScorer, k as MultiShotSplit, l as MultiShotTrace, m as MultiShotTrialResult, n as MultiShotVariant, o as MutateAdapter, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, R as ReflectionContext, r as ReflectionProposal, S as ScenarioAggregate, s as ScoreAdapter, T as TrialCache, t as TrialResult, u as TrialTrace, V as VariantAggregate, v as buildReflectionPrompt, w as defaultMultiShotObjectives, x as parseReflectionResponse, y as runMultiShotOptimization, z as runPromptEvolution, B as trialTraceFromMultiShotTrial } from './summary-report-
|
|
1
|
+
export { C as CallbackResearcher, a as CallbackResearcherOptions, b as CampaignFactoryParams, c as CampaignIntegrityPolicy, d as CampaignRunContext, e as CampaignRunOutcome, f as CampaignRunner, g as CampaignScenario, h as CampaignVariant, E as EvalCampaignOptions, i as EvalCampaignResult, j as ExperimentPlan, k as ExperimentResult, F as FailedRun, l as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './researcher-ClDX3KZx.js';
|
|
2
|
+
export { F as FeedbackArtifactType, a as FeedbackAttempt, b as FeedbackLabel, c as FeedbackLabelKind, d as FeedbackLabelSource, e as FeedbackOptimizerRow, f as FeedbackOutcome, g as FeedbackReplayAdapter, h as FeedbackReplayResult, i as FeedbackSeverity, j as FeedbackSplitPolicy, k as FeedbackTask, l as FeedbackTrajectory, m as FeedbackTrajectoryFilter, n as FeedbackTrajectoryStore, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-j0nJFgC6.js';
|
|
3
|
+
export { A as ActionableSideInfo, a as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, E as EvolvableVariant, G as GenerationReport, I as InMemoryTrialCache, M as MultiShotGateConfig, b as MultiShotGateResult, c as MultiShotMutateAdapter, d as MultiShotOptimizationConfig, e as MultiShotOptimizationResult, f as MultiShotRun, g as MultiShotRunInput, h as MultiShotRunner, i as MultiShotScore, j as MultiShotScorer, k as MultiShotSplit, l as MultiShotTrace, m as MultiShotTrialResult, n as MultiShotVariant, o as MutateAdapter, P as PromptEvolutionConfig, p as PromptEvolutionEvent, q as PromptEvolutionResult, R as ReflectionContext, r as ReflectionProposal, S as ScenarioAggregate, s as ScoreAdapter, T as TrialCache, t as TrialResult, u as TrialTrace, V as VariantAggregate, v as buildReflectionPrompt, w as defaultMultiShotObjectives, x as parseReflectionResponse, y as runMultiShotOptimization, z as runPromptEvolution, B as trialTraceFromMultiShotTrial } from './summary-report-jrSGb2xZ.js';
|
|
4
4
|
import './errors-BZ9sTdz7.js';
|
|
5
|
-
import './integrity-
|
|
6
|
-
import './store-
|
|
5
|
+
import './integrity-BAxLGJ9I.js';
|
|
6
|
+
import './store-BP5be6s7.js';
|
|
7
7
|
import './run-record-CqzahIbx.js';
|
|
8
|
-
import './emitter-
|
|
9
|
-
import './control-runtime-
|
|
8
|
+
import './emitter-BqjeOvJh.js';
|
|
9
|
+
import './control-runtime-BRdQ0wrx.js';
|
|
10
10
|
import './dataset-CiK_3LDr.js';
|
|
11
|
-
import './failure-cluster-
|
|
11
|
+
import './failure-cluster-D1NZKqYu.js';
|
package/dist/optimization.js
CHANGED
|
@@ -25,18 +25,18 @@ import {
|
|
|
25
25
|
summarizePreferenceMemory,
|
|
26
26
|
trialTraceFromMultiShotTrial,
|
|
27
27
|
withAssignedFeedbackSplit
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-SZSBQUIJ.js";
|
|
29
29
|
import "./chunk-NLMNWKVM.js";
|
|
30
30
|
import {
|
|
31
31
|
runEvalCampaign
|
|
32
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-RUI6SIHY.js";
|
|
33
33
|
import "./chunk-4S4BM3QQ.js";
|
|
34
|
-
import "./chunk-
|
|
35
|
-
import "./chunk-
|
|
34
|
+
import "./chunk-5AKPEK5L.js";
|
|
35
|
+
import "./chunk-R5UQJNKC.js";
|
|
36
36
|
import "./chunk-KTGTIOFD.js";
|
|
37
37
|
import "./chunk-PC4UYEBM.js";
|
|
38
38
|
import "./chunk-TVVP3ZZQ.js";
|
|
39
|
-
import "./chunk-
|
|
39
|
+
import "./chunk-VSMTAMNK.js";
|
|
40
40
|
import "./chunk-NG236HPC.js";
|
|
41
41
|
import "./chunk-PZ5AY32C.js";
|
|
42
42
|
export {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { g as BudgetSpec, T as TraceStore,
|
|
2
|
-
export { a as FailureCluster, F as FailureClusterReport, f as failureClusterView } from '../failure-cluster-
|
|
3
|
-
import { a as TrajectoryStep } from '../trajectory-
|
|
4
|
-
import { B as BaselineOptions, a as BaselineReport } from '../baseline-
|
|
5
|
-
export { c as computeToolUseMetrics } from '../baseline-
|
|
6
|
-
import { l as llmSpans } from '../query-
|
|
1
|
+
import { g as BudgetSpec, T as TraceStore, l as RunFilter, R as Run, a as ToolSpan } from '../store-BP5be6s7.js';
|
|
2
|
+
export { a as FailureCluster, F as FailureClusterReport, f as failureClusterView } from '../failure-cluster-D1NZKqYu.js';
|
|
3
|
+
import { a as TrajectoryStep } from '../trajectory-BFmveYZt.js';
|
|
4
|
+
import { B as BaselineOptions, a as BaselineReport } from '../baseline-BwdCXUS8.js';
|
|
5
|
+
export { c as computeToolUseMetrics } from '../baseline-BwdCXUS8.js';
|
|
6
|
+
import { l as llmSpans } from '../query-BFDT0kX_.js';
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* BudgetBreachView — aggregates breach events across the corpus.
|
package/dist/pipelines/index.js
CHANGED
|
@@ -2,13 +2,13 @@ import {
|
|
|
2
2
|
compareToBaseline,
|
|
3
3
|
computeToolUseMetrics,
|
|
4
4
|
failureClusterView
|
|
5
|
-
} from "../chunk-
|
|
5
|
+
} from "../chunk-K33INZHH.js";
|
|
6
6
|
import {
|
|
7
7
|
buildTrajectory
|
|
8
8
|
} from "../chunk-RZTMDUO7.js";
|
|
9
9
|
import {
|
|
10
10
|
interRaterReliability
|
|
11
|
-
} from "../chunk-
|
|
11
|
+
} from "../chunk-R5UQJNKC.js";
|
|
12
12
|
import {
|
|
13
13
|
aggregateLlm,
|
|
14
14
|
argHash,
|
package/dist/prm/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { P as PrmGradedTrace, S as StepRubric, a as PrmGrader } from '../rubric-
|
|
2
|
-
export { G as GradedStep, b as StepContext, i as isPrmVerdict } from '../rubric-
|
|
3
|
-
import { S as Span, T as TraceStore } from '../store-
|
|
4
|
-
import '../trajectory-
|
|
1
|
+
import { P as PrmGradedTrace, S as StepRubric, a as PrmGrader } from '../rubric-DgSqjqqj.js';
|
|
2
|
+
export { G as GradedStep, b as StepContext, i as isPrmVerdict } from '../rubric-DgSqjqqj.js';
|
|
3
|
+
import { S as Span, T as TraceStore } from '../store-BP5be6s7.js';
|
|
4
|
+
import '../trajectory-BFmveYZt.js';
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Export PRM-graded traces as training data for downstream reward-model
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { L as LlmSpan, T as TraceStore, J as JudgeSpan, R as Run, F as FailureClass, a as ToolSpan } from './store-
|
|
1
|
+
import { L as LlmSpan, T as TraceStore, J as JudgeSpan, R as Run, F as FailureClass, a as ToolSpan } from './store-BP5be6s7.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Typed query helpers over TraceStore.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { D as DatasetSplit, b as DatasetManifest, a as DatasetScenario } from './dataset-CiK_3LDr.js';
|
|
2
|
-
import { a3 as GateDecision, A as ActionableSideInfo, m as MultiShotTrialResult } from './summary-report-
|
|
2
|
+
import { a3 as GateDecision, A as ActionableSideInfo, m as MultiShotTrialResult } from './summary-report-jrSGb2xZ.js';
|
|
3
3
|
import { R as RunRecord, a as RunSplitTag } from './run-record-CqzahIbx.js';
|
|
4
4
|
|
|
5
5
|
/**
|