@tangle-network/agent-eval 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/README.md +13 -3
- package/dist/benchmarks/index.d.ts +2 -2
- package/dist/{chunk-UAND2LOT.js → chunk-7EAUOUQS.js} +4 -247
- package/dist/chunk-7EAUOUQS.js.map +1 -0
- package/dist/chunk-AXHNWLIX.js +246 -0
- package/dist/chunk-AXHNWLIX.js.map +1 -0
- package/dist/chunk-EXGR4XEM.js +283 -0
- package/dist/chunk-EXGR4XEM.js.map +1 -0
- package/dist/chunk-LZKIOBG2.js +2026 -0
- package/dist/chunk-LZKIOBG2.js.map +1 -0
- package/dist/{chunk-YUFXO3TU.js → chunk-QBW3YBTR.js} +1 -1
- package/dist/chunk-QBW3YBTR.js.map +1 -0
- package/dist/{chunk-ARZ6BEV6.js → chunk-V5QSWN7L.js} +2 -2
- package/dist/{chunk-USHQBPMH.js → chunk-VQQSPGSM.js} +7 -283
- package/dist/chunk-VQQSPGSM.js.map +1 -0
- package/dist/{control-cxwMOAsy.d.ts → control-DvkH87qJ.d.ts} +2 -2
- package/dist/control.d.ts +3 -3
- package/dist/control.js +2 -2
- package/dist/{optimization-UVDNKaO6.d.ts → eval-campaign-Ds5QljIh.d.ts} +4 -5
- package/dist/{feedback-trajectory-CB0A32o3.d.ts → feedback-trajectory-c43WGtTX.d.ts} +1 -1
- package/dist/{index-c5saLbKD.d.ts → index-DDTlbHEK.d.ts} +1 -1
- package/dist/index-ekBXweiQ.d.ts +1894 -0
- package/dist/index.d.ts +18 -154
- package/dist/index.js +125 -25
- package/dist/index.js.map +1 -1
- package/dist/{integrity-K2oVlF57.d.ts → integrity-Cr5YodSY.d.ts} +1 -1
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +5 -5
- package/dist/optimization.js +7 -5
- package/dist/reporting.d.ts +294 -4
- package/dist/reporting.js +6 -4
- package/dist/rl.d.ts +8 -0
- package/dist/rl.js +113 -0
- package/dist/rl.js.map +1 -0
- package/dist/{run-record-CX_jcAyr.d.ts → run-record-DNiOMBrZ.d.ts} +10 -1
- package/dist/sequential-DgU2mFsE.d.ts +304 -0
- package/dist/{summary-report-D4p7RlDu.d.ts → summary-report-Ce1r4EYo.d.ts} +2 -2
- package/dist/traces.d.ts +2 -2
- package/dist/traces.js +5 -5
- package/docs/auto-research-loop-end-to-end.md +186 -0
- package/docs/three-package-architecture.md +180 -0
- package/package.json +6 -1
- package/dist/chunk-UAND2LOT.js.map +0 -1
- package/dist/chunk-USHQBPMH.js.map +0 -1
- package/dist/chunk-YUFXO3TU.js.map +0 -1
- package/dist/reporting-B82RSv9C.d.ts +0 -593
- /package/dist/{chunk-ARZ6BEV6.js.map → chunk-V5QSWN7L.js.map} +0 -0
package/dist/control.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
1
|
+
export { H as ControlActionFailureMode, J as ControlActionOutcome, K as ControlBudget, L as ControlContext, M as ControlDecision, G as ControlEvalResult, N as ControlRunResult, O as ControlRuntimeConfig, Q as ControlRuntimeError, E as ControlSeverity, R as ControlStep, S as ControlStopPolicies, T as StopDecision, U as allCriticalPassed, V as objectiveEval, W as runAgentControlLoop, X as stopOnNoProgress, Y as stopOnRepeatedAction, Z as subjectiveEval } from './feedback-trajectory-c43WGtTX.js';
|
|
2
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DvkH87qJ.js';
|
|
3
3
|
import './dataset-B9qvlm_o.js';
|
|
4
4
|
import './emitter-B2XqDKFU.js';
|
|
5
5
|
import './store-u47QaJ9G.js';
|
|
6
|
-
import './run-record-
|
|
6
|
+
import './run-record-DNiOMBrZ.js';
|
package/dist/control.js
CHANGED
|
@@ -10,8 +10,8 @@ import {
|
|
|
10
10
|
stopOnNoProgress,
|
|
11
11
|
stopOnRepeatedAction,
|
|
12
12
|
subjectiveEval
|
|
13
|
-
} from "./chunk-
|
|
14
|
-
import "./chunk-
|
|
13
|
+
} from "./chunk-V5QSWN7L.js";
|
|
14
|
+
import "./chunk-QBW3YBTR.js";
|
|
15
15
|
import "./chunk-5IIQKMD5.js";
|
|
16
16
|
import "./chunk-PZ5AY32C.js";
|
|
17
17
|
export {
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as GateDecision, F as ResearchReportOptions, H as ResearchReport } from './summary-report-Ce1r4EYo.js';
|
|
2
|
+
import { R as RunRecord, a as RunSplitTag, b as RunTokenUsage, c as RunJudgeMetadata } from './run-record-DNiOMBrZ.js';
|
|
3
|
+
import { R as RawProviderSink, P as ProviderRedactor, a as RunIntegrityExpectations, b as RunIntegrityReport } from './integrity-Cr5YodSY.js';
|
|
2
4
|
import { T as TraceEmitter, R as RunCompleteHook } from './emitter-B2XqDKFU.js';
|
|
3
5
|
import { T as TraceStore } from './store-u47QaJ9G.js';
|
|
4
|
-
import { a as RunRecord, R as RunSplitTag, e as RunTokenUsage, b as RunJudgeMetadata } from './run-record-CX_jcAyr.js';
|
|
5
|
-
import { k as GateDecision, $ as ResearchReportOptions, X as ResearchReport } from './summary-report-D4p7RlDu.js';
|
|
6
|
-
import './feedback-trajectory-CB0A32o3.js';
|
|
7
6
|
|
|
8
7
|
/**
|
|
9
8
|
* LLM client with graceful degrade.
|
|
@@ -571,4 +570,4 @@ interface EvalCampaignResult {
|
|
|
571
570
|
}
|
|
572
571
|
declare function runEvalCampaign<V>(opts: EvalCampaignOptions<V>): Promise<EvalCampaignResult>;
|
|
573
572
|
|
|
574
|
-
export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, LlmCallError as m, type LlmCallRequest as n, type LlmCallResult as o, LlmClient as p, type LlmMessage as q,
|
|
573
|
+
export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, LlmCallError as m, type LlmCallRequest as n, type LlmCallResult as o, LlmClient as p, type LlmMessage as q, runEvalCampaign as r, LlmRouteAssertionError as s, type LlmRouteRequirements as t, type LlmUsage as u, assertLlmRoute as v, callLlm as w, callLlmJson as x, probeLlm as y, stripFencedJson as z };
|
|
@@ -343,4 +343,4 @@ declare function controlRunToFeedbackTrajectory<TState, TAction, TActionResult>(
|
|
|
343
343
|
createdAt?: string;
|
|
344
344
|
}): FeedbackTrajectory;
|
|
345
345
|
|
|
346
|
-
export {
|
|
346
|
+
export { replayFeedbackTrajectory as A, serializeFeedbackTrajectoriesJsonl as B, summarizePreferenceMemory as C, withAssignedFeedbackSplit as D, type ControlSeverity as E, type FeedbackArtifactType as F, type ControlEvalResult as G, type ControlActionFailureMode as H, InMemoryFeedbackTrajectoryStore as I, type ControlActionOutcome as J, type ControlBudget as K, type ControlContext as L, type ControlDecision as M, type ControlRunResult as N, type ControlRuntimeConfig as O, type PreferenceMemoryEntry as P, type ControlRuntimeError as Q, type ControlStep as R, type ControlStopPolicies as S, type StopDecision as T, allCriticalPassed as U, objectiveEval as V, runAgentControlLoop as W, stopOnNoProgress as X, stopOnRepeatedAction as Y, subjectiveEval as Z, type FeedbackAttempt as a, type FeedbackLabel as b, type FeedbackLabelKind as c, type FeedbackLabelSource as d, type FeedbackOptimizerRow as e, type FeedbackOutcome as f, type FeedbackReplayAdapter as g, type FeedbackReplayResult as h, type FeedbackSeverity as i, type FeedbackSplitPolicy as j, type FeedbackTask as k, type FeedbackTrajectory as l, type FeedbackTrajectoryFilter as m, type FeedbackTrajectoryStore as n, FileSystemFeedbackTrajectoryStore as o, type ProposedSideEffect as p, assignFeedbackSplit as q, controlRunToFeedbackTrajectory as r, createFeedbackTrajectory as s, feedbackTrajectoriesToDatasetScenarios as t, feedbackTrajectoriesToOptimizerRows as u, feedbackTrajectoryToDatasetScenario as v, feedbackTrajectoryToOptimizerRow as w, parseFeedbackTrajectoriesJsonl as x, renderPreferenceMemoryMarkdown as y, replayFeedbackTrajectories as z };
|