npm - @tangle-network/agent-eval - Versions diffs - 0.22.0 → 0.23.0 - Mend

@tangle-network/agent-eval 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +134 -0
package/README.md +13 -3
package/dist/benchmarks/index.d.ts +2 -2
package/dist/{chunk-UAND2LOT.js → chunk-7EAUOUQS.js} +4 -247
package/dist/chunk-7EAUOUQS.js.map +1 -0
package/dist/chunk-AXHNWLIX.js +246 -0
package/dist/chunk-AXHNWLIX.js.map +1 -0
package/dist/chunk-EXGR4XEM.js +283 -0
package/dist/chunk-EXGR4XEM.js.map +1 -0
package/dist/chunk-LZKIOBG2.js +2026 -0
package/dist/chunk-LZKIOBG2.js.map +1 -0
package/dist/{chunk-YUFXO3TU.js → chunk-QBW3YBTR.js} +1 -1
package/dist/chunk-QBW3YBTR.js.map +1 -0
package/dist/{chunk-ARZ6BEV6.js → chunk-V5QSWN7L.js} +2 -2
package/dist/{chunk-USHQBPMH.js → chunk-VQQSPGSM.js} +7 -283
package/dist/chunk-VQQSPGSM.js.map +1 -0
package/dist/{control-cxwMOAsy.d.ts → control-DvkH87qJ.d.ts} +2 -2
package/dist/control.d.ts +3 -3
package/dist/control.js +2 -2
package/dist/{optimization-UVDNKaO6.d.ts → eval-campaign-Ds5QljIh.d.ts} +4 -5
package/dist/{feedback-trajectory-CB0A32o3.d.ts → feedback-trajectory-c43WGtTX.d.ts} +1 -1
package/dist/{index-c5saLbKD.d.ts → index-DDTlbHEK.d.ts} +1 -1
package/dist/index-ekBXweiQ.d.ts +1894 -0
package/dist/index.d.ts +18 -154
package/dist/index.js +125 -25
package/dist/index.js.map +1 -1
package/dist/{integrity-K2oVlF57.d.ts → integrity-Cr5YodSY.d.ts} +1 -1
package/dist/openapi.json +1 -1
package/dist/optimization.d.ts +5 -5
package/dist/optimization.js +7 -5
package/dist/reporting.d.ts +294 -4
package/dist/reporting.js +6 -4
package/dist/rl.d.ts +8 -0
package/dist/rl.js +113 -0
package/dist/rl.js.map +1 -0
package/dist/{run-record-CX_jcAyr.d.ts → run-record-DNiOMBrZ.d.ts} +10 -1
package/dist/sequential-DgU2mFsE.d.ts +304 -0
package/dist/{summary-report-D4p7RlDu.d.ts → summary-report-Ce1r4EYo.d.ts} +2 -2
package/dist/traces.d.ts +2 -2
package/dist/traces.js +5 -5
package/docs/auto-research-loop-end-to-end.md +186 -0
package/docs/three-package-architecture.md +180 -0
package/package.json +6 -1
package/dist/chunk-UAND2LOT.js.map +0 -1
package/dist/chunk-USHQBPMH.js.map +0 -1
package/dist/chunk-YUFXO3TU.js.map +0 -1
package/dist/reporting-B82RSv9C.d.ts +0 -593
/package/dist/{chunk-ARZ6BEV6.js.map → chunk-V5QSWN7L.js.map} +0 -0

package/dist/control.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-export { d as ControlActionFailureMode, e as ControlActionOutcome, f as ControlBudget, g as ControlContext, h as ControlDecision, c as ControlEvalResult, i as ControlRunResult, j as ControlRuntimeConfig, k as ControlRuntimeError, C as ControlSeverity, l as ControlStep, m as ControlStopPolicies, S as StopDecision, B as allCriticalPassed, M as objectiveEval, T as runAgentControlLoop, V as stopOnNoProgress, W as stopOnRepeatedAction, X as subjectiveEval } from './feedback-trajectory-CB0A32o3.js';
-export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-cxwMOAsy.js';
+export { H as ControlActionFailureMode, J as ControlActionOutcome, K as ControlBudget, L as ControlContext, M as ControlDecision, G as ControlEvalResult, N as ControlRunResult, O as ControlRuntimeConfig, Q as ControlRuntimeError, E as ControlSeverity, R as ControlStep, S as ControlStopPolicies, T as StopDecision, U as allCriticalPassed, V as objectiveEval, W as runAgentControlLoop, X as stopOnNoProgress, Y as stopOnRepeatedAction, Z as subjectiveEval } from './feedback-trajectory-c43WGtTX.js';
+export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-DvkH87qJ.js';
 import './dataset-B9qvlm_o.js';
 import './emitter-B2XqDKFU.js';
 import './store-u47QaJ9G.js';
-import './run-record-CX_jcAyr.js';
+import './run-record-DNiOMBrZ.js';

package/dist/control.js CHANGED Viewed

@@ -10,8 +10,8 @@ import {
   stopOnNoProgress,
   stopOnRepeatedAction,
   subjectiveEval
-} from "./chunk-ARZ6BEV6.js";
-import "./chunk-YUFXO3TU.js";
+} from "./chunk-V5QSWN7L.js";
+import "./chunk-QBW3YBTR.js";
 import "./chunk-5IIQKMD5.js";
 import "./chunk-PZ5AY32C.js";
 export {

package/dist/{optimization-UVDNKaO6.d.ts → eval-campaign-Ds5QljIh.d.ts} RENAMED Viewed

@@ -1,9 +1,8 @@
-import { d as RawProviderSink, P as ProviderRedactor, g as RunIntegrityExpectations, j as RunIntegrityReport } from './integrity-K2oVlF57.js';
+import { C as GateDecision, F as ResearchReportOptions, H as ResearchReport } from './summary-report-Ce1r4EYo.js';
+import { R as RunRecord, a as RunSplitTag, b as RunTokenUsage, c as RunJudgeMetadata } from './run-record-DNiOMBrZ.js';
+import { R as RawProviderSink, P as ProviderRedactor, a as RunIntegrityExpectations, b as RunIntegrityReport } from './integrity-Cr5YodSY.js';
 import { T as TraceEmitter, R as RunCompleteHook } from './emitter-B2XqDKFU.js';
 import { T as TraceStore } from './store-u47QaJ9G.js';
-import { a as RunRecord, R as RunSplitTag, e as RunTokenUsage, b as RunJudgeMetadata } from './run-record-CX_jcAyr.js';
-import { k as GateDecision, $ as ResearchReportOptions, X as ResearchReport } from './summary-report-D4p7RlDu.js';
-import './feedback-trajectory-CB0A32o3.js';
 /**
  * LLM client with graceful degrade.
@@ -571,4 +570,4 @@ interface EvalCampaignResult {
 }
 declare function runEvalCampaign<V>(opts: EvalCampaignOptions<V>): Promise<EvalCampaignResult>;
-export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, LlmCallError as m, type LlmCallRequest as n, type LlmCallResult as o, LlmClient as p, type LlmMessage as q, LlmRouteAssertionError as r, type LlmRouteRequirements as s, type LlmUsage as t, assertLlmRoute as u, callLlm as v, callLlmJson as w, probeLlm as x, runEvalCampaign as y, stripFencedJson as z };
+export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, LlmCallError as m, type LlmCallRequest as n, type LlmCallResult as o, LlmClient as p, type LlmMessage as q, runEvalCampaign as r, LlmRouteAssertionError as s, type LlmRouteRequirements as t, type LlmUsage as u, assertLlmRoute as v, callLlm as w, callLlmJson as x, probeLlm as y, stripFencedJson as z };

package/dist/{feedback-trajectory-CB0A32o3.d.ts → feedback-trajectory-c43WGtTX.d.ts} RENAMED Viewed

@@ -343,4 +343,4 @@ declare function controlRunToFeedbackTrajectory<TState, TAction, TActionResult>(
     createdAt?: string;
 }): FeedbackTrajectory;
-export { type ProposedSideEffect as A, allCriticalPassed as B, type ControlSeverity as C, assignFeedbackSplit as D, controlRunToFeedbackTrajectory as E, type FeedbackLabel as F, createFeedbackTrajectory as G, feedbackTrajectoriesToDatasetScenarios as H, InMemoryFeedbackTrajectoryStore as I, feedbackTrajectoriesToOptimizerRows as J, feedbackTrajectoryToDatasetScenario as K, feedbackTrajectoryToOptimizerRow as L, objectiveEval as M, parseFeedbackTrajectoriesJsonl as N, renderPreferenceMemoryMarkdown as O, type PreferenceMemoryEntry as P, replayFeedbackTrajectories as Q, replayFeedbackTrajectory as R, type StopDecision as S, runAgentControlLoop as T, serializeFeedbackTrajectoriesJsonl as U, stopOnNoProgress as V, stopOnRepeatedAction as W, subjectiveEval as X, summarizePreferenceMemory as Y, withAssignedFeedbackSplit as Z, type FeedbackTrajectoryStore as a, type FeedbackTrajectory as b, type ControlEvalResult as c, type ControlActionFailureMode as d, type ControlActionOutcome as e, type ControlBudget as f, type ControlContext as g, type ControlDecision as h, type ControlRunResult as i, type ControlRuntimeConfig as j, type ControlRuntimeError as k, type ControlStep as l, type ControlStopPolicies as m, type FeedbackArtifactType as n, type FeedbackAttempt as o, type FeedbackLabelKind as p, type FeedbackLabelSource as q, type FeedbackOptimizerRow as r, type FeedbackOutcome as s, type FeedbackReplayAdapter as t, type FeedbackReplayResult as u, type FeedbackSeverity as v, type FeedbackSplitPolicy as w, type FeedbackTask as x, type FeedbackTrajectoryFilter as y, FileSystemFeedbackTrajectoryStore as z };
+export { replayFeedbackTrajectory as A, serializeFeedbackTrajectoriesJsonl as B, summarizePreferenceMemory as C, withAssignedFeedbackSplit as D, type ControlSeverity as E, type FeedbackArtifactType as F, type ControlEvalResult as G, type ControlActionFailureMode as H, InMemoryFeedbackTrajectoryStore as I, type ControlActionOutcome as J, type ControlBudget as K, type ControlContext as L, type ControlDecision as M, type ControlRunResult as N, type ControlRuntimeConfig as O, type PreferenceMemoryEntry as P, type ControlRuntimeError as Q, type ControlStep as R, type ControlStopPolicies as S, type StopDecision as T, allCriticalPassed as U, objectiveEval as V, runAgentControlLoop as W, stopOnNoProgress as X, stopOnRepeatedAction as Y, subjectiveEval as Z, type FeedbackAttempt as a, type FeedbackLabel as b, type FeedbackLabelKind as c, type FeedbackLabelSource as d, type FeedbackOptimizerRow as e, type FeedbackOutcome as f, type FeedbackReplayAdapter as g, type FeedbackReplayResult as h, type FeedbackSeverity as i, type FeedbackSplitPolicy as j, type FeedbackTask as k, type FeedbackTrajectory as l, type FeedbackTrajectoryFilter as m, type FeedbackTrajectoryStore as n, FileSystemFeedbackTrajectoryStore as o, type ProposedSideEffect as p, assignFeedbackSplit as q, controlRunToFeedbackTrajectory as r, createFeedbackTrajectory as s, feedbackTrajectoriesToDatasetScenarios as t, feedbackTrajectoriesToOptimizerRows as u, feedbackTrajectoryToDatasetScenario as v, feedbackTrajectoryToOptimizerRow as w, parseFeedbackTrajectoriesJsonl as x, renderPreferenceMemoryMarkdown as y, replayFeedbackTrajectories as z };

package/dist/{index-c5saLbKD.d.ts → index-DDTlbHEK.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { R as RunSplitTag } from './run-record-CX_jcAyr.js';
+import { a as RunSplitTag } from './run-record-DNiOMBrZ.js';
 /**
  * Shared types for the reference benchmark wrappers under