@tangle-network/agent-eval 0.23.1 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +80 -0
  2. package/README.md +141 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  20. package/dist/chunk-6QDKWHLS.js.map +1 -0
  21. package/dist/chunk-I4MBDTY5.js +272 -0
  22. package/dist/chunk-I4MBDTY5.js.map +1 -0
  23. package/dist/chunk-K2TPS5LB.js +569 -0
  24. package/dist/chunk-K2TPS5LB.js.map +1 -0
  25. package/dist/chunk-KKHDIONI.js +414 -0
  26. package/dist/chunk-KKHDIONI.js.map +1 -0
  27. package/dist/chunk-KMPRBJK4.js +74 -0
  28. package/dist/chunk-KMPRBJK4.js.map +1 -0
  29. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  30. package/dist/chunk-KTGTIOFD.js.map +1 -0
  31. package/dist/chunk-LSH4MMOZ.js +838 -0
  32. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  33. package/dist/chunk-NG236HPC.js +57 -0
  34. package/dist/chunk-NG236HPC.js.map +1 -0
  35. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  36. package/dist/chunk-NLMNWKVM.js.map +1 -0
  37. package/dist/chunk-NU65VQ7M.js +99 -0
  38. package/dist/chunk-NU65VQ7M.js.map +1 -0
  39. package/dist/chunk-OHEPNJQN.js +554 -0
  40. package/dist/chunk-OHEPNJQN.js.map +1 -0
  41. package/dist/chunk-OWLAAMME.js +250 -0
  42. package/dist/chunk-OWLAAMME.js.map +1 -0
  43. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  44. package/dist/chunk-PC4UYEBM.js.map +1 -0
  45. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  46. package/dist/chunk-RAF443UI.js.map +1 -0
  47. package/dist/chunk-RZTMDUO7.js +49 -0
  48. package/dist/chunk-RZTMDUO7.js.map +1 -0
  49. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  50. package/dist/chunk-SESZDQPX.js.map +1 -0
  51. package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
  52. package/dist/chunk-SY6WAAAD.js.map +1 -0
  53. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  54. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  55. package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
  56. package/dist/chunk-VRJVTXRV.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +1866 -3151
  80. package/dist/index.js +5457 -7809
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +1 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +409 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-TDPn1cxq.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +22 -22
  125. package/dist/wire/index.js +4 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -1,346 +0,0 @@
1
- import { D as DatasetSplit, a as DatasetScenario } from './dataset-B9qvlm_o.js';
2
- import { T as TraceEmitter } from './emitter-B2XqDKFU.js';
3
- import { F as FailureClass, T as TraceStore } from './store-u47QaJ9G.js';
4
-
5
- /**
6
- * Policy-based agent control runtime.
7
- *
8
- * This is the minimal reusable loop behind driver-agent patterns:
9
- *
10
- * observe state -> validate -> decide next action -> act -> observe -> ...
11
- *
12
- * It deliberately does not model named "topologies". Direct execution,
13
- * critic/revise, driver intervention, specialist calls, and human escalation
14
- * are all just actions chosen by the control policy.
15
- */
16
-
17
- type ControlSeverity = 'info' | 'warning' | 'error' | 'critical';
18
- type ControlActionFailureMode = 'continue' | 'stop';
19
- interface ControlEvalResult {
20
- /** Stable validator or judge id. */
21
- id: string;
22
- /** Whether this check passed. */
23
- passed: boolean;
24
- /** Optional normalized score. 1 = best, 0 = worst. */
25
- score?: number;
26
- /** Objective validators should usually be "error" or "critical" when failed. */
27
- severity?: ControlSeverity;
28
- /** Human-readable result. */
29
- detail?: string;
30
- /** Small evidence string or pointer. Avoid large payloads. */
31
- evidence?: string;
32
- /** True when the result came from deterministic state, not LLM judgment. */
33
- objective?: boolean;
34
- /** Structured details for downstream control policies and reports. */
35
- metadata?: Record<string, unknown>;
36
- }
37
- interface ControlBudget {
38
- maxSteps: number;
39
- maxWallMs?: number;
40
- maxCostUsd?: number;
41
- }
42
- interface ControlStopPolicies<TState, TAction> {
43
- /**
44
- * Stop after N consecutive steps with no state fingerprint change and
45
- * less than `minScoreDelta` score movement. Disabled when omitted.
46
- */
47
- maxNoProgressSteps?: number;
48
- /**
49
- * Stop after the same action fingerprint is selected N consecutive
50
- * times. Disabled when omitted.
51
- */
52
- maxRepeatedActions?: number;
53
- /** Minimum score movement that counts as progress. Default 0.001. */
54
- minScoreDelta?: number;
55
- /** Override the default JSON/string fingerprint for state comparisons. */
56
- stateFingerprint?: (state: TState) => string;
57
- /** Override the default JSON/string fingerprint for repeated-action checks. */
58
- actionFingerprint?: (action: TAction) => string;
59
- }
60
- interface ControlContext<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
61
- intent: string;
62
- state: TState;
63
- evals: TEval[];
64
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
65
- budget: ControlBudget;
66
- stepIndex: number;
67
- wallMs: number;
68
- spentCostUsd: number;
69
- remainingCostUsd?: number;
70
- abortSignal: AbortSignal;
71
- emitter?: TraceEmitter;
72
- }
73
- type ControlDecision<TAction> = {
74
- type: 'continue';
75
- action: TAction;
76
- reason?: string;
77
- } | {
78
- type: 'stop';
79
- reason: string;
80
- pass?: boolean;
81
- score?: number;
82
- };
83
- interface StopDecision {
84
- stop: boolean;
85
- pass: boolean;
86
- reason: string;
87
- score?: number;
88
- failureClass?: FailureClass;
89
- }
90
- interface ControlActionOutcome<TActionResult> {
91
- ok: boolean;
92
- result?: TActionResult;
93
- error?: string;
94
- costUsd?: number;
95
- durationMs: number;
96
- }
97
- interface ControlRuntimeError {
98
- phase: 'observe' | 'validate' | 'decide' | 'act' | 'stop-policy' | 'on-step' | 'trace';
99
- stepIndex: number;
100
- message: string;
101
- }
102
- interface ControlStep<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
103
- index: number;
104
- decision: ControlDecision<TAction>;
105
- beforeState: TState;
106
- afterState: TState;
107
- evalsBefore: TEval[];
108
- evalsAfter: TEval[];
109
- actionOutcome?: ControlActionOutcome<TActionResult>;
110
- startedAt: string;
111
- endedAt: string;
112
- }
113
- interface ControlRunResult<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
114
- intent: string;
115
- pass: boolean;
116
- completed: boolean;
117
- reason: string;
118
- score?: number;
119
- steps: ControlStep<TState, TAction, TActionResult, TEval>[];
120
- finalState: TState | undefined;
121
- finalEvals: TEval[];
122
- wallMs: number;
123
- spentCostUsd: number;
124
- runId: string | null;
125
- failureClass?: FailureClass;
126
- runtimeErrors: ControlRuntimeError[];
127
- stoppedBy: 'policy' | 'stop-policy' | 'budget' | 'abort' | 'runtime-error';
128
- }
129
- interface ControlRuntimeConfig<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult> {
130
- intent: string;
131
- budget?: Partial<ControlBudget>;
132
- signal?: AbortSignal;
133
- /** Defaults to `continue`: action failures are recorded, then the policy gets another chance. */
134
- actionFailure?: ControlActionFailureMode;
135
- /**
136
- * Extract cost from an action result. Used for `maxCostUsd` budget
137
- * enforcement and trace budget ledger emission.
138
- */
139
- getActionCostUsd?: (ctx: {
140
- action: TAction;
141
- result: TActionResult;
142
- state: TState;
143
- evals: TEval[];
144
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
145
- }) => number | undefined;
146
- /** Read typed task/product state. Prefer structured state over transcript-only context. */
147
- observe: (ctx: {
148
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
149
- abortSignal: AbortSignal;
150
- }) => Promise<TState> | TState;
151
- /** Objective validators first, subjective judges only where objective state is insufficient. */
152
- validate: (ctx: {
153
- intent: string;
154
- state: TState;
155
- history: ControlStep<TState, TAction, TActionResult, TEval>[];
156
- abortSignal: AbortSignal;
157
- }) => Promise<TEval[]> | TEval[];
158
- /** Choose the next control action. Can call a worker, ask user, run critic, inspect state, or stop. */
159
- decide: (ctx: ControlContext<TState, TAction, TActionResult, TEval>) => Promise<ControlDecision<TAction>> | ControlDecision<TAction>;
160
- /** Execute the action selected by the policy. */
161
- act: (action: TAction, ctx: ControlContext<TState, TAction, TActionResult, TEval>) => Promise<TActionResult> | TActionResult;
162
- /** Final stopping policy. Called before decide and after each action. */
163
- shouldStop?: (ctx: ControlContext<TState, TAction, TActionResult, TEval>) => Promise<StopDecision> | StopDecision;
164
- /** Optional hook for tracing or live progress updates. */
165
- onStep?: (step: ControlStep<TState, TAction, TActionResult, TEval>) => Promise<void> | void;
166
- /** Optional generic stuck-loop policies. Custom `shouldStop` still runs first. */
167
- stopPolicies?: ControlStopPolicies<TState, TAction>;
168
- /** Optional trace sink. Emits one run plus one span per control step. */
169
- store?: TraceStore;
170
- scenarioId?: string;
171
- projectId?: string;
172
- variantId?: string;
173
- }
174
- declare function runAgentControlLoop<TState, TAction, TActionResult, TEval extends ControlEvalResult = ControlEvalResult>(config: ControlRuntimeConfig<TState, TAction, TActionResult, TEval>): Promise<ControlRunResult<TState, TAction, TActionResult, TEval>>;
175
- declare function stopOnNoProgress<TState, TAction>(maxNoProgressSteps: number, options?: Omit<ControlStopPolicies<TState, TAction>, 'maxNoProgressSteps'>): ControlStopPolicies<TState, TAction>;
176
- declare function stopOnRepeatedAction<TState, TAction>(maxRepeatedActions: number, options?: Omit<ControlStopPolicies<TState, TAction>, 'maxRepeatedActions'>): ControlStopPolicies<TState, TAction>;
177
- declare function objectiveEval(input: Omit<ControlEvalResult, 'objective'>): ControlEvalResult;
178
- declare function subjectiveEval(input: Omit<ControlEvalResult, 'objective'>): ControlEvalResult;
179
- declare function allCriticalPassed(evals: ControlEvalResult[]): boolean;
180
-
181
- type FeedbackArtifactType = 'text' | 'code' | 'plan' | 'research' | 'action' | 'ui' | 'decision' | 'data' | 'other';
182
- type FeedbackLabelSource = 'user' | 'judge' | 'environment' | 'metric' | 'policy' | 'system';
183
- type FeedbackLabelKind = 'approve' | 'reject' | 'select' | 'edit' | 'rank' | 'rate' | 'comment' | 'metric_outcome' | 'policy_block' | 'revision_request';
184
- type FeedbackSeverity = 'info' | 'warning' | 'error' | 'critical';
185
- interface FeedbackTask {
186
- intent: string;
187
- context?: unknown;
188
- }
189
- interface ProposedSideEffect {
190
- type: string;
191
- risk?: 'low' | 'medium' | 'high';
192
- costUsd?: number;
193
- externalSideEffect?: boolean;
194
- requiresApproval?: boolean;
195
- metadata?: Record<string, unknown>;
196
- }
197
- interface FeedbackLabel {
198
- id?: string;
199
- source: FeedbackLabelSource;
200
- kind: FeedbackLabelKind;
201
- value: unknown;
202
- reason?: string;
203
- severity?: FeedbackSeverity;
204
- createdAt: string;
205
- metadata?: Record<string, unknown>;
206
- }
207
- interface FeedbackAttempt {
208
- id: string;
209
- stepIndex: number;
210
- artifactType: FeedbackArtifactType;
211
- artifact: unknown;
212
- options?: unknown[];
213
- proposedAction?: ProposedSideEffect;
214
- evals?: ControlEvalResult[];
215
- feedback?: FeedbackLabel[];
216
- createdAt: string;
217
- metadata?: Record<string, unknown>;
218
- }
219
- interface FeedbackOutcome {
220
- success?: boolean;
221
- score?: number;
222
- metrics?: Record<string, number>;
223
- costUsd?: number;
224
- detail?: string;
225
- observedAt?: string;
226
- metadata?: Record<string, unknown>;
227
- }
228
- interface FeedbackTrajectory {
229
- id: string;
230
- projectId?: string;
231
- scenarioId?: string;
232
- task: FeedbackTask;
233
- attempts: FeedbackAttempt[];
234
- labels: FeedbackLabel[];
235
- outcome?: FeedbackOutcome;
236
- split?: DatasetSplit;
237
- tags?: Record<string, string>;
238
- createdAt: string;
239
- updatedAt?: string;
240
- metadata?: Record<string, unknown>;
241
- }
242
- interface FeedbackTrajectoryStore {
243
- save(trajectory: FeedbackTrajectory): Promise<void>;
244
- get(id: string): Promise<FeedbackTrajectory | null>;
245
- list(filter?: FeedbackTrajectoryFilter): Promise<FeedbackTrajectory[]>;
246
- appendAttempt(id: string, attempt: FeedbackAttempt): Promise<FeedbackTrajectory>;
247
- appendLabel(id: string, label: FeedbackLabel, attemptId?: string): Promise<FeedbackTrajectory>;
248
- }
249
- interface FeedbackTrajectoryFilter {
250
- projectId?: string;
251
- scenarioId?: string;
252
- split?: DatasetSplit;
253
- tag?: [string, string];
254
- }
255
- interface FeedbackSplitPolicy {
256
- trainPct?: number;
257
- devPct?: number;
258
- testPct?: number;
259
- holdoutPct?: number;
260
- }
261
- interface PreferenceMemoryEntry {
262
- instruction: string;
263
- rationale: string;
264
- weight: number;
265
- sourceTrajectoryId: string;
266
- sourceLabelId?: string;
267
- category?: string;
268
- }
269
- interface FeedbackOptimizerRow {
270
- scenarioId: string;
271
- trajectoryId: string;
272
- labelKinds: FeedbackLabelKind[];
273
- score?: number;
274
- metadata?: Record<string, unknown>;
275
- }
276
- interface FeedbackReplayResult {
277
- trajectoryId: string;
278
- pass: boolean;
279
- score?: number;
280
- labels: FeedbackLabel[];
281
- outcome?: FeedbackOutcome;
282
- metadata?: Record<string, unknown>;
283
- }
284
- interface FeedbackReplayAdapter {
285
- replay(trajectory: FeedbackTrajectory): Promise<Omit<FeedbackReplayResult, 'trajectoryId'>> | Omit<FeedbackReplayResult, 'trajectoryId'>;
286
- }
287
- declare class InMemoryFeedbackTrajectoryStore implements FeedbackTrajectoryStore {
288
- private readonly trajectories;
289
- save(trajectory: FeedbackTrajectory): Promise<void>;
290
- get(id: string): Promise<FeedbackTrajectory | null>;
291
- list(filter?: FeedbackTrajectoryFilter): Promise<FeedbackTrajectory[]>;
292
- appendAttempt(id: string, attempt: FeedbackAttempt): Promise<FeedbackTrajectory>;
293
- appendLabel(id: string, label: FeedbackLabel, attemptId?: string): Promise<FeedbackTrajectory>;
294
- }
295
- declare class FileSystemFeedbackTrajectoryStore implements FeedbackTrajectoryStore {
296
- private readonly dir;
297
- private readonly memory;
298
- private loaded;
299
- constructor(options: {
300
- dir: string;
301
- });
302
- save(trajectory: FeedbackTrajectory): Promise<void>;
303
- get(id: string): Promise<FeedbackTrajectory | null>;
304
- list(filter?: FeedbackTrajectoryFilter): Promise<FeedbackTrajectory[]>;
305
- appendAttempt(id: string, attempt: FeedbackAttempt): Promise<FeedbackTrajectory>;
306
- appendLabel(id: string, label: FeedbackLabel, attemptId?: string): Promise<FeedbackTrajectory>;
307
- private append;
308
- private load;
309
- }
310
- declare function createFeedbackTrajectory(input: {
311
- id?: string;
312
- projectId?: string;
313
- scenarioId?: string;
314
- task: FeedbackTask;
315
- attempts?: FeedbackAttempt[];
316
- labels?: FeedbackLabel[];
317
- outcome?: FeedbackOutcome;
318
- split?: DatasetSplit;
319
- tags?: Record<string, string>;
320
- createdAt?: string;
321
- metadata?: Record<string, unknown>;
322
- }): FeedbackTrajectory;
323
- declare function assignFeedbackSplit(trajectory: Pick<FeedbackTrajectory, 'id' | 'projectId' | 'scenarioId' | 'task'>, policy?: FeedbackSplitPolicy): DatasetSplit;
324
- declare function withAssignedFeedbackSplit(trajectory: FeedbackTrajectory, policy?: FeedbackSplitPolicy): FeedbackTrajectory;
325
- declare function feedbackTrajectoryToDatasetScenario(trajectory: FeedbackTrajectory): DatasetScenario;
326
- declare function feedbackTrajectoriesToDatasetScenarios(trajectories: FeedbackTrajectory[]): DatasetScenario[];
327
- declare function feedbackTrajectoryToOptimizerRow(trajectory: FeedbackTrajectory): FeedbackOptimizerRow;
328
- declare function feedbackTrajectoriesToOptimizerRows(trajectories: FeedbackTrajectory[]): FeedbackOptimizerRow[];
329
- declare function replayFeedbackTrajectory(trajectory: FeedbackTrajectory, adapter: FeedbackReplayAdapter): Promise<FeedbackReplayResult>;
330
- declare function replayFeedbackTrajectories(trajectories: FeedbackTrajectory[], adapter: FeedbackReplayAdapter): Promise<FeedbackReplayResult[]>;
331
- declare function summarizePreferenceMemory(trajectories: FeedbackTrajectory[], options?: {
332
- maxEntries?: number;
333
- }): PreferenceMemoryEntry[];
334
- declare function renderPreferenceMemoryMarkdown(entries: PreferenceMemoryEntry[]): string;
335
- declare function serializeFeedbackTrajectoriesJsonl(trajectories: FeedbackTrajectory[]): string;
336
- declare function parseFeedbackTrajectoriesJsonl(jsonl: string): FeedbackTrajectory[];
337
- declare function controlRunToFeedbackTrajectory<TState, TAction, TActionResult>(run: ControlRunResult<TState, TAction, TActionResult>, options?: {
338
- projectId?: string;
339
- scenarioId?: string;
340
- artifactType?: FeedbackArtifactType;
341
- artifactFromStep?: (step: ControlStep<TState, TAction, TActionResult>) => unknown;
342
- proposedActionFromStep?: (step: ControlStep<TState, TAction, TActionResult>) => ProposedSideEffect | undefined;
343
- createdAt?: string;
344
- }): FeedbackTrajectory;
345
-
346
- export { replayFeedbackTrajectory as A, serializeFeedbackTrajectoriesJsonl as B, summarizePreferenceMemory as C, withAssignedFeedbackSplit as D, type ControlSeverity as E, type FeedbackArtifactType as F, type ControlEvalResult as G, type ControlActionFailureMode as H, InMemoryFeedbackTrajectoryStore as I, type ControlActionOutcome as J, type ControlBudget as K, type ControlContext as L, type ControlDecision as M, type ControlRunResult as N, type ControlRuntimeConfig as O, type PreferenceMemoryEntry as P, type ControlRuntimeError as Q, type ControlStep as R, type ControlStopPolicies as S, type StopDecision as T, allCriticalPassed as U, objectiveEval as V, runAgentControlLoop as W, stopOnNoProgress as X, stopOnRepeatedAction as Y, subjectiveEval as Z, type FeedbackAttempt as a, type FeedbackLabel as b, type FeedbackLabelKind as c, type FeedbackLabelSource as d, type FeedbackOptimizerRow as e, type FeedbackOutcome as f, type FeedbackReplayAdapter as g, type FeedbackReplayResult as h, type FeedbackSeverity as i, type FeedbackSplitPolicy as j, type FeedbackTask as k, type FeedbackTrajectory as l, type FeedbackTrajectoryFilter as m, type FeedbackTrajectoryStore as n, FileSystemFeedbackTrajectoryStore as o, type ProposedSideEffect as p, assignFeedbackSplit as q, controlRunToFeedbackTrajectory as r, createFeedbackTrajectory as s, feedbackTrajectoriesToDatasetScenarios as t, feedbackTrajectoriesToOptimizerRows as u, feedbackTrajectoryToDatasetScenario as v, feedbackTrajectoryToOptimizerRow as w, parseFeedbackTrajectoriesJsonl as x, renderPreferenceMemoryMarkdown as y, replayFeedbackTrajectories as z };