@tangle-network/agent-eval 0.72.0 → 0.72.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/dist/adapters/http.d.ts +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/otel.d.ts +3 -2
- package/dist/agent-profile-DYRboYWu.d.ts +364 -0
- package/dist/analyst/index.d.ts +221 -0
- package/dist/analyst/index.js +371 -0
- package/dist/analyst/index.js.map +1 -0
- package/dist/analyst-t7zZS3TV.d.ts +88 -0
- package/dist/campaign/index.d.ts +485 -9
- package/dist/campaign/index.js +597 -22
- package/dist/campaign/index.js.map +1 -1
- package/dist/chunk-7W4SM7FD.js +1075 -0
- package/dist/chunk-7W4SM7FD.js.map +1 -0
- package/dist/{chunk-AIWHLG7J.js → chunk-GJJNJVIR.js} +11 -11
- package/dist/chunk-JHA3ZGSO.js +1496 -0
- package/dist/chunk-JHA3ZGSO.js.map +1 -0
- package/dist/{chunk-4QJN7RDX.js → chunk-JYE3WOTE.js} +55 -7
- package/dist/{chunk-4QJN7RDX.js.map → chunk-JYE3WOTE.js.map} +1 -1
- package/dist/chunk-LB2UOI5F.js +412 -0
- package/dist/chunk-LB2UOI5F.js.map +1 -0
- package/dist/{chunk-ODGETRTM.js → chunk-VUINJM5M.js} +234 -1415
- package/dist/chunk-VUINJM5M.js.map +1 -0
- package/dist/chunk-WYIHD6EB.js +1044 -0
- package/dist/chunk-WYIHD6EB.js.map +1 -0
- package/dist/{chunk-UD6EF73X.js → chunk-XPILG2CA.js} +119 -2
- package/dist/chunk-XPILG2CA.js.map +1 -0
- package/dist/contract/index.d.ts +17 -13
- package/dist/contract/index.js +13 -7
- package/dist/contract/index.js.map +1 -1
- package/dist/{control-DxvZeV5X.d.ts → control-BgA6BYTm.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/{feedback-trajectory-8hKC5EOb.d.ts → feedback-trajectory-B3rErRsh.d.ts} +1 -1
- package/dist/harness-optimizer-EnEnQPsr.d.ts +106 -0
- package/dist/hosted/index.d.ts +223 -2
- package/dist/index.d.ts +49 -1323
- package/dist/index.js +353 -2496
- package/dist/index.js.map +1 -1
- package/dist/{index-BGBrVS24.d.ts → insight-report-Df3lxYXM.d.ts} +1 -221
- package/dist/kind-factory-DW9XWPvM.d.ts +172 -0
- package/dist/multi-layer-verifier-DlWCXuxL.d.ts +141 -0
- package/dist/openapi.json +1 -1
- package/dist/pareto-E-pembql.d.ts +81 -0
- package/dist/{provenance-C69gLUXH.d.ts → provenance-B-TFszPW.d.ts} +131 -4
- package/dist/redact-B40YG2M_.d.ts +45 -0
- package/dist/registry-DuVYiTvw.d.ts +128 -0
- package/dist/{researcher-WJvIpX3L.d.ts → researcher-C_KJyIGg.d.ts} +1 -141
- package/dist/rl.d.ts +4 -3
- package/dist/rl.js +4 -4
- package/dist/run-critic-BAIjX99r.d.ts +56 -0
- package/dist/{run-improvement-loop-Bzamo6GB.d.ts → run-improvement-loop-BqYH2vCR.d.ts} +25 -1
- package/dist/semantic-concept-judge-CV9Wlx4t.d.ts +650 -0
- package/dist/{store-jzKpMl16.d.ts → store-GmBE2pZZ.d.ts} +1 -1
- package/dist/traces.d.ts +371 -308
- package/dist/traces.js +43 -18
- package/dist/{types-CnmZ2bkP.d.ts → types-Bba0vl1V.d.ts} +1 -1
- package/dist/{registry-BGKyX6bw.d.ts → types-CRD68aH7.d.ts} +3 -128
- package/dist/wire/index.d.ts +1 -1
- package/dist/workflow/index.d.ts +494 -0
- package/dist/workflow/index.js +2177 -0
- package/dist/workflow/index.js.map +1 -0
- package/docs/design/self-improvement-roadmap.md +106 -0
- package/package.json +36 -12
- package/dist/agent-profile-DzcPHR1Z.d.ts +0 -114
- package/dist/chunk-ODGETRTM.js.map +0 -1
- package/dist/chunk-SL55X4VN.js +0 -186
- package/dist/chunk-SL55X4VN.js.map +0 -1
- package/dist/chunk-UD6EF73X.js.map +0 -1
- /package/dist/{chunk-AIWHLG7J.js.map → chunk-GJJNJVIR.js.map} +0 -0
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
import { W as WorkflowTopology } from '../harness-optimizer-EnEnQPsr.js';
|
|
2
|
+
import { b as RunSplitTag, a as RunTokenUsage, R as RunRecord } from '../run-record-BgTFzO2r.js';
|
|
3
|
+
import { c as AnalystFinding, h as AnalystSeverity, E as EvidenceRef } from '../types-CRD68aH7.js';
|
|
4
|
+
import { F as FailureClusterInsight } from '../insight-report-Df3lxYXM.js';
|
|
5
|
+
import { a as VerificationReport, L as LayerResult } from '../multi-layer-verifier-DlWCXuxL.js';
|
|
6
|
+
import { F as FailureClusterReport } from '../failure-cluster-CL7IVgkJ.js';
|
|
7
|
+
import { R as RedactionRule, a as RedactionReport } from '../redact-B40YG2M_.js';
|
|
8
|
+
import { D as DatasetSplit } from '../dataset-B2kL-fSM.js';
|
|
9
|
+
import { a as FeedbackTrajectory } from '../feedback-trajectory-B3rErRsh.js';
|
|
10
|
+
import { a as PairedBootstrapResult } from '../statistics-B7yCbi9i.js';
|
|
11
|
+
import '../pareto-E-pembql.js';
|
|
12
|
+
import '../run-critic-BAIjX99r.js';
|
|
13
|
+
import '../schema-m0gsnbt3.js';
|
|
14
|
+
import '../store-CKUAgsJz.js';
|
|
15
|
+
import '../errors-Dwqw-T_m.js';
|
|
16
|
+
import '../store-GmBE2pZZ.js';
|
|
17
|
+
import '../types-Croy5h7V.js';
|
|
18
|
+
import '@tangle-network/tcloud';
|
|
19
|
+
import '../llm-client-DbjLfz-K.js';
|
|
20
|
+
import '../raw-provider-sink-C46HDghv.js';
|
|
21
|
+
import '../summary-report-ByiOUrHj.js';
|
|
22
|
+
import '../judge-calibration-DilmB3Ml.js';
|
|
23
|
+
import '../control-runtime-DuFBYg7A.js';
|
|
24
|
+
import '../emitter-DEZwY14K.js';
|
|
25
|
+
|
|
26
|
+
type WorkflowTraceVersion = 'workflow-trace-v1';
|
|
27
|
+
type WorkflowTraceEventKind = 'workflow.started' | 'workflow.phase' | 'workflow.log' | 'workflow.parallel.started' | 'workflow.parallel.ended' | 'workflow.pipeline.started' | 'workflow.pipeline.ended' | 'workflow.branch.started' | 'workflow.branch.ended' | 'workflow.branch.failed' | 'workflow.agent.started' | 'workflow.agent.ended' | 'workflow.agent.failed' | 'workflow.loop.started' | 'workflow.loop.ended' | 'workflow.loop.failed' | 'workflow.verifier.started' | 'workflow.verifier.ended' | 'workflow.verifier.failed' | 'workflow.analyst.started' | 'workflow.analyst.ended' | 'workflow.analyst.failed' | 'workflow.reviewer.started' | 'workflow.reviewer.ended' | 'workflow.reviewer.failed' | 'workflow.failed' | 'workflow.ended';
|
|
28
|
+
interface WorkflowTraceEvent {
|
|
29
|
+
kind: WorkflowTraceEventKind;
|
|
30
|
+
runId: string;
|
|
31
|
+
timestamp: number;
|
|
32
|
+
payload: Record<string, unknown>;
|
|
33
|
+
}
|
|
34
|
+
interface WorkflowTraceArtifact {
|
|
35
|
+
kind: string;
|
|
36
|
+
uri: string;
|
|
37
|
+
contentType?: string;
|
|
38
|
+
sha256?: string;
|
|
39
|
+
metadata?: Record<string, unknown>;
|
|
40
|
+
}
|
|
41
|
+
interface WorkflowTraceExportLinks {
|
|
42
|
+
traceArtifactUri?: string;
|
|
43
|
+
exportBundleUri?: string;
|
|
44
|
+
partnerReportUri?: string;
|
|
45
|
+
intelligenceRunUri?: string;
|
|
46
|
+
}
|
|
47
|
+
interface WorkflowTraceEnvelope {
|
|
48
|
+
traceVersion: WorkflowTraceVersion;
|
|
49
|
+
runId: string;
|
|
50
|
+
topology?: WorkflowTopology;
|
|
51
|
+
events: WorkflowTraceEvent[];
|
|
52
|
+
artifacts?: WorkflowTraceArtifact[];
|
|
53
|
+
metadata?: Record<string, unknown>;
|
|
54
|
+
}
|
|
55
|
+
interface WorkflowTraceSummary {
|
|
56
|
+
runId: string;
|
|
57
|
+
startedAt?: number;
|
|
58
|
+
endedAt?: number;
|
|
59
|
+
durationMs: number;
|
|
60
|
+
costUsd: number;
|
|
61
|
+
tokenUsage: RunTokenUsage;
|
|
62
|
+
phaseCount: number;
|
|
63
|
+
branchCount: number;
|
|
64
|
+
failedBranchCount: number;
|
|
65
|
+
agentCalls: number;
|
|
66
|
+
loopCalls: number;
|
|
67
|
+
verifierCalls: number;
|
|
68
|
+
analystCalls: number;
|
|
69
|
+
reviewerCalls: number;
|
|
70
|
+
agentFailures: number;
|
|
71
|
+
loopFailures: number;
|
|
72
|
+
verifierFailures: number;
|
|
73
|
+
analystFailures: number;
|
|
74
|
+
reviewerFailures: number;
|
|
75
|
+
eventCount: number;
|
|
76
|
+
failed: boolean;
|
|
77
|
+
failureMessage?: string;
|
|
78
|
+
}
|
|
79
|
+
interface WorkflowTraceProjectionMetadata {
|
|
80
|
+
experimentId: string;
|
|
81
|
+
candidateId: string;
|
|
82
|
+
seed: number;
|
|
83
|
+
model: string;
|
|
84
|
+
promptHash: string;
|
|
85
|
+
configHash: string;
|
|
86
|
+
commitSha: string;
|
|
87
|
+
splitTag: RunSplitTag;
|
|
88
|
+
scenarioId?: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
declare const WORKFLOW_TRACE_EVENT_KINDS: readonly WorkflowTraceEventKind[];
|
|
92
|
+
declare function validateWorkflowTraceEventKind(kind: string): WorkflowTraceEventKind;
|
|
93
|
+
declare function validateWorkflowTraceEventPayload(kind: WorkflowTraceEventKind, payload: Record<string, unknown>): Record<string, unknown>;
|
|
94
|
+
|
|
95
|
+
type WorkflowFeedbackPackVersion = 'workflow-feedback-pack-v1';
|
|
96
|
+
type WorkflowFeedbackSeverity = AnalystSeverity;
|
|
97
|
+
interface WorkflowToolUsageSummary {
|
|
98
|
+
totalCalls: number;
|
|
99
|
+
erroredCalls: number;
|
|
100
|
+
byTool: Record<string, {
|
|
101
|
+
calls: number;
|
|
102
|
+
errors: number;
|
|
103
|
+
}>;
|
|
104
|
+
}
|
|
105
|
+
interface WorkflowVerifierFindingSummary {
|
|
106
|
+
severity: WorkflowFeedbackSeverity;
|
|
107
|
+
message: string;
|
|
108
|
+
evidence?: string;
|
|
109
|
+
detail?: Record<string, unknown>;
|
|
110
|
+
}
|
|
111
|
+
interface WorkflowVerifierLayerSummary {
|
|
112
|
+
layer: string;
|
|
113
|
+
status: LayerResult['status'];
|
|
114
|
+
score?: number;
|
|
115
|
+
durationMs: number;
|
|
116
|
+
reason?: string;
|
|
117
|
+
findings: WorkflowVerifierFindingSummary[];
|
|
118
|
+
diagnostics?: Record<string, number | null>;
|
|
119
|
+
}
|
|
120
|
+
interface WorkflowVerifierSummary {
|
|
121
|
+
allPass: boolean;
|
|
122
|
+
blendedScore: number;
|
|
123
|
+
durationMs: number;
|
|
124
|
+
failedLayers: string[];
|
|
125
|
+
layers: WorkflowVerifierLayerSummary[];
|
|
126
|
+
}
|
|
127
|
+
interface WorkflowFailureClusterInput {
|
|
128
|
+
id: string;
|
|
129
|
+
name: string;
|
|
130
|
+
share?: number;
|
|
131
|
+
runCount?: number;
|
|
132
|
+
exemplars?: readonly string[];
|
|
133
|
+
suggestedFix?: string;
|
|
134
|
+
metadata?: Record<string, unknown>;
|
|
135
|
+
}
|
|
136
|
+
interface WorkflowFailureClusterSummary {
|
|
137
|
+
id: string;
|
|
138
|
+
name: string;
|
|
139
|
+
share: number;
|
|
140
|
+
runCount?: number;
|
|
141
|
+
exemplars: string[];
|
|
142
|
+
suggestedFix?: string;
|
|
143
|
+
source: 'failure-cluster-view' | 'insight-report' | 'custom';
|
|
144
|
+
metadata?: Record<string, unknown>;
|
|
145
|
+
}
|
|
146
|
+
interface WorkflowAnalystFindingSummary {
|
|
147
|
+
findingId: string;
|
|
148
|
+
analystId: string;
|
|
149
|
+
severity: WorkflowFeedbackSeverity;
|
|
150
|
+
area: string;
|
|
151
|
+
claim: string;
|
|
152
|
+
confidence: number;
|
|
153
|
+
subject?: string;
|
|
154
|
+
recommendedAction?: string;
|
|
155
|
+
evidenceRefs: EvidenceRef[];
|
|
156
|
+
}
|
|
157
|
+
interface WorkflowAnalystFeedbackPack {
|
|
158
|
+
schemaVersion: WorkflowFeedbackPackVersion;
|
|
159
|
+
runId: string;
|
|
160
|
+
generatedAt: string;
|
|
161
|
+
summary: WorkflowTraceSummary;
|
|
162
|
+
verifier?: WorkflowVerifierSummary;
|
|
163
|
+
toolUsage: WorkflowToolUsageSummary;
|
|
164
|
+
failureClusters: WorkflowFailureClusterSummary[];
|
|
165
|
+
findings: WorkflowAnalystFindingSummary[];
|
|
166
|
+
recommendations: string[];
|
|
167
|
+
driverContextLines: string[];
|
|
168
|
+
}
|
|
169
|
+
interface WorkflowFeedbackPackLimits {
|
|
170
|
+
findings?: number;
|
|
171
|
+
clusters?: number;
|
|
172
|
+
layerFindings?: number;
|
|
173
|
+
recommendations?: number;
|
|
174
|
+
contextLines?: number;
|
|
175
|
+
}
|
|
176
|
+
interface BuildWorkflowAnalystFeedbackPackOptions {
|
|
177
|
+
envelope: WorkflowTraceEnvelope | unknown;
|
|
178
|
+
verifier?: VerificationReport;
|
|
179
|
+
analystFindings?: readonly AnalystFinding[];
|
|
180
|
+
failureClusters?: FailureClusterReport | FailureClusterInsight | readonly WorkflowFailureClusterInput[];
|
|
181
|
+
generatedAt?: string;
|
|
182
|
+
limits?: WorkflowFeedbackPackLimits;
|
|
183
|
+
}
|
|
184
|
+
declare function buildWorkflowAnalystFeedbackPack(options: BuildWorkflowAnalystFeedbackPackOptions): WorkflowAnalystFeedbackPack;
|
|
185
|
+
declare function renderWorkflowFeedbackPack(pack: WorkflowAnalystFeedbackPack, options?: {
|
|
186
|
+
maxChars?: number;
|
|
187
|
+
}): string;
|
|
188
|
+
|
|
189
|
+
interface WorkflowTraceSanitizationReport extends RedactionReport {
|
|
190
|
+
hashedArgs: number;
|
|
191
|
+
truncatedStrings: number;
|
|
192
|
+
droppedPayloadKeys: Record<string, number>;
|
|
193
|
+
droppedArtifactContents: number;
|
|
194
|
+
}
|
|
195
|
+
interface SanitizeWorkflowTraceEnvelopeOptions {
|
|
196
|
+
rules?: readonly RedactionRule[];
|
|
197
|
+
maxStringLength?: number;
|
|
198
|
+
hashSalt?: string;
|
|
199
|
+
approvedArtifactUris?: readonly string[];
|
|
200
|
+
approvedArtifactKinds?: readonly string[];
|
|
201
|
+
}
|
|
202
|
+
interface SanitizedWorkflowTraceEnvelopeResult {
|
|
203
|
+
envelope: WorkflowTraceEnvelope;
|
|
204
|
+
report: WorkflowTraceSanitizationReport;
|
|
205
|
+
}
|
|
206
|
+
declare function sanitizeWorkflowTraceEnvelope(input: WorkflowTraceEnvelope | unknown, options?: SanitizeWorkflowTraceEnvelopeOptions): SanitizedWorkflowTraceEnvelopeResult;
|
|
207
|
+
|
|
208
|
+
interface WorkflowPhaseGraphNode {
|
|
209
|
+
id: string;
|
|
210
|
+
title: string;
|
|
211
|
+
startedAt?: number;
|
|
212
|
+
endedAt?: number;
|
|
213
|
+
eventCount: number;
|
|
214
|
+
branchCount: number;
|
|
215
|
+
failedBranchCount: number;
|
|
216
|
+
agentCalls: number;
|
|
217
|
+
loopCalls: number;
|
|
218
|
+
verifierCalls: number;
|
|
219
|
+
analystCalls: number;
|
|
220
|
+
reviewerCalls: number;
|
|
221
|
+
agentFailures: number;
|
|
222
|
+
loopFailures: number;
|
|
223
|
+
verifierFailures: number;
|
|
224
|
+
analystFailures: number;
|
|
225
|
+
reviewerFailures: number;
|
|
226
|
+
costUsd: number;
|
|
227
|
+
tokenUsage: RunTokenUsage;
|
|
228
|
+
}
|
|
229
|
+
interface WorkflowPhaseGraphBranch {
|
|
230
|
+
id: string;
|
|
231
|
+
operation: string;
|
|
232
|
+
branchIndex: number;
|
|
233
|
+
phase: string | null;
|
|
234
|
+
status: 'started' | 'ended' | 'failed';
|
|
235
|
+
startedAt?: number;
|
|
236
|
+
endedAt?: number;
|
|
237
|
+
durationMs?: number;
|
|
238
|
+
stageCount?: number;
|
|
239
|
+
stageIndex?: number;
|
|
240
|
+
message?: string;
|
|
241
|
+
code?: string;
|
|
242
|
+
}
|
|
243
|
+
interface WorkflowPhaseGraph {
|
|
244
|
+
nodes: WorkflowPhaseGraphNode[];
|
|
245
|
+
branches: WorkflowPhaseGraphBranch[];
|
|
246
|
+
}
|
|
247
|
+
declare function workflowPhaseGraph(events: readonly WorkflowTraceEvent[]): WorkflowPhaseGraph;
|
|
248
|
+
|
|
249
|
+
interface WorkflowDelegateTraceSummary {
|
|
250
|
+
index: number | null;
|
|
251
|
+
label: string | null;
|
|
252
|
+
phase: string | null;
|
|
253
|
+
costUsd: number | null;
|
|
254
|
+
tokenUsage: RunTokenUsage | null;
|
|
255
|
+
trace: unknown;
|
|
256
|
+
}
|
|
257
|
+
interface WorkflowDelegateFailureSummary {
|
|
258
|
+
index: number | null;
|
|
259
|
+
label: string | null;
|
|
260
|
+
phase: string | null;
|
|
261
|
+
durationMs: number | null;
|
|
262
|
+
message: string | null;
|
|
263
|
+
code: string | null;
|
|
264
|
+
}
|
|
265
|
+
interface WorkflowCheckpointTraceSummary extends WorkflowDelegateTraceSummary {
|
|
266
|
+
output: unknown;
|
|
267
|
+
}
|
|
268
|
+
interface WorkflowExecutionSummary extends WorkflowTraceSummary {
|
|
269
|
+
source?: string;
|
|
270
|
+
eventKinds: Record<string, number>;
|
|
271
|
+
phases: string[];
|
|
272
|
+
phaseGraph: WorkflowPhaseGraph;
|
|
273
|
+
agentRuns: WorkflowDelegateTraceSummary[];
|
|
274
|
+
loopRuns: WorkflowDelegateTraceSummary[];
|
|
275
|
+
verifierOutputs: WorkflowCheckpointTraceSummary[];
|
|
276
|
+
analystOutputs: WorkflowCheckpointTraceSummary[];
|
|
277
|
+
reviewerOutputs: WorkflowCheckpointTraceSummary[];
|
|
278
|
+
agentFailureDetails: WorkflowDelegateFailureSummary[];
|
|
279
|
+
loopFailureDetails: WorkflowDelegateFailureSummary[];
|
|
280
|
+
verifierFailureDetails: WorkflowDelegateFailureSummary[];
|
|
281
|
+
analystFailureDetails: WorkflowDelegateFailureSummary[];
|
|
282
|
+
reviewerFailureDetails: WorkflowDelegateFailureSummary[];
|
|
283
|
+
}
|
|
284
|
+
interface SummarizeWorkflowExecutionOptions {
|
|
285
|
+
source?: string;
|
|
286
|
+
}
|
|
287
|
+
declare function summarizeWorkflowExecution(input: WorkflowTraceEnvelope | unknown, options?: SummarizeWorkflowExecutionOptions): WorkflowExecutionSummary;
|
|
288
|
+
|
|
289
|
+
type WorkflowTraceIntelligenceEnvelopeVersion = 'workflow-trace-intelligence-envelope-v1';
|
|
290
|
+
type WorkflowTraceExportGrantScope = 'workflow-trace:export' | 'workflow-trace:read' | '*';
|
|
291
|
+
type WorkflowTraceExportGrantSubject = 'product' | 'partner' | 'tenant';
|
|
292
|
+
interface WorkflowTraceExportGrant {
|
|
293
|
+
grantId: string;
|
|
294
|
+
subject: WorkflowTraceExportGrantSubject;
|
|
295
|
+
subjectId: string;
|
|
296
|
+
scopes: readonly WorkflowTraceExportGrantScope[];
|
|
297
|
+
grantedAt?: string;
|
|
298
|
+
expiresAt?: string;
|
|
299
|
+
metadata?: Record<string, unknown>;
|
|
300
|
+
}
|
|
301
|
+
interface WorkflowTraceHashEvidence {
|
|
302
|
+
path: string;
|
|
303
|
+
sha256: string;
|
|
304
|
+
shape?: unknown;
|
|
305
|
+
}
|
|
306
|
+
interface WorkflowTraceArtifactEvidence {
|
|
307
|
+
kind: string;
|
|
308
|
+
uri: string;
|
|
309
|
+
contentType?: string;
|
|
310
|
+
sha256?: string;
|
|
311
|
+
}
|
|
312
|
+
interface WorkflowTraceCompactEvidence {
|
|
313
|
+
eventKinds: Record<string, number>;
|
|
314
|
+
phases: string[];
|
|
315
|
+
toolNames: string[];
|
|
316
|
+
redactedHashes: WorkflowTraceHashEvidence[];
|
|
317
|
+
artifacts: WorkflowTraceArtifactEvidence[];
|
|
318
|
+
failureMessage?: string;
|
|
319
|
+
}
|
|
320
|
+
interface WorkflowTraceIntelligenceEnvelope {
|
|
321
|
+
schemaVersion: WorkflowTraceIntelligenceEnvelopeVersion;
|
|
322
|
+
destination: string;
|
|
323
|
+
generatedAt: string;
|
|
324
|
+
productId: string;
|
|
325
|
+
partnerId?: string;
|
|
326
|
+
runId: string;
|
|
327
|
+
grantIds: string[];
|
|
328
|
+
traceEnvelope: WorkflowTraceEnvelope;
|
|
329
|
+
summary: WorkflowExecutionSummary;
|
|
330
|
+
compactEvidence: WorkflowTraceCompactEvidence;
|
|
331
|
+
sanitization: SanitizedWorkflowTraceEnvelopeResult['report'];
|
|
332
|
+
links?: WorkflowTraceExportLinks;
|
|
333
|
+
}
|
|
334
|
+
interface BuildWorkflowTraceIntelligenceEnvelopeOptions {
|
|
335
|
+
envelope: WorkflowTraceEnvelope | unknown;
|
|
336
|
+
productId: string;
|
|
337
|
+
partnerId?: string;
|
|
338
|
+
grants: readonly WorkflowTraceExportGrant[];
|
|
339
|
+
generatedAt?: string;
|
|
340
|
+
destination?: string;
|
|
341
|
+
sanitize?: SanitizeWorkflowTraceEnvelopeOptions;
|
|
342
|
+
links?: WorkflowTraceExportLinks;
|
|
343
|
+
metadata?: Record<string, unknown>;
|
|
344
|
+
}
|
|
345
|
+
declare function buildWorkflowTraceIntelligenceEnvelope(options: BuildWorkflowTraceIntelligenceEnvelopeOptions): WorkflowTraceIntelligenceEnvelope;
|
|
346
|
+
declare function validateWorkflowTraceIntelligenceEnvelope(input: unknown): WorkflowTraceIntelligenceEnvelope;
|
|
347
|
+
|
|
348
|
+
interface WorkflowTraceRunRecordOptions extends WorkflowTraceProjectionMetadata {
|
|
349
|
+
runId?: string;
|
|
350
|
+
score?: number;
|
|
351
|
+
raw?: Record<string, number>;
|
|
352
|
+
failureMode?: string;
|
|
353
|
+
judgeMetadata?: RunRecord['judgeMetadata'];
|
|
354
|
+
agentProfile?: RunRecord['agentProfile'];
|
|
355
|
+
}
|
|
356
|
+
declare function workflowTraceToRunRecord(input: WorkflowTraceEnvelope | unknown, options: WorkflowTraceRunRecordOptions): RunRecord;
|
|
357
|
+
|
|
358
|
+
interface WorkflowTraceTrajectoryOptions {
|
|
359
|
+
projectId?: string;
|
|
360
|
+
scenarioId?: string;
|
|
361
|
+
task: string;
|
|
362
|
+
split?: DatasetSplit;
|
|
363
|
+
tags?: Record<string, string>;
|
|
364
|
+
score?: number;
|
|
365
|
+
success?: boolean;
|
|
366
|
+
metadata?: Record<string, unknown>;
|
|
367
|
+
}
|
|
368
|
+
declare function workflowTraceToFeedbackTrajectory(input: WorkflowTraceEnvelope | unknown, options: WorkflowTraceTrajectoryOptions): FeedbackTrajectory;
|
|
369
|
+
|
|
370
|
+
type WorkflowPartnerReportVersion = 'workflow-partner-report-v1';
|
|
371
|
+
interface WorkflowPartnerFinding {
|
|
372
|
+
source: 'analyst' | 'verifier' | 'failure-cluster';
|
|
373
|
+
severity: AnalystSeverity;
|
|
374
|
+
area: string;
|
|
375
|
+
claim: string;
|
|
376
|
+
evidence: EvidenceRef[];
|
|
377
|
+
recommendedAction?: string;
|
|
378
|
+
metadata?: Record<string, unknown>;
|
|
379
|
+
}
|
|
380
|
+
interface WorkflowPartnerReport {
|
|
381
|
+
schemaVersion: WorkflowPartnerReportVersion;
|
|
382
|
+
runId: string;
|
|
383
|
+
generatedAt: string;
|
|
384
|
+
summary: WorkflowTraceSummary;
|
|
385
|
+
docsApiGaps: WorkflowPartnerFinding[];
|
|
386
|
+
prReadyFindings: WorkflowPartnerFinding[];
|
|
387
|
+
failureClusters: ReturnType<typeof buildWorkflowAnalystFeedbackPack>['failureClusters'];
|
|
388
|
+
recommendations: string[];
|
|
389
|
+
traceArtifacts: WorkflowTraceEnvelope['artifacts'];
|
|
390
|
+
links?: WorkflowTraceExportLinks;
|
|
391
|
+
exportBundle: {
|
|
392
|
+
traceEnvelope: WorkflowTraceEnvelope;
|
|
393
|
+
sanitization: SanitizedWorkflowTraceEnvelopeResult['report'];
|
|
394
|
+
feedbackPack: ReturnType<typeof buildWorkflowAnalystFeedbackPack>;
|
|
395
|
+
trajectory: ReturnType<typeof workflowTraceToFeedbackTrajectory>;
|
|
396
|
+
runRecord?: RunRecord;
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
interface BuildWorkflowPartnerReportOptions extends Omit<BuildWorkflowAnalystFeedbackPackOptions, 'envelope'> {
|
|
400
|
+
envelope: WorkflowTraceEnvelope | unknown;
|
|
401
|
+
sanitize?: SanitizeWorkflowTraceEnvelopeOptions;
|
|
402
|
+
trajectory: WorkflowTraceTrajectoryOptions;
|
|
403
|
+
runRecord?: WorkflowTraceRunRecordOptions;
|
|
404
|
+
links?: WorkflowTraceExportLinks;
|
|
405
|
+
}
|
|
406
|
+
declare function buildWorkflowPartnerReport(options: BuildWorkflowPartnerReportOptions): WorkflowPartnerReport;
|
|
407
|
+
declare function validateWorkflowPartnerReport(input: unknown): WorkflowPartnerReport;
|
|
408
|
+
declare function renderWorkflowPartnerReport(report: WorkflowPartnerReport, options?: {
|
|
409
|
+
maxFindings?: number;
|
|
410
|
+
}): string;
|
|
411
|
+
|
|
412
|
+
type WorkflowDriverPromotionDecisionVersion = 'workflow-driver-promotion-v1';
|
|
413
|
+
type WorkflowDriverPromotionRejectionCode = 'missing_baseline_records' | 'missing_candidate_records' | 'missing_holdout_pairs' | 'few_pairs' | 'insufficient_lift' | 'cost_ceiling';
|
|
414
|
+
interface WorkflowDriverPromotionPair {
|
|
415
|
+
key: string;
|
|
416
|
+
scenarioId: string;
|
|
417
|
+
seed: number;
|
|
418
|
+
baselineRunId: string;
|
|
419
|
+
candidateRunId: string;
|
|
420
|
+
baselineScore: number;
|
|
421
|
+
candidateScore: number;
|
|
422
|
+
delta: number;
|
|
423
|
+
}
|
|
424
|
+
interface WorkflowDriverPromotionEvidence {
|
|
425
|
+
pairedRuns: number;
|
|
426
|
+
expectedScenarioIds: string[];
|
|
427
|
+
pairedScenarioIds: string[];
|
|
428
|
+
missingScenarioIds: string[];
|
|
429
|
+
baselineMean: number;
|
|
430
|
+
candidateMean: number;
|
|
431
|
+
lift: number;
|
|
432
|
+
liftCi: {
|
|
433
|
+
low: number;
|
|
434
|
+
high: number;
|
|
435
|
+
};
|
|
436
|
+
bootstrap: PairedBootstrapResult;
|
|
437
|
+
confidence: number;
|
|
438
|
+
resamples: number;
|
|
439
|
+
statistic: 'mean' | 'median';
|
|
440
|
+
deltaThreshold: number;
|
|
441
|
+
baselineMedianCostUsd: number;
|
|
442
|
+
candidateMedianCostUsd: number;
|
|
443
|
+
pairs: WorkflowDriverPromotionPair[];
|
|
444
|
+
}
|
|
445
|
+
interface WorkflowDriverPromotionDecision {
|
|
446
|
+
schemaVersion: WorkflowDriverPromotionDecisionVersion;
|
|
447
|
+
generatedAt: string;
|
|
448
|
+
baselineStrategyId: string;
|
|
449
|
+
candidateStrategyId: string;
|
|
450
|
+
promote: boolean;
|
|
451
|
+
rejectionCode: WorkflowDriverPromotionRejectionCode | null;
|
|
452
|
+
reason: string;
|
|
453
|
+
evidence: WorkflowDriverPromotionEvidence;
|
|
454
|
+
}
|
|
455
|
+
interface DecideWorkflowDriverPromotionOptions {
|
|
456
|
+
records: readonly RunRecord[] | readonly unknown[];
|
|
457
|
+
baselineStrategyId?: string;
|
|
458
|
+
candidateStrategyId?: string;
|
|
459
|
+
expectedScenarioIds?: readonly string[];
|
|
460
|
+
minPairedHoldoutRuns?: number;
|
|
461
|
+
deltaThreshold?: number;
|
|
462
|
+
confidence?: number;
|
|
463
|
+
resamples?: number;
|
|
464
|
+
seed?: number;
|
|
465
|
+
statistic?: 'mean' | 'median';
|
|
466
|
+
costPerRunCeiling?: number;
|
|
467
|
+
generatedAt?: string;
|
|
468
|
+
}
|
|
469
|
+
declare function decideWorkflowDriverPromotion(options: DecideWorkflowDriverPromotionOptions): WorkflowDriverPromotionDecision;
|
|
470
|
+
|
|
471
|
+
interface WorkflowTraceEnvelopeFromEventsOptions {
|
|
472
|
+
runId?: string;
|
|
473
|
+
topology?: WorkflowTopology;
|
|
474
|
+
artifacts?: readonly WorkflowTraceArtifact[];
|
|
475
|
+
metadata?: Record<string, unknown>;
|
|
476
|
+
}
|
|
477
|
+
interface WorkflowRuntimeResultLike {
|
|
478
|
+
runId?: string;
|
|
479
|
+
meta?: unknown;
|
|
480
|
+
output?: unknown;
|
|
481
|
+
events: readonly unknown[];
|
|
482
|
+
}
|
|
483
|
+
interface WorkflowRuntimeResultToTraceEnvelopeOptions extends Omit<WorkflowTraceEnvelopeFromEventsOptions, 'runId'> {
|
|
484
|
+
runId?: string;
|
|
485
|
+
includeOutputInMetadata?: boolean;
|
|
486
|
+
}
|
|
487
|
+
declare function workflowEventsToTraceEnvelope(events: readonly unknown[], options?: WorkflowTraceEnvelopeFromEventsOptions): WorkflowTraceEnvelope;
|
|
488
|
+
declare function workflowRuntimeResultToTraceEnvelope(result: WorkflowRuntimeResultLike, options?: WorkflowRuntimeResultToTraceEnvelopeOptions): WorkflowTraceEnvelope;
|
|
489
|
+
|
|
490
|
+
declare function validateWorkflowTraceEvent(input: unknown): WorkflowTraceEvent;
|
|
491
|
+
declare function validateWorkflowTraceEnvelope(input: unknown): WorkflowTraceEnvelope;
|
|
492
|
+
declare function summarizeWorkflowTrace(input: WorkflowTraceEnvelope | unknown): WorkflowTraceSummary;
|
|
493
|
+
|
|
494
|
+
export { type BuildWorkflowAnalystFeedbackPackOptions, type BuildWorkflowPartnerReportOptions, type BuildWorkflowTraceIntelligenceEnvelopeOptions, type DecideWorkflowDriverPromotionOptions, type SanitizeWorkflowTraceEnvelopeOptions, type SanitizedWorkflowTraceEnvelopeResult, type SummarizeWorkflowExecutionOptions, WORKFLOW_TRACE_EVENT_KINDS, type WorkflowAnalystFeedbackPack, type WorkflowAnalystFindingSummary, type WorkflowCheckpointTraceSummary, type WorkflowDelegateTraceSummary, type WorkflowDriverPromotionDecision, type WorkflowDriverPromotionDecisionVersion, type WorkflowDriverPromotionEvidence, type WorkflowDriverPromotionPair, type WorkflowDriverPromotionRejectionCode, type WorkflowExecutionSummary, type WorkflowFailureClusterInput, type WorkflowFailureClusterSummary, type WorkflowFeedbackPackLimits, type WorkflowFeedbackPackVersion, type WorkflowFeedbackSeverity, type WorkflowPartnerFinding, type WorkflowPartnerReport, type WorkflowPartnerReportVersion, type WorkflowPhaseGraph, type WorkflowPhaseGraphBranch, type WorkflowPhaseGraphNode, type WorkflowRuntimeResultLike, type WorkflowRuntimeResultToTraceEnvelopeOptions, type WorkflowToolUsageSummary, WorkflowTopology, type WorkflowTraceArtifact, type WorkflowTraceArtifactEvidence, type WorkflowTraceCompactEvidence, type WorkflowTraceEnvelope, type WorkflowTraceEnvelopeFromEventsOptions, type WorkflowTraceEvent, type WorkflowTraceEventKind, type WorkflowTraceExportGrant, type WorkflowTraceExportGrantScope, type WorkflowTraceExportGrantSubject, type WorkflowTraceExportLinks, type WorkflowTraceHashEvidence, type WorkflowTraceIntelligenceEnvelope, type WorkflowTraceIntelligenceEnvelopeVersion, type WorkflowTraceProjectionMetadata, type WorkflowTraceRunRecordOptions, type WorkflowTraceSanitizationReport, type WorkflowTraceSummary, type WorkflowTraceTrajectoryOptions, type WorkflowTraceVersion, type WorkflowVerifierFindingSummary, type WorkflowVerifierLayerSummary, type WorkflowVerifierSummary, buildWorkflowAnalystFeedbackPack, buildWorkflowPartnerReport, buildWorkflowTraceIntelligenceEnvelope, decideWorkflowDriverPromotion, renderWorkflowFeedbackPack, renderWorkflowPartnerReport, sanitizeWorkflowTraceEnvelope, summarizeWorkflowExecution, summarizeWorkflowTrace, validateWorkflowPartnerReport, validateWorkflowTraceEnvelope, validateWorkflowTraceEvent, validateWorkflowTraceEventKind, validateWorkflowTraceEventPayload, validateWorkflowTraceIntelligenceEnvelope, workflowEventsToTraceEnvelope, workflowPhaseGraph, workflowRuntimeResultToTraceEnvelope, workflowTraceToFeedbackTrajectory, workflowTraceToRunRecord };
|