@tangle-network/agent-eval 0.71.0 → 0.72.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/dist/adapters/http.d.ts +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/otel.d.ts +3 -2
- package/dist/agent-profile-DYRboYWu.d.ts +364 -0
- package/dist/analyst/index.d.ts +221 -0
- package/dist/analyst/index.js +371 -0
- package/dist/analyst/index.js.map +1 -0
- package/dist/analyst-t7zZS3TV.d.ts +88 -0
- package/dist/campaign/index.d.ts +485 -9
- package/dist/campaign/index.js +618 -30
- package/dist/campaign/index.js.map +1 -1
- package/dist/chunk-7W4SM7FD.js +1075 -0
- package/dist/chunk-7W4SM7FD.js.map +1 -0
- package/dist/{chunk-AIWHLG7J.js → chunk-GJJNJVIR.js} +11 -11
- package/dist/chunk-JHA3ZGSO.js +1496 -0
- package/dist/chunk-JHA3ZGSO.js.map +1 -0
- package/dist/{chunk-VMAYE3LM.js → chunk-JYE3WOTE.js} +57 -9
- package/dist/{chunk-VMAYE3LM.js.map → chunk-JYE3WOTE.js.map} +1 -1
- package/dist/chunk-LB2UOI5F.js +412 -0
- package/dist/chunk-LB2UOI5F.js.map +1 -0
- package/dist/{chunk-ODGETRTM.js → chunk-VUINJM5M.js} +234 -1415
- package/dist/chunk-VUINJM5M.js.map +1 -0
- package/dist/chunk-WYIHD6EB.js +1044 -0
- package/dist/chunk-WYIHD6EB.js.map +1 -0
- package/dist/{chunk-6QZUCFKM.js → chunk-XPILG2CA.js} +120 -3
- package/dist/chunk-XPILG2CA.js.map +1 -0
- package/dist/{chunk-6XQIEUQ2.js → chunk-ZPSKPT3V.js} +5 -3
- package/dist/{chunk-6XQIEUQ2.js.map → chunk-ZPSKPT3V.js.map} +1 -1
- package/dist/contract/index.d.ts +17 -13
- package/dist/contract/index.js +14 -8
- package/dist/contract/index.js.map +1 -1
- package/dist/{control-DxvZeV5X.d.ts → control-BgA6BYTm.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/{feedback-trajectory-8hKC5EOb.d.ts → feedback-trajectory-B3rErRsh.d.ts} +1 -1
- package/dist/harness-optimizer-EnEnQPsr.d.ts +106 -0
- package/dist/hosted/index.d.ts +223 -2
- package/dist/index.d.ts +49 -1323
- package/dist/index.js +339 -2627
- package/dist/index.js.map +1 -1
- package/dist/{index-BGBrVS24.d.ts → insight-report-Df3lxYXM.d.ts} +1 -221
- package/dist/kind-factory-DW9XWPvM.d.ts +172 -0
- package/dist/multi-layer-verifier-DlWCXuxL.d.ts +141 -0
- package/dist/openapi.json +1 -1
- package/dist/pareto-E-pembql.d.ts +81 -0
- package/dist/{provenance-C69gLUXH.d.ts → provenance-B-TFszPW.d.ts} +131 -4
- package/dist/redact-B40YG2M_.d.ts +45 -0
- package/dist/registry-DuVYiTvw.d.ts +128 -0
- package/dist/{researcher-WJvIpX3L.d.ts → researcher-C_KJyIGg.d.ts} +1 -141
- package/dist/rl.d.ts +4 -3
- package/dist/rl.js +4 -4
- package/dist/{run-campaign-BVY3RGAZ.js → run-campaign-OVEZF24D.js} +2 -2
- package/dist/run-critic-BAIjX99r.d.ts +56 -0
- package/dist/{run-improvement-loop-Bzamo6GB.d.ts → run-improvement-loop-BqYH2vCR.d.ts} +25 -1
- package/dist/semantic-concept-judge-CV9Wlx4t.d.ts +650 -0
- package/dist/{store-jzKpMl16.d.ts → store-GmBE2pZZ.d.ts} +1 -1
- package/dist/traces.d.ts +371 -308
- package/dist/traces.js +43 -18
- package/dist/{types-CnmZ2bkP.d.ts → types-Bba0vl1V.d.ts} +1 -1
- package/dist/{registry-BGKyX6bw.d.ts → types-CRD68aH7.d.ts} +3 -128
- package/dist/wire/index.d.ts +1 -1
- package/dist/workflow/index.d.ts +494 -0
- package/dist/workflow/index.js +2177 -0
- package/dist/workflow/index.js.map +1 -0
- package/docs/design/self-improvement-roadmap.md +106 -0
- package/package.json +36 -12
- package/dist/agent-profile-DzcPHR1Z.d.ts +0 -114
- package/dist/chunk-6QZUCFKM.js.map +0 -1
- package/dist/chunk-ODGETRTM.js.map +0 -1
- package/dist/chunk-PQV2TKC3.js +0 -27
- package/dist/chunk-PQV2TKC3.js.map +0 -1
- /package/dist/{chunk-AIWHLG7J.js.map → chunk-GJJNJVIR.js.map} +0 -0
- /package/dist/{run-campaign-BVY3RGAZ.js.map → run-campaign-OVEZF24D.js.map} +0 -0
|
@@ -0,0 +1,2177 @@
|
|
|
1
|
+
import {
|
|
2
|
+
pairedBootstrap
|
|
3
|
+
} from "../chunk-ITBRCT73.js";
|
|
4
|
+
import {
|
|
5
|
+
DEFAULT_REDACTION_RULES,
|
|
6
|
+
redactString
|
|
7
|
+
} from "../chunk-GGE4NNQT.js";
|
|
8
|
+
import {
|
|
9
|
+
validateRunRecord
|
|
10
|
+
} from "../chunk-F3SRAAZO.js";
|
|
11
|
+
import "../chunk-VSMTAMNK.js";
|
|
12
|
+
import {
|
|
13
|
+
ValidationError
|
|
14
|
+
} from "../chunk-3BFEG2F6.js";
|
|
15
|
+
import "../chunk-PZ5AY32C.js";
|
|
16
|
+
|
|
17
|
+
// src/workflow/event-schema.ts
|
|
18
|
+
var WORKFLOW_EVENT_KINDS = [
|
|
19
|
+
"workflow.started",
|
|
20
|
+
"workflow.phase",
|
|
21
|
+
"workflow.log",
|
|
22
|
+
"workflow.parallel.started",
|
|
23
|
+
"workflow.parallel.ended",
|
|
24
|
+
"workflow.pipeline.started",
|
|
25
|
+
"workflow.pipeline.ended",
|
|
26
|
+
"workflow.branch.started",
|
|
27
|
+
"workflow.branch.ended",
|
|
28
|
+
"workflow.branch.failed",
|
|
29
|
+
"workflow.agent.started",
|
|
30
|
+
"workflow.agent.ended",
|
|
31
|
+
"workflow.agent.failed",
|
|
32
|
+
"workflow.loop.started",
|
|
33
|
+
"workflow.loop.ended",
|
|
34
|
+
"workflow.loop.failed",
|
|
35
|
+
"workflow.verifier.started",
|
|
36
|
+
"workflow.verifier.ended",
|
|
37
|
+
"workflow.verifier.failed",
|
|
38
|
+
"workflow.analyst.started",
|
|
39
|
+
"workflow.analyst.ended",
|
|
40
|
+
"workflow.analyst.failed",
|
|
41
|
+
"workflow.reviewer.started",
|
|
42
|
+
"workflow.reviewer.ended",
|
|
43
|
+
"workflow.reviewer.failed",
|
|
44
|
+
"workflow.failed",
|
|
45
|
+
"workflow.ended"
|
|
46
|
+
];
|
|
47
|
+
var WORKFLOW_TRACE_EVENT_KINDS = WORKFLOW_EVENT_KINDS;
|
|
48
|
+
var WORKFLOW_EVENT_KIND_SET = new Set(WORKFLOW_EVENT_KINDS);
|
|
49
|
+
var BRANCH_OPERATIONS = /* @__PURE__ */ new Set(["parallel", "pipeline"]);
|
|
50
|
+
function validateWorkflowTraceEventKind(kind) {
|
|
51
|
+
if (!WORKFLOW_EVENT_KIND_SET.has(kind)) {
|
|
52
|
+
throw new ValidationError(`unknown workflow trace event kind: ${kind}`);
|
|
53
|
+
}
|
|
54
|
+
return kind;
|
|
55
|
+
}
|
|
56
|
+
function validateWorkflowTraceEventPayload(kind, payload) {
|
|
57
|
+
switch (kind) {
|
|
58
|
+
case "workflow.started":
|
|
59
|
+
requireRecord(payload.meta, `${kind}.payload.meta`);
|
|
60
|
+
requireInteger(payload.depth, `${kind}.payload.depth`, { min: 0 });
|
|
61
|
+
requireRecord(payload.caps, `${kind}.payload.caps`);
|
|
62
|
+
return payload;
|
|
63
|
+
case "workflow.phase":
|
|
64
|
+
requireString(payload.title, `${kind}.payload.title`);
|
|
65
|
+
return payload;
|
|
66
|
+
case "workflow.log":
|
|
67
|
+
requireString(payload.message, `${kind}.payload.message`);
|
|
68
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
69
|
+
return payload;
|
|
70
|
+
case "workflow.parallel.started":
|
|
71
|
+
requireInteger(payload.branchCount, `${kind}.payload.branchCount`, { min: 0 });
|
|
72
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
73
|
+
return payload;
|
|
74
|
+
case "workflow.parallel.ended":
|
|
75
|
+
requireInteger(payload.branchCount, `${kind}.payload.branchCount`, { min: 0 });
|
|
76
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
77
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
78
|
+
return payload;
|
|
79
|
+
case "workflow.pipeline.started":
|
|
80
|
+
requireInteger(payload.itemCount, `${kind}.payload.itemCount`, { min: 0 });
|
|
81
|
+
requireInteger(payload.stageCount, `${kind}.payload.stageCount`, { min: 1 });
|
|
82
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
83
|
+
return payload;
|
|
84
|
+
case "workflow.pipeline.ended":
|
|
85
|
+
requireInteger(payload.itemCount, `${kind}.payload.itemCount`, { min: 0 });
|
|
86
|
+
requireInteger(payload.stageCount, `${kind}.payload.stageCount`, { min: 1 });
|
|
87
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
88
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
89
|
+
return payload;
|
|
90
|
+
case "workflow.branch.started":
|
|
91
|
+
validateBranchPayload(kind, payload, { terminal: false });
|
|
92
|
+
return payload;
|
|
93
|
+
case "workflow.branch.ended":
|
|
94
|
+
validateBranchPayload(kind, payload, { terminal: true });
|
|
95
|
+
return payload;
|
|
96
|
+
case "workflow.branch.failed":
|
|
97
|
+
validateBranchPayload(kind, payload, { terminal: true });
|
|
98
|
+
requireString(payload.message, `${kind}.payload.message`);
|
|
99
|
+
optionalString(payload.code, `${kind}.payload.code`);
|
|
100
|
+
if (payload.stageIndex !== void 0) {
|
|
101
|
+
requireInteger(payload.stageIndex, `${kind}.payload.stageIndex`, { min: 0 });
|
|
102
|
+
}
|
|
103
|
+
return payload;
|
|
104
|
+
case "workflow.agent.started":
|
|
105
|
+
validateIndexedPayload(kind, payload);
|
|
106
|
+
requireInteger(payload.promptChars, `${kind}.payload.promptChars`, { min: 0 });
|
|
107
|
+
optionalRecord(payload.metadata, `${kind}.payload.metadata`);
|
|
108
|
+
return payload;
|
|
109
|
+
case "workflow.agent.ended":
|
|
110
|
+
validateDelegateEndedPayload(kind, payload);
|
|
111
|
+
return payload;
|
|
112
|
+
case "workflow.agent.failed":
|
|
113
|
+
validateDelegateFailedPayload(kind, payload);
|
|
114
|
+
return payload;
|
|
115
|
+
case "workflow.loop.started":
|
|
116
|
+
case "workflow.verifier.started":
|
|
117
|
+
case "workflow.analyst.started":
|
|
118
|
+
case "workflow.reviewer.started":
|
|
119
|
+
validateIndexedPayload(kind, payload);
|
|
120
|
+
optionalRecord(payload.metadata, `${kind}.payload.metadata`);
|
|
121
|
+
return payload;
|
|
122
|
+
case "workflow.loop.ended":
|
|
123
|
+
case "workflow.verifier.ended":
|
|
124
|
+
case "workflow.analyst.ended":
|
|
125
|
+
case "workflow.reviewer.ended":
|
|
126
|
+
validateDelegateEndedPayload(kind, payload);
|
|
127
|
+
return payload;
|
|
128
|
+
case "workflow.loop.failed":
|
|
129
|
+
case "workflow.verifier.failed":
|
|
130
|
+
case "workflow.analyst.failed":
|
|
131
|
+
case "workflow.reviewer.failed":
|
|
132
|
+
validateDelegateFailedPayload(kind, payload);
|
|
133
|
+
return payload;
|
|
134
|
+
case "workflow.failed":
|
|
135
|
+
requireString(payload.message, `${kind}.payload.message`);
|
|
136
|
+
optionalString(payload.code, `${kind}.payload.code`);
|
|
137
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
138
|
+
return payload;
|
|
139
|
+
case "workflow.ended":
|
|
140
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
141
|
+
requireNonNegativeNumber(payload.costUsd, `${kind}.payload.costUsd`);
|
|
142
|
+
validateTokenUsage(payload.tokenUsage, `${kind}.payload.tokenUsage`);
|
|
143
|
+
requireInteger(payload.agentCalls, `${kind}.payload.agentCalls`, { min: 0 });
|
|
144
|
+
requireInteger(payload.loopCalls, `${kind}.payload.loopCalls`, { min: 0 });
|
|
145
|
+
return payload;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
function validateBranchPayload(kind, payload, options) {
|
|
149
|
+
const operation = requireString(payload.operation, `${kind}.payload.operation`);
|
|
150
|
+
if (!BRANCH_OPERATIONS.has(operation)) {
|
|
151
|
+
throw new ValidationError(`${kind}.payload.operation: expected parallel or pipeline`);
|
|
152
|
+
}
|
|
153
|
+
requireInteger(payload.branchIndex, `${kind}.payload.branchIndex`, { min: 0 });
|
|
154
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
155
|
+
if (payload.stageCount !== void 0) {
|
|
156
|
+
requireInteger(payload.stageCount, `${kind}.payload.stageCount`, { min: 1 });
|
|
157
|
+
}
|
|
158
|
+
if (options.terminal) {
|
|
159
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
function validateIndexedPayload(kind, payload) {
|
|
163
|
+
requireInteger(payload.index, `${kind}.payload.index`, { min: 0 });
|
|
164
|
+
optionalString(payload.label, `${kind}.payload.label`);
|
|
165
|
+
optionalString(payload.phase, `${kind}.payload.phase`);
|
|
166
|
+
}
|
|
167
|
+
function validateDelegateEndedPayload(kind, payload) {
|
|
168
|
+
validateIndexedPayload(kind, payload);
|
|
169
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
170
|
+
requireNonNegativeNumber(payload.costUsd, `${kind}.payload.costUsd`);
|
|
171
|
+
validateTokenUsage(payload.tokenUsage, `${kind}.payload.tokenUsage`);
|
|
172
|
+
}
|
|
173
|
+
function validateDelegateFailedPayload(kind, payload) {
|
|
174
|
+
validateIndexedPayload(kind, payload);
|
|
175
|
+
requireNonNegativeNumber(payload.durationMs, `${kind}.payload.durationMs`);
|
|
176
|
+
requireString(payload.message, `${kind}.payload.message`);
|
|
177
|
+
optionalString(payload.code, `${kind}.payload.code`);
|
|
178
|
+
}
|
|
179
|
+
function validateTokenUsage(value, path) {
|
|
180
|
+
const tokenUsage = requireRecord(value, path);
|
|
181
|
+
requireNonNegativeNumber(tokenUsage.input, `${path}.input`);
|
|
182
|
+
requireNonNegativeNumber(tokenUsage.output, `${path}.output`);
|
|
183
|
+
if (tokenUsage.cached !== void 0) {
|
|
184
|
+
requireNonNegativeNumber(tokenUsage.cached, `${path}.cached`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
function requireRecord(value, path) {
|
|
188
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
189
|
+
throw new ValidationError(`${path}: expected object`);
|
|
190
|
+
}
|
|
191
|
+
return value;
|
|
192
|
+
}
|
|
193
|
+
function optionalRecord(value, path) {
|
|
194
|
+
if (value !== void 0) requireRecord(value, path);
|
|
195
|
+
}
|
|
196
|
+
function requireString(value, path) {
|
|
197
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
198
|
+
throw new ValidationError(`${path}: expected non-empty string`);
|
|
199
|
+
}
|
|
200
|
+
return value;
|
|
201
|
+
}
|
|
202
|
+
function optionalString(value, path) {
|
|
203
|
+
if (value !== void 0) requireString(value, path);
|
|
204
|
+
}
|
|
205
|
+
function requireNonNegativeNumber(value, path) {
|
|
206
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
207
|
+
throw new ValidationError(`${path}: expected finite non-negative number`);
|
|
208
|
+
}
|
|
209
|
+
return value;
|
|
210
|
+
}
|
|
211
|
+
function requireInteger(value, path, options = {}) {
|
|
212
|
+
if (typeof value !== "number" || !Number.isInteger(value)) {
|
|
213
|
+
throw new ValidationError(`${path}: expected integer`);
|
|
214
|
+
}
|
|
215
|
+
if (options.min !== void 0 && value < options.min) {
|
|
216
|
+
throw new ValidationError(`${path}: expected integer >= ${options.min}`);
|
|
217
|
+
}
|
|
218
|
+
return value;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// src/workflow/schema.ts
|
|
222
|
+
var TRACE_VERSION = "workflow-trace-v1";
|
|
223
|
+
function validateWorkflowTraceEvent(input) {
|
|
224
|
+
const obj = expectRecord(input, "event");
|
|
225
|
+
const kind = validateWorkflowTraceEventKind(expectString(obj.kind, "event.kind"));
|
|
226
|
+
const runId = expectString(obj.runId, "event.runId");
|
|
227
|
+
const timestamp = expectFinite(obj.timestamp, "event.timestamp");
|
|
228
|
+
const payload = validateWorkflowTraceEventPayload(
|
|
229
|
+
kind,
|
|
230
|
+
expectRecord(obj.payload, "event.payload")
|
|
231
|
+
);
|
|
232
|
+
return { kind, runId, timestamp, payload };
|
|
233
|
+
}
|
|
234
|
+
function validateWorkflowTraceEnvelope(input) {
|
|
235
|
+
const obj = expectRecord(input, "workflow trace envelope");
|
|
236
|
+
if (obj.traceVersion !== TRACE_VERSION) {
|
|
237
|
+
throw new ValidationError(`workflow traceVersion must be ${TRACE_VERSION}`);
|
|
238
|
+
}
|
|
239
|
+
const runId = expectString(obj.runId, "workflow trace runId");
|
|
240
|
+
if (!Array.isArray(obj.events) || obj.events.length === 0) {
|
|
241
|
+
throw new ValidationError("workflow trace envelope must include at least one event");
|
|
242
|
+
}
|
|
243
|
+
const events = obj.events.map(validateWorkflowTraceEvent);
|
|
244
|
+
for (const event of events) {
|
|
245
|
+
if (event.runId !== runId) {
|
|
246
|
+
throw new ValidationError(`workflow trace event runId ${event.runId} does not match ${runId}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
validateWorkflowTraceLifecycle(events);
|
|
250
|
+
return {
|
|
251
|
+
traceVersion: TRACE_VERSION,
|
|
252
|
+
runId,
|
|
253
|
+
...obj.topology !== void 0 ? { topology: obj.topology } : {},
|
|
254
|
+
events,
|
|
255
|
+
...obj.artifacts !== void 0 ? { artifacts: validateArtifacts(obj.artifacts) } : {},
|
|
256
|
+
...obj.metadata !== void 0 ? { metadata: expectRecord(obj.metadata, "metadata") } : {}
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
function validateWorkflowTraceLifecycle(events) {
|
|
260
|
+
const first = events[0];
|
|
261
|
+
if (first?.kind !== "workflow.started") {
|
|
262
|
+
throw new ValidationError("workflow trace first event must be workflow.started");
|
|
263
|
+
}
|
|
264
|
+
for (let index = 1; index < events.length; index += 1) {
|
|
265
|
+
if (events[index].timestamp < events[index - 1].timestamp) {
|
|
266
|
+
throw new ValidationError(`workflow trace timestamps must be nondecreasing at event ${index}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
const terminalEvents = events.filter(
|
|
270
|
+
(event) => event.kind === "workflow.ended" || event.kind === "workflow.failed"
|
|
271
|
+
);
|
|
272
|
+
if (terminalEvents.length !== 1) {
|
|
273
|
+
throw new ValidationError(
|
|
274
|
+
`workflow trace must include exactly one terminal event, got ${terminalEvents.length}`
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
const last = events.at(-1);
|
|
278
|
+
if (last?.kind !== terminalEvents[0]?.kind) {
|
|
279
|
+
throw new ValidationError("workflow trace terminal event must be last");
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
function summarizeWorkflowTrace(input) {
|
|
283
|
+
const envelope = validateWorkflowTraceEnvelope(input);
|
|
284
|
+
const started = envelope.events.find((e) => e.kind === "workflow.started");
|
|
285
|
+
const ended = envelope.events.find((e) => e.kind === "workflow.ended");
|
|
286
|
+
const failed = envelope.events.find((e) => e.kind === "workflow.failed");
|
|
287
|
+
const endedPayload = ended?.payload ?? {};
|
|
288
|
+
const tokenUsage = tokenUsageOf(endedPayload.tokenUsage);
|
|
289
|
+
return {
|
|
290
|
+
runId: envelope.runId,
|
|
291
|
+
startedAt: started?.timestamp,
|
|
292
|
+
endedAt: ended?.timestamp,
|
|
293
|
+
durationMs: finiteOr(endedPayload.durationMs, durationFromEvents(envelope.events)),
|
|
294
|
+
costUsd: finiteOr(endedPayload.costUsd, 0),
|
|
295
|
+
tokenUsage,
|
|
296
|
+
phaseCount: envelope.events.filter((e) => e.kind === "workflow.phase").length,
|
|
297
|
+
branchCount: envelope.events.filter((e) => e.kind === "workflow.branch.ended").length,
|
|
298
|
+
failedBranchCount: envelope.events.filter((e) => e.kind === "workflow.branch.failed").length,
|
|
299
|
+
agentCalls: finiteOr(
|
|
300
|
+
endedPayload.agentCalls,
|
|
301
|
+
countEvents(envelope.events, "workflow.agent.ended", "workflow.agent.failed")
|
|
302
|
+
),
|
|
303
|
+
loopCalls: finiteOr(
|
|
304
|
+
endedPayload.loopCalls,
|
|
305
|
+
countEvents(envelope.events, "workflow.loop.ended", "workflow.loop.failed")
|
|
306
|
+
),
|
|
307
|
+
verifierCalls: countEvents(
|
|
308
|
+
envelope.events,
|
|
309
|
+
"workflow.verifier.ended",
|
|
310
|
+
"workflow.verifier.failed"
|
|
311
|
+
),
|
|
312
|
+
analystCalls: countEvents(envelope.events, "workflow.analyst.ended", "workflow.analyst.failed"),
|
|
313
|
+
reviewerCalls: countEvents(
|
|
314
|
+
envelope.events,
|
|
315
|
+
"workflow.reviewer.ended",
|
|
316
|
+
"workflow.reviewer.failed"
|
|
317
|
+
),
|
|
318
|
+
agentFailures: countEvents(envelope.events, "workflow.agent.failed"),
|
|
319
|
+
loopFailures: countEvents(envelope.events, "workflow.loop.failed"),
|
|
320
|
+
verifierFailures: countEvents(envelope.events, "workflow.verifier.failed"),
|
|
321
|
+
analystFailures: countEvents(envelope.events, "workflow.analyst.failed"),
|
|
322
|
+
reviewerFailures: countEvents(envelope.events, "workflow.reviewer.failed"),
|
|
323
|
+
eventCount: envelope.events.length,
|
|
324
|
+
failed: failed !== void 0,
|
|
325
|
+
failureMessage: typeof failed?.payload.message === "string" ? failed.payload.message : void 0
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
function countEvents(events, ...kinds) {
|
|
329
|
+
const allowed = new Set(kinds);
|
|
330
|
+
return events.filter((event) => allowed.has(event.kind)).length;
|
|
331
|
+
}
|
|
332
|
+
function validateArtifacts(value) {
|
|
333
|
+
if (!Array.isArray(value)) throw new ValidationError("workflow artifacts must be an array");
|
|
334
|
+
return value.map((artifact, index) => {
|
|
335
|
+
const obj = expectRecord(artifact, `artifacts[${index}]`);
|
|
336
|
+
return {
|
|
337
|
+
kind: expectString(obj.kind, `artifacts[${index}].kind`),
|
|
338
|
+
uri: expectString(obj.uri, `artifacts[${index}].uri`),
|
|
339
|
+
...obj.contentType !== void 0 ? { contentType: expectString(obj.contentType, `artifacts[${index}].contentType`) } : {},
|
|
340
|
+
...obj.sha256 !== void 0 ? { sha256: expectString(obj.sha256, `artifacts[${index}].sha256`) } : {},
|
|
341
|
+
...obj.metadata !== void 0 ? { metadata: expectRecord(obj.metadata, `artifacts[${index}].metadata`) } : {}
|
|
342
|
+
};
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
function durationFromEvents(events) {
|
|
346
|
+
const first = events[0]?.timestamp;
|
|
347
|
+
const last = events.at(-1)?.timestamp;
|
|
348
|
+
if (!Number.isFinite(first) || !Number.isFinite(last)) return 0;
|
|
349
|
+
return Math.max(0, last - first);
|
|
350
|
+
}
|
|
351
|
+
function tokenUsageOf(value) {
|
|
352
|
+
const obj = value && typeof value === "object" ? value : {};
|
|
353
|
+
const cached = finiteOrUndefined(obj.cached);
|
|
354
|
+
return {
|
|
355
|
+
input: finiteOr(obj.input, 0),
|
|
356
|
+
output: finiteOr(obj.output, 0),
|
|
357
|
+
...cached !== void 0 ? { cached } : {}
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
function finiteOr(value, fallback) {
|
|
361
|
+
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
362
|
+
}
|
|
363
|
+
function finiteOrUndefined(value) {
|
|
364
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
365
|
+
}
|
|
366
|
+
function expectRecord(value, path) {
|
|
367
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
368
|
+
throw new ValidationError(`${path}: expected object`);
|
|
369
|
+
}
|
|
370
|
+
return value;
|
|
371
|
+
}
|
|
372
|
+
function expectString(value, path) {
|
|
373
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
374
|
+
throw new ValidationError(`${path}: expected non-empty string`);
|
|
375
|
+
}
|
|
376
|
+
return value;
|
|
377
|
+
}
|
|
378
|
+
function expectFinite(value, path) {
|
|
379
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
380
|
+
throw new ValidationError(`${path}: expected finite number`);
|
|
381
|
+
}
|
|
382
|
+
return value;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// src/workflow/feedback-pack.ts
|
|
386
|
+
var PACK_VERSION = "workflow-feedback-pack-v1";
|
|
387
|
+
var DEFAULT_LIMITS = {
|
|
388
|
+
findings: 12,
|
|
389
|
+
clusters: 8,
|
|
390
|
+
layerFindings: 5,
|
|
391
|
+
recommendations: 10,
|
|
392
|
+
contextLines: 24
|
|
393
|
+
};
|
|
394
|
+
function buildWorkflowAnalystFeedbackPack(options) {
|
|
395
|
+
const limits = { ...DEFAULT_LIMITS, ...options.limits ?? {} };
|
|
396
|
+
const envelope = validateWorkflowTraceEnvelope(options.envelope);
|
|
397
|
+
const summary = summarizeWorkflowTrace(envelope);
|
|
398
|
+
const verifier = options.verifier ? summarizeVerifier(options.verifier, limits) : void 0;
|
|
399
|
+
const toolUsage = summarizeToolUsage(envelope);
|
|
400
|
+
const failureClusters = normalizeFailureClusters(options.failureClusters).sort((a, b) => b.share - a.share).slice(0, limits.clusters);
|
|
401
|
+
const findings = summarizeFindings(options.analystFindings ?? []).sort(compareFindings).slice(0, limits.findings);
|
|
402
|
+
const recommendations = uniqueStrings([
|
|
403
|
+
...recommendFromVerifier(verifier),
|
|
404
|
+
...failureClusters.flatMap((cluster) => cluster.suggestedFix ? [cluster.suggestedFix] : []),
|
|
405
|
+
...findings.flatMap(
|
|
406
|
+
(finding) => finding.recommendedAction ? [finding.recommendedAction] : []
|
|
407
|
+
),
|
|
408
|
+
...summary.failed && summary.failureMessage ? [`Inspect workflow failure: ${summary.failureMessage}`] : []
|
|
409
|
+
]).slice(0, limits.recommendations);
|
|
410
|
+
const pack = {
|
|
411
|
+
schemaVersion: PACK_VERSION,
|
|
412
|
+
runId: envelope.runId,
|
|
413
|
+
generatedAt: options.generatedAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
414
|
+
summary,
|
|
415
|
+
...verifier ? { verifier } : {},
|
|
416
|
+
toolUsage,
|
|
417
|
+
failureClusters,
|
|
418
|
+
findings,
|
|
419
|
+
recommendations,
|
|
420
|
+
driverContextLines: []
|
|
421
|
+
};
|
|
422
|
+
return {
|
|
423
|
+
...pack,
|
|
424
|
+
driverContextLines: renderDriverContextLines(pack).slice(0, limits.contextLines)
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
function renderWorkflowFeedbackPack(pack, options = {}) {
|
|
428
|
+
const lines = [
|
|
429
|
+
`Workflow feedback pack for ${pack.runId}`,
|
|
430
|
+
`status=${pack.summary.failed ? "failed" : "completed"} durationMs=${pack.summary.durationMs} costUsd=${pack.summary.costUsd.toFixed(6)} tokens=${pack.summary.tokenUsage.input}/${pack.summary.tokenUsage.output} events=${pack.summary.eventCount}`,
|
|
431
|
+
renderDelegateFailureCounts(pack.summary),
|
|
432
|
+
pack.verifier ? `verifier=${pack.verifier.allPass ? "pass" : "fail"} blendedScore=${pack.verifier.blendedScore.toFixed(3)} failedLayers=${pack.verifier.failedLayers.join(",") || "none"}` : void 0,
|
|
433
|
+
pack.toolUsage.totalCalls > 0 ? `tools=${pack.toolUsage.totalCalls} errors=${pack.toolUsage.erroredCalls} byTool=${formatToolUsage(pack.toolUsage)}` : void 0,
|
|
434
|
+
pack.failureClusters.length > 0 ? "Failure clusters:" : void 0,
|
|
435
|
+
...pack.failureClusters.map(
|
|
436
|
+
(cluster) => `- ${cluster.name} share=${cluster.share.toFixed(3)} exemplars=${cluster.exemplars.join(",") || "none"}${cluster.suggestedFix ? ` fix=${cluster.suggestedFix}` : ""}`
|
|
437
|
+
),
|
|
438
|
+
pack.findings.length > 0 ? "Analyst findings:" : void 0,
|
|
439
|
+
...pack.findings.map(
|
|
440
|
+
(finding) => `- ${finding.severity} ${finding.area}: ${finding.claim}${finding.recommendedAction ? ` action=${finding.recommendedAction}` : ""}`
|
|
441
|
+
),
|
|
442
|
+
pack.recommendations.length > 0 ? "Recommended next moves:" : void 0,
|
|
443
|
+
...pack.recommendations.map((recommendation) => `- ${recommendation}`)
|
|
444
|
+
].filter((line) => Boolean(line));
|
|
445
|
+
const rendered = lines.join("\n");
|
|
446
|
+
const maxChars = options.maxChars;
|
|
447
|
+
if (maxChars === void 0 || rendered.length <= maxChars) return rendered;
|
|
448
|
+
if (maxChars <= 1) return rendered.slice(0, Math.max(0, maxChars));
|
|
449
|
+
return `${rendered.slice(0, maxChars - 1)}\u2026`;
|
|
450
|
+
}
|
|
451
|
+
function renderDelegateFailureCounts(summary) {
|
|
452
|
+
const entries = [
|
|
453
|
+
["agent", summary.agentFailures],
|
|
454
|
+
["loop", summary.loopFailures],
|
|
455
|
+
["verifier", summary.verifierFailures],
|
|
456
|
+
["analyst", summary.analystFailures],
|
|
457
|
+
["reviewer", summary.reviewerFailures]
|
|
458
|
+
];
|
|
459
|
+
const failures = entries.filter((entry) => entry[1] > 0);
|
|
460
|
+
if (failures.length === 0) return void 0;
|
|
461
|
+
return `delegateFailures=${failures.map(([kind, count]) => `${kind}:${count}`).join(",")}`;
|
|
462
|
+
}
|
|
463
|
+
function summarizeVerifier(verifier, limits) {
|
|
464
|
+
const layers = verifier.layers.map((layer) => ({
|
|
465
|
+
layer: layer.layer,
|
|
466
|
+
status: layer.status,
|
|
467
|
+
...layer.score !== void 0 ? { score: layer.score } : {},
|
|
468
|
+
durationMs: layer.durationMs,
|
|
469
|
+
...layer.reason ? { reason: layer.reason } : {},
|
|
470
|
+
findings: layer.findings.map((finding) => ({
|
|
471
|
+
severity: verifierSeverity(finding.severity),
|
|
472
|
+
message: finding.message,
|
|
473
|
+
...finding.evidence ? { evidence: finding.evidence } : {},
|
|
474
|
+
...finding.detail ? { detail: finding.detail } : {}
|
|
475
|
+
})).slice(0, limits.layerFindings),
|
|
476
|
+
...layer.diagnostics ? { diagnostics: layer.diagnostics } : {}
|
|
477
|
+
}));
|
|
478
|
+
return {
|
|
479
|
+
allPass: verifier.allPass,
|
|
480
|
+
blendedScore: verifier.blendedScore,
|
|
481
|
+
durationMs: verifier.durationMs,
|
|
482
|
+
failedLayers: verifier.layers.filter(
|
|
483
|
+
(layer) => layer.status === "fail" || layer.status === "error" || layer.status === "timeout"
|
|
484
|
+
).map((layer) => layer.layer),
|
|
485
|
+
layers
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
function summarizeToolUsage(envelope) {
|
|
489
|
+
const summary = { totalCalls: 0, erroredCalls: 0, byTool: {} };
|
|
490
|
+
for (const event of envelope.events) {
|
|
491
|
+
collectToolUsagePayload(summary, event.payload.toolUsage);
|
|
492
|
+
collectToolCalls(summary, event.payload.toolCalls);
|
|
493
|
+
collectSingleTool(summary, event.payload);
|
|
494
|
+
}
|
|
495
|
+
return summary;
|
|
496
|
+
}
|
|
497
|
+
function collectToolUsagePayload(summary, value) {
|
|
498
|
+
if (!isRecord(value)) return;
|
|
499
|
+
if (!isRecord(value.byTool)) {
|
|
500
|
+
summary.totalCalls += finiteNumber(value.totalCalls);
|
|
501
|
+
summary.erroredCalls += finiteNumber(value.erroredCalls);
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
let addedByTool = false;
|
|
505
|
+
for (const [tool, raw] of Object.entries(value.byTool)) {
|
|
506
|
+
if (!isRecord(raw)) continue;
|
|
507
|
+
const calls = finiteNumber(raw.calls);
|
|
508
|
+
const errors = finiteNumber(raw.errors);
|
|
509
|
+
addTool(summary, tool, calls, errors);
|
|
510
|
+
addedByTool ||= calls > 0 || errors > 0;
|
|
511
|
+
}
|
|
512
|
+
if (!addedByTool) {
|
|
513
|
+
summary.totalCalls += finiteNumber(value.totalCalls);
|
|
514
|
+
summary.erroredCalls += finiteNumber(value.erroredCalls);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
function collectToolCalls(summary, value) {
|
|
518
|
+
if (!Array.isArray(value)) return;
|
|
519
|
+
for (const call of value) {
|
|
520
|
+
if (!isRecord(call)) continue;
|
|
521
|
+
const name = stringValue(call.toolName) ?? stringValue(call.name);
|
|
522
|
+
if (!name) continue;
|
|
523
|
+
const errored = call.status === "error" || call.error !== void 0 || call.ok === false || call.success === false;
|
|
524
|
+
addTool(summary, name, 1, errored ? 1 : 0);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
function collectSingleTool(summary, payload) {
|
|
528
|
+
const name = stringValue(payload.toolName);
|
|
529
|
+
if (!name) return;
|
|
530
|
+
const errored = payload.status === "error" || payload.error !== void 0 || payload.ok === false || payload.success === false;
|
|
531
|
+
addTool(summary, name, 1, errored ? 1 : 0);
|
|
532
|
+
}
|
|
533
|
+
function addTool(summary, name, calls, errors) {
|
|
534
|
+
if (calls === 0 && errors === 0) return;
|
|
535
|
+
const current = summary.byTool[name] ?? { calls: 0, errors: 0 };
|
|
536
|
+
current.calls += calls;
|
|
537
|
+
current.errors += errors;
|
|
538
|
+
summary.byTool[name] = current;
|
|
539
|
+
summary.totalCalls += calls;
|
|
540
|
+
summary.erroredCalls += errors;
|
|
541
|
+
}
|
|
542
|
+
function normalizeFailureClusters(input) {
|
|
543
|
+
if (!input) return [];
|
|
544
|
+
if (Array.isArray(input)) {
|
|
545
|
+
return input.map((cluster) => ({
|
|
546
|
+
id: cluster.id,
|
|
547
|
+
name: cluster.name,
|
|
548
|
+
share: clamp01(cluster.share ?? 0),
|
|
549
|
+
...cluster.runCount !== void 0 ? { runCount: cluster.runCount } : {},
|
|
550
|
+
exemplars: [...cluster.exemplars ?? []],
|
|
551
|
+
...cluster.suggestedFix ? { suggestedFix: cluster.suggestedFix } : {},
|
|
552
|
+
source: "custom",
|
|
553
|
+
...cluster.metadata ? { metadata: cluster.metadata } : {}
|
|
554
|
+
}));
|
|
555
|
+
}
|
|
556
|
+
const report = input;
|
|
557
|
+
const clusters = report.clusters ?? [];
|
|
558
|
+
const first = clusters[0];
|
|
559
|
+
if (isRecord(first) && typeof first.failureClass === "string") {
|
|
560
|
+
const failureReport = report;
|
|
561
|
+
return failureReport.clusters.map((cluster) => ({
|
|
562
|
+
id: [
|
|
563
|
+
cluster.failureClass,
|
|
564
|
+
cluster.toolName ?? "",
|
|
565
|
+
cluster.argPrefix ?? "",
|
|
566
|
+
cluster.dimension ?? ""
|
|
567
|
+
].join("|"),
|
|
568
|
+
name: [
|
|
569
|
+
cluster.failureClass,
|
|
570
|
+
cluster.toolName ? `tool:${cluster.toolName}` : void 0,
|
|
571
|
+
cluster.dimension ? `dimension:${cluster.dimension}` : void 0
|
|
572
|
+
].filter(Boolean).join(" "),
|
|
573
|
+
share: failureReport.totalFailures > 0 ? cluster.runCount / failureReport.totalFailures : 0,
|
|
574
|
+
runCount: cluster.runCount,
|
|
575
|
+
exemplars: [cluster.exampleRunId],
|
|
576
|
+
source: "failure-cluster-view",
|
|
577
|
+
metadata: {
|
|
578
|
+
scenarioIds: cluster.scenarioIds,
|
|
579
|
+
exampleError: cluster.exampleError,
|
|
580
|
+
argPrefix: cluster.argPrefix
|
|
581
|
+
}
|
|
582
|
+
}));
|
|
583
|
+
}
|
|
584
|
+
const insight = report;
|
|
585
|
+
return insight.clusters.map((cluster) => ({
|
|
586
|
+
id: cluster.id,
|
|
587
|
+
name: cluster.name,
|
|
588
|
+
share: clamp01(cluster.share),
|
|
589
|
+
exemplars: [...cluster.exemplars],
|
|
590
|
+
...cluster.suggestedFix ? { suggestedFix: cluster.suggestedFix } : {},
|
|
591
|
+
source: "insight-report"
|
|
592
|
+
}));
|
|
593
|
+
}
|
|
594
|
+
function summarizeFindings(findings) {
|
|
595
|
+
return findings.map((finding) => ({
|
|
596
|
+
findingId: finding.finding_id,
|
|
597
|
+
analystId: finding.analyst_id,
|
|
598
|
+
severity: finding.severity,
|
|
599
|
+
area: finding.area,
|
|
600
|
+
claim: finding.claim,
|
|
601
|
+
confidence: clamp01(finding.confidence),
|
|
602
|
+
...finding.subject ? { subject: finding.subject } : {},
|
|
603
|
+
...finding.recommended_action ? { recommendedAction: finding.recommended_action } : {},
|
|
604
|
+
evidenceRefs: finding.evidence_refs
|
|
605
|
+
}));
|
|
606
|
+
}
|
|
607
|
+
function recommendFromVerifier(verifier) {
|
|
608
|
+
if (!verifier || verifier.allPass) return [];
|
|
609
|
+
return verifier.layers.filter((layer) => verifier.failedLayers.includes(layer.layer)).map((layer) => {
|
|
610
|
+
const firstFinding = layer.findings[0]?.message;
|
|
611
|
+
const detail = layer.reason ?? firstFinding ?? layer.status;
|
|
612
|
+
return `Fix verifier layer "${layer.layer}": ${detail}`;
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
function renderDriverContextLines(pack) {
|
|
616
|
+
return renderWorkflowFeedbackPack(pack).split("\n").map((line) => line.trim()).filter(Boolean);
|
|
617
|
+
}
|
|
618
|
+
function compareFindings(left, right) {
|
|
619
|
+
const severityDelta = severityRank(right.severity) - severityRank(left.severity);
|
|
620
|
+
if (severityDelta !== 0) return severityDelta;
|
|
621
|
+
return right.confidence - left.confidence;
|
|
622
|
+
}
|
|
623
|
+
function verifierSeverity(severity) {
|
|
624
|
+
switch (severity) {
|
|
625
|
+
case "critical":
|
|
626
|
+
return "critical";
|
|
627
|
+
case "major":
|
|
628
|
+
return "high";
|
|
629
|
+
case "minor":
|
|
630
|
+
return "low";
|
|
631
|
+
case "info":
|
|
632
|
+
return "info";
|
|
633
|
+
default:
|
|
634
|
+
return "medium";
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
function severityRank(severity) {
|
|
638
|
+
switch (severity) {
|
|
639
|
+
case "critical":
|
|
640
|
+
return 5;
|
|
641
|
+
case "high":
|
|
642
|
+
return 4;
|
|
643
|
+
case "medium":
|
|
644
|
+
return 3;
|
|
645
|
+
case "low":
|
|
646
|
+
return 2;
|
|
647
|
+
case "info":
|
|
648
|
+
return 1;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
function uniqueStrings(values) {
|
|
652
|
+
const out = [];
|
|
653
|
+
const seen = /* @__PURE__ */ new Set();
|
|
654
|
+
for (const value of values) {
|
|
655
|
+
const trimmed = value.trim();
|
|
656
|
+
if (trimmed.length === 0 || seen.has(trimmed)) continue;
|
|
657
|
+
seen.add(trimmed);
|
|
658
|
+
out.push(trimmed);
|
|
659
|
+
}
|
|
660
|
+
return out;
|
|
661
|
+
}
|
|
662
|
+
function formatToolUsage(summary) {
|
|
663
|
+
return Object.entries(summary.byTool).sort(([a], [b]) => a.localeCompare(b)).map(([tool, usage]) => `${tool}:${usage.calls}/${usage.errors}`).join(",");
|
|
664
|
+
}
|
|
665
|
+
function finiteNumber(value) {
|
|
666
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : 0;
|
|
667
|
+
}
|
|
668
|
+
function clamp01(value) {
|
|
669
|
+
if (!Number.isFinite(value)) return 0;
|
|
670
|
+
return Math.max(0, Math.min(1, value));
|
|
671
|
+
}
|
|
672
|
+
function isRecord(value) {
|
|
673
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
674
|
+
}
|
|
675
|
+
function stringValue(value) {
|
|
676
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// src/workflow/sanitize.ts
|
|
680
|
+
import { createHash } from "crypto";
|
|
681
|
+
var DEFAULT_MAX_STRING_LENGTH = 600;
|
|
682
|
+
var SECRET_KEY_RE = /^(authorization|cookie|set-cookie|x-api-key|api[-_]?key|token|access[-_]?token|refresh[-_]?token|secret|password|passwd|session|credential|credentials)$/i;
|
|
683
|
+
var ARG_KEY_RE = /^(args|arguments|rawargs|raw_args|toolargs|tool_args|inputargs|input_args)$/i;
|
|
684
|
+
var FILE_CONTENT_KEY_RE = /^(filecontent|filecontents|file_content|file_contents|contents)$/i;
|
|
685
|
+
var FILE_HINT_KEY_RE = /^(path|filepath|file_path|filename|file|uri)$/i;
|
|
686
|
+
function sanitizeWorkflowTraceEnvelope(input, options = {}) {
|
|
687
|
+
const envelope = validateWorkflowTraceEnvelope(input);
|
|
688
|
+
const report = emptyReport();
|
|
689
|
+
const ctx = {
|
|
690
|
+
rules: [...options.rules ?? DEFAULT_REDACTION_RULES],
|
|
691
|
+
maxStringLength: options.maxStringLength ?? DEFAULT_MAX_STRING_LENGTH,
|
|
692
|
+
hashSalt: options.hashSalt ?? "",
|
|
693
|
+
approvedArtifactUris: new Set(options.approvedArtifactUris ?? []),
|
|
694
|
+
approvedArtifactKinds: new Set(options.approvedArtifactKinds ?? []),
|
|
695
|
+
report
|
|
696
|
+
};
|
|
697
|
+
return {
|
|
698
|
+
envelope: {
|
|
699
|
+
traceVersion: envelope.traceVersion,
|
|
700
|
+
runId: envelope.runId,
|
|
701
|
+
...envelope.topology ? { topology: sanitizeValue(envelope.topology, ctx) } : {},
|
|
702
|
+
events: envelope.events.map((event) => sanitizeWorkflowTraceEvent(event, ctx)),
|
|
703
|
+
...envelope.artifacts ? { artifacts: sanitizeArtifacts(envelope.artifacts, ctx) } : {},
|
|
704
|
+
...envelope.metadata ? { metadata: sanitizeRecord(envelope.metadata, ctx) } : {}
|
|
705
|
+
},
|
|
706
|
+
report
|
|
707
|
+
};
|
|
708
|
+
}
|
|
709
|
+
function sanitizeWorkflowTraceEvent(event, ctx) {
|
|
710
|
+
return {
|
|
711
|
+
kind: event.kind,
|
|
712
|
+
runId: event.runId,
|
|
713
|
+
timestamp: event.timestamp,
|
|
714
|
+
payload: sanitizeRecord(event.payload, ctx)
|
|
715
|
+
};
|
|
716
|
+
}
|
|
717
|
+
function sanitizeArtifacts(artifacts, ctx) {
|
|
718
|
+
return artifacts.map((artifact) => {
|
|
719
|
+
const approved = ctx.approvedArtifactUris.has(artifact.uri) || ctx.approvedArtifactKinds.has(artifact.kind);
|
|
720
|
+
const metadata = artifact.metadata ? sanitizeRecord(artifact.metadata, ctx, { artifactApproved: approved }) : void 0;
|
|
721
|
+
return {
|
|
722
|
+
kind: artifact.kind,
|
|
723
|
+
uri: sanitizeString(artifact.uri, ctx),
|
|
724
|
+
...artifact.contentType ? { contentType: sanitizeString(artifact.contentType, ctx) } : {},
|
|
725
|
+
...artifact.sha256 ? { sha256: artifact.sha256 } : {},
|
|
726
|
+
...metadata ? { metadata } : {}
|
|
727
|
+
};
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
function sanitizeRecord(record, ctx, options = {}) {
|
|
731
|
+
const out = {};
|
|
732
|
+
const hasFileHint = Object.keys(record).some((key) => FILE_HINT_KEY_RE.test(key));
|
|
733
|
+
for (const [key, value] of Object.entries(record)) {
|
|
734
|
+
if (SECRET_KEY_RE.test(key)) {
|
|
735
|
+
out[key] = `[redacted:${key}]`;
|
|
736
|
+
increment(ctx.report.droppedPayloadKeys, key);
|
|
737
|
+
continue;
|
|
738
|
+
}
|
|
739
|
+
if (ARG_KEY_RE.test(key)) {
|
|
740
|
+
out[key] = hashedValue(value, ctx);
|
|
741
|
+
ctx.report.hashedArgs += 1;
|
|
742
|
+
continue;
|
|
743
|
+
}
|
|
744
|
+
if (!options.artifactApproved && isFileContentKey(key, hasFileHint)) {
|
|
745
|
+
out[key] = hashedValue(value, ctx);
|
|
746
|
+
ctx.report.droppedArtifactContents += 1;
|
|
747
|
+
continue;
|
|
748
|
+
}
|
|
749
|
+
out[key] = sanitizeValue(value, ctx);
|
|
750
|
+
}
|
|
751
|
+
return out;
|
|
752
|
+
}
|
|
753
|
+
function sanitizeValue(value, ctx) {
|
|
754
|
+
if (typeof value === "string") return sanitizeString(value, ctx);
|
|
755
|
+
if (Array.isArray(value)) return value.map((item) => sanitizeValue(item, ctx));
|
|
756
|
+
if (isRecord2(value)) return sanitizeRecord(value, ctx);
|
|
757
|
+
return value;
|
|
758
|
+
}
|
|
759
|
+
function sanitizeString(value, ctx) {
|
|
760
|
+
const redacted = redactString(value, ctx.rules);
|
|
761
|
+
ctx.report.redactionCount += redacted.report.redactionCount;
|
|
762
|
+
for (const [rule, count] of Object.entries(redacted.report.byRule)) {
|
|
763
|
+
ctx.report.byRule[rule] = (ctx.report.byRule[rule] ?? 0) + count;
|
|
764
|
+
}
|
|
765
|
+
if (redacted.output.length <= ctx.maxStringLength) return redacted.output;
|
|
766
|
+
ctx.report.truncatedStrings += 1;
|
|
767
|
+
return `${redacted.output.slice(0, Math.max(0, ctx.maxStringLength - 1))}\u2026`;
|
|
768
|
+
}
|
|
769
|
+
function hashedValue(value, ctx) {
|
|
770
|
+
return {
|
|
771
|
+
redacted: true,
|
|
772
|
+
sha256: sha256Stable(value, ctx.hashSalt),
|
|
773
|
+
shape: valueShape(value)
|
|
774
|
+
};
|
|
775
|
+
}
|
|
776
|
+
function sha256Stable(value, salt) {
|
|
777
|
+
return createHash("sha256").update(salt).update(stableStringify(value)).digest("hex");
|
|
778
|
+
}
|
|
779
|
+
function stableStringify(value) {
|
|
780
|
+
if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
|
|
781
|
+
if (isRecord2(value)) {
|
|
782
|
+
return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`).join(",")}}`;
|
|
783
|
+
}
|
|
784
|
+
return JSON.stringify(value);
|
|
785
|
+
}
|
|
786
|
+
function valueShape(value) {
|
|
787
|
+
if (Array.isArray(value)) return { type: "array", length: value.length };
|
|
788
|
+
if (isRecord2(value)) {
|
|
789
|
+
return {
|
|
790
|
+
type: "object",
|
|
791
|
+
keys: Object.keys(value).sort()
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
return { type: typeof value };
|
|
795
|
+
}
|
|
796
|
+
function isFileContentKey(key, hasFileHint) {
|
|
797
|
+
if (FILE_CONTENT_KEY_RE.test(key)) return true;
|
|
798
|
+
return hasFileHint && /^(content|source|diff)$/i.test(key);
|
|
799
|
+
}
|
|
800
|
+
function emptyReport() {
|
|
801
|
+
return {
|
|
802
|
+
redactionCount: 0,
|
|
803
|
+
byRule: {},
|
|
804
|
+
hashedArgs: 0,
|
|
805
|
+
truncatedStrings: 0,
|
|
806
|
+
droppedPayloadKeys: {},
|
|
807
|
+
droppedArtifactContents: 0
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
function increment(record, key) {
|
|
811
|
+
record[key] = (record[key] ?? 0) + 1;
|
|
812
|
+
}
|
|
813
|
+
function isRecord2(value) {
|
|
814
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// src/workflow/trace-event-fields.ts
|
|
818
|
+
function objectRecord(value) {
|
|
819
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
820
|
+
}
|
|
821
|
+
function stringField(record, key) {
|
|
822
|
+
const value = record[key];
|
|
823
|
+
return typeof value === "string" ? value : null;
|
|
824
|
+
}
|
|
825
|
+
function numberField(record, key) {
|
|
826
|
+
const value = record[key];
|
|
827
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
828
|
+
}
|
|
829
|
+
function tokenUsageField(value) {
|
|
830
|
+
const record = objectRecord(value);
|
|
831
|
+
if (!record) return null;
|
|
832
|
+
const input = numberField(record, "input");
|
|
833
|
+
const output = numberField(record, "output");
|
|
834
|
+
if (input === null || output === null) return null;
|
|
835
|
+
const cached = numberField(record, "cached");
|
|
836
|
+
return {
|
|
837
|
+
input,
|
|
838
|
+
output,
|
|
839
|
+
...cached !== null ? { cached } : {}
|
|
840
|
+
};
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// src/workflow/phase-graph.ts
|
|
844
|
+
function workflowPhaseGraph(events) {
|
|
845
|
+
const nodes = /* @__PURE__ */ new Map();
|
|
846
|
+
const branches = [];
|
|
847
|
+
for (const event of events) {
|
|
848
|
+
const phaseTitle = phaseTitleForEvent(event);
|
|
849
|
+
if (phaseTitle) observePhaseEvent(phaseNode(nodes, phaseTitle), event);
|
|
850
|
+
if (event.kind === "workflow.branch.started") {
|
|
851
|
+
branches.push(branchStarted(event, branches.length));
|
|
852
|
+
continue;
|
|
853
|
+
}
|
|
854
|
+
if (event.kind === "workflow.branch.ended" || event.kind === "workflow.branch.failed") {
|
|
855
|
+
const branch = openBranchFor(branches, event) ?? branchStarted(event, branches.length);
|
|
856
|
+
if (!branches.includes(branch)) branches.push(branch);
|
|
857
|
+
observeBranchTerminal(branch, event);
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
return {
|
|
861
|
+
nodes: Array.from(nodes.values()).map(readonlyPhaseNode),
|
|
862
|
+
branches: branches.map(readonlyBranch)
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
function phaseNode(nodes, title) {
|
|
866
|
+
const existing = nodes.get(title);
|
|
867
|
+
if (existing) return existing;
|
|
868
|
+
const node = {
|
|
869
|
+
id: `phase-${nodes.size}`,
|
|
870
|
+
title,
|
|
871
|
+
eventCount: 0,
|
|
872
|
+
branchCount: 0,
|
|
873
|
+
failedBranchCount: 0,
|
|
874
|
+
agentCalls: 0,
|
|
875
|
+
loopCalls: 0,
|
|
876
|
+
verifierCalls: 0,
|
|
877
|
+
analystCalls: 0,
|
|
878
|
+
reviewerCalls: 0,
|
|
879
|
+
agentFailures: 0,
|
|
880
|
+
loopFailures: 0,
|
|
881
|
+
verifierFailures: 0,
|
|
882
|
+
analystFailures: 0,
|
|
883
|
+
reviewerFailures: 0,
|
|
884
|
+
costUsd: 0,
|
|
885
|
+
tokenUsage: { input: 0, output: 0 }
|
|
886
|
+
};
|
|
887
|
+
nodes.set(title, node);
|
|
888
|
+
return node;
|
|
889
|
+
}
|
|
890
|
+
function observePhaseEvent(node, event) {
|
|
891
|
+
node.eventCount += 1;
|
|
892
|
+
node.startedAt = minDefined(node.startedAt, event.timestamp);
|
|
893
|
+
node.endedAt = maxDefined(node.endedAt, event.timestamp);
|
|
894
|
+
switch (event.kind) {
|
|
895
|
+
case "workflow.branch.ended":
|
|
896
|
+
node.branchCount += 1;
|
|
897
|
+
break;
|
|
898
|
+
case "workflow.branch.failed":
|
|
899
|
+
node.failedBranchCount += 1;
|
|
900
|
+
break;
|
|
901
|
+
case "workflow.agent.ended":
|
|
902
|
+
node.agentCalls += 1;
|
|
903
|
+
break;
|
|
904
|
+
case "workflow.agent.failed":
|
|
905
|
+
node.agentCalls += 1;
|
|
906
|
+
node.agentFailures += 1;
|
|
907
|
+
break;
|
|
908
|
+
case "workflow.loop.ended":
|
|
909
|
+
node.loopCalls += 1;
|
|
910
|
+
break;
|
|
911
|
+
case "workflow.loop.failed":
|
|
912
|
+
node.loopCalls += 1;
|
|
913
|
+
node.loopFailures += 1;
|
|
914
|
+
break;
|
|
915
|
+
case "workflow.verifier.ended":
|
|
916
|
+
node.verifierCalls += 1;
|
|
917
|
+
break;
|
|
918
|
+
case "workflow.verifier.failed":
|
|
919
|
+
node.verifierCalls += 1;
|
|
920
|
+
node.verifierFailures += 1;
|
|
921
|
+
break;
|
|
922
|
+
case "workflow.analyst.ended":
|
|
923
|
+
node.analystCalls += 1;
|
|
924
|
+
break;
|
|
925
|
+
case "workflow.analyst.failed":
|
|
926
|
+
node.analystCalls += 1;
|
|
927
|
+
node.analystFailures += 1;
|
|
928
|
+
break;
|
|
929
|
+
case "workflow.reviewer.ended":
|
|
930
|
+
node.reviewerCalls += 1;
|
|
931
|
+
break;
|
|
932
|
+
case "workflow.reviewer.failed":
|
|
933
|
+
node.reviewerCalls += 1;
|
|
934
|
+
node.reviewerFailures += 1;
|
|
935
|
+
break;
|
|
936
|
+
}
|
|
937
|
+
const costUsd = numberField(event.payload, "costUsd");
|
|
938
|
+
if (costUsd !== null) node.costUsd += costUsd;
|
|
939
|
+
const tokenUsage = tokenUsageField(event.payload.tokenUsage);
|
|
940
|
+
if (tokenUsage) addTokenUsage(node.tokenUsage, tokenUsage);
|
|
941
|
+
}
|
|
942
|
+
function phaseTitleForEvent(event) {
|
|
943
|
+
return event.kind === "workflow.phase" ? stringField(event.payload, "title") : stringField(event.payload, "phase");
|
|
944
|
+
}
|
|
945
|
+
function branchStarted(event, index) {
|
|
946
|
+
const branch = {
|
|
947
|
+
id: `branch-${index}`,
|
|
948
|
+
operation: stringField(event.payload, "operation") ?? "unknown",
|
|
949
|
+
branchIndex: numberField(event.payload, "branchIndex") ?? -1,
|
|
950
|
+
phase: stringField(event.payload, "phase"),
|
|
951
|
+
status: "started"
|
|
952
|
+
};
|
|
953
|
+
const stageCount = numberField(event.payload, "stageCount");
|
|
954
|
+
if (stageCount !== null) branch.stageCount = stageCount;
|
|
955
|
+
branch.startedAt = event.timestamp;
|
|
956
|
+
return branch;
|
|
957
|
+
}
|
|
958
|
+
function openBranchFor(branches, event) {
|
|
959
|
+
const operation = stringField(event.payload, "operation") ?? "unknown";
|
|
960
|
+
const branchIndex = numberField(event.payload, "branchIndex") ?? -1;
|
|
961
|
+
const phase = stringField(event.payload, "phase");
|
|
962
|
+
for (let i = branches.length - 1; i >= 0; i -= 1) {
|
|
963
|
+
const branch = branches[i];
|
|
964
|
+
if (branch?.status === "started" && branch.operation === operation && branch.branchIndex === branchIndex && branch.phase === phase) {
|
|
965
|
+
return branch;
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
return null;
|
|
969
|
+
}
|
|
970
|
+
function observeBranchTerminal(branch, event) {
|
|
971
|
+
branch.status = event.kind === "workflow.branch.failed" ? "failed" : "ended";
|
|
972
|
+
branch.endedAt = event.timestamp;
|
|
973
|
+
const durationMs = numberField(event.payload, "durationMs");
|
|
974
|
+
if (durationMs !== null) {
|
|
975
|
+
branch.durationMs = durationMs;
|
|
976
|
+
} else if (branch.startedAt !== void 0) {
|
|
977
|
+
branch.durationMs = Math.max(0, event.timestamp - branch.startedAt);
|
|
978
|
+
}
|
|
979
|
+
const stageCount = numberField(event.payload, "stageCount");
|
|
980
|
+
if (stageCount !== null) branch.stageCount = stageCount;
|
|
981
|
+
const stageIndex = numberField(event.payload, "stageIndex");
|
|
982
|
+
if (stageIndex !== null) branch.stageIndex = stageIndex;
|
|
983
|
+
const message = stringField(event.payload, "message");
|
|
984
|
+
if (message) branch.message = message;
|
|
985
|
+
const code = stringField(event.payload, "code");
|
|
986
|
+
if (code) branch.code = code;
|
|
987
|
+
}
|
|
988
|
+
function readonlyPhaseNode(node) {
|
|
989
|
+
const result = {
|
|
990
|
+
id: node.id,
|
|
991
|
+
title: node.title,
|
|
992
|
+
eventCount: node.eventCount,
|
|
993
|
+
branchCount: node.branchCount,
|
|
994
|
+
failedBranchCount: node.failedBranchCount,
|
|
995
|
+
agentCalls: node.agentCalls,
|
|
996
|
+
loopCalls: node.loopCalls,
|
|
997
|
+
verifierCalls: node.verifierCalls,
|
|
998
|
+
analystCalls: node.analystCalls,
|
|
999
|
+
reviewerCalls: node.reviewerCalls,
|
|
1000
|
+
agentFailures: node.agentFailures,
|
|
1001
|
+
loopFailures: node.loopFailures,
|
|
1002
|
+
verifierFailures: node.verifierFailures,
|
|
1003
|
+
analystFailures: node.analystFailures,
|
|
1004
|
+
reviewerFailures: node.reviewerFailures,
|
|
1005
|
+
costUsd: node.costUsd,
|
|
1006
|
+
tokenUsage: node.tokenUsage
|
|
1007
|
+
};
|
|
1008
|
+
if (node.startedAt !== void 0) result.startedAt = node.startedAt;
|
|
1009
|
+
if (node.endedAt !== void 0) result.endedAt = node.endedAt;
|
|
1010
|
+
return result;
|
|
1011
|
+
}
|
|
1012
|
+
function readonlyBranch(branch) {
|
|
1013
|
+
const result = {
|
|
1014
|
+
id: branch.id,
|
|
1015
|
+
operation: branch.operation,
|
|
1016
|
+
branchIndex: branch.branchIndex,
|
|
1017
|
+
phase: branch.phase,
|
|
1018
|
+
status: branch.status
|
|
1019
|
+
};
|
|
1020
|
+
if (branch.startedAt !== void 0) result.startedAt = branch.startedAt;
|
|
1021
|
+
if (branch.endedAt !== void 0) result.endedAt = branch.endedAt;
|
|
1022
|
+
if (branch.durationMs !== void 0) result.durationMs = branch.durationMs;
|
|
1023
|
+
if (branch.stageCount !== void 0) result.stageCount = branch.stageCount;
|
|
1024
|
+
if (branch.stageIndex !== void 0) result.stageIndex = branch.stageIndex;
|
|
1025
|
+
if (branch.message !== void 0) result.message = branch.message;
|
|
1026
|
+
if (branch.code !== void 0) result.code = branch.code;
|
|
1027
|
+
return result;
|
|
1028
|
+
}
|
|
1029
|
+
function minDefined(current, next) {
|
|
1030
|
+
return current === void 0 ? next : Math.min(current, next);
|
|
1031
|
+
}
|
|
1032
|
+
function maxDefined(current, next) {
|
|
1033
|
+
return current === void 0 ? next : Math.max(current, next);
|
|
1034
|
+
}
|
|
1035
|
+
function addTokenUsage(target, value) {
|
|
1036
|
+
target.input += value.input;
|
|
1037
|
+
target.output += value.output;
|
|
1038
|
+
if (value.cached !== void 0) target.cached = (target.cached ?? 0) + value.cached;
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
// src/workflow/summary.ts
|
|
1042
|
+
function summarizeWorkflowExecution(input, options = {}) {
|
|
1043
|
+
const envelope = validateWorkflowTraceEnvelope(input);
|
|
1044
|
+
const base = summarizeWorkflowTrace(envelope);
|
|
1045
|
+
return {
|
|
1046
|
+
...base,
|
|
1047
|
+
...options.source !== void 0 ? { source: options.source } : {},
|
|
1048
|
+
eventKinds: workflowEventKinds(envelope.events),
|
|
1049
|
+
phases: workflowPhaseTitles(envelope.events),
|
|
1050
|
+
phaseGraph: workflowPhaseGraph(envelope.events),
|
|
1051
|
+
agentRuns: workflowDelegateTraceSummaries(envelope.events, "workflow.agent.ended"),
|
|
1052
|
+
loopRuns: workflowDelegateTraceSummaries(envelope.events, "workflow.loop.ended"),
|
|
1053
|
+
verifierOutputs: workflowCheckpointTraceSummaries(envelope.events, "workflow.verifier.ended"),
|
|
1054
|
+
analystOutputs: workflowCheckpointTraceSummaries(envelope.events, "workflow.analyst.ended"),
|
|
1055
|
+
reviewerOutputs: workflowCheckpointTraceSummaries(envelope.events, "workflow.reviewer.ended"),
|
|
1056
|
+
agentFailureDetails: workflowDelegateFailureSummaries(envelope.events, "workflow.agent.failed"),
|
|
1057
|
+
loopFailureDetails: workflowDelegateFailureSummaries(envelope.events, "workflow.loop.failed"),
|
|
1058
|
+
verifierFailureDetails: workflowDelegateFailureSummaries(
|
|
1059
|
+
envelope.events,
|
|
1060
|
+
"workflow.verifier.failed"
|
|
1061
|
+
),
|
|
1062
|
+
analystFailureDetails: workflowDelegateFailureSummaries(
|
|
1063
|
+
envelope.events,
|
|
1064
|
+
"workflow.analyst.failed"
|
|
1065
|
+
),
|
|
1066
|
+
reviewerFailureDetails: workflowDelegateFailureSummaries(
|
|
1067
|
+
envelope.events,
|
|
1068
|
+
"workflow.reviewer.failed"
|
|
1069
|
+
)
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
function workflowEventKinds(events) {
|
|
1073
|
+
return events.reduce((acc, event) => {
|
|
1074
|
+
acc[event.kind] = (acc[event.kind] ?? 0) + 1;
|
|
1075
|
+
return acc;
|
|
1076
|
+
}, {});
|
|
1077
|
+
}
|
|
1078
|
+
function workflowPhaseTitles(events) {
|
|
1079
|
+
const titles = [];
|
|
1080
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1081
|
+
for (const event of events) {
|
|
1082
|
+
const title = event.kind === "workflow.phase" ? stringField(event.payload, "title") : stringField(event.payload, "phase");
|
|
1083
|
+
if (!title || seen.has(title)) continue;
|
|
1084
|
+
seen.add(title);
|
|
1085
|
+
titles.push(title);
|
|
1086
|
+
}
|
|
1087
|
+
return titles;
|
|
1088
|
+
}
|
|
1089
|
+
function workflowDelegateTraceSummaries(events, endedKind) {
|
|
1090
|
+
return events.filter((event) => event.kind === endedKind).map((event) => ({
|
|
1091
|
+
index: numberField(event.payload, "index"),
|
|
1092
|
+
label: stringField(event.payload, "label"),
|
|
1093
|
+
phase: stringField(event.payload, "phase"),
|
|
1094
|
+
costUsd: numberField(event.payload, "costUsd"),
|
|
1095
|
+
tokenUsage: tokenUsageField(event.payload.tokenUsage),
|
|
1096
|
+
trace: event.payload.trace ?? null
|
|
1097
|
+
}));
|
|
1098
|
+
}
|
|
1099
|
+
function workflowCheckpointTraceSummaries(events, endedKind) {
|
|
1100
|
+
return workflowDelegateTraceSummaries(events, endedKind).map((summary) => ({
|
|
1101
|
+
...summary,
|
|
1102
|
+
output: checkpointOutput(summary.trace)
|
|
1103
|
+
}));
|
|
1104
|
+
}
|
|
1105
|
+
function workflowDelegateFailureSummaries(events, failedKind) {
|
|
1106
|
+
return events.filter((event) => event.kind === failedKind).map((event) => ({
|
|
1107
|
+
index: numberField(event.payload, "index"),
|
|
1108
|
+
label: stringField(event.payload, "label"),
|
|
1109
|
+
phase: stringField(event.payload, "phase"),
|
|
1110
|
+
durationMs: numberField(event.payload, "durationMs"),
|
|
1111
|
+
message: stringField(event.payload, "message"),
|
|
1112
|
+
code: stringField(event.payload, "code")
|
|
1113
|
+
}));
|
|
1114
|
+
}
|
|
1115
|
+
function checkpointOutput(trace) {
|
|
1116
|
+
const record = objectRecord(trace);
|
|
1117
|
+
if (record && Object.hasOwn(record, "checkpointOutput")) {
|
|
1118
|
+
return record.checkpointOutput;
|
|
1119
|
+
}
|
|
1120
|
+
if (record && Object.hasOwn(record, "output")) return record.output;
|
|
1121
|
+
return trace;
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
// src/workflow/intelligence-export.ts
|
|
1125
|
+
var ENVELOPE_VERSION = "workflow-trace-intelligence-envelope-v1";
|
|
1126
|
+
var DEFAULT_DESTINATION = "intelligence.tangle.tools";
|
|
1127
|
+
function buildWorkflowTraceIntelligenceEnvelope(options) {
|
|
1128
|
+
const productId = requireNonEmpty(options.productId, "productId");
|
|
1129
|
+
const partnerId = options.partnerId !== void 0 ? requireNonEmpty(options.partnerId, "partnerId") : void 0;
|
|
1130
|
+
const generatedAt = options.generatedAt ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
1131
|
+
const grantIds = activeExportGrantIds({
|
|
1132
|
+
grants: options.grants,
|
|
1133
|
+
productId,
|
|
1134
|
+
partnerId,
|
|
1135
|
+
nowMs: Date.parse(generatedAt)
|
|
1136
|
+
});
|
|
1137
|
+
const base = validateWorkflowTraceEnvelope(options.envelope);
|
|
1138
|
+
const sourceEnvelope = options.metadata === void 0 ? base : {
|
|
1139
|
+
...base,
|
|
1140
|
+
metadata: {
|
|
1141
|
+
...base.metadata ?? {},
|
|
1142
|
+
intelligenceExport: options.metadata
|
|
1143
|
+
}
|
|
1144
|
+
};
|
|
1145
|
+
const sanitized = sanitizeWorkflowTraceEnvelope(sourceEnvelope, options.sanitize);
|
|
1146
|
+
const summary = summarizeWorkflowExecution(sanitized.envelope);
|
|
1147
|
+
return {
|
|
1148
|
+
schemaVersion: ENVELOPE_VERSION,
|
|
1149
|
+
destination: options.destination ?? DEFAULT_DESTINATION,
|
|
1150
|
+
generatedAt,
|
|
1151
|
+
productId,
|
|
1152
|
+
...partnerId ? { partnerId } : {},
|
|
1153
|
+
runId: sanitized.envelope.runId,
|
|
1154
|
+
grantIds,
|
|
1155
|
+
traceEnvelope: sanitized.envelope,
|
|
1156
|
+
summary,
|
|
1157
|
+
compactEvidence: compactEvidence(sanitized.envelope, summary),
|
|
1158
|
+
sanitization: sanitized.report,
|
|
1159
|
+
...options.links ? { links: options.links } : {}
|
|
1160
|
+
};
|
|
1161
|
+
}
|
|
1162
|
+
function validateWorkflowTraceIntelligenceEnvelope(input) {
|
|
1163
|
+
const obj = expectRecord2(input, "workflow intelligence envelope");
|
|
1164
|
+
if (obj.schemaVersion !== ENVELOPE_VERSION) {
|
|
1165
|
+
throw new ValidationError(`workflow intelligence schemaVersion must be ${ENVELOPE_VERSION}`);
|
|
1166
|
+
}
|
|
1167
|
+
const destination = expectString2(obj.destination, "destination");
|
|
1168
|
+
const generatedAt = expectString2(obj.generatedAt, "generatedAt");
|
|
1169
|
+
const productId = expectString2(obj.productId, "productId");
|
|
1170
|
+
const partnerId = obj.partnerId !== void 0 ? expectString2(obj.partnerId, "partnerId") : void 0;
|
|
1171
|
+
const runId = expectString2(obj.runId, "runId");
|
|
1172
|
+
const grantIds = expectStringArray(obj.grantIds, "grantIds");
|
|
1173
|
+
const traceEnvelope = validateWorkflowTraceEnvelope(obj.traceEnvelope);
|
|
1174
|
+
if (traceEnvelope.runId !== runId) {
|
|
1175
|
+
throw new ValidationError(`workflow intelligence runId ${runId} does not match trace envelope`);
|
|
1176
|
+
}
|
|
1177
|
+
const summary = summarizeWorkflowExecution(traceEnvelope);
|
|
1178
|
+
const compact = validateCompactEvidence(obj.compactEvidence);
|
|
1179
|
+
const expectedCompact = compactEvidence(traceEnvelope, summary);
|
|
1180
|
+
assertCompactEvidenceEqual(compact, expectedCompact);
|
|
1181
|
+
const sanitization = validateSanitizationReport(obj.sanitization);
|
|
1182
|
+
return {
|
|
1183
|
+
schemaVersion: ENVELOPE_VERSION,
|
|
1184
|
+
destination,
|
|
1185
|
+
generatedAt,
|
|
1186
|
+
productId,
|
|
1187
|
+
...partnerId ? { partnerId } : {},
|
|
1188
|
+
runId,
|
|
1189
|
+
grantIds,
|
|
1190
|
+
traceEnvelope,
|
|
1191
|
+
summary,
|
|
1192
|
+
compactEvidence: expectedCompact,
|
|
1193
|
+
sanitization,
|
|
1194
|
+
...obj.links !== void 0 ? { links: validateLinks(obj.links) } : {}
|
|
1195
|
+
};
|
|
1196
|
+
}
|
|
1197
|
+
function activeExportGrantIds(args) {
|
|
1198
|
+
if (!Array.isArray(args.grants) || args.grants.length === 0) {
|
|
1199
|
+
throw new ValidationError("workflow intelligence export requires at least one opt-in grant");
|
|
1200
|
+
}
|
|
1201
|
+
const nowMs = Number.isFinite(args.nowMs) ? args.nowMs : Date.now();
|
|
1202
|
+
const ids = args.grants.filter((grant) => grantMatchesSubject(grant, args.productId, args.partnerId)).filter((grant) => grant.scopes.includes("workflow-trace:export") || grant.scopes.includes("*")).filter((grant) => !grantExpired(grant, nowMs)).map((grant) => grant.grantId).filter((id) => id.length > 0);
|
|
1203
|
+
if (ids.length === 0) {
|
|
1204
|
+
throw new ValidationError(
|
|
1205
|
+
"workflow intelligence export requires an active workflow-trace:export grant for the product or partner"
|
|
1206
|
+
);
|
|
1207
|
+
}
|
|
1208
|
+
return ids;
|
|
1209
|
+
}
|
|
1210
|
+
function grantMatchesSubject(grant, productId, partnerId) {
|
|
1211
|
+
if (grant.subject === "product") return grant.subjectId === productId;
|
|
1212
|
+
if (grant.subject === "partner") return partnerId !== void 0 && grant.subjectId === partnerId;
|
|
1213
|
+
return grant.subjectId === productId || grant.subjectId === partnerId;
|
|
1214
|
+
}
|
|
1215
|
+
function grantExpired(grant, nowMs) {
|
|
1216
|
+
if (!grant.expiresAt) return false;
|
|
1217
|
+
const expiresAt = Date.parse(grant.expiresAt);
|
|
1218
|
+
return Number.isFinite(expiresAt) && expiresAt <= nowMs;
|
|
1219
|
+
}
|
|
1220
|
+
function compactEvidence(envelope, summary) {
|
|
1221
|
+
return {
|
|
1222
|
+
eventKinds: summary.eventKinds,
|
|
1223
|
+
phases: summary.phases,
|
|
1224
|
+
toolNames: toolNames(envelope.events),
|
|
1225
|
+
redactedHashes: redactedHashes(envelope),
|
|
1226
|
+
artifacts: (envelope.artifacts ?? []).map((artifact) => ({
|
|
1227
|
+
kind: artifact.kind,
|
|
1228
|
+
uri: artifact.uri,
|
|
1229
|
+
...artifact.contentType ? { contentType: artifact.contentType } : {},
|
|
1230
|
+
...artifact.sha256 ? { sha256: artifact.sha256 } : {}
|
|
1231
|
+
})),
|
|
1232
|
+
...summary.failureMessage ? { failureMessage: summary.failureMessage } : {}
|
|
1233
|
+
};
|
|
1234
|
+
}
|
|
1235
|
+
function toolNames(events) {
|
|
1236
|
+
const names = /* @__PURE__ */ new Set();
|
|
1237
|
+
for (const event of events) collectToolNames(event.payload, names);
|
|
1238
|
+
return [...names].sort();
|
|
1239
|
+
}
|
|
1240
|
+
function collectToolNames(value, names) {
|
|
1241
|
+
if (Array.isArray(value)) {
|
|
1242
|
+
value.forEach((item) => {
|
|
1243
|
+
collectToolNames(item, names);
|
|
1244
|
+
});
|
|
1245
|
+
return;
|
|
1246
|
+
}
|
|
1247
|
+
if (!isRecord3(value)) return;
|
|
1248
|
+
const direct = stringValue2(value.toolName) ?? stringValue2(value.name);
|
|
1249
|
+
if (direct && looksLikeToolRecord(value)) names.add(direct);
|
|
1250
|
+
if (isRecord3(value.byTool)) {
|
|
1251
|
+
for (const key of Object.keys(value.byTool)) names.add(key);
|
|
1252
|
+
}
|
|
1253
|
+
for (const child of Object.values(value)) collectToolNames(child, names);
|
|
1254
|
+
}
|
|
1255
|
+
function looksLikeToolRecord(value) {
|
|
1256
|
+
return "toolName" in value || "toolArgs" in value || "args" in value || "status" in value || "error" in value || "success" in value;
|
|
1257
|
+
}
|
|
1258
|
+
function redactedHashes(envelope) {
|
|
1259
|
+
const out = [];
|
|
1260
|
+
envelope.events.forEach((event, index) => {
|
|
1261
|
+
collectHashEvidence(event.payload, `events[${index}].payload`, out);
|
|
1262
|
+
});
|
|
1263
|
+
const artifacts = envelope.artifacts ?? [];
|
|
1264
|
+
artifacts.forEach((artifact, index) => {
|
|
1265
|
+
collectHashEvidence(artifact.metadata, `artifacts[${index}].metadata`, out);
|
|
1266
|
+
});
|
|
1267
|
+
collectHashEvidence(envelope.metadata, "metadata", out);
|
|
1268
|
+
return out;
|
|
1269
|
+
}
|
|
1270
|
+
function collectHashEvidence(value, path, out) {
|
|
1271
|
+
if (Array.isArray(value)) {
|
|
1272
|
+
value.forEach((item, index) => {
|
|
1273
|
+
collectHashEvidence(item, `${path}[${index}]`, out);
|
|
1274
|
+
});
|
|
1275
|
+
return;
|
|
1276
|
+
}
|
|
1277
|
+
if (!isRecord3(value)) return;
|
|
1278
|
+
if (value.redacted === true && typeof value.sha256 === "string") {
|
|
1279
|
+
out.push({
|
|
1280
|
+
path,
|
|
1281
|
+
sha256: value.sha256,
|
|
1282
|
+
...value.shape !== void 0 ? { shape: value.shape } : {}
|
|
1283
|
+
});
|
|
1284
|
+
return;
|
|
1285
|
+
}
|
|
1286
|
+
for (const [key, child] of Object.entries(value)) {
|
|
1287
|
+
collectHashEvidence(child, `${path}.${key}`, out);
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
function validateCompactEvidence(value) {
|
|
1291
|
+
const obj = expectRecord2(value, "compactEvidence");
|
|
1292
|
+
return {
|
|
1293
|
+
eventKinds: expectNumberRecord(obj.eventKinds, "compactEvidence.eventKinds"),
|
|
1294
|
+
phases: expectStringArray(obj.phases, "compactEvidence.phases"),
|
|
1295
|
+
toolNames: expectStringArray(obj.toolNames, "compactEvidence.toolNames"),
|
|
1296
|
+
redactedHashes: expectArray(obj.redactedHashes, "compactEvidence.redactedHashes").map(
|
|
1297
|
+
(item, index) => {
|
|
1298
|
+
const record = expectRecord2(item, `compactEvidence.redactedHashes[${index}]`);
|
|
1299
|
+
return {
|
|
1300
|
+
path: expectString2(record.path, `compactEvidence.redactedHashes[${index}].path`),
|
|
1301
|
+
sha256: expectString2(record.sha256, `compactEvidence.redactedHashes[${index}].sha256`),
|
|
1302
|
+
...record.shape !== void 0 ? { shape: record.shape } : {}
|
|
1303
|
+
};
|
|
1304
|
+
}
|
|
1305
|
+
),
|
|
1306
|
+
artifacts: expectArray(obj.artifacts, "compactEvidence.artifacts").map((item, index) => {
|
|
1307
|
+
const record = expectRecord2(item, `compactEvidence.artifacts[${index}]`);
|
|
1308
|
+
return {
|
|
1309
|
+
kind: expectString2(record.kind, `compactEvidence.artifacts[${index}].kind`),
|
|
1310
|
+
uri: expectString2(record.uri, `compactEvidence.artifacts[${index}].uri`),
|
|
1311
|
+
...record.contentType !== void 0 ? {
|
|
1312
|
+
contentType: expectString2(
|
|
1313
|
+
record.contentType,
|
|
1314
|
+
`compactEvidence.artifacts[${index}].contentType`
|
|
1315
|
+
)
|
|
1316
|
+
} : {},
|
|
1317
|
+
...record.sha256 !== void 0 ? {
|
|
1318
|
+
sha256: expectString2(record.sha256, `compactEvidence.artifacts[${index}].sha256`)
|
|
1319
|
+
} : {}
|
|
1320
|
+
};
|
|
1321
|
+
}),
|
|
1322
|
+
...obj.failureMessage !== void 0 ? { failureMessage: expectString2(obj.failureMessage, "compactEvidence.failureMessage") } : {}
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
function assertCompactEvidenceEqual(actual, expected) {
|
|
1326
|
+
assertNumberRecordEqual(actual.eventKinds, expected.eventKinds, "compactEvidence.eventKinds");
|
|
1327
|
+
assertStringArrayEqual(actual.phases, expected.phases, "compactEvidence.phases");
|
|
1328
|
+
assertStringArrayEqual(actual.toolNames, expected.toolNames, "compactEvidence.toolNames");
|
|
1329
|
+
assertJsonArrayEqual(
|
|
1330
|
+
actual.redactedHashes,
|
|
1331
|
+
expected.redactedHashes,
|
|
1332
|
+
"compactEvidence.redactedHashes"
|
|
1333
|
+
);
|
|
1334
|
+
assertJsonArrayEqual(actual.artifacts, expected.artifacts, "compactEvidence.artifacts");
|
|
1335
|
+
if (actual.failureMessage !== expected.failureMessage) {
|
|
1336
|
+
throw new ValidationError("compactEvidence.failureMessage does not match trace envelope");
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
function validateSanitizationReport(value) {
|
|
1340
|
+
const obj = expectRecord2(value, "sanitization");
|
|
1341
|
+
return {
|
|
1342
|
+
redactionCount: expectNonNegativeNumber(obj.redactionCount, "sanitization.redactionCount"),
|
|
1343
|
+
byRule: expectNumberRecord(obj.byRule, "sanitization.byRule"),
|
|
1344
|
+
hashedArgs: expectNonNegativeNumber(obj.hashedArgs, "sanitization.hashedArgs"),
|
|
1345
|
+
truncatedStrings: expectNonNegativeNumber(
|
|
1346
|
+
obj.truncatedStrings,
|
|
1347
|
+
"sanitization.truncatedStrings"
|
|
1348
|
+
),
|
|
1349
|
+
droppedPayloadKeys: expectNumberRecord(
|
|
1350
|
+
obj.droppedPayloadKeys,
|
|
1351
|
+
"sanitization.droppedPayloadKeys"
|
|
1352
|
+
),
|
|
1353
|
+
droppedArtifactContents: expectNonNegativeNumber(
|
|
1354
|
+
obj.droppedArtifactContents,
|
|
1355
|
+
"sanitization.droppedArtifactContents"
|
|
1356
|
+
)
|
|
1357
|
+
};
|
|
1358
|
+
}
|
|
1359
|
+
function validateLinks(value) {
|
|
1360
|
+
const obj = expectRecord2(value, "links");
|
|
1361
|
+
return {
|
|
1362
|
+
...obj.traceArtifactUri !== void 0 ? { traceArtifactUri: expectString2(obj.traceArtifactUri, "links.traceArtifactUri") } : {},
|
|
1363
|
+
...obj.exportBundleUri !== void 0 ? { exportBundleUri: expectString2(obj.exportBundleUri, "links.exportBundleUri") } : {},
|
|
1364
|
+
...obj.partnerReportUri !== void 0 ? { partnerReportUri: expectString2(obj.partnerReportUri, "links.partnerReportUri") } : {},
|
|
1365
|
+
...obj.intelligenceRunUri !== void 0 ? { intelligenceRunUri: expectString2(obj.intelligenceRunUri, "links.intelligenceRunUri") } : {}
|
|
1366
|
+
};
|
|
1367
|
+
}
|
|
1368
|
+
function requireNonEmpty(value, field) {
|
|
1369
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
1370
|
+
throw new ValidationError(`workflow intelligence ${field} must be a non-empty string`);
|
|
1371
|
+
}
|
|
1372
|
+
return value;
|
|
1373
|
+
}
|
|
1374
|
+
function expectRecord2(value, path) {
|
|
1375
|
+
if (!isRecord3(value)) throw new ValidationError(`${path}: expected object`);
|
|
1376
|
+
return value;
|
|
1377
|
+
}
|
|
1378
|
+
function expectArray(value, path) {
|
|
1379
|
+
if (!Array.isArray(value)) throw new ValidationError(`${path}: expected array`);
|
|
1380
|
+
return value;
|
|
1381
|
+
}
|
|
1382
|
+
function expectStringArray(value, path) {
|
|
1383
|
+
return expectArray(value, path).map((item, index) => expectString2(item, `${path}[${index}]`));
|
|
1384
|
+
}
|
|
1385
|
+
function expectNumberRecord(value, path) {
|
|
1386
|
+
const record = expectRecord2(value, path);
|
|
1387
|
+
const out = {};
|
|
1388
|
+
for (const [key, item] of Object.entries(record)) {
|
|
1389
|
+
out[key] = expectNonNegativeNumber(item, `${path}.${key}`);
|
|
1390
|
+
}
|
|
1391
|
+
return out;
|
|
1392
|
+
}
|
|
1393
|
+
function expectString2(value, path) {
|
|
1394
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
1395
|
+
throw new ValidationError(`${path}: expected non-empty string`);
|
|
1396
|
+
}
|
|
1397
|
+
return value;
|
|
1398
|
+
}
|
|
1399
|
+
function expectNonNegativeNumber(value, path) {
|
|
1400
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
1401
|
+
throw new ValidationError(`${path}: expected non-negative number`);
|
|
1402
|
+
}
|
|
1403
|
+
return value;
|
|
1404
|
+
}
|
|
1405
|
+
function assertNumberRecordEqual(actual, expected, path) {
|
|
1406
|
+
const actualKeys = Object.keys(actual).sort();
|
|
1407
|
+
const expectedKeys = Object.keys(expected).sort();
|
|
1408
|
+
assertStringArrayEqual(actualKeys, expectedKeys, path);
|
|
1409
|
+
for (const key of expectedKeys) {
|
|
1410
|
+
if (actual[key] !== expected[key]) {
|
|
1411
|
+
throw new ValidationError(`${path}.${key} does not match trace envelope`);
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
}
|
|
1415
|
+
function assertStringArrayEqual(actual, expected, path) {
|
|
1416
|
+
if (actual.length !== expected.length || actual.some((item, index) => item !== expected[index])) {
|
|
1417
|
+
throw new ValidationError(`${path} does not match trace envelope`);
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
function assertJsonArrayEqual(actual, expected, path) {
|
|
1421
|
+
if (actual.length !== expected.length || actual.some((item, index) => JSON.stringify(item) !== JSON.stringify(expected[index]))) {
|
|
1422
|
+
throw new ValidationError(`${path} does not match trace envelope`);
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
function stringValue2(value) {
|
|
1426
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
1427
|
+
}
|
|
1428
|
+
function isRecord3(value) {
|
|
1429
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
// src/workflow/run-record.ts
|
|
1433
|
+
function workflowTraceToRunRecord(input, options) {
|
|
1434
|
+
const envelope = validateWorkflowTraceEnvelope(input);
|
|
1435
|
+
const summary = summarizeWorkflowTrace(envelope);
|
|
1436
|
+
const score = clampScore(options.score ?? (summary.failed ? 0 : 1));
|
|
1437
|
+
const raw = {
|
|
1438
|
+
...finiteOnly(options.raw ?? {}),
|
|
1439
|
+
score,
|
|
1440
|
+
workflow_failed: summary.failed ? 1 : 0,
|
|
1441
|
+
workflow_events: summary.eventCount,
|
|
1442
|
+
workflow_phases: summary.phaseCount,
|
|
1443
|
+
workflow_branches: summary.branchCount,
|
|
1444
|
+
workflow_branch_failures: summary.failedBranchCount,
|
|
1445
|
+
workflow_agent_calls: summary.agentCalls,
|
|
1446
|
+
workflow_loop_calls: summary.loopCalls,
|
|
1447
|
+
workflow_verifier_calls: summary.verifierCalls,
|
|
1448
|
+
workflow_analyst_calls: summary.analystCalls,
|
|
1449
|
+
workflow_reviewer_calls: summary.reviewerCalls,
|
|
1450
|
+
workflow_agent_failures: summary.agentFailures,
|
|
1451
|
+
workflow_loop_failures: summary.loopFailures,
|
|
1452
|
+
workflow_verifier_failures: summary.verifierFailures,
|
|
1453
|
+
workflow_analyst_failures: summary.analystFailures,
|
|
1454
|
+
workflow_reviewer_failures: summary.reviewerFailures
|
|
1455
|
+
};
|
|
1456
|
+
const outcome = options.splitTag === "holdout" ? { holdoutScore: score, raw } : { searchScore: score, raw };
|
|
1457
|
+
return validateRunRecord({
|
|
1458
|
+
runId: options.runId ?? envelope.runId,
|
|
1459
|
+
experimentId: options.experimentId,
|
|
1460
|
+
candidateId: options.candidateId,
|
|
1461
|
+
seed: options.seed,
|
|
1462
|
+
model: options.model,
|
|
1463
|
+
promptHash: options.promptHash,
|
|
1464
|
+
configHash: options.configHash,
|
|
1465
|
+
commitSha: options.commitSha,
|
|
1466
|
+
wallMs: summary.durationMs,
|
|
1467
|
+
costUsd: summary.costUsd,
|
|
1468
|
+
tokenUsage: summary.tokenUsage,
|
|
1469
|
+
...options.judgeMetadata ? { judgeMetadata: options.judgeMetadata } : {},
|
|
1470
|
+
outcome,
|
|
1471
|
+
failureMode: options.failureMode ?? (summary.failed ? "workflow_failed" : void 0),
|
|
1472
|
+
splitTag: options.splitTag,
|
|
1473
|
+
...options.scenarioId ? { scenarioId: options.scenarioId } : {},
|
|
1474
|
+
...options.agentProfile ? { agentProfile: options.agentProfile } : {}
|
|
1475
|
+
});
|
|
1476
|
+
}
|
|
1477
|
+
function finiteOnly(values) {
|
|
1478
|
+
const out = {};
|
|
1479
|
+
for (const [key, value] of Object.entries(values)) {
|
|
1480
|
+
if (Number.isFinite(value)) out[key] = value;
|
|
1481
|
+
}
|
|
1482
|
+
return out;
|
|
1483
|
+
}
|
|
1484
|
+
function clampScore(value) {
|
|
1485
|
+
if (!Number.isFinite(value)) return 0;
|
|
1486
|
+
return Math.max(0, Math.min(1, value));
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
// src/workflow/trajectory.ts
|
|
1490
|
+
function workflowTraceToFeedbackTrajectory(input, options) {
|
|
1491
|
+
const envelope = validateWorkflowTraceEnvelope(input);
|
|
1492
|
+
const summary = summarizeWorkflowTrace(envelope);
|
|
1493
|
+
const createdAt = iso(envelope.events[0]?.timestamp);
|
|
1494
|
+
const updatedAt = iso(envelope.events.at(-1)?.timestamp);
|
|
1495
|
+
return {
|
|
1496
|
+
id: envelope.runId,
|
|
1497
|
+
projectId: options.projectId,
|
|
1498
|
+
scenarioId: options.scenarioId,
|
|
1499
|
+
task: {
|
|
1500
|
+
intent: options.task,
|
|
1501
|
+
context: {
|
|
1502
|
+
topology: envelope.topology,
|
|
1503
|
+
metadata: envelope.metadata
|
|
1504
|
+
}
|
|
1505
|
+
},
|
|
1506
|
+
attempts: workflowEventsToAttempts(envelope.events),
|
|
1507
|
+
labels: [],
|
|
1508
|
+
outcome: {
|
|
1509
|
+
success: options.success ?? !summary.failed,
|
|
1510
|
+
score: options.score,
|
|
1511
|
+
costUsd: summary.costUsd,
|
|
1512
|
+
observedAt: updatedAt,
|
|
1513
|
+
detail: summary.failureMessage,
|
|
1514
|
+
metrics: {
|
|
1515
|
+
workflow_events: summary.eventCount,
|
|
1516
|
+
workflow_phases: summary.phaseCount,
|
|
1517
|
+
workflow_branches: summary.branchCount,
|
|
1518
|
+
workflow_branch_failures: summary.failedBranchCount,
|
|
1519
|
+
workflow_agent_calls: summary.agentCalls,
|
|
1520
|
+
workflow_loop_calls: summary.loopCalls,
|
|
1521
|
+
workflow_verifier_calls: summary.verifierCalls,
|
|
1522
|
+
workflow_analyst_calls: summary.analystCalls,
|
|
1523
|
+
workflow_reviewer_calls: summary.reviewerCalls,
|
|
1524
|
+
workflow_agent_failures: summary.agentFailures,
|
|
1525
|
+
workflow_loop_failures: summary.loopFailures,
|
|
1526
|
+
workflow_verifier_failures: summary.verifierFailures,
|
|
1527
|
+
workflow_analyst_failures: summary.analystFailures,
|
|
1528
|
+
workflow_reviewer_failures: summary.reviewerFailures,
|
|
1529
|
+
workflow_tokens_input: summary.tokenUsage.input,
|
|
1530
|
+
workflow_tokens_output: summary.tokenUsage.output
|
|
1531
|
+
},
|
|
1532
|
+
metadata: {
|
|
1533
|
+
durationMs: summary.durationMs,
|
|
1534
|
+
...options.metadata ?? {}
|
|
1535
|
+
}
|
|
1536
|
+
},
|
|
1537
|
+
split: options.split,
|
|
1538
|
+
tags: options.tags,
|
|
1539
|
+
createdAt,
|
|
1540
|
+
updatedAt,
|
|
1541
|
+
metadata: {
|
|
1542
|
+
traceVersion: envelope.traceVersion,
|
|
1543
|
+
artifacts: envelope.artifacts
|
|
1544
|
+
}
|
|
1545
|
+
};
|
|
1546
|
+
}
|
|
1547
|
+
function workflowEventsToAttempts(events) {
|
|
1548
|
+
const attempts = [];
|
|
1549
|
+
for (const event of events) {
|
|
1550
|
+
const artifactType = artifactTypeForWorkflowEvent(event.kind);
|
|
1551
|
+
if (!artifactType) continue;
|
|
1552
|
+
const stepIndex = attempts.length;
|
|
1553
|
+
attempts.push({
|
|
1554
|
+
id: `${event.runId}:${stepIndex}`,
|
|
1555
|
+
stepIndex,
|
|
1556
|
+
artifactType,
|
|
1557
|
+
artifact: event.payload.trace ?? event.payload,
|
|
1558
|
+
createdAt: iso(event.timestamp),
|
|
1559
|
+
metadata: {
|
|
1560
|
+
eventKind: event.kind,
|
|
1561
|
+
label: event.payload.label,
|
|
1562
|
+
phase: event.payload.phase,
|
|
1563
|
+
costUsd: event.payload.costUsd,
|
|
1564
|
+
tokenUsage: event.payload.tokenUsage,
|
|
1565
|
+
...event.kind.endsWith(".failed") ? { failed: true, message: event.payload.message, code: event.payload.code } : {}
|
|
1566
|
+
}
|
|
1567
|
+
});
|
|
1568
|
+
}
|
|
1569
|
+
return attempts;
|
|
1570
|
+
}
|
|
1571
|
+
function artifactTypeForWorkflowEvent(kind) {
|
|
1572
|
+
switch (kind) {
|
|
1573
|
+
case "workflow.agent.ended":
|
|
1574
|
+
case "workflow.agent.failed":
|
|
1575
|
+
return "action";
|
|
1576
|
+
case "workflow.analyst.ended":
|
|
1577
|
+
case "workflow.analyst.failed":
|
|
1578
|
+
return "data";
|
|
1579
|
+
case "workflow.loop.ended":
|
|
1580
|
+
case "workflow.verifier.ended":
|
|
1581
|
+
case "workflow.reviewer.ended":
|
|
1582
|
+
case "workflow.loop.failed":
|
|
1583
|
+
case "workflow.verifier.failed":
|
|
1584
|
+
case "workflow.reviewer.failed":
|
|
1585
|
+
return "decision";
|
|
1586
|
+
default:
|
|
1587
|
+
return null;
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
function iso(timestamp) {
|
|
1591
|
+
return new Date(Number.isFinite(timestamp) ? timestamp : 0).toISOString();
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
// src/workflow/partner-report.ts
|
|
1595
|
+
var REPORT_VERSION = "workflow-partner-report-v1";
|
|
1596
|
+
function buildWorkflowPartnerReport(options) {
|
|
1597
|
+
const sanitized = sanitizeWorkflowTraceEnvelope(options.envelope, options.sanitize);
|
|
1598
|
+
const feedbackPack = buildWorkflowAnalystFeedbackPack({
|
|
1599
|
+
...options,
|
|
1600
|
+
envelope: sanitized.envelope
|
|
1601
|
+
});
|
|
1602
|
+
const trajectory = workflowTraceToFeedbackTrajectory(sanitized.envelope, options.trajectory);
|
|
1603
|
+
const runRecord = options.runRecord ? workflowTraceToRunRecord(sanitized.envelope, {
|
|
1604
|
+
...options.runRecord,
|
|
1605
|
+
runId: sanitized.envelope.runId
|
|
1606
|
+
}) : void 0;
|
|
1607
|
+
const analystFindings = feedbackPack.findings.map((finding) => ({
|
|
1608
|
+
source: "analyst",
|
|
1609
|
+
severity: finding.severity,
|
|
1610
|
+
area: finding.area,
|
|
1611
|
+
claim: finding.claim,
|
|
1612
|
+
evidence: finding.evidenceRefs,
|
|
1613
|
+
...finding.recommendedAction ? { recommendedAction: finding.recommendedAction } : {},
|
|
1614
|
+
metadata: {
|
|
1615
|
+
findingId: finding.findingId,
|
|
1616
|
+
analystId: finding.analystId,
|
|
1617
|
+
confidence: finding.confidence,
|
|
1618
|
+
...finding.subject ? { subject: finding.subject } : {}
|
|
1619
|
+
}
|
|
1620
|
+
}));
|
|
1621
|
+
const verifierFindings = (feedbackPack.verifier?.layers ?? []).flatMap(
|
|
1622
|
+
(layer) => layer.findings.map((finding) => ({
|
|
1623
|
+
source: "verifier",
|
|
1624
|
+
severity: finding.severity,
|
|
1625
|
+
area: layer.layer,
|
|
1626
|
+
claim: finding.message,
|
|
1627
|
+
evidence: finding.evidence ? [{ kind: "artifact", uri: finding.evidence }] : [],
|
|
1628
|
+
...layer.reason ? { recommendedAction: `Fix verifier layer "${layer.layer}": ${layer.reason}` } : {},
|
|
1629
|
+
metadata: {
|
|
1630
|
+
status: layer.status,
|
|
1631
|
+
score: layer.score
|
|
1632
|
+
}
|
|
1633
|
+
}))
|
|
1634
|
+
);
|
|
1635
|
+
const clusterFindings = feedbackPack.failureClusters.map((cluster) => ({
|
|
1636
|
+
source: "failure-cluster",
|
|
1637
|
+
severity: "medium",
|
|
1638
|
+
area: "failure-cluster",
|
|
1639
|
+
claim: `${cluster.name} affects ${(cluster.share * 100).toFixed(1)}% of failed workflow runs`,
|
|
1640
|
+
evidence: cluster.exemplars.map((runId) => ({
|
|
1641
|
+
kind: "artifact",
|
|
1642
|
+
uri: `run://${runId}`
|
|
1643
|
+
})),
|
|
1644
|
+
...cluster.suggestedFix ? { recommendedAction: cluster.suggestedFix } : {},
|
|
1645
|
+
metadata: {
|
|
1646
|
+
clusterId: cluster.id,
|
|
1647
|
+
runCount: cluster.runCount,
|
|
1648
|
+
source: cluster.source
|
|
1649
|
+
}
|
|
1650
|
+
}));
|
|
1651
|
+
const allFindings = [...analystFindings, ...verifierFindings, ...clusterFindings].sort(
|
|
1652
|
+
comparePartnerFindings
|
|
1653
|
+
);
|
|
1654
|
+
return {
|
|
1655
|
+
schemaVersion: REPORT_VERSION,
|
|
1656
|
+
runId: sanitized.envelope.runId,
|
|
1657
|
+
generatedAt: options.generatedAt ?? feedbackPack.generatedAt,
|
|
1658
|
+
summary: feedbackPack.summary,
|
|
1659
|
+
docsApiGaps: allFindings.filter(isDocsApiGap),
|
|
1660
|
+
prReadyFindings: allFindings.filter(isPrReadyFinding),
|
|
1661
|
+
failureClusters: feedbackPack.failureClusters,
|
|
1662
|
+
recommendations: feedbackPack.recommendations,
|
|
1663
|
+
traceArtifacts: sanitized.envelope.artifacts,
|
|
1664
|
+
...options.links ? { links: options.links } : {},
|
|
1665
|
+
exportBundle: {
|
|
1666
|
+
traceEnvelope: sanitized.envelope,
|
|
1667
|
+
sanitization: sanitized.report,
|
|
1668
|
+
feedbackPack,
|
|
1669
|
+
trajectory,
|
|
1670
|
+
...runRecord ? { runRecord } : {}
|
|
1671
|
+
}
|
|
1672
|
+
};
|
|
1673
|
+
}
|
|
1674
|
+
function validateWorkflowPartnerReport(input) {
|
|
1675
|
+
const obj = expectRecord3(input, "workflow partner report");
|
|
1676
|
+
if (obj.schemaVersion !== REPORT_VERSION) {
|
|
1677
|
+
throw new ValidationError(`workflow partner report schemaVersion must be ${REPORT_VERSION}`);
|
|
1678
|
+
}
|
|
1679
|
+
const runId = expectString3(obj.runId, "runId");
|
|
1680
|
+
const generatedAt = expectString3(obj.generatedAt, "generatedAt");
|
|
1681
|
+
const exportBundle = validateExportBundle(obj.exportBundle, runId);
|
|
1682
|
+
const expectedSummary = summarizeWorkflowTrace(exportBundle.traceEnvelope);
|
|
1683
|
+
assertJsonEqual(expectRecord3(obj.summary, "summary"), expectedSummary, "summary");
|
|
1684
|
+
assertJsonEqual(
|
|
1685
|
+
exportBundle.feedbackPack.summary,
|
|
1686
|
+
expectedSummary,
|
|
1687
|
+
"exportBundle.feedbackPack.summary"
|
|
1688
|
+
);
|
|
1689
|
+
const traceArtifacts = validateOptionalArtifacts(obj.traceArtifacts, "traceArtifacts");
|
|
1690
|
+
assertJsonEqual(
|
|
1691
|
+
traceArtifacts ?? [],
|
|
1692
|
+
exportBundle.traceEnvelope.artifacts ?? [],
|
|
1693
|
+
"traceArtifacts"
|
|
1694
|
+
);
|
|
1695
|
+
return {
|
|
1696
|
+
schemaVersion: REPORT_VERSION,
|
|
1697
|
+
runId,
|
|
1698
|
+
generatedAt,
|
|
1699
|
+
summary: expectedSummary,
|
|
1700
|
+
docsApiGaps: expectArray2(obj.docsApiGaps, "docsApiGaps"),
|
|
1701
|
+
prReadyFindings: expectArray2(
|
|
1702
|
+
obj.prReadyFindings,
|
|
1703
|
+
"prReadyFindings"
|
|
1704
|
+
),
|
|
1705
|
+
failureClusters: expectArray2(
|
|
1706
|
+
obj.failureClusters,
|
|
1707
|
+
"failureClusters"
|
|
1708
|
+
),
|
|
1709
|
+
recommendations: expectStringArray2(obj.recommendations, "recommendations"),
|
|
1710
|
+
traceArtifacts,
|
|
1711
|
+
...obj.links !== void 0 ? { links: validateLinks2(obj.links) } : {},
|
|
1712
|
+
exportBundle
|
|
1713
|
+
};
|
|
1714
|
+
}
|
|
1715
|
+
function renderWorkflowPartnerReport(report, options = {}) {
|
|
1716
|
+
const maxFindings = options.maxFindings ?? 8;
|
|
1717
|
+
const lines = [
|
|
1718
|
+
`Workflow partner report for ${report.runId}`,
|
|
1719
|
+
`status=${report.summary.failed ? "failed" : "completed"} scoreEvidence events=${report.summary.eventCount} agents=${report.summary.agentCalls} verifier=${report.summary.verifierCalls} analyst=${report.summary.analystCalls} reviewer=${report.summary.reviewerCalls} costUsd=${report.summary.costUsd.toFixed(6)}`,
|
|
1720
|
+
report.failureClusters.length > 0 ? "Failure clusters:" : void 0,
|
|
1721
|
+
...report.failureClusters.slice(0, maxFindings).map(
|
|
1722
|
+
(cluster) => `- ${cluster.name} share=${cluster.share.toFixed(3)} exemplars=${cluster.exemplars.join(",")}`
|
|
1723
|
+
),
|
|
1724
|
+
report.docsApiGaps.length > 0 ? "Docs/API gaps:" : void 0,
|
|
1725
|
+
...report.docsApiGaps.slice(0, maxFindings).map((finding) => `- ${finding.severity} ${finding.area}: ${finding.claim}`),
|
|
1726
|
+
report.prReadyFindings.length > 0 ? "PR-ready findings:" : void 0,
|
|
1727
|
+
...report.prReadyFindings.slice(0, maxFindings).map((finding) => `- ${finding.severity} ${finding.area}: ${finding.claim}`),
|
|
1728
|
+
report.recommendations.length > 0 ? "Recommendations:" : void 0,
|
|
1729
|
+
...report.recommendations.slice(0, maxFindings).map((recommendation) => `- ${recommendation}`)
|
|
1730
|
+
].filter((line) => Boolean(line));
|
|
1731
|
+
return lines.join("\n");
|
|
1732
|
+
}
|
|
1733
|
+
function validateExportBundle(value, runId) {
|
|
1734
|
+
const obj = expectRecord3(value, "exportBundle");
|
|
1735
|
+
const traceEnvelope = validateWorkflowTraceEnvelope(obj.traceEnvelope);
|
|
1736
|
+
if (traceEnvelope.runId !== runId) {
|
|
1737
|
+
throw new ValidationError("exportBundle.traceEnvelope.runId must match report runId");
|
|
1738
|
+
}
|
|
1739
|
+
const feedbackPack = expectRecord3(obj.feedbackPack, "exportBundle.feedbackPack");
|
|
1740
|
+
if (feedbackPack.schemaVersion !== "workflow-feedback-pack-v1") {
|
|
1741
|
+
throw new ValidationError(
|
|
1742
|
+
"exportBundle.feedbackPack.schemaVersion must be workflow-feedback-pack-v1"
|
|
1743
|
+
);
|
|
1744
|
+
}
|
|
1745
|
+
if (feedbackPack.runId !== runId) {
|
|
1746
|
+
throw new ValidationError("exportBundle.feedbackPack.runId must match report runId");
|
|
1747
|
+
}
|
|
1748
|
+
expectRecord3(feedbackPack.toolUsage, "exportBundle.feedbackPack.toolUsage");
|
|
1749
|
+
expectArray2(feedbackPack.failureClusters, "exportBundle.feedbackPack.failureClusters");
|
|
1750
|
+
expectArray2(feedbackPack.findings, "exportBundle.feedbackPack.findings");
|
|
1751
|
+
expectStringArray2(feedbackPack.recommendations, "exportBundle.feedbackPack.recommendations");
|
|
1752
|
+
expectStringArray2(feedbackPack.driverContextLines, "exportBundle.feedbackPack.driverContextLines");
|
|
1753
|
+
const trajectory = expectRecord3(obj.trajectory, "exportBundle.trajectory");
|
|
1754
|
+
if (trajectory.id !== runId) {
|
|
1755
|
+
throw new ValidationError("exportBundle.trajectory.id must match report runId");
|
|
1756
|
+
}
|
|
1757
|
+
expectArray2(trajectory.attempts, "exportBundle.trajectory.attempts");
|
|
1758
|
+
expectArray2(trajectory.labels, "exportBundle.trajectory.labels");
|
|
1759
|
+
const runRecord = obj.runRecord === void 0 ? void 0 : validateWorkflowRunRecord(obj.runRecord, traceEnvelope);
|
|
1760
|
+
expectRecord3(obj.sanitization, "exportBundle.sanitization");
|
|
1761
|
+
return {
|
|
1762
|
+
traceEnvelope,
|
|
1763
|
+
sanitization: obj.sanitization,
|
|
1764
|
+
feedbackPack: obj.feedbackPack,
|
|
1765
|
+
trajectory: obj.trajectory,
|
|
1766
|
+
...runRecord ? { runRecord } : {}
|
|
1767
|
+
};
|
|
1768
|
+
}
|
|
1769
|
+
function validateWorkflowRunRecord(value, envelope) {
|
|
1770
|
+
const record = validateRunRecord(value);
|
|
1771
|
+
const summary = summarizeWorkflowTrace(envelope);
|
|
1772
|
+
if (record.runId !== envelope.runId) {
|
|
1773
|
+
throw new ValidationError("exportBundle.runRecord.runId must match trace envelope runId");
|
|
1774
|
+
}
|
|
1775
|
+
if (record.outcome.raw.workflow_events !== summary.eventCount) {
|
|
1776
|
+
throw new ValidationError("exportBundle.runRecord outcome does not match trace event count");
|
|
1777
|
+
}
|
|
1778
|
+
return record;
|
|
1779
|
+
}
|
|
1780
|
+
function validateLinks2(value) {
|
|
1781
|
+
const obj = expectRecord3(value, "links");
|
|
1782
|
+
return {
|
|
1783
|
+
...obj.traceArtifactUri !== void 0 ? { traceArtifactUri: expectString3(obj.traceArtifactUri, "links.traceArtifactUri") } : {},
|
|
1784
|
+
...obj.exportBundleUri !== void 0 ? { exportBundleUri: expectString3(obj.exportBundleUri, "links.exportBundleUri") } : {},
|
|
1785
|
+
...obj.partnerReportUri !== void 0 ? { partnerReportUri: expectString3(obj.partnerReportUri, "links.partnerReportUri") } : {},
|
|
1786
|
+
...obj.intelligenceRunUri !== void 0 ? { intelligenceRunUri: expectString3(obj.intelligenceRunUri, "links.intelligenceRunUri") } : {}
|
|
1787
|
+
};
|
|
1788
|
+
}
|
|
1789
|
+
function validateOptionalArtifacts(value, path) {
|
|
1790
|
+
if (value === void 0) return void 0;
|
|
1791
|
+
return expectArray2(value, path).map((item, index) => {
|
|
1792
|
+
const itemPath = `${path}[${index}]`;
|
|
1793
|
+
const obj = expectRecord3(item, itemPath);
|
|
1794
|
+
return {
|
|
1795
|
+
kind: expectString3(obj.kind, `${itemPath}.kind`),
|
|
1796
|
+
uri: expectString3(obj.uri, `${itemPath}.uri`),
|
|
1797
|
+
...obj.contentType !== void 0 ? { contentType: expectString3(obj.contentType, `${itemPath}.contentType`) } : {},
|
|
1798
|
+
...obj.sha256 !== void 0 ? { sha256: expectString3(obj.sha256, `${itemPath}.sha256`) } : {},
|
|
1799
|
+
...obj.metadata !== void 0 ? { metadata: expectRecord3(obj.metadata, `${itemPath}.metadata`) } : {}
|
|
1800
|
+
};
|
|
1801
|
+
});
|
|
1802
|
+
}
|
|
1803
|
+
function expectRecord3(value, path) {
|
|
1804
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
1805
|
+
throw new ValidationError(`${path}: expected object`);
|
|
1806
|
+
}
|
|
1807
|
+
return value;
|
|
1808
|
+
}
|
|
1809
|
+
function expectArray2(value, path) {
|
|
1810
|
+
if (!Array.isArray(value)) throw new ValidationError(`${path}: expected array`);
|
|
1811
|
+
return value;
|
|
1812
|
+
}
|
|
1813
|
+
function expectString3(value, path) {
|
|
1814
|
+
if (typeof value !== "string" || value.length === 0) {
|
|
1815
|
+
throw new ValidationError(`${path}: expected non-empty string`);
|
|
1816
|
+
}
|
|
1817
|
+
return value;
|
|
1818
|
+
}
|
|
1819
|
+
function expectStringArray2(value, path) {
|
|
1820
|
+
return expectArray2(value, path).map((item, index) => expectString3(item, `${path}[${index}]`));
|
|
1821
|
+
}
|
|
1822
|
+
function assertJsonEqual(actual, expected, path) {
|
|
1823
|
+
if (JSON.stringify(stableJson(actual)) !== JSON.stringify(stableJson(expected))) {
|
|
1824
|
+
throw new ValidationError(`${path} does not match trace envelope`);
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
function stableJson(value) {
|
|
1828
|
+
if (Array.isArray(value)) return value.map(stableJson);
|
|
1829
|
+
if (!value || typeof value !== "object") return value;
|
|
1830
|
+
return Object.fromEntries(
|
|
1831
|
+
Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, child]) => [key, stableJson(child)])
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1834
|
+
function isDocsApiGap(finding) {
|
|
1835
|
+
const haystack = [
|
|
1836
|
+
finding.area,
|
|
1837
|
+
finding.claim,
|
|
1838
|
+
finding.recommendedAction,
|
|
1839
|
+
typeof finding.metadata?.subject === "string" ? finding.metadata.subject : void 0
|
|
1840
|
+
].filter(Boolean).join(" ").toLowerCase();
|
|
1841
|
+
return /\b(api|sdk|docs?|documentation|reference|integration|example|quickstart)\b/.test(haystack);
|
|
1842
|
+
}
|
|
1843
|
+
function isPrReadyFinding(finding) {
|
|
1844
|
+
return finding.recommendedAction !== void 0 || finding.severity === "critical" || finding.severity === "high";
|
|
1845
|
+
}
|
|
1846
|
+
function comparePartnerFindings(left, right) {
|
|
1847
|
+
const severityDelta = severityRank2(right.severity) - severityRank2(left.severity);
|
|
1848
|
+
if (severityDelta !== 0) return severityDelta;
|
|
1849
|
+
return left.area.localeCompare(right.area);
|
|
1850
|
+
}
|
|
1851
|
+
function severityRank2(severity) {
|
|
1852
|
+
switch (severity) {
|
|
1853
|
+
case "critical":
|
|
1854
|
+
return 5;
|
|
1855
|
+
case "high":
|
|
1856
|
+
return 4;
|
|
1857
|
+
case "medium":
|
|
1858
|
+
return 3;
|
|
1859
|
+
case "low":
|
|
1860
|
+
return 2;
|
|
1861
|
+
case "info":
|
|
1862
|
+
return 1;
|
|
1863
|
+
}
|
|
1864
|
+
}
|
|
1865
|
+
|
|
1866
|
+
// src/workflow/promotion-gate.ts
|
|
1867
|
+
var DECISION_VERSION = "workflow-driver-promotion-v1";
|
|
1868
|
+
var DEFAULT_BASELINE_STRATEGY = "reviewer-loop-v1";
|
|
1869
|
+
var DEFAULT_CANDIDATE_STRATEGY = "workflow-driver-v1";
|
|
1870
|
+
function decideWorkflowDriverPromotion(options) {
|
|
1871
|
+
const records = options.records.map(validateRunRecord);
|
|
1872
|
+
const baselineStrategyId = options.baselineStrategyId ?? DEFAULT_BASELINE_STRATEGY;
|
|
1873
|
+
const candidateStrategyId = options.candidateStrategyId ?? DEFAULT_CANDIDATE_STRATEGY;
|
|
1874
|
+
const confidence = options.confidence ?? 0.95;
|
|
1875
|
+
const resamples = options.resamples ?? 2e3;
|
|
1876
|
+
const statistic = options.statistic ?? "mean";
|
|
1877
|
+
const deltaThreshold = options.deltaThreshold ?? 0;
|
|
1878
|
+
const minPairedHoldoutRuns = options.minPairedHoldoutRuns ?? 3;
|
|
1879
|
+
validateOptions({ confidence, resamples, deltaThreshold, minPairedHoldoutRuns }, options);
|
|
1880
|
+
const baseline = records.filter((record) => record.candidateId === baselineStrategyId);
|
|
1881
|
+
const candidate = records.filter((record) => record.candidateId === candidateStrategyId);
|
|
1882
|
+
const baselineHoldout = baseline.filter(isScoredHoldout);
|
|
1883
|
+
const candidateHoldout = candidate.filter(isScoredHoldout);
|
|
1884
|
+
const expectedScenarioIds = expectedScenarios(options.expectedScenarioIds, [
|
|
1885
|
+
...baselineHoldout,
|
|
1886
|
+
...candidateHoldout
|
|
1887
|
+
]);
|
|
1888
|
+
const expectedScenarioIdSet = new Set(expectedScenarioIds);
|
|
1889
|
+
const pairs = pairHoldoutRuns(baselineHoldout, candidateHoldout).filter(
|
|
1890
|
+
(pair) => expectedScenarioIdSet.size === 0 || expectedScenarioIdSet.has(pair.scenarioId)
|
|
1891
|
+
);
|
|
1892
|
+
const pairedScenarioIds = [...new Set(pairs.map((pair) => pair.scenarioId))].sort();
|
|
1893
|
+
const missingScenarioIds = expectedScenarioIds.filter((id) => !pairedScenarioIds.includes(id));
|
|
1894
|
+
const evidence = buildEvidence({
|
|
1895
|
+
pairs,
|
|
1896
|
+
expectedScenarioIds,
|
|
1897
|
+
pairedScenarioIds,
|
|
1898
|
+
missingScenarioIds,
|
|
1899
|
+
baseline,
|
|
1900
|
+
candidate,
|
|
1901
|
+
confidence,
|
|
1902
|
+
resamples,
|
|
1903
|
+
statistic,
|
|
1904
|
+
deltaThreshold,
|
|
1905
|
+
seed: options.seed
|
|
1906
|
+
});
|
|
1907
|
+
if (baselineHoldout.length === 0) {
|
|
1908
|
+
return decision({
|
|
1909
|
+
options,
|
|
1910
|
+
baselineStrategyId,
|
|
1911
|
+
candidateStrategyId,
|
|
1912
|
+
evidence,
|
|
1913
|
+
promote: false,
|
|
1914
|
+
rejectionCode: "missing_baseline_records",
|
|
1915
|
+
reason: `missing_baseline_records: no holdout RunRecords for baseline "${baselineStrategyId}"`
|
|
1916
|
+
});
|
|
1917
|
+
}
|
|
1918
|
+
if (candidateHoldout.length === 0) {
|
|
1919
|
+
return decision({
|
|
1920
|
+
options,
|
|
1921
|
+
baselineStrategyId,
|
|
1922
|
+
candidateStrategyId,
|
|
1923
|
+
evidence,
|
|
1924
|
+
promote: false,
|
|
1925
|
+
rejectionCode: "missing_candidate_records",
|
|
1926
|
+
reason: `missing_candidate_records: no holdout RunRecords for candidate "${candidateStrategyId}"`
|
|
1927
|
+
});
|
|
1928
|
+
}
|
|
1929
|
+
if (missingScenarioIds.length > 0) {
|
|
1930
|
+
return decision({
|
|
1931
|
+
options,
|
|
1932
|
+
baselineStrategyId,
|
|
1933
|
+
candidateStrategyId,
|
|
1934
|
+
evidence,
|
|
1935
|
+
promote: false,
|
|
1936
|
+
rejectionCode: "missing_holdout_pairs",
|
|
1937
|
+
reason: `missing_holdout_pairs: no paired baseline/candidate holdout record for scenario(s) [${missingScenarioIds.join(", ")}]`
|
|
1938
|
+
});
|
|
1939
|
+
}
|
|
1940
|
+
if (pairs.length < minPairedHoldoutRuns) {
|
|
1941
|
+
return decision({
|
|
1942
|
+
options,
|
|
1943
|
+
baselineStrategyId,
|
|
1944
|
+
candidateStrategyId,
|
|
1945
|
+
evidence,
|
|
1946
|
+
promote: false,
|
|
1947
|
+
rejectionCode: "few_pairs",
|
|
1948
|
+
reason: `few_pairs: ${pairs.length} paired holdout run(s) < min ${minPairedHoldoutRuns}`
|
|
1949
|
+
});
|
|
1950
|
+
}
|
|
1951
|
+
if (!(evidence.liftCi.low > deltaThreshold)) {
|
|
1952
|
+
return decision({
|
|
1953
|
+
options,
|
|
1954
|
+
baselineStrategyId,
|
|
1955
|
+
candidateStrategyId,
|
|
1956
|
+
evidence,
|
|
1957
|
+
promote: false,
|
|
1958
|
+
rejectionCode: "insufficient_lift",
|
|
1959
|
+
reason: `insufficient_lift: heldout ${statistic} lift=${fmt(evidence.lift)} CI=[${fmt(evidence.liftCi.low)}, ${fmt(evidence.liftCi.high)}] does not clear threshold ${fmt(deltaThreshold)}`
|
|
1960
|
+
});
|
|
1961
|
+
}
|
|
1962
|
+
if (options.costPerRunCeiling !== void 0 && Number.isFinite(evidence.candidateMedianCostUsd) && evidence.candidateMedianCostUsd > options.costPerRunCeiling) {
|
|
1963
|
+
return decision({
|
|
1964
|
+
options,
|
|
1965
|
+
baselineStrategyId,
|
|
1966
|
+
candidateStrategyId,
|
|
1967
|
+
evidence,
|
|
1968
|
+
promote: false,
|
|
1969
|
+
rejectionCode: "cost_ceiling",
|
|
1970
|
+
reason: `cost_ceiling: candidate median cost $${fmt(evidence.candidateMedianCostUsd)} exceeds ceiling $${fmt(options.costPerRunCeiling)}`
|
|
1971
|
+
});
|
|
1972
|
+
}
|
|
1973
|
+
return decision({
|
|
1974
|
+
options,
|
|
1975
|
+
baselineStrategyId,
|
|
1976
|
+
candidateStrategyId,
|
|
1977
|
+
evidence,
|
|
1978
|
+
promote: true,
|
|
1979
|
+
rejectionCode: null,
|
|
1980
|
+
reason: `promote: ${candidateStrategyId} beats ${baselineStrategyId} on paired heldout workflows lift=${fmt(evidence.lift)} CI=[${fmt(evidence.liftCi.low)}, ${fmt(evidence.liftCi.high)}] over ${pairs.length} pair(s)`
|
|
1981
|
+
});
|
|
1982
|
+
}
|
|
1983
|
+
function validateOptions(normalized, options) {
|
|
1984
|
+
if (options.records.length === 0) {
|
|
1985
|
+
throw new ValidationError("workflow promotion gate requires at least one RunRecord");
|
|
1986
|
+
}
|
|
1987
|
+
if (normalized.confidence <= 0 || normalized.confidence >= 1) {
|
|
1988
|
+
throw new ValidationError("workflow promotion gate confidence must be in (0,1)");
|
|
1989
|
+
}
|
|
1990
|
+
if (!Number.isInteger(normalized.resamples) || normalized.resamples <= 0) {
|
|
1991
|
+
throw new ValidationError("workflow promotion gate resamples must be a positive integer");
|
|
1992
|
+
}
|
|
1993
|
+
if (!Number.isFinite(normalized.deltaThreshold)) {
|
|
1994
|
+
throw new ValidationError("workflow promotion gate deltaThreshold must be finite");
|
|
1995
|
+
}
|
|
1996
|
+
if (!Number.isInteger(normalized.minPairedHoldoutRuns) || normalized.minPairedHoldoutRuns < 1) {
|
|
1997
|
+
throw new ValidationError(
|
|
1998
|
+
"workflow promotion gate minPairedHoldoutRuns must be a positive integer"
|
|
1999
|
+
);
|
|
2000
|
+
}
|
|
2001
|
+
if (options.costPerRunCeiling !== void 0 && (!Number.isFinite(options.costPerRunCeiling) || options.costPerRunCeiling <= 0)) {
|
|
2002
|
+
throw new ValidationError("workflow promotion gate costPerRunCeiling must be positive");
|
|
2003
|
+
}
|
|
2004
|
+
}
|
|
2005
|
+
function isScoredHoldout(record) {
|
|
2006
|
+
return record.splitTag === "holdout" && typeof record.outcome.holdoutScore === "number";
|
|
2007
|
+
}
|
|
2008
|
+
function expectedScenarios(requested, records) {
|
|
2009
|
+
const values = (requested ?? records.map((record) => record.scenarioId)).filter(isString);
|
|
2010
|
+
return [...new Set(values)].sort();
|
|
2011
|
+
}
|
|
2012
|
+
function pairHoldoutRuns(baseline, candidate) {
|
|
2013
|
+
const baselineByKey = indexByPairKey(baseline, "baseline");
|
|
2014
|
+
const out = [];
|
|
2015
|
+
for (const candidateRun of candidate) {
|
|
2016
|
+
const key = holdoutPairKey(candidateRun, "candidate");
|
|
2017
|
+
const baselineRun = baselineByKey.get(key);
|
|
2018
|
+
if (!baselineRun) continue;
|
|
2019
|
+
const baselineScore = baselineRun.outcome.holdoutScore;
|
|
2020
|
+
const candidateScore = candidateRun.outcome.holdoutScore;
|
|
2021
|
+
out.push({
|
|
2022
|
+
key,
|
|
2023
|
+
scenarioId: candidateRun.scenarioId,
|
|
2024
|
+
seed: candidateRun.seed,
|
|
2025
|
+
baselineRunId: baselineRun.runId,
|
|
2026
|
+
candidateRunId: candidateRun.runId,
|
|
2027
|
+
baselineScore,
|
|
2028
|
+
candidateScore,
|
|
2029
|
+
delta: candidateScore - baselineScore
|
|
2030
|
+
});
|
|
2031
|
+
}
|
|
2032
|
+
return out.sort((a, b) => a.key.localeCompare(b.key));
|
|
2033
|
+
}
|
|
2034
|
+
function indexByPairKey(records, side) {
|
|
2035
|
+
const out = /* @__PURE__ */ new Map();
|
|
2036
|
+
for (const record of records) {
|
|
2037
|
+
const key = holdoutPairKey(record, side);
|
|
2038
|
+
if (out.has(key)) {
|
|
2039
|
+
throw new ValidationError(
|
|
2040
|
+
`workflow promotion gate duplicate ${side} holdout pair key: ${key}`
|
|
2041
|
+
);
|
|
2042
|
+
}
|
|
2043
|
+
out.set(key, record);
|
|
2044
|
+
}
|
|
2045
|
+
return out;
|
|
2046
|
+
}
|
|
2047
|
+
function holdoutPairKey(record, side) {
|
|
2048
|
+
if (!record.scenarioId) {
|
|
2049
|
+
throw new ValidationError(
|
|
2050
|
+
`workflow promotion gate ${side} holdout RunRecord ${record.runId} is missing scenarioId`
|
|
2051
|
+
);
|
|
2052
|
+
}
|
|
2053
|
+
return `${record.scenarioId}::${record.seed}`;
|
|
2054
|
+
}
|
|
2055
|
+
function buildEvidence(args) {
|
|
2056
|
+
const before = args.pairs.map((pair) => pair.baselineScore);
|
|
2057
|
+
const after = args.pairs.map((pair) => pair.candidateScore);
|
|
2058
|
+
const bootstrap = pairedBootstrap(before, after, {
|
|
2059
|
+
confidence: args.confidence,
|
|
2060
|
+
resamples: args.resamples,
|
|
2061
|
+
statistic: args.statistic,
|
|
2062
|
+
...args.seed !== void 0 ? { seed: args.seed } : {}
|
|
2063
|
+
});
|
|
2064
|
+
return {
|
|
2065
|
+
pairedRuns: args.pairs.length,
|
|
2066
|
+
expectedScenarioIds: args.expectedScenarioIds,
|
|
2067
|
+
pairedScenarioIds: args.pairedScenarioIds,
|
|
2068
|
+
missingScenarioIds: args.missingScenarioIds,
|
|
2069
|
+
baselineMean: mean(before),
|
|
2070
|
+
candidateMean: mean(after),
|
|
2071
|
+
lift: args.statistic === "mean" ? bootstrap.mean : bootstrap.median,
|
|
2072
|
+
liftCi: { low: bootstrap.low, high: bootstrap.high },
|
|
2073
|
+
bootstrap,
|
|
2074
|
+
confidence: args.confidence,
|
|
2075
|
+
resamples: args.resamples,
|
|
2076
|
+
statistic: args.statistic,
|
|
2077
|
+
deltaThreshold: args.deltaThreshold,
|
|
2078
|
+
baselineMedianCostUsd: medianFinite(args.baseline.map((record) => record.costUsd)),
|
|
2079
|
+
candidateMedianCostUsd: medianFinite(args.candidate.map((record) => record.costUsd)),
|
|
2080
|
+
pairs: [...args.pairs]
|
|
2081
|
+
};
|
|
2082
|
+
}
|
|
2083
|
+
function decision(args) {
|
|
2084
|
+
return {
|
|
2085
|
+
schemaVersion: DECISION_VERSION,
|
|
2086
|
+
generatedAt: args.options.generatedAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
2087
|
+
baselineStrategyId: args.baselineStrategyId,
|
|
2088
|
+
candidateStrategyId: args.candidateStrategyId,
|
|
2089
|
+
promote: args.promote,
|
|
2090
|
+
rejectionCode: args.rejectionCode,
|
|
2091
|
+
reason: args.reason,
|
|
2092
|
+
evidence: args.evidence
|
|
2093
|
+
};
|
|
2094
|
+
}
|
|
2095
|
+
function mean(values) {
|
|
2096
|
+
if (values.length === 0) return Number.NaN;
|
|
2097
|
+
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
2098
|
+
}
|
|
2099
|
+
function medianFinite(values) {
|
|
2100
|
+
const finite = values.filter(Number.isFinite).sort((a, b) => a - b);
|
|
2101
|
+
if (finite.length === 0) return Number.NaN;
|
|
2102
|
+
const mid = Math.floor(finite.length / 2);
|
|
2103
|
+
return finite.length % 2 === 0 ? (finite[mid - 1] + finite[mid]) / 2 : finite[mid];
|
|
2104
|
+
}
|
|
2105
|
+
function fmt(value) {
|
|
2106
|
+
if (!Number.isFinite(value)) return String(value);
|
|
2107
|
+
return value.toFixed(4);
|
|
2108
|
+
}
|
|
2109
|
+
function isString(value) {
|
|
2110
|
+
return typeof value === "string" && value.length > 0;
|
|
2111
|
+
}
|
|
2112
|
+
|
|
2113
|
+
// src/workflow/runtime-adapter.ts
|
|
2114
|
+
function workflowEventsToTraceEnvelope(events, options = {}) {
|
|
2115
|
+
if (!Array.isArray(events) || events.length === 0) {
|
|
2116
|
+
throw new ValidationError("workflow trace events must be a non-empty array");
|
|
2117
|
+
}
|
|
2118
|
+
const first = validateWorkflowTraceEvent(events[0]);
|
|
2119
|
+
const runId = options.runId ?? first.runId;
|
|
2120
|
+
return validateWorkflowTraceEnvelope({
|
|
2121
|
+
traceVersion: "workflow-trace-v1",
|
|
2122
|
+
runId,
|
|
2123
|
+
...options.topology ? { topology: options.topology } : {},
|
|
2124
|
+
events,
|
|
2125
|
+
...options.artifacts ? { artifacts: options.artifacts } : {},
|
|
2126
|
+
...options.metadata ? { metadata: options.metadata } : {}
|
|
2127
|
+
});
|
|
2128
|
+
}
|
|
2129
|
+
function workflowRuntimeResultToTraceEnvelope(result, options = {}) {
|
|
2130
|
+
if (!result || typeof result !== "object") {
|
|
2131
|
+
throw new ValidationError("workflow runtime result must be an object");
|
|
2132
|
+
}
|
|
2133
|
+
const metadata = runtimeResultMetadata(result, options);
|
|
2134
|
+
return workflowEventsToTraceEnvelope(result.events, {
|
|
2135
|
+
runId: options.runId ?? result.runId,
|
|
2136
|
+
...options.topology ? { topology: options.topology } : {},
|
|
2137
|
+
...options.artifacts ? { artifacts: options.artifacts } : {},
|
|
2138
|
+
...metadata ? { metadata } : {}
|
|
2139
|
+
});
|
|
2140
|
+
}
|
|
2141
|
+
function runtimeResultMetadata(result, options) {
|
|
2142
|
+
const runtimeResult = {};
|
|
2143
|
+
if (result.meta !== void 0) runtimeResult.meta = result.meta;
|
|
2144
|
+
if (options.includeOutputInMetadata && result.output !== void 0) {
|
|
2145
|
+
runtimeResult.output = result.output;
|
|
2146
|
+
}
|
|
2147
|
+
const hasRuntimeMetadata = Object.keys(runtimeResult).length > 0;
|
|
2148
|
+
if (!hasRuntimeMetadata && !options.metadata) return void 0;
|
|
2149
|
+
return {
|
|
2150
|
+
...options.metadata ?? {},
|
|
2151
|
+
...hasRuntimeMetadata ? { runtimeResult } : {}
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
export {
|
|
2155
|
+
WORKFLOW_TRACE_EVENT_KINDS,
|
|
2156
|
+
buildWorkflowAnalystFeedbackPack,
|
|
2157
|
+
buildWorkflowPartnerReport,
|
|
2158
|
+
buildWorkflowTraceIntelligenceEnvelope,
|
|
2159
|
+
decideWorkflowDriverPromotion,
|
|
2160
|
+
renderWorkflowFeedbackPack,
|
|
2161
|
+
renderWorkflowPartnerReport,
|
|
2162
|
+
sanitizeWorkflowTraceEnvelope,
|
|
2163
|
+
summarizeWorkflowExecution,
|
|
2164
|
+
summarizeWorkflowTrace,
|
|
2165
|
+
validateWorkflowPartnerReport,
|
|
2166
|
+
validateWorkflowTraceEnvelope,
|
|
2167
|
+
validateWorkflowTraceEvent,
|
|
2168
|
+
validateWorkflowTraceEventKind,
|
|
2169
|
+
validateWorkflowTraceEventPayload,
|
|
2170
|
+
validateWorkflowTraceIntelligenceEnvelope,
|
|
2171
|
+
workflowEventsToTraceEnvelope,
|
|
2172
|
+
workflowPhaseGraph,
|
|
2173
|
+
workflowRuntimeResultToTraceEnvelope,
|
|
2174
|
+
workflowTraceToFeedbackTrajectory,
|
|
2175
|
+
workflowTraceToRunRecord
|
|
2176
|
+
};
|
|
2177
|
+
//# sourceMappingURL=index.js.map
|