@agwab/pi-workflow 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-graph-runtime.d.ts +1 -1
- package/dist/artifact-graph-runtime.js +10 -5
- package/dist/artifact-graph-schema.js +127 -5
- package/dist/compiler.js +52 -19
- package/dist/dynamic-generated-task-runtime.js +3 -1
- package/dist/dynamic-profiles.d.ts +1 -1
- package/dist/engine-run-graph.d.ts +3 -0
- package/dist/engine-run-graph.js +194 -4
- package/dist/engine.d.ts +5 -0
- package/dist/engine.js +389 -41
- package/dist/extension.d.ts +2 -1
- package/dist/extension.js +30 -8
- package/dist/index.d.ts +11 -3
- package/dist/index.js +6 -1
- package/dist/prompt-json.d.ts +7 -0
- package/dist/prompt-json.js +13 -0
- package/dist/roles.d.ts +1 -1
- package/dist/roles.js +5 -8
- package/dist/store.d.ts +20 -1
- package/dist/store.js +139 -35
- package/dist/strings.d.ts +11 -0
- package/dist/strings.js +24 -0
- package/dist/subagent-backend.js +710 -40
- package/dist/types.d.ts +107 -1
- package/dist/verification-ontology.d.ts +31 -0
- package/dist/verification-ontology.js +66 -0
- package/dist/workflow-artifact-tool.js +5 -6
- package/dist/workflow-artifacts.d.ts +7 -0
- package/dist/workflow-artifacts.js +55 -4
- package/dist/workflow-fetch-cache-extension.d.ts +1 -0
- package/dist/workflow-fetch-cache-extension.js +57 -9
- package/dist/workflow-metrics.d.ts +113 -0
- package/dist/workflow-metrics.js +272 -0
- package/dist/workflow-output-artifacts.js +5 -3
- package/dist/workflow-partial-output.d.ts +45 -0
- package/dist/workflow-partial-output.js +205 -0
- package/dist/workflow-progress-health.js +42 -10
- package/dist/workflow-runtime.js +10 -1
- package/dist/workflow-view.js +3 -1
- package/dist/workflow-web-source-extension.js +194 -52
- package/dist/workflow-web-source.d.ts +2 -1
- package/dist/workflow-web-source.js +109 -30
- package/docs/usage.md +76 -29
- package/node_modules/@agwab/pi-subagent/README.md +3 -3
- package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
- package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
- package/node_modules/@agwab/pi-subagent/package.json +2 -2
- package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
- package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
- package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
- package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
- package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
- package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
- package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
- package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
- package/package.json +2 -2
- package/skills/workflow-guide/SKILL.md +1 -0
- package/src/artifact-graph-runtime.ts +19 -13
- package/src/artifact-graph-schema.ts +143 -3
- package/src/cli.mjs +52 -0
- package/src/compiler.ts +63 -18
- package/src/dynamic-generated-task-runtime.ts +3 -1
- package/src/dynamic-profiles.ts +1 -1
- package/src/engine-run-graph.ts +246 -4
- package/src/engine.ts +545 -38
- package/src/extension.ts +36 -6
- package/src/index.ts +52 -1
- package/src/prompt-json.ts +13 -0
- package/src/roles.ts +6 -9
- package/src/store.ts +194 -42
- package/src/strings.ts +38 -0
- package/src/subagent-backend.ts +921 -62
- package/src/types.ts +116 -2
- package/src/verification-ontology.ts +88 -0
- package/src/workflow-artifact-tool.ts +5 -7
- package/src/workflow-artifacts.ts +83 -3
- package/src/workflow-fetch-cache-extension.ts +78 -13
- package/src/workflow-metrics.ts +478 -0
- package/src/workflow-output-artifacts.ts +5 -3
- package/src/workflow-partial-output.ts +299 -0
- package/src/workflow-progress-health.ts +47 -15
- package/src/workflow-runtime.ts +18 -2
- package/src/workflow-view.ts +2 -1
- package/src/workflow-web-source-extension.ts +654 -232
- package/src/workflow-web-source.ts +153 -39
- package/workflows/README.md +7 -25
- package/workflows/deep-research/batched-verification.spec.json +253 -0
- package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
- package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
- package/workflows/deep-research/helpers/render-executive.mjs +40 -26
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
- package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
- package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
- package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
- package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
- package/workflows/deep-research/spec.json +32 -12
- package/workflows/impact-review/spec.json +3 -3
- package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
- package/dist/dynamic-loader.d.ts +0 -25
- package/dist/dynamic-loader.js +0 -13
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
- package/src/dynamic-loader.ts +0 -49
- package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
TaskRunStatus,
|
|
3
|
+
WorkflowRunRecord,
|
|
4
|
+
WorkflowRunStatus,
|
|
5
|
+
WorkflowRunType,
|
|
6
|
+
WorkflowTaskRunRecord,
|
|
7
|
+
WorkflowTaskUsageValues,
|
|
8
|
+
} from "./types.js";
|
|
9
|
+
|
|
10
|
+
export const WORKFLOW_METRICS_SCHEMA_VERSION = 1;
|
|
11
|
+
export const WORKFLOW_METRICS_PRICING_MODEL_VERSION = "provider-reported-v1";
|
|
12
|
+
|
|
13
|
+
export type WorkflowMetricsSchemaVersion =
|
|
14
|
+
typeof WORKFLOW_METRICS_SCHEMA_VERSION;
|
|
15
|
+
export type WorkflowMetricsPricingModelVersion =
|
|
16
|
+
typeof WORKFLOW_METRICS_PRICING_MODEL_VERSION;
|
|
17
|
+
export type WorkflowMetricsPricingSource = "provider-reported";
|
|
18
|
+
export type WorkflowMetricValue = number | null;
|
|
19
|
+
|
|
20
|
+
export interface WorkflowUsageMetrics {
|
|
21
|
+
inputTokens: WorkflowMetricValue;
|
|
22
|
+
outputTokens: WorkflowMetricValue;
|
|
23
|
+
totalTokens: WorkflowMetricValue;
|
|
24
|
+
cachedInputTokens: WorkflowMetricValue;
|
|
25
|
+
cacheCreationInputTokens: WorkflowMetricValue;
|
|
26
|
+
cacheReadInputTokens: WorkflowMetricValue;
|
|
27
|
+
reasoningTokens: WorkflowMetricValue;
|
|
28
|
+
/**
|
|
29
|
+
* Provider-reported cost only. This helper intentionally never derives cost
|
|
30
|
+
* from token counts or model names.
|
|
31
|
+
*/
|
|
32
|
+
costUsd: WorkflowMetricValue;
|
|
33
|
+
attempts: number;
|
|
34
|
+
unavailable: boolean;
|
|
35
|
+
incomplete: boolean;
|
|
36
|
+
unavailableTaskIds: string[];
|
|
37
|
+
incompleteTaskIds: string[];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface WorkflowLaunchTimingMetrics {
|
|
41
|
+
launchWaitMs: WorkflowMetricValue;
|
|
42
|
+
launchDurationMs: WorkflowMetricValue;
|
|
43
|
+
executionMs: WorkflowMetricValue;
|
|
44
|
+
totalMs: WorkflowMetricValue;
|
|
45
|
+
launchSlotReleaseDelayMs: WorkflowMetricValue;
|
|
46
|
+
attempts: number;
|
|
47
|
+
unavailable: boolean;
|
|
48
|
+
incomplete: boolean;
|
|
49
|
+
unavailableTaskIds: string[];
|
|
50
|
+
incompleteTaskIds: string[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface WorkflowRetryMetrics {
|
|
54
|
+
launchRetries: number;
|
|
55
|
+
outputRetries: number;
|
|
56
|
+
resumeEvents: number;
|
|
57
|
+
totalRetryEvents: number;
|
|
58
|
+
tasksWithRetries: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface WorkflowTaskStatusCounts {
|
|
62
|
+
pending: number;
|
|
63
|
+
running: number;
|
|
64
|
+
blocked: number;
|
|
65
|
+
completed: number;
|
|
66
|
+
failed: number;
|
|
67
|
+
skipped: number;
|
|
68
|
+
interrupted: number;
|
|
69
|
+
total: number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface WorkflowRunMetricsRollup {
|
|
73
|
+
taskCount: number;
|
|
74
|
+
statusCounts: WorkflowTaskStatusCounts;
|
|
75
|
+
usage: WorkflowUsageMetrics;
|
|
76
|
+
launchTiming: WorkflowLaunchTimingMetrics;
|
|
77
|
+
retries: WorkflowRetryMetrics;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface WorkflowTaskMetrics {
|
|
81
|
+
taskId: string;
|
|
82
|
+
specId: string;
|
|
83
|
+
displayName: string;
|
|
84
|
+
agent: string;
|
|
85
|
+
status: TaskRunStatus;
|
|
86
|
+
statusDetail: string;
|
|
87
|
+
stageId: string | null;
|
|
88
|
+
kind: string | null;
|
|
89
|
+
provider: string | null;
|
|
90
|
+
model: string | null;
|
|
91
|
+
thinking: string | null;
|
|
92
|
+
usage: WorkflowUsageMetrics;
|
|
93
|
+
launchTiming: WorkflowLaunchTimingMetrics;
|
|
94
|
+
retries: WorkflowRetryMetrics;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface WorkflowStageMetrics extends WorkflowRunMetricsRollup {
|
|
98
|
+
stageId: string | null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export interface WorkflowRunMetricsMetadata {
|
|
102
|
+
usageUnavailableTaskIds: string[];
|
|
103
|
+
usageIncompleteTaskIds: string[];
|
|
104
|
+
launchTimingUnavailableTaskIds: string[];
|
|
105
|
+
launchTimingIncompleteTaskIds: string[];
|
|
106
|
+
incomplete: boolean;
|
|
107
|
+
unavailable: boolean;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export interface WorkflowRunMetrics {
|
|
111
|
+
schemaVersion: WorkflowMetricsSchemaVersion;
|
|
112
|
+
pricingModelVersion: WorkflowMetricsPricingModelVersion;
|
|
113
|
+
pricingSource: WorkflowMetricsPricingSource;
|
|
114
|
+
costsAreProviderReported: true;
|
|
115
|
+
run: {
|
|
116
|
+
runId: string;
|
|
117
|
+
name?: string;
|
|
118
|
+
type: WorkflowRunType;
|
|
119
|
+
status: WorkflowRunStatus;
|
|
120
|
+
createdAt: string;
|
|
121
|
+
updatedAt: string;
|
|
122
|
+
};
|
|
123
|
+
totals: WorkflowRunMetricsRollup;
|
|
124
|
+
byStage: WorkflowStageMetrics[];
|
|
125
|
+
byTask: WorkflowTaskMetrics[];
|
|
126
|
+
metadata: WorkflowRunMetricsMetadata;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
type UsageMetricKey = keyof WorkflowTaskUsageValues;
|
|
130
|
+
type TimingMetricKey =
|
|
131
|
+
| "launchWaitMs"
|
|
132
|
+
| "launchDurationMs"
|
|
133
|
+
| "executionMs"
|
|
134
|
+
| "totalMs"
|
|
135
|
+
| "launchSlotReleaseDelayMs";
|
|
136
|
+
|
|
137
|
+
const USAGE_METRIC_KEYS: UsageMetricKey[] = [
|
|
138
|
+
"inputTokens",
|
|
139
|
+
"outputTokens",
|
|
140
|
+
"totalTokens",
|
|
141
|
+
"cachedInputTokens",
|
|
142
|
+
"cacheCreationInputTokens",
|
|
143
|
+
"cacheReadInputTokens",
|
|
144
|
+
"reasoningTokens",
|
|
145
|
+
"costUsd",
|
|
146
|
+
];
|
|
147
|
+
|
|
148
|
+
const TIMING_METRIC_KEYS: TimingMetricKey[] = [
|
|
149
|
+
"launchWaitMs",
|
|
150
|
+
"launchDurationMs",
|
|
151
|
+
"executionMs",
|
|
152
|
+
"totalMs",
|
|
153
|
+
"launchSlotReleaseDelayMs",
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
function hasOwnValue(record: object, key: string): boolean {
|
|
157
|
+
return Object.hasOwn(record, key);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function metricValue(
|
|
161
|
+
record: object | undefined,
|
|
162
|
+
key: string,
|
|
163
|
+
): WorkflowMetricValue {
|
|
164
|
+
if (!record || !hasOwnValue(record, key)) return null;
|
|
165
|
+
const value = (record as Record<string, unknown>)[key];
|
|
166
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function metricString(value: unknown): string | null {
|
|
170
|
+
return typeof value === "string" && value.trim() ? value : null;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function sumMetricValues(values: WorkflowMetricValue[]): {
|
|
174
|
+
value: WorkflowMetricValue;
|
|
175
|
+
incomplete: boolean;
|
|
176
|
+
} {
|
|
177
|
+
if (values.length === 0) return { value: null, incomplete: true };
|
|
178
|
+
let total = 0;
|
|
179
|
+
for (const value of values) {
|
|
180
|
+
if (value === null) return { value: null, incomplete: true };
|
|
181
|
+
total += value;
|
|
182
|
+
}
|
|
183
|
+
return { value: total, incomplete: false };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function usageAttempts(task: WorkflowTaskRunRecord): number {
|
|
187
|
+
return task.usage?.aggregate?.attempts ?? task.usage?.attempts?.length ?? 0;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function timingAttempts(task: WorkflowTaskRunRecord): number {
|
|
191
|
+
return task.timing?.aggregate?.attempts ?? task.timing?.attempts?.length ?? 0;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function taskUsageMetrics(task: WorkflowTaskRunRecord): WorkflowUsageMetrics {
|
|
195
|
+
const usage = task.usage;
|
|
196
|
+
const source = usage?.aggregate ?? usage;
|
|
197
|
+
const unavailable =
|
|
198
|
+
usage === undefined ||
|
|
199
|
+
usage.attempts?.some((attempt) => attempt.unavailable) === true;
|
|
200
|
+
const metrics = Object.fromEntries(
|
|
201
|
+
USAGE_METRIC_KEYS.map((key) => [key, metricValue(source, key)]),
|
|
202
|
+
) as Record<UsageMetricKey, WorkflowMetricValue>;
|
|
203
|
+
const incomplete =
|
|
204
|
+
unavailable ||
|
|
205
|
+
usage?.incomplete === true ||
|
|
206
|
+
usage?.aggregate?.incomplete === true ||
|
|
207
|
+
USAGE_METRIC_KEYS.some((key) => metrics[key] === null);
|
|
208
|
+
return {
|
|
209
|
+
inputTokens: metrics.inputTokens,
|
|
210
|
+
outputTokens: metrics.outputTokens,
|
|
211
|
+
totalTokens: metrics.totalTokens,
|
|
212
|
+
cachedInputTokens: metrics.cachedInputTokens,
|
|
213
|
+
cacheCreationInputTokens: metrics.cacheCreationInputTokens,
|
|
214
|
+
cacheReadInputTokens: metrics.cacheReadInputTokens,
|
|
215
|
+
reasoningTokens: metrics.reasoningTokens,
|
|
216
|
+
costUsd: metrics.costUsd,
|
|
217
|
+
attempts: usageAttempts(task),
|
|
218
|
+
unavailable,
|
|
219
|
+
incomplete,
|
|
220
|
+
unavailableTaskIds: unavailable ? [task.taskId] : [],
|
|
221
|
+
incompleteTaskIds: incomplete ? [task.taskId] : [],
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function taskLaunchTimingMetrics(
|
|
226
|
+
task: WorkflowTaskRunRecord,
|
|
227
|
+
): WorkflowLaunchTimingMetrics {
|
|
228
|
+
const timing = task.timing;
|
|
229
|
+
const aggregateSource = timing?.aggregate ?? timing;
|
|
230
|
+
const unavailable = timing === undefined;
|
|
231
|
+
const metrics = Object.fromEntries(
|
|
232
|
+
TIMING_METRIC_KEYS.map((key) => [
|
|
233
|
+
key,
|
|
234
|
+
metricValue(
|
|
235
|
+
key === "launchSlotReleaseDelayMs" ? timing : aggregateSource,
|
|
236
|
+
key,
|
|
237
|
+
),
|
|
238
|
+
]),
|
|
239
|
+
) as Record<TimingMetricKey, WorkflowMetricValue>;
|
|
240
|
+
const incomplete =
|
|
241
|
+
unavailable ||
|
|
242
|
+
timing?.aggregate?.incomplete === true ||
|
|
243
|
+
TIMING_METRIC_KEYS.some((key) => metrics[key] === null);
|
|
244
|
+
return {
|
|
245
|
+
launchWaitMs: metrics.launchWaitMs,
|
|
246
|
+
launchDurationMs: metrics.launchDurationMs,
|
|
247
|
+
executionMs: metrics.executionMs,
|
|
248
|
+
totalMs: metrics.totalMs,
|
|
249
|
+
launchSlotReleaseDelayMs: metrics.launchSlotReleaseDelayMs,
|
|
250
|
+
attempts: timingAttempts(task),
|
|
251
|
+
unavailable,
|
|
252
|
+
incomplete,
|
|
253
|
+
unavailableTaskIds: unavailable ? [task.taskId] : [],
|
|
254
|
+
incompleteTaskIds: incomplete ? [task.taskId] : [],
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function sumResumeRetryAttempts(
|
|
259
|
+
task: WorkflowTaskRunRecord,
|
|
260
|
+
key: "launchRetryAttempts" | "outputRetryAttempts",
|
|
261
|
+
): number {
|
|
262
|
+
return (task.resumeEvents ?? []).reduce((total, event) => {
|
|
263
|
+
const attempts = event[key];
|
|
264
|
+
return typeof attempts === "number" && Number.isFinite(attempts)
|
|
265
|
+
? total + attempts
|
|
266
|
+
: total;
|
|
267
|
+
}, 0);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function taskRetryMetrics(task: WorkflowTaskRunRecord): WorkflowRetryMetrics {
|
|
271
|
+
const launchRetries =
|
|
272
|
+
(task.launchRetry?.attempts ?? 0) +
|
|
273
|
+
sumResumeRetryAttempts(task, "launchRetryAttempts");
|
|
274
|
+
const outputRetries =
|
|
275
|
+
(task.outputRetry?.attempts ?? 0) +
|
|
276
|
+
sumResumeRetryAttempts(task, "outputRetryAttempts");
|
|
277
|
+
const resumeEvents = task.resumeEvents?.length ?? 0;
|
|
278
|
+
const totalRetryEvents = launchRetries + outputRetries + resumeEvents;
|
|
279
|
+
return {
|
|
280
|
+
launchRetries,
|
|
281
|
+
outputRetries,
|
|
282
|
+
resumeEvents,
|
|
283
|
+
totalRetryEvents,
|
|
284
|
+
tasksWithRetries: totalRetryEvents > 0 ? 1 : 0,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function emptyStatusCounts(): WorkflowTaskStatusCounts {
|
|
289
|
+
return {
|
|
290
|
+
pending: 0,
|
|
291
|
+
running: 0,
|
|
292
|
+
blocked: 0,
|
|
293
|
+
completed: 0,
|
|
294
|
+
failed: 0,
|
|
295
|
+
skipped: 0,
|
|
296
|
+
interrupted: 0,
|
|
297
|
+
total: 0,
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function rollupUsage(tasks: WorkflowTaskMetrics[]): WorkflowUsageMetrics {
|
|
302
|
+
const rollup = Object.fromEntries(
|
|
303
|
+
USAGE_METRIC_KEYS.map((key) => [
|
|
304
|
+
key,
|
|
305
|
+
sumMetricValues(tasks.map((task) => task.usage[key])),
|
|
306
|
+
]),
|
|
307
|
+
) as Record<UsageMetricKey, ReturnType<typeof sumMetricValues>>;
|
|
308
|
+
const unavailableTaskIds = tasks.flatMap(
|
|
309
|
+
(task) => task.usage.unavailableTaskIds,
|
|
310
|
+
);
|
|
311
|
+
const incompleteTaskIds = tasks.flatMap(
|
|
312
|
+
(task) => task.usage.incompleteTaskIds,
|
|
313
|
+
);
|
|
314
|
+
return {
|
|
315
|
+
inputTokens: rollup.inputTokens.value,
|
|
316
|
+
outputTokens: rollup.outputTokens.value,
|
|
317
|
+
totalTokens: rollup.totalTokens.value,
|
|
318
|
+
cachedInputTokens: rollup.cachedInputTokens.value,
|
|
319
|
+
cacheCreationInputTokens: rollup.cacheCreationInputTokens.value,
|
|
320
|
+
cacheReadInputTokens: rollup.cacheReadInputTokens.value,
|
|
321
|
+
reasoningTokens: rollup.reasoningTokens.value,
|
|
322
|
+
costUsd: rollup.costUsd.value,
|
|
323
|
+
attempts: tasks.reduce((total, task) => total + task.usage.attempts, 0),
|
|
324
|
+
unavailable: unavailableTaskIds.length > 0,
|
|
325
|
+
incomplete:
|
|
326
|
+
incompleteTaskIds.length > 0 ||
|
|
327
|
+
USAGE_METRIC_KEYS.some((key) => rollup[key].incomplete),
|
|
328
|
+
unavailableTaskIds,
|
|
329
|
+
incompleteTaskIds,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function rollupLaunchTiming(
|
|
334
|
+
tasks: WorkflowTaskMetrics[],
|
|
335
|
+
): WorkflowLaunchTimingMetrics {
|
|
336
|
+
const rollup = Object.fromEntries(
|
|
337
|
+
TIMING_METRIC_KEYS.map((key) => [
|
|
338
|
+
key,
|
|
339
|
+
sumMetricValues(tasks.map((task) => task.launchTiming[key])),
|
|
340
|
+
]),
|
|
341
|
+
) as Record<TimingMetricKey, ReturnType<typeof sumMetricValues>>;
|
|
342
|
+
const unavailableTaskIds = tasks.flatMap(
|
|
343
|
+
(task) => task.launchTiming.unavailableTaskIds,
|
|
344
|
+
);
|
|
345
|
+
const incompleteTaskIds = tasks.flatMap(
|
|
346
|
+
(task) => task.launchTiming.incompleteTaskIds,
|
|
347
|
+
);
|
|
348
|
+
return {
|
|
349
|
+
launchWaitMs: rollup.launchWaitMs.value,
|
|
350
|
+
launchDurationMs: rollup.launchDurationMs.value,
|
|
351
|
+
executionMs: rollup.executionMs.value,
|
|
352
|
+
totalMs: rollup.totalMs.value,
|
|
353
|
+
launchSlotReleaseDelayMs: rollup.launchSlotReleaseDelayMs.value,
|
|
354
|
+
attempts: tasks.reduce(
|
|
355
|
+
(total, task) => total + task.launchTiming.attempts,
|
|
356
|
+
0,
|
|
357
|
+
),
|
|
358
|
+
unavailable: unavailableTaskIds.length > 0,
|
|
359
|
+
incomplete:
|
|
360
|
+
incompleteTaskIds.length > 0 ||
|
|
361
|
+
TIMING_METRIC_KEYS.some((key) => rollup[key].incomplete),
|
|
362
|
+
unavailableTaskIds,
|
|
363
|
+
incompleteTaskIds,
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function rollupRetries(tasks: WorkflowTaskMetrics[]): WorkflowRetryMetrics {
|
|
368
|
+
const launchRetries = tasks.reduce(
|
|
369
|
+
(total, task) => total + task.retries.launchRetries,
|
|
370
|
+
0,
|
|
371
|
+
);
|
|
372
|
+
const outputRetries = tasks.reduce(
|
|
373
|
+
(total, task) => total + task.retries.outputRetries,
|
|
374
|
+
0,
|
|
375
|
+
);
|
|
376
|
+
const resumeEvents = tasks.reduce(
|
|
377
|
+
(total, task) => total + task.retries.resumeEvents,
|
|
378
|
+
0,
|
|
379
|
+
);
|
|
380
|
+
return {
|
|
381
|
+
launchRetries,
|
|
382
|
+
outputRetries,
|
|
383
|
+
resumeEvents,
|
|
384
|
+
totalRetryEvents: launchRetries + outputRetries + resumeEvents,
|
|
385
|
+
tasksWithRetries: tasks.reduce(
|
|
386
|
+
(total, task) => total + task.retries.tasksWithRetries,
|
|
387
|
+
0,
|
|
388
|
+
),
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function statusCounts(tasks: WorkflowTaskMetrics[]): WorkflowTaskStatusCounts {
|
|
393
|
+
const counts = emptyStatusCounts();
|
|
394
|
+
for (const task of tasks) {
|
|
395
|
+
counts[task.status] += 1;
|
|
396
|
+
counts.total += 1;
|
|
397
|
+
}
|
|
398
|
+
return counts;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
function rollupTasks(tasks: WorkflowTaskMetrics[]): WorkflowRunMetricsRollup {
|
|
402
|
+
return {
|
|
403
|
+
taskCount: tasks.length,
|
|
404
|
+
statusCounts: statusCounts(tasks),
|
|
405
|
+
usage: rollupUsage(tasks),
|
|
406
|
+
launchTiming: rollupLaunchTiming(tasks),
|
|
407
|
+
retries: rollupRetries(tasks),
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function stageMetrics(tasks: WorkflowTaskMetrics[]): WorkflowStageMetrics[] {
|
|
412
|
+
const stageIds: Array<string | null> = [];
|
|
413
|
+
for (const task of tasks) {
|
|
414
|
+
if (!stageIds.includes(task.stageId)) stageIds.push(task.stageId);
|
|
415
|
+
}
|
|
416
|
+
return stageIds.map((stageId) => ({
|
|
417
|
+
stageId,
|
|
418
|
+
...rollupTasks(tasks.filter((task) => task.stageId === stageId)),
|
|
419
|
+
}));
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function taskMetrics(task: WorkflowTaskRunRecord): WorkflowTaskMetrics {
|
|
423
|
+
return {
|
|
424
|
+
taskId: task.taskId,
|
|
425
|
+
specId: task.specId,
|
|
426
|
+
displayName: task.displayName,
|
|
427
|
+
agent: task.agent,
|
|
428
|
+
status: task.status,
|
|
429
|
+
statusDetail: task.statusDetail,
|
|
430
|
+
stageId: task.stageId ?? null,
|
|
431
|
+
kind: task.kind ?? null,
|
|
432
|
+
provider: metricString(task.usage?.provider),
|
|
433
|
+
model: metricString(task.usage?.model ?? task.runtime.model),
|
|
434
|
+
thinking: metricString(task.usage?.thinking ?? task.runtime.thinking),
|
|
435
|
+
usage: taskUsageMetrics(task),
|
|
436
|
+
launchTiming: taskLaunchTimingMetrics(task),
|
|
437
|
+
retries: taskRetryMetrics(task),
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Build a deterministic, JSON-serializable metrics export from a persisted
|
|
443
|
+
* workflow run record. The helper is intentionally pure: it reads only the
|
|
444
|
+
* supplied record, performs no pricing inference, and does not mutate the run.
|
|
445
|
+
*/
|
|
446
|
+
export function buildWorkflowRunMetrics(
|
|
447
|
+
run: WorkflowRunRecord,
|
|
448
|
+
): WorkflowRunMetrics {
|
|
449
|
+
const byTask = run.tasks.map((task) => taskMetrics(task));
|
|
450
|
+
const totals = rollupTasks(byTask);
|
|
451
|
+
return {
|
|
452
|
+
schemaVersion: WORKFLOW_METRICS_SCHEMA_VERSION,
|
|
453
|
+
pricingModelVersion: WORKFLOW_METRICS_PRICING_MODEL_VERSION,
|
|
454
|
+
pricingSource: "provider-reported",
|
|
455
|
+
costsAreProviderReported: true,
|
|
456
|
+
run: {
|
|
457
|
+
runId: run.runId,
|
|
458
|
+
...(run.name === undefined ? {} : { name: run.name }),
|
|
459
|
+
type: run.type,
|
|
460
|
+
status: run.status,
|
|
461
|
+
createdAt: run.createdAt,
|
|
462
|
+
updatedAt: run.updatedAt,
|
|
463
|
+
},
|
|
464
|
+
totals,
|
|
465
|
+
byStage: stageMetrics(byTask),
|
|
466
|
+
byTask,
|
|
467
|
+
metadata: {
|
|
468
|
+
usageUnavailableTaskIds: [...totals.usage.unavailableTaskIds],
|
|
469
|
+
usageIncompleteTaskIds: [...totals.usage.incompleteTaskIds],
|
|
470
|
+
launchTimingUnavailableTaskIds: [
|
|
471
|
+
...totals.launchTiming.unavailableTaskIds,
|
|
472
|
+
],
|
|
473
|
+
launchTimingIncompleteTaskIds: [...totals.launchTiming.incompleteTaskIds],
|
|
474
|
+
incomplete: totals.usage.incomplete || totals.launchTiming.incomplete,
|
|
475
|
+
unavailable: totals.usage.unavailable || totals.launchTiming.unavailable,
|
|
476
|
+
},
|
|
477
|
+
};
|
|
478
|
+
}
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
type StructuredContract,
|
|
8
8
|
type StructuredContractIssue,
|
|
9
9
|
} from "./workflow-artifacts.js";
|
|
10
|
+
import { stripWorkflowPartialOutputSections } from "./workflow-partial-output.js";
|
|
10
11
|
import {
|
|
11
12
|
validateJsonSchema,
|
|
12
13
|
type JsonSchema,
|
|
@@ -157,10 +158,11 @@ export function parseWorkflowOutput(
|
|
|
157
158
|
raw: string,
|
|
158
159
|
options: ParseWorkflowOutputOptions = {},
|
|
159
160
|
): ParsedWorkflowOutput {
|
|
161
|
+
const protocolRaw = stripWorkflowPartialOutputSections(raw);
|
|
160
162
|
const issues: WorkflowOutputIssue[] = [];
|
|
161
163
|
const requirements = sectionRequirements(options);
|
|
162
|
-
const sections = collectSections(
|
|
163
|
-
validateSectionLayout(
|
|
164
|
+
const sections = collectSections(protocolRaw, requirements);
|
|
165
|
+
validateSectionLayout(protocolRaw, sections, issues, requirements);
|
|
164
166
|
|
|
165
167
|
const control = parseControlSection(
|
|
166
168
|
sectionText(sections, SECTION_CONTROL),
|
|
@@ -181,7 +183,7 @@ export function parseWorkflowOutput(
|
|
|
181
183
|
validateControlJsonSchema(control, issues, options.controlJsonSchema);
|
|
182
184
|
|
|
183
185
|
return buildParsedOutput(
|
|
184
|
-
|
|
186
|
+
protocolRaw,
|
|
185
187
|
issues,
|
|
186
188
|
{ control, analysis, refs },
|
|
187
189
|
requirements,
|