@agwab/pi-workflow 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +3 -1
  2. package/dist/artifact-graph-runtime.d.ts +1 -1
  3. package/dist/artifact-graph-runtime.js +10 -5
  4. package/dist/artifact-graph-schema.js +127 -5
  5. package/dist/compiler.js +52 -19
  6. package/dist/dynamic-generated-task-runtime.js +3 -1
  7. package/dist/dynamic-profiles.d.ts +1 -1
  8. package/dist/engine-run-graph.d.ts +3 -0
  9. package/dist/engine-run-graph.js +194 -4
  10. package/dist/engine.d.ts +5 -0
  11. package/dist/engine.js +389 -41
  12. package/dist/extension.d.ts +2 -1
  13. package/dist/extension.js +30 -8
  14. package/dist/index.d.ts +11 -3
  15. package/dist/index.js +6 -1
  16. package/dist/prompt-json.d.ts +7 -0
  17. package/dist/prompt-json.js +13 -0
  18. package/dist/roles.d.ts +1 -1
  19. package/dist/roles.js +5 -8
  20. package/dist/store.d.ts +20 -1
  21. package/dist/store.js +139 -35
  22. package/dist/strings.d.ts +11 -0
  23. package/dist/strings.js +24 -0
  24. package/dist/subagent-backend.js +710 -40
  25. package/dist/types.d.ts +107 -1
  26. package/dist/verification-ontology.d.ts +31 -0
  27. package/dist/verification-ontology.js +66 -0
  28. package/dist/workflow-artifact-tool.js +5 -6
  29. package/dist/workflow-artifacts.d.ts +7 -0
  30. package/dist/workflow-artifacts.js +55 -4
  31. package/dist/workflow-fetch-cache-extension.d.ts +1 -0
  32. package/dist/workflow-fetch-cache-extension.js +57 -9
  33. package/dist/workflow-metrics.d.ts +113 -0
  34. package/dist/workflow-metrics.js +272 -0
  35. package/dist/workflow-output-artifacts.js +5 -3
  36. package/dist/workflow-partial-output.d.ts +45 -0
  37. package/dist/workflow-partial-output.js +205 -0
  38. package/dist/workflow-progress-health.js +42 -10
  39. package/dist/workflow-runtime.js +10 -1
  40. package/dist/workflow-view.js +3 -1
  41. package/dist/workflow-web-source-extension.js +194 -52
  42. package/dist/workflow-web-source.d.ts +2 -1
  43. package/dist/workflow-web-source.js +109 -30
  44. package/docs/usage.md +76 -29
  45. package/node_modules/@agwab/pi-subagent/README.md +3 -3
  46. package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
  47. package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
  48. package/node_modules/@agwab/pi-subagent/package.json +2 -2
  49. package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
  50. package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
  51. package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
  52. package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
  53. package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
  54. package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
  55. package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
  56. package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
  57. package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
  58. package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
  59. package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
  60. package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
  61. package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
  62. package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
  63. package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
  64. package/package.json +2 -2
  65. package/skills/workflow-guide/SKILL.md +1 -0
  66. package/src/artifact-graph-runtime.ts +19 -13
  67. package/src/artifact-graph-schema.ts +143 -3
  68. package/src/cli.mjs +52 -0
  69. package/src/compiler.ts +63 -18
  70. package/src/dynamic-generated-task-runtime.ts +3 -1
  71. package/src/dynamic-profiles.ts +1 -1
  72. package/src/engine-run-graph.ts +246 -4
  73. package/src/engine.ts +545 -38
  74. package/src/extension.ts +36 -6
  75. package/src/index.ts +52 -1
  76. package/src/prompt-json.ts +13 -0
  77. package/src/roles.ts +6 -9
  78. package/src/store.ts +194 -42
  79. package/src/strings.ts +38 -0
  80. package/src/subagent-backend.ts +921 -62
  81. package/src/types.ts +116 -2
  82. package/src/verification-ontology.ts +88 -0
  83. package/src/workflow-artifact-tool.ts +5 -7
  84. package/src/workflow-artifacts.ts +83 -3
  85. package/src/workflow-fetch-cache-extension.ts +78 -13
  86. package/src/workflow-metrics.ts +478 -0
  87. package/src/workflow-output-artifacts.ts +5 -3
  88. package/src/workflow-partial-output.ts +299 -0
  89. package/src/workflow-progress-health.ts +47 -15
  90. package/src/workflow-runtime.ts +18 -2
  91. package/src/workflow-view.ts +2 -1
  92. package/src/workflow-web-source-extension.ts +654 -232
  93. package/src/workflow-web-source.ts +153 -39
  94. package/workflows/README.md +7 -25
  95. package/workflows/deep-research/batched-verification.spec.json +253 -0
  96. package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
  97. package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
  98. package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
  99. package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
  100. package/workflows/deep-research/helpers/render-executive.mjs +40 -26
  101. package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
  102. package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
  103. package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
  104. package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
  105. package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
  106. package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
  107. package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
  108. package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
  109. package/workflows/deep-research/spec.json +32 -12
  110. package/workflows/impact-review/spec.json +3 -3
  111. package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
  112. package/dist/dynamic-loader.d.ts +0 -25
  113. package/dist/dynamic-loader.js +0 -13
  114. package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
  115. package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
  116. package/src/dynamic-loader.ts +0 -49
  117. package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
  118. package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
  119. package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
@@ -0,0 +1,478 @@
1
+ import type {
2
+ TaskRunStatus,
3
+ WorkflowRunRecord,
4
+ WorkflowRunStatus,
5
+ WorkflowRunType,
6
+ WorkflowTaskRunRecord,
7
+ WorkflowTaskUsageValues,
8
+ } from "./types.js";
9
+
10
+ export const WORKFLOW_METRICS_SCHEMA_VERSION = 1;
11
+ export const WORKFLOW_METRICS_PRICING_MODEL_VERSION = "provider-reported-v1";
12
+
13
+ export type WorkflowMetricsSchemaVersion =
14
+ typeof WORKFLOW_METRICS_SCHEMA_VERSION;
15
+ export type WorkflowMetricsPricingModelVersion =
16
+ typeof WORKFLOW_METRICS_PRICING_MODEL_VERSION;
17
+ export type WorkflowMetricsPricingSource = "provider-reported";
18
+ export type WorkflowMetricValue = number | null;
19
+
20
+ export interface WorkflowUsageMetrics {
21
+ inputTokens: WorkflowMetricValue;
22
+ outputTokens: WorkflowMetricValue;
23
+ totalTokens: WorkflowMetricValue;
24
+ cachedInputTokens: WorkflowMetricValue;
25
+ cacheCreationInputTokens: WorkflowMetricValue;
26
+ cacheReadInputTokens: WorkflowMetricValue;
27
+ reasoningTokens: WorkflowMetricValue;
28
+ /**
29
+ * Provider-reported cost only. This helper intentionally never derives cost
30
+ * from token counts or model names.
31
+ */
32
+ costUsd: WorkflowMetricValue;
33
+ attempts: number;
34
+ unavailable: boolean;
35
+ incomplete: boolean;
36
+ unavailableTaskIds: string[];
37
+ incompleteTaskIds: string[];
38
+ }
39
+
40
+ export interface WorkflowLaunchTimingMetrics {
41
+ launchWaitMs: WorkflowMetricValue;
42
+ launchDurationMs: WorkflowMetricValue;
43
+ executionMs: WorkflowMetricValue;
44
+ totalMs: WorkflowMetricValue;
45
+ launchSlotReleaseDelayMs: WorkflowMetricValue;
46
+ attempts: number;
47
+ unavailable: boolean;
48
+ incomplete: boolean;
49
+ unavailableTaskIds: string[];
50
+ incompleteTaskIds: string[];
51
+ }
52
+
53
+ export interface WorkflowRetryMetrics {
54
+ launchRetries: number;
55
+ outputRetries: number;
56
+ resumeEvents: number;
57
+ totalRetryEvents: number;
58
+ tasksWithRetries: number;
59
+ }
60
+
61
+ export interface WorkflowTaskStatusCounts {
62
+ pending: number;
63
+ running: number;
64
+ blocked: number;
65
+ completed: number;
66
+ failed: number;
67
+ skipped: number;
68
+ interrupted: number;
69
+ total: number;
70
+ }
71
+
72
+ export interface WorkflowRunMetricsRollup {
73
+ taskCount: number;
74
+ statusCounts: WorkflowTaskStatusCounts;
75
+ usage: WorkflowUsageMetrics;
76
+ launchTiming: WorkflowLaunchTimingMetrics;
77
+ retries: WorkflowRetryMetrics;
78
+ }
79
+
80
+ export interface WorkflowTaskMetrics {
81
+ taskId: string;
82
+ specId: string;
83
+ displayName: string;
84
+ agent: string;
85
+ status: TaskRunStatus;
86
+ statusDetail: string;
87
+ stageId: string | null;
88
+ kind: string | null;
89
+ provider: string | null;
90
+ model: string | null;
91
+ thinking: string | null;
92
+ usage: WorkflowUsageMetrics;
93
+ launchTiming: WorkflowLaunchTimingMetrics;
94
+ retries: WorkflowRetryMetrics;
95
+ }
96
+
97
+ export interface WorkflowStageMetrics extends WorkflowRunMetricsRollup {
98
+ stageId: string | null;
99
+ }
100
+
101
+ export interface WorkflowRunMetricsMetadata {
102
+ usageUnavailableTaskIds: string[];
103
+ usageIncompleteTaskIds: string[];
104
+ launchTimingUnavailableTaskIds: string[];
105
+ launchTimingIncompleteTaskIds: string[];
106
+ incomplete: boolean;
107
+ unavailable: boolean;
108
+ }
109
+
110
+ export interface WorkflowRunMetrics {
111
+ schemaVersion: WorkflowMetricsSchemaVersion;
112
+ pricingModelVersion: WorkflowMetricsPricingModelVersion;
113
+ pricingSource: WorkflowMetricsPricingSource;
114
+ costsAreProviderReported: true;
115
+ run: {
116
+ runId: string;
117
+ name?: string;
118
+ type: WorkflowRunType;
119
+ status: WorkflowRunStatus;
120
+ createdAt: string;
121
+ updatedAt: string;
122
+ };
123
+ totals: WorkflowRunMetricsRollup;
124
+ byStage: WorkflowStageMetrics[];
125
+ byTask: WorkflowTaskMetrics[];
126
+ metadata: WorkflowRunMetricsMetadata;
127
+ }
128
+
129
+ type UsageMetricKey = keyof WorkflowTaskUsageValues;
130
+ type TimingMetricKey =
131
+ | "launchWaitMs"
132
+ | "launchDurationMs"
133
+ | "executionMs"
134
+ | "totalMs"
135
+ | "launchSlotReleaseDelayMs";
136
+
137
+ const USAGE_METRIC_KEYS: UsageMetricKey[] = [
138
+ "inputTokens",
139
+ "outputTokens",
140
+ "totalTokens",
141
+ "cachedInputTokens",
142
+ "cacheCreationInputTokens",
143
+ "cacheReadInputTokens",
144
+ "reasoningTokens",
145
+ "costUsd",
146
+ ];
147
+
148
+ const TIMING_METRIC_KEYS: TimingMetricKey[] = [
149
+ "launchWaitMs",
150
+ "launchDurationMs",
151
+ "executionMs",
152
+ "totalMs",
153
+ "launchSlotReleaseDelayMs",
154
+ ];
155
+
156
+ function hasOwnValue(record: object, key: string): boolean {
157
+ return Object.hasOwn(record, key);
158
+ }
159
+
160
+ function metricValue(
161
+ record: object | undefined,
162
+ key: string,
163
+ ): WorkflowMetricValue {
164
+ if (!record || !hasOwnValue(record, key)) return null;
165
+ const value = (record as Record<string, unknown>)[key];
166
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
167
+ }
168
+
169
+ function metricString(value: unknown): string | null {
170
+ return typeof value === "string" && value.trim() ? value : null;
171
+ }
172
+
173
+ function sumMetricValues(values: WorkflowMetricValue[]): {
174
+ value: WorkflowMetricValue;
175
+ incomplete: boolean;
176
+ } {
177
+ if (values.length === 0) return { value: null, incomplete: true };
178
+ let total = 0;
179
+ for (const value of values) {
180
+ if (value === null) return { value: null, incomplete: true };
181
+ total += value;
182
+ }
183
+ return { value: total, incomplete: false };
184
+ }
185
+
186
+ function usageAttempts(task: WorkflowTaskRunRecord): number {
187
+ return task.usage?.aggregate?.attempts ?? task.usage?.attempts?.length ?? 0;
188
+ }
189
+
190
+ function timingAttempts(task: WorkflowTaskRunRecord): number {
191
+ return task.timing?.aggregate?.attempts ?? task.timing?.attempts?.length ?? 0;
192
+ }
193
+
194
+ function taskUsageMetrics(task: WorkflowTaskRunRecord): WorkflowUsageMetrics {
195
+ const usage = task.usage;
196
+ const source = usage?.aggregate ?? usage;
197
+ const unavailable =
198
+ usage === undefined ||
199
+ usage.attempts?.some((attempt) => attempt.unavailable) === true;
200
+ const metrics = Object.fromEntries(
201
+ USAGE_METRIC_KEYS.map((key) => [key, metricValue(source, key)]),
202
+ ) as Record<UsageMetricKey, WorkflowMetricValue>;
203
+ const incomplete =
204
+ unavailable ||
205
+ usage?.incomplete === true ||
206
+ usage?.aggregate?.incomplete === true ||
207
+ USAGE_METRIC_KEYS.some((key) => metrics[key] === null);
208
+ return {
209
+ inputTokens: metrics.inputTokens,
210
+ outputTokens: metrics.outputTokens,
211
+ totalTokens: metrics.totalTokens,
212
+ cachedInputTokens: metrics.cachedInputTokens,
213
+ cacheCreationInputTokens: metrics.cacheCreationInputTokens,
214
+ cacheReadInputTokens: metrics.cacheReadInputTokens,
215
+ reasoningTokens: metrics.reasoningTokens,
216
+ costUsd: metrics.costUsd,
217
+ attempts: usageAttempts(task),
218
+ unavailable,
219
+ incomplete,
220
+ unavailableTaskIds: unavailable ? [task.taskId] : [],
221
+ incompleteTaskIds: incomplete ? [task.taskId] : [],
222
+ };
223
+ }
224
+
225
+ function taskLaunchTimingMetrics(
226
+ task: WorkflowTaskRunRecord,
227
+ ): WorkflowLaunchTimingMetrics {
228
+ const timing = task.timing;
229
+ const aggregateSource = timing?.aggregate ?? timing;
230
+ const unavailable = timing === undefined;
231
+ const metrics = Object.fromEntries(
232
+ TIMING_METRIC_KEYS.map((key) => [
233
+ key,
234
+ metricValue(
235
+ key === "launchSlotReleaseDelayMs" ? timing : aggregateSource,
236
+ key,
237
+ ),
238
+ ]),
239
+ ) as Record<TimingMetricKey, WorkflowMetricValue>;
240
+ const incomplete =
241
+ unavailable ||
242
+ timing?.aggregate?.incomplete === true ||
243
+ TIMING_METRIC_KEYS.some((key) => metrics[key] === null);
244
+ return {
245
+ launchWaitMs: metrics.launchWaitMs,
246
+ launchDurationMs: metrics.launchDurationMs,
247
+ executionMs: metrics.executionMs,
248
+ totalMs: metrics.totalMs,
249
+ launchSlotReleaseDelayMs: metrics.launchSlotReleaseDelayMs,
250
+ attempts: timingAttempts(task),
251
+ unavailable,
252
+ incomplete,
253
+ unavailableTaskIds: unavailable ? [task.taskId] : [],
254
+ incompleteTaskIds: incomplete ? [task.taskId] : [],
255
+ };
256
+ }
257
+
258
+ function sumResumeRetryAttempts(
259
+ task: WorkflowTaskRunRecord,
260
+ key: "launchRetryAttempts" | "outputRetryAttempts",
261
+ ): number {
262
+ return (task.resumeEvents ?? []).reduce((total, event) => {
263
+ const attempts = event[key];
264
+ return typeof attempts === "number" && Number.isFinite(attempts)
265
+ ? total + attempts
266
+ : total;
267
+ }, 0);
268
+ }
269
+
270
+ function taskRetryMetrics(task: WorkflowTaskRunRecord): WorkflowRetryMetrics {
271
+ const launchRetries =
272
+ (task.launchRetry?.attempts ?? 0) +
273
+ sumResumeRetryAttempts(task, "launchRetryAttempts");
274
+ const outputRetries =
275
+ (task.outputRetry?.attempts ?? 0) +
276
+ sumResumeRetryAttempts(task, "outputRetryAttempts");
277
+ const resumeEvents = task.resumeEvents?.length ?? 0;
278
+ const totalRetryEvents = launchRetries + outputRetries + resumeEvents;
279
+ return {
280
+ launchRetries,
281
+ outputRetries,
282
+ resumeEvents,
283
+ totalRetryEvents,
284
+ tasksWithRetries: totalRetryEvents > 0 ? 1 : 0,
285
+ };
286
+ }
287
+
288
+ function emptyStatusCounts(): WorkflowTaskStatusCounts {
289
+ return {
290
+ pending: 0,
291
+ running: 0,
292
+ blocked: 0,
293
+ completed: 0,
294
+ failed: 0,
295
+ skipped: 0,
296
+ interrupted: 0,
297
+ total: 0,
298
+ };
299
+ }
300
+
301
+ function rollupUsage(tasks: WorkflowTaskMetrics[]): WorkflowUsageMetrics {
302
+ const rollup = Object.fromEntries(
303
+ USAGE_METRIC_KEYS.map((key) => [
304
+ key,
305
+ sumMetricValues(tasks.map((task) => task.usage[key])),
306
+ ]),
307
+ ) as Record<UsageMetricKey, ReturnType<typeof sumMetricValues>>;
308
+ const unavailableTaskIds = tasks.flatMap(
309
+ (task) => task.usage.unavailableTaskIds,
310
+ );
311
+ const incompleteTaskIds = tasks.flatMap(
312
+ (task) => task.usage.incompleteTaskIds,
313
+ );
314
+ return {
315
+ inputTokens: rollup.inputTokens.value,
316
+ outputTokens: rollup.outputTokens.value,
317
+ totalTokens: rollup.totalTokens.value,
318
+ cachedInputTokens: rollup.cachedInputTokens.value,
319
+ cacheCreationInputTokens: rollup.cacheCreationInputTokens.value,
320
+ cacheReadInputTokens: rollup.cacheReadInputTokens.value,
321
+ reasoningTokens: rollup.reasoningTokens.value,
322
+ costUsd: rollup.costUsd.value,
323
+ attempts: tasks.reduce((total, task) => total + task.usage.attempts, 0),
324
+ unavailable: unavailableTaskIds.length > 0,
325
+ incomplete:
326
+ incompleteTaskIds.length > 0 ||
327
+ USAGE_METRIC_KEYS.some((key) => rollup[key].incomplete),
328
+ unavailableTaskIds,
329
+ incompleteTaskIds,
330
+ };
331
+ }
332
+
333
+ function rollupLaunchTiming(
334
+ tasks: WorkflowTaskMetrics[],
335
+ ): WorkflowLaunchTimingMetrics {
336
+ const rollup = Object.fromEntries(
337
+ TIMING_METRIC_KEYS.map((key) => [
338
+ key,
339
+ sumMetricValues(tasks.map((task) => task.launchTiming[key])),
340
+ ]),
341
+ ) as Record<TimingMetricKey, ReturnType<typeof sumMetricValues>>;
342
+ const unavailableTaskIds = tasks.flatMap(
343
+ (task) => task.launchTiming.unavailableTaskIds,
344
+ );
345
+ const incompleteTaskIds = tasks.flatMap(
346
+ (task) => task.launchTiming.incompleteTaskIds,
347
+ );
348
+ return {
349
+ launchWaitMs: rollup.launchWaitMs.value,
350
+ launchDurationMs: rollup.launchDurationMs.value,
351
+ executionMs: rollup.executionMs.value,
352
+ totalMs: rollup.totalMs.value,
353
+ launchSlotReleaseDelayMs: rollup.launchSlotReleaseDelayMs.value,
354
+ attempts: tasks.reduce(
355
+ (total, task) => total + task.launchTiming.attempts,
356
+ 0,
357
+ ),
358
+ unavailable: unavailableTaskIds.length > 0,
359
+ incomplete:
360
+ incompleteTaskIds.length > 0 ||
361
+ TIMING_METRIC_KEYS.some((key) => rollup[key].incomplete),
362
+ unavailableTaskIds,
363
+ incompleteTaskIds,
364
+ };
365
+ }
366
+
367
+ function rollupRetries(tasks: WorkflowTaskMetrics[]): WorkflowRetryMetrics {
368
+ const launchRetries = tasks.reduce(
369
+ (total, task) => total + task.retries.launchRetries,
370
+ 0,
371
+ );
372
+ const outputRetries = tasks.reduce(
373
+ (total, task) => total + task.retries.outputRetries,
374
+ 0,
375
+ );
376
+ const resumeEvents = tasks.reduce(
377
+ (total, task) => total + task.retries.resumeEvents,
378
+ 0,
379
+ );
380
+ return {
381
+ launchRetries,
382
+ outputRetries,
383
+ resumeEvents,
384
+ totalRetryEvents: launchRetries + outputRetries + resumeEvents,
385
+ tasksWithRetries: tasks.reduce(
386
+ (total, task) => total + task.retries.tasksWithRetries,
387
+ 0,
388
+ ),
389
+ };
390
+ }
391
+
392
+ function statusCounts(tasks: WorkflowTaskMetrics[]): WorkflowTaskStatusCounts {
393
+ const counts = emptyStatusCounts();
394
+ for (const task of tasks) {
395
+ counts[task.status] += 1;
396
+ counts.total += 1;
397
+ }
398
+ return counts;
399
+ }
400
+
401
+ function rollupTasks(tasks: WorkflowTaskMetrics[]): WorkflowRunMetricsRollup {
402
+ return {
403
+ taskCount: tasks.length,
404
+ statusCounts: statusCounts(tasks),
405
+ usage: rollupUsage(tasks),
406
+ launchTiming: rollupLaunchTiming(tasks),
407
+ retries: rollupRetries(tasks),
408
+ };
409
+ }
410
+
411
+ function stageMetrics(tasks: WorkflowTaskMetrics[]): WorkflowStageMetrics[] {
412
+ const stageIds: Array<string | null> = [];
413
+ for (const task of tasks) {
414
+ if (!stageIds.includes(task.stageId)) stageIds.push(task.stageId);
415
+ }
416
+ return stageIds.map((stageId) => ({
417
+ stageId,
418
+ ...rollupTasks(tasks.filter((task) => task.stageId === stageId)),
419
+ }));
420
+ }
421
+
422
+ function taskMetrics(task: WorkflowTaskRunRecord): WorkflowTaskMetrics {
423
+ return {
424
+ taskId: task.taskId,
425
+ specId: task.specId,
426
+ displayName: task.displayName,
427
+ agent: task.agent,
428
+ status: task.status,
429
+ statusDetail: task.statusDetail,
430
+ stageId: task.stageId ?? null,
431
+ kind: task.kind ?? null,
432
+ provider: metricString(task.usage?.provider),
433
+ model: metricString(task.usage?.model ?? task.runtime.model),
434
+ thinking: metricString(task.usage?.thinking ?? task.runtime.thinking),
435
+ usage: taskUsageMetrics(task),
436
+ launchTiming: taskLaunchTimingMetrics(task),
437
+ retries: taskRetryMetrics(task),
438
+ };
439
+ }
440
+
441
+ /**
442
+ * Build a deterministic, JSON-serializable metrics export from a persisted
443
+ * workflow run record. The helper is intentionally pure: it reads only the
444
+ * supplied record, performs no pricing inference, and does not mutate the run.
445
+ */
446
+ export function buildWorkflowRunMetrics(
447
+ run: WorkflowRunRecord,
448
+ ): WorkflowRunMetrics {
449
+ const byTask = run.tasks.map((task) => taskMetrics(task));
450
+ const totals = rollupTasks(byTask);
451
+ return {
452
+ schemaVersion: WORKFLOW_METRICS_SCHEMA_VERSION,
453
+ pricingModelVersion: WORKFLOW_METRICS_PRICING_MODEL_VERSION,
454
+ pricingSource: "provider-reported",
455
+ costsAreProviderReported: true,
456
+ run: {
457
+ runId: run.runId,
458
+ ...(run.name === undefined ? {} : { name: run.name }),
459
+ type: run.type,
460
+ status: run.status,
461
+ createdAt: run.createdAt,
462
+ updatedAt: run.updatedAt,
463
+ },
464
+ totals,
465
+ byStage: stageMetrics(byTask),
466
+ byTask,
467
+ metadata: {
468
+ usageUnavailableTaskIds: [...totals.usage.unavailableTaskIds],
469
+ usageIncompleteTaskIds: [...totals.usage.incompleteTaskIds],
470
+ launchTimingUnavailableTaskIds: [
471
+ ...totals.launchTiming.unavailableTaskIds,
472
+ ],
473
+ launchTimingIncompleteTaskIds: [...totals.launchTiming.incompleteTaskIds],
474
+ incomplete: totals.usage.incomplete || totals.launchTiming.incomplete,
475
+ unavailable: totals.usage.unavailable || totals.launchTiming.unavailable,
476
+ },
477
+ };
478
+ }
@@ -7,6 +7,7 @@ import {
7
7
  type StructuredContract,
8
8
  type StructuredContractIssue,
9
9
  } from "./workflow-artifacts.js";
10
+ import { stripWorkflowPartialOutputSections } from "./workflow-partial-output.js";
10
11
  import {
11
12
  validateJsonSchema,
12
13
  type JsonSchema,
@@ -157,10 +158,11 @@ export function parseWorkflowOutput(
157
158
  raw: string,
158
159
  options: ParseWorkflowOutputOptions = {},
159
160
  ): ParsedWorkflowOutput {
161
+ const protocolRaw = stripWorkflowPartialOutputSections(raw);
160
162
  const issues: WorkflowOutputIssue[] = [];
161
163
  const requirements = sectionRequirements(options);
162
- const sections = collectSections(raw, requirements);
163
- validateSectionLayout(raw, sections, issues, requirements);
164
+ const sections = collectSections(protocolRaw, requirements);
165
+ validateSectionLayout(protocolRaw, sections, issues, requirements);
164
166
 
165
167
  const control = parseControlSection(
166
168
  sectionText(sections, SECTION_CONTROL),
@@ -181,7 +183,7 @@ export function parseWorkflowOutput(
181
183
  validateControlJsonSchema(control, issues, options.controlJsonSchema);
182
184
 
183
185
  return buildParsedOutput(
184
- raw,
186
+ protocolRaw,
185
187
  issues,
186
188
  { control, analysis, refs },
187
189
  requirements,