@agwab/pi-workflow 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-graph-runtime.d.ts +1 -1
- package/dist/artifact-graph-runtime.js +10 -5
- package/dist/artifact-graph-schema.js +127 -5
- package/dist/compiler.js +52 -19
- package/dist/dynamic-generated-task-runtime.js +3 -1
- package/dist/dynamic-profiles.d.ts +1 -1
- package/dist/engine-run-graph.d.ts +3 -0
- package/dist/engine-run-graph.js +194 -4
- package/dist/engine.d.ts +5 -0
- package/dist/engine.js +389 -41
- package/dist/extension.d.ts +2 -1
- package/dist/extension.js +30 -8
- package/dist/index.d.ts +11 -3
- package/dist/index.js +6 -1
- package/dist/prompt-json.d.ts +7 -0
- package/dist/prompt-json.js +13 -0
- package/dist/roles.d.ts +1 -1
- package/dist/roles.js +5 -8
- package/dist/store.d.ts +20 -1
- package/dist/store.js +139 -35
- package/dist/strings.d.ts +11 -0
- package/dist/strings.js +24 -0
- package/dist/subagent-backend.js +710 -40
- package/dist/types.d.ts +107 -1
- package/dist/verification-ontology.d.ts +31 -0
- package/dist/verification-ontology.js +66 -0
- package/dist/workflow-artifact-tool.js +5 -6
- package/dist/workflow-artifacts.d.ts +7 -0
- package/dist/workflow-artifacts.js +55 -4
- package/dist/workflow-fetch-cache-extension.d.ts +1 -0
- package/dist/workflow-fetch-cache-extension.js +57 -9
- package/dist/workflow-metrics.d.ts +113 -0
- package/dist/workflow-metrics.js +272 -0
- package/dist/workflow-output-artifacts.js +5 -3
- package/dist/workflow-partial-output.d.ts +45 -0
- package/dist/workflow-partial-output.js +205 -0
- package/dist/workflow-progress-health.js +42 -10
- package/dist/workflow-runtime.js +10 -1
- package/dist/workflow-view.js +3 -1
- package/dist/workflow-web-source-extension.js +194 -52
- package/dist/workflow-web-source.d.ts +2 -1
- package/dist/workflow-web-source.js +109 -30
- package/docs/usage.md +76 -29
- package/node_modules/@agwab/pi-subagent/README.md +3 -3
- package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
- package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
- package/node_modules/@agwab/pi-subagent/package.json +2 -2
- package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
- package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
- package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
- package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
- package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
- package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
- package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
- package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
- package/package.json +2 -2
- package/skills/workflow-guide/SKILL.md +1 -0
- package/src/artifact-graph-runtime.ts +19 -13
- package/src/artifact-graph-schema.ts +143 -3
- package/src/cli.mjs +52 -0
- package/src/compiler.ts +63 -18
- package/src/dynamic-generated-task-runtime.ts +3 -1
- package/src/dynamic-profiles.ts +1 -1
- package/src/engine-run-graph.ts +246 -4
- package/src/engine.ts +545 -38
- package/src/extension.ts +36 -6
- package/src/index.ts +52 -1
- package/src/prompt-json.ts +13 -0
- package/src/roles.ts +6 -9
- package/src/store.ts +194 -42
- package/src/strings.ts +38 -0
- package/src/subagent-backend.ts +921 -62
- package/src/types.ts +116 -2
- package/src/verification-ontology.ts +88 -0
- package/src/workflow-artifact-tool.ts +5 -7
- package/src/workflow-artifacts.ts +83 -3
- package/src/workflow-fetch-cache-extension.ts +78 -13
- package/src/workflow-metrics.ts +478 -0
- package/src/workflow-output-artifacts.ts +5 -3
- package/src/workflow-partial-output.ts +299 -0
- package/src/workflow-progress-health.ts +47 -15
- package/src/workflow-runtime.ts +18 -2
- package/src/workflow-view.ts +2 -1
- package/src/workflow-web-source-extension.ts +654 -232
- package/src/workflow-web-source.ts +153 -39
- package/workflows/README.md +7 -25
- package/workflows/deep-research/batched-verification.spec.json +253 -0
- package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
- package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
- package/workflows/deep-research/helpers/render-executive.mjs +40 -26
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
- package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
- package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
- package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
- package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
- package/workflows/deep-research/spec.json +32 -12
- package/workflows/impact-review/spec.json +3 -3
- package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
- package/dist/dynamic-loader.d.ts +0 -25
- package/dist/dynamic-loader.js +0 -13
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
- package/src/dynamic-loader.ts +0 -49
- package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
package/dist/types.d.ts
CHANGED
|
@@ -167,6 +167,10 @@ export interface ArtifactGraphStageSpec {
|
|
|
167
167
|
from?: string | string[] | {
|
|
168
168
|
source: string;
|
|
169
169
|
path: string;
|
|
170
|
+
streaming?: {
|
|
171
|
+
enabled: true;
|
|
172
|
+
minChunk?: number;
|
|
173
|
+
};
|
|
170
174
|
};
|
|
171
175
|
after?: string | string[];
|
|
172
176
|
sourcePolicy?: "success" | "partial" | "require-success";
|
|
@@ -177,6 +181,7 @@ export interface ArtifactGraphStageSpec {
|
|
|
177
181
|
inputPolicy?: {
|
|
178
182
|
requiredReads?: string[];
|
|
179
183
|
enforcement?: "fail";
|
|
184
|
+
artifactAccess?: "enabled" | "none";
|
|
180
185
|
};
|
|
181
186
|
output?: {
|
|
182
187
|
controlSchema?: string;
|
|
@@ -188,6 +193,9 @@ export interface ArtifactGraphStageSpec {
|
|
|
188
193
|
minItems?: number;
|
|
189
194
|
};
|
|
190
195
|
maxDigestChars?: number;
|
|
196
|
+
partial?: {
|
|
197
|
+
paths: string[];
|
|
198
|
+
};
|
|
191
199
|
};
|
|
192
200
|
each?: Record<string, unknown>;
|
|
193
201
|
stages?: ArtifactGraphStageSpec[];
|
|
@@ -433,8 +441,12 @@ export interface CompiledArtifactGraphTask {
|
|
|
433
441
|
controlSchema?: string;
|
|
434
442
|
controlSchemaPath?: string;
|
|
435
443
|
maxDigestChars?: number;
|
|
444
|
+
partial?: {
|
|
445
|
+
paths: string[];
|
|
446
|
+
};
|
|
436
447
|
};
|
|
437
448
|
requiredReads: string[];
|
|
449
|
+
artifactAccess: "enabled" | "none";
|
|
438
450
|
sourceProjection?: {
|
|
439
451
|
include?: string[];
|
|
440
452
|
maxChars?: number;
|
|
@@ -484,6 +496,13 @@ export interface CompiledTask {
|
|
|
484
496
|
branchId?: string;
|
|
485
497
|
outputProfile?: string;
|
|
486
498
|
};
|
|
499
|
+
foreachGenerated?: {
|
|
500
|
+
placeholderSpecId: string;
|
|
501
|
+
itemHash?: string;
|
|
502
|
+
itemSourceSpecId?: string;
|
|
503
|
+
itemSourceKind?: "control" | "partial";
|
|
504
|
+
itemRef?: string;
|
|
505
|
+
};
|
|
487
506
|
loopChild?: CompiledLoopChildTaskRef;
|
|
488
507
|
loopPlaceholder?: {
|
|
489
508
|
loopId: string;
|
|
@@ -496,6 +515,80 @@ export interface CompiledTask {
|
|
|
496
515
|
}
|
|
497
516
|
export type TaskRunStatus = "pending" | "running" | "blocked" | "completed" | "failed" | "skipped" | "interrupted";
|
|
498
517
|
export type WorkflowRunStatus = "running" | "blocked" | "completed" | "failed" | "interrupted";
|
|
518
|
+
export interface WorkflowTaskUsageValues {
|
|
519
|
+
inputTokens?: number | null;
|
|
520
|
+
outputTokens?: number | null;
|
|
521
|
+
totalTokens?: number | null;
|
|
522
|
+
cachedInputTokens?: number | null;
|
|
523
|
+
cacheCreationInputTokens?: number | null;
|
|
524
|
+
cacheReadInputTokens?: number | null;
|
|
525
|
+
reasoningTokens?: number | null;
|
|
526
|
+
costUsd?: number | null;
|
|
527
|
+
}
|
|
528
|
+
export interface WorkflowTaskUsageAttemptRecord extends WorkflowTaskUsageValues {
|
|
529
|
+
source: string;
|
|
530
|
+
capturedAt: string;
|
|
531
|
+
provider?: string;
|
|
532
|
+
model?: string;
|
|
533
|
+
thinking?: ThinkingLevel | string;
|
|
534
|
+
backendRunId?: string;
|
|
535
|
+
backendAttemptId?: string;
|
|
536
|
+
unavailable?: true;
|
|
537
|
+
raw?: unknown;
|
|
538
|
+
}
|
|
539
|
+
export interface WorkflowTaskUsageAggregateRecord extends WorkflowTaskUsageValues {
|
|
540
|
+
attempts: number;
|
|
541
|
+
incomplete?: boolean;
|
|
542
|
+
}
|
|
543
|
+
export interface WorkflowTaskUsageRecord extends WorkflowTaskUsageValues {
|
|
544
|
+
source: "pi-subagent";
|
|
545
|
+
capturedAt: string;
|
|
546
|
+
provider?: string;
|
|
547
|
+
model?: string;
|
|
548
|
+
thinking?: ThinkingLevel | string;
|
|
549
|
+
incomplete?: boolean;
|
|
550
|
+
aggregate?: WorkflowTaskUsageAggregateRecord;
|
|
551
|
+
attempts?: WorkflowTaskUsageAttemptRecord[];
|
|
552
|
+
}
|
|
553
|
+
export interface WorkflowTaskTimingAttemptRecord {
|
|
554
|
+
source: string;
|
|
555
|
+
capturedAt: string;
|
|
556
|
+
backendRunId?: string;
|
|
557
|
+
backendAttemptId?: string;
|
|
558
|
+
launchQueuedAt?: string;
|
|
559
|
+
launchStartedAt?: string;
|
|
560
|
+
launchCompletedAt?: string;
|
|
561
|
+
launchWaitMs?: number;
|
|
562
|
+
launchDurationMs?: number;
|
|
563
|
+
executionStartedAt?: string;
|
|
564
|
+
executionCompletedAt?: string;
|
|
565
|
+
executionMs?: number | null;
|
|
566
|
+
totalMs?: number;
|
|
567
|
+
}
|
|
568
|
+
export interface WorkflowTaskTimingAggregateRecord {
|
|
569
|
+
attempts: number;
|
|
570
|
+
launchWaitMs?: number | null;
|
|
571
|
+
launchDurationMs?: number | null;
|
|
572
|
+
executionMs?: number | null;
|
|
573
|
+
totalMs?: number | null;
|
|
574
|
+
incomplete?: boolean;
|
|
575
|
+
}
|
|
576
|
+
export interface WorkflowTaskTimingRecord {
|
|
577
|
+
source: "pi-workflow";
|
|
578
|
+
capturedAt: string;
|
|
579
|
+
launchQueuedAt?: string;
|
|
580
|
+
launchStartedAt?: string;
|
|
581
|
+
launchCompletedAt?: string;
|
|
582
|
+
launchWaitMs?: number;
|
|
583
|
+
launchDurationMs?: number;
|
|
584
|
+
launchSlotReleaseDelayMs?: number;
|
|
585
|
+
executionStartedAt?: string;
|
|
586
|
+
executionCompletedAt?: string;
|
|
587
|
+
executionMs?: number | null;
|
|
588
|
+
totalMs?: number;
|
|
589
|
+
aggregate?: WorkflowTaskTimingAggregateRecord;
|
|
590
|
+
attempts?: WorkflowTaskTimingAttemptRecord[];
|
|
591
|
+
}
|
|
499
592
|
export interface WorkflowTaskRunRecord {
|
|
500
593
|
taskId: string;
|
|
501
594
|
specId: string;
|
|
@@ -534,6 +627,8 @@ export interface WorkflowTaskRunRecord {
|
|
|
534
627
|
startedAt?: string;
|
|
535
628
|
completedAt?: string;
|
|
536
629
|
elapsedMs?: number;
|
|
630
|
+
usage?: WorkflowTaskUsageRecord;
|
|
631
|
+
timing?: WorkflowTaskTimingRecord;
|
|
537
632
|
exitCode?: number;
|
|
538
633
|
files: {
|
|
539
634
|
systemPrompt: string;
|
|
@@ -562,6 +657,13 @@ export interface WorkflowTaskRunRecord {
|
|
|
562
657
|
branchId?: string;
|
|
563
658
|
outputProfile?: string;
|
|
564
659
|
};
|
|
660
|
+
foreachGenerated?: {
|
|
661
|
+
placeholderSpecId: string;
|
|
662
|
+
itemHash?: string;
|
|
663
|
+
itemSourceSpecId?: string;
|
|
664
|
+
itemSourceKind?: "control" | "partial";
|
|
665
|
+
itemRef?: string;
|
|
666
|
+
};
|
|
565
667
|
launchRetry?: {
|
|
566
668
|
attempts: number;
|
|
567
669
|
maxAttempts?: number;
|
|
@@ -654,7 +756,11 @@ export interface WorkflowIndexRecord {
|
|
|
654
756
|
rootRunId?: string;
|
|
655
757
|
round?: number;
|
|
656
758
|
fanout?: unknown[];
|
|
657
|
-
|
|
759
|
+
/**
|
|
760
|
+
* Deprecated compatibility projection. New index writes omit task rows;
|
|
761
|
+
* consumers that need task-level details should load runJson/run.json.
|
|
762
|
+
*/
|
|
763
|
+
tasks?: Array<{
|
|
658
764
|
taskId: string;
|
|
659
765
|
displayName: string;
|
|
660
766
|
agent: string;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export declare const VERIFICATION_STATUS: Readonly<{
|
|
2
|
+
readonly VERIFIED: "verified";
|
|
3
|
+
readonly PARTIALLY_SUPPORTED: "partially_supported";
|
|
4
|
+
readonly UNSUPPORTED: "unsupported";
|
|
5
|
+
readonly CONFLICTING: "conflicting";
|
|
6
|
+
readonly VERIFICATION_BLOCKED: "verification_blocked";
|
|
7
|
+
readonly UNVERIFIED: "unverified";
|
|
8
|
+
}>;
|
|
9
|
+
export type VerificationStatus = (typeof VERIFICATION_STATUS)[keyof typeof VERIFICATION_STATUS];
|
|
10
|
+
export type TerminalVerificationStatus = Exclude<VerificationStatus, (typeof VERIFICATION_STATUS)["UNVERIFIED"]>;
|
|
11
|
+
export declare const VERIFICATION_STATUS_VALUES: readonly ["verified", "partially_supported", "unsupported", "conflicting", "verification_blocked"];
|
|
12
|
+
export declare const VERIFICATION_STATUS_BUCKETS: Readonly<{
|
|
13
|
+
readonly verified: "verified";
|
|
14
|
+
readonly partially_supported: "partiallySupported";
|
|
15
|
+
readonly unsupported: "unsupported";
|
|
16
|
+
readonly conflicting: "conflicting";
|
|
17
|
+
readonly verification_blocked: "verificationBlocked";
|
|
18
|
+
}>;
|
|
19
|
+
export declare const VERIFICATION_STATUS_LABELS: Readonly<{
|
|
20
|
+
readonly verified: "verified";
|
|
21
|
+
readonly partially_supported: "partially supported";
|
|
22
|
+
readonly unsupported: "unsupported";
|
|
23
|
+
readonly conflicting: "conflicting";
|
|
24
|
+
readonly verification_blocked: "verification blocked";
|
|
25
|
+
readonly unverified: "unverified";
|
|
26
|
+
}>;
|
|
27
|
+
export declare function canonicalVerificationStatus(status: unknown): VerificationStatus;
|
|
28
|
+
export declare function verificationStatusBucket(status: unknown): string;
|
|
29
|
+
export declare function isVerifiedStatus(status: unknown): boolean;
|
|
30
|
+
export declare function isVerificationBlockedStatus(status: unknown): boolean;
|
|
31
|
+
export declare function isNonVerifiedTerminalStatus(status: unknown): boolean;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
export const VERIFICATION_STATUS = Object.freeze({
|
|
2
|
+
VERIFIED: "verified",
|
|
3
|
+
PARTIALLY_SUPPORTED: "partially_supported",
|
|
4
|
+
UNSUPPORTED: "unsupported",
|
|
5
|
+
CONFLICTING: "conflicting",
|
|
6
|
+
VERIFICATION_BLOCKED: "verification_blocked",
|
|
7
|
+
UNVERIFIED: "unverified",
|
|
8
|
+
});
|
|
9
|
+
export const VERIFICATION_STATUS_VALUES = Object.freeze([
|
|
10
|
+
VERIFICATION_STATUS.VERIFIED,
|
|
11
|
+
VERIFICATION_STATUS.PARTIALLY_SUPPORTED,
|
|
12
|
+
VERIFICATION_STATUS.UNSUPPORTED,
|
|
13
|
+
VERIFICATION_STATUS.CONFLICTING,
|
|
14
|
+
VERIFICATION_STATUS.VERIFICATION_BLOCKED,
|
|
15
|
+
]);
|
|
16
|
+
export const VERIFICATION_STATUS_BUCKETS = Object.freeze({
|
|
17
|
+
[VERIFICATION_STATUS.VERIFIED]: "verified",
|
|
18
|
+
[VERIFICATION_STATUS.PARTIALLY_SUPPORTED]: "partiallySupported",
|
|
19
|
+
[VERIFICATION_STATUS.UNSUPPORTED]: "unsupported",
|
|
20
|
+
[VERIFICATION_STATUS.CONFLICTING]: "conflicting",
|
|
21
|
+
[VERIFICATION_STATUS.VERIFICATION_BLOCKED]: "verificationBlocked",
|
|
22
|
+
});
|
|
23
|
+
export const VERIFICATION_STATUS_LABELS = Object.freeze({
|
|
24
|
+
[VERIFICATION_STATUS.VERIFIED]: "verified",
|
|
25
|
+
[VERIFICATION_STATUS.PARTIALLY_SUPPORTED]: "partially supported",
|
|
26
|
+
[VERIFICATION_STATUS.UNSUPPORTED]: "unsupported",
|
|
27
|
+
[VERIFICATION_STATUS.CONFLICTING]: "conflicting",
|
|
28
|
+
[VERIFICATION_STATUS.VERIFICATION_BLOCKED]: "verification blocked",
|
|
29
|
+
[VERIFICATION_STATUS.UNVERIFIED]: "unverified",
|
|
30
|
+
});
|
|
31
|
+
export function canonicalVerificationStatus(status) {
|
|
32
|
+
const text = String(status ?? "").trim();
|
|
33
|
+
if (!text)
|
|
34
|
+
return VERIFICATION_STATUS.UNVERIFIED;
|
|
35
|
+
if (text === "partiallySupported") {
|
|
36
|
+
return VERIFICATION_STATUS.PARTIALLY_SUPPORTED;
|
|
37
|
+
}
|
|
38
|
+
if (text === "verificationBlocked" || text === "blocked") {
|
|
39
|
+
return VERIFICATION_STATUS.VERIFICATION_BLOCKED;
|
|
40
|
+
}
|
|
41
|
+
return Object.values(VERIFICATION_STATUS).includes(text)
|
|
42
|
+
? text
|
|
43
|
+
: VERIFICATION_STATUS.UNVERIFIED;
|
|
44
|
+
}
|
|
45
|
+
export function verificationStatusBucket(status) {
|
|
46
|
+
const canonical = canonicalVerificationStatus(status);
|
|
47
|
+
return canonical in VERIFICATION_STATUS_BUCKETS
|
|
48
|
+
? VERIFICATION_STATUS_BUCKETS[canonical]
|
|
49
|
+
: "other";
|
|
50
|
+
}
|
|
51
|
+
export function isVerifiedStatus(status) {
|
|
52
|
+
return canonicalVerificationStatus(status) === VERIFICATION_STATUS.VERIFIED;
|
|
53
|
+
}
|
|
54
|
+
export function isVerificationBlockedStatus(status) {
|
|
55
|
+
return (canonicalVerificationStatus(status) ===
|
|
56
|
+
VERIFICATION_STATUS.VERIFICATION_BLOCKED);
|
|
57
|
+
}
|
|
58
|
+
const NON_VERIFIED_TERMINAL_STATUSES = new Set([
|
|
59
|
+
VERIFICATION_STATUS.PARTIALLY_SUPPORTED,
|
|
60
|
+
VERIFICATION_STATUS.UNSUPPORTED,
|
|
61
|
+
VERIFICATION_STATUS.CONFLICTING,
|
|
62
|
+
VERIFICATION_STATUS.VERIFICATION_BLOCKED,
|
|
63
|
+
]);
|
|
64
|
+
export function isNonVerifiedTerminalStatus(status) {
|
|
65
|
+
return NON_VERIFIED_TERMINAL_STATUSES.has(canonicalVerificationStatus(status));
|
|
66
|
+
}
|
|
@@ -23,7 +23,7 @@ const WORKFLOW_ARTIFACT_KIND_SET = new Set(WORKFLOW_ARTIFACT_KINDS);
|
|
|
23
23
|
const DEFAULT_MAX_BYTES = 50 * 1024;
|
|
24
24
|
const DEFAULT_MAX_LINES = 2000;
|
|
25
25
|
const SOURCE_NAME_PATTERN = /^[A-Za-z0-9_.:-]+$/;
|
|
26
|
-
const SIMPLE_JSON_PATH_PATTERN = /^(\$|\$(\.[A-Za-z0-9_-]+
|
|
26
|
+
const SIMPLE_JSON_PATH_PATTERN = /^(\$|\$(\.[A-Za-z0-9_-]+)+)$/;
|
|
27
27
|
const JSON_PATH_SEGMENT_ALIASES = {
|
|
28
28
|
axes: "researchAxes",
|
|
29
29
|
claimVerdicts: "claimVerdictLedger",
|
|
@@ -226,14 +226,13 @@ async function readProjectedWorkflowArtifact(options) {
|
|
|
226
226
|
path: effectivePath,
|
|
227
227
|
});
|
|
228
228
|
const serialized = JSON.stringify(sliced.value, null, 2);
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
: serialized;
|
|
229
|
+
const maxChars = options.maxChars ?? DEFAULT_MAX_BYTES;
|
|
230
|
+
const preview = serialized.length > maxChars ? serialized.slice(0, maxChars) : serialized;
|
|
232
231
|
const projection = {
|
|
233
232
|
path: effectivePath,
|
|
234
233
|
valueType: jsonValueType(resolved),
|
|
235
234
|
...(options.maxItems === undefined ? {} : { maxItems: options.maxItems }),
|
|
236
|
-
|
|
235
|
+
maxChars,
|
|
237
236
|
...(sliced.totalItems === undefined
|
|
238
237
|
? {}
|
|
239
238
|
: { totalItems: sliced.totalItems }),
|
|
@@ -501,7 +500,7 @@ function normalizeProjectionPath(value) {
|
|
|
501
500
|
if (path === undefined)
|
|
502
501
|
return undefined;
|
|
503
502
|
if (!SIMPLE_JSON_PATH_PATTERN.test(path)) {
|
|
504
|
-
throw new Error("path must be $ or a simple dot JSON path like $.claims.items");
|
|
503
|
+
throw new Error("path must be $ or a simple dot JSON path like $.claims.items; array selectors are not supported");
|
|
505
504
|
}
|
|
506
505
|
return path;
|
|
507
506
|
}
|
|
@@ -4,6 +4,13 @@ export interface WorkflowTelemetrySummary {
|
|
|
4
4
|
taskCount: number;
|
|
5
5
|
wallClockMs: number | null;
|
|
6
6
|
statusCounts: StatusCounts;
|
|
7
|
+
completion: {
|
|
8
|
+
health: "clean" | "repaired" | "incomplete";
|
|
9
|
+
clean: boolean;
|
|
10
|
+
repaired: boolean;
|
|
11
|
+
repairEvents: number;
|
|
12
|
+
contextLimitFailures: number;
|
|
13
|
+
};
|
|
7
14
|
retryCounts: {
|
|
8
15
|
output: number;
|
|
9
16
|
launch: number;
|
|
@@ -25,10 +25,21 @@ export function summarizeWorkflowTelemetry(run, options = {}) {
|
|
|
25
25
|
stage.durationMs += taskDurationMs(task);
|
|
26
26
|
stage.outputBytes += taskOutputBytes;
|
|
27
27
|
}
|
|
28
|
+
const repairEvents = accumulator.outputRetries +
|
|
29
|
+
accumulator.launchRetries +
|
|
30
|
+
accumulator.resumeEvents;
|
|
31
|
+
const health = completionHealth(tasks, repairEvents, accumulator);
|
|
28
32
|
return {
|
|
29
33
|
taskCount: tasks.length,
|
|
30
34
|
wallClockMs: durationBetween(run.createdAt, run.updatedAt),
|
|
31
35
|
statusCounts,
|
|
36
|
+
completion: {
|
|
37
|
+
health,
|
|
38
|
+
clean: health === "clean",
|
|
39
|
+
repaired: health === "repaired",
|
|
40
|
+
repairEvents,
|
|
41
|
+
contextLimitFailures: accumulator.contextLimitFailures,
|
|
42
|
+
},
|
|
32
43
|
retryCounts: {
|
|
33
44
|
output: accumulator.outputRetries,
|
|
34
45
|
launch: accumulator.launchRetries,
|
|
@@ -50,12 +61,15 @@ function createWorkflowTelemetryAccumulator() {
|
|
|
50
61
|
launchRetries: 0,
|
|
51
62
|
resumeEvents: 0,
|
|
52
63
|
resumedTasks: 0,
|
|
64
|
+
contextLimitFailures: 0,
|
|
53
65
|
retryReasons: { output: {}, launch: {} },
|
|
54
66
|
resumeStatusCounts: {},
|
|
55
67
|
outputRepairCounts: { sameSession: 0, newSession: 0, unknown: 0 },
|
|
56
68
|
};
|
|
57
69
|
}
|
|
58
70
|
function accumulateTaskReliability(task, accumulator) {
|
|
71
|
+
if (taskHasContextLimitFailure(task))
|
|
72
|
+
accumulator.contextLimitFailures += 1;
|
|
59
73
|
const currentOutputAttempts = positiveCount(task.outputRetry?.attempts);
|
|
60
74
|
accumulator.outputRetries += currentOutputAttempts;
|
|
61
75
|
if (currentOutputAttempts > 0) {
|
|
@@ -76,15 +90,29 @@ function accumulateTaskReliability(task, accumulator) {
|
|
|
76
90
|
for (const event of resumeEvents)
|
|
77
91
|
accumulateResumeEvent(event, accumulator);
|
|
78
92
|
}
|
|
93
|
+
function completionHealth(tasks, repairEvents, accumulator) {
|
|
94
|
+
const allCompleted = tasks.length > 0 && tasks.every((task) => task.status === "completed");
|
|
95
|
+
if (!allCompleted)
|
|
96
|
+
return "incomplete";
|
|
97
|
+
return repairEvents === 0 && accumulator.contextLimitFailures === 0
|
|
98
|
+
? "clean"
|
|
99
|
+
: "repaired";
|
|
100
|
+
}
|
|
79
101
|
function accumulateResumeEvent(event, accumulator) {
|
|
80
102
|
accumulator.resumeStatusCounts[event.fromStatus] =
|
|
81
103
|
(accumulator.resumeStatusCounts[event.fromStatus] ?? 0) + 1;
|
|
104
|
+
if (resumeEventHasContextLimitFailure(event))
|
|
105
|
+
accumulator.contextLimitFailures += 1;
|
|
82
106
|
const previousOutputAttempts = positiveCount(event.outputRetryAttempts);
|
|
83
107
|
accumulator.outputRetries += previousOutputAttempts;
|
|
84
|
-
if (previousOutputAttempts
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
108
|
+
if (previousOutputAttempts > 0) {
|
|
109
|
+
countReason(accumulator.retryReasons.output, event.outputRetryReason);
|
|
110
|
+
countRepairMode(accumulator.outputRepairCounts, event.outputRetryRepairMode);
|
|
111
|
+
}
|
|
112
|
+
const previousLaunchAttempts = positiveCount(event.launchRetryAttempts);
|
|
113
|
+
accumulator.launchRetries += previousLaunchAttempts;
|
|
114
|
+
if (previousLaunchAttempts > 0)
|
|
115
|
+
countReason(accumulator.retryReasons.launch, event.launchRetryReason);
|
|
88
116
|
}
|
|
89
117
|
function positiveCount(value) {
|
|
90
118
|
if (value === undefined || !Number.isFinite(value))
|
|
@@ -103,6 +131,29 @@ function countRepairMode(counts, mode) {
|
|
|
103
131
|
else
|
|
104
132
|
counts.unknown += 1;
|
|
105
133
|
}
|
|
134
|
+
function taskHasContextLimitFailure(task) {
|
|
135
|
+
return [
|
|
136
|
+
task.statusDetail,
|
|
137
|
+
task.lastMessage,
|
|
138
|
+
task.outputRetry?.reason,
|
|
139
|
+
task.outputRetry?.message,
|
|
140
|
+
task.launchRetry?.reason,
|
|
141
|
+
task.launchRetry?.message,
|
|
142
|
+
].some(isContextLimitText);
|
|
143
|
+
}
|
|
144
|
+
function resumeEventHasContextLimitFailure(event) {
|
|
145
|
+
return [
|
|
146
|
+
event.fromStatusDetail,
|
|
147
|
+
event.lastMessage,
|
|
148
|
+
event.outputRetryReason,
|
|
149
|
+
event.launchRetryReason,
|
|
150
|
+
].some(isContextLimitText);
|
|
151
|
+
}
|
|
152
|
+
function isContextLimitText(value) {
|
|
153
|
+
const text = value?.toLowerCase() ?? "";
|
|
154
|
+
return (text.includes("context_or_request_too_large") ||
|
|
155
|
+
/context (window|length)|maximum context|request too large|token limit/.test(text));
|
|
156
|
+
}
|
|
106
157
|
export function buildSourceContextPacket(run, options = {}) {
|
|
107
158
|
const maxPreviewChars = Math.max(0, Math.floor(options.maxPreviewChars ?? 1200));
|
|
108
159
|
const maxStructuredChars = normalizeOptionalCharCap(options.maxStructuredChars);
|
|
@@ -5,7 +5,7 @@ import { pathToFileURL } from "node:url";
|
|
|
5
5
|
export const WORKFLOW_FETCH_CONTENT_CACHE_SCHEMA = "workflow-fetch-content-cache-v1";
|
|
6
6
|
export const WORKFLOW_FETCH_CONTENT_CACHE_EVENT_SCHEMA = "workflow-fetch-content-cache-event-v1";
|
|
7
7
|
export function registerWorkflowFetchCacheExtension(pi, config, webAccessExtension, storage) {
|
|
8
|
-
|
|
8
|
+
const capturedFetchDataByResponseId = new Map();
|
|
9
9
|
const adapter = new Proxy(pi, {
|
|
10
10
|
get(target, property, receiver) {
|
|
11
11
|
if (property === "registerTool") {
|
|
@@ -19,22 +19,25 @@ export function registerWorkflowFetchCacheExtension(pi, config, webAccessExtensi
|
|
|
19
19
|
execute: async (toolCallId, params, signal, onUpdate) => {
|
|
20
20
|
const cacheKey = cacheKeyForParams(params);
|
|
21
21
|
if (!cacheKey) {
|
|
22
|
-
return await tool.execute(toolCallId, params, signal, onUpdate);
|
|
22
|
+
return capFetchContentInlineResult(await tool.execute(toolCallId, params, signal, onUpdate), config.maxInlineChars);
|
|
23
23
|
}
|
|
24
24
|
const hit = await readCacheRecord(config, cacheKey.key);
|
|
25
25
|
if (hit) {
|
|
26
26
|
await recordCacheEvent(config, "hit", cacheKey);
|
|
27
|
-
return materializeCacheHit(pi, storage, hit);
|
|
27
|
+
return capFetchContentInlineResult(materializeCacheHit(pi, storage, hit), config.maxInlineChars);
|
|
28
28
|
}
|
|
29
29
|
await recordCacheEvent(config, "miss", cacheKey);
|
|
30
|
-
capturedFetchData = undefined;
|
|
31
30
|
const result = await tool.execute(toolCallId, params, signal, onUpdate);
|
|
32
|
-
const
|
|
33
|
-
|
|
31
|
+
const responseId = stringValue(result.details?.responseId);
|
|
32
|
+
const storedData = responseId
|
|
33
|
+
? capturedFetchDataByResponseId.get(responseId)
|
|
34
|
+
: undefined;
|
|
35
|
+
if (responseId)
|
|
36
|
+
capturedFetchDataByResponseId.delete(responseId);
|
|
34
37
|
const writeReason = cacheWriteSkipReason(result, storedData);
|
|
35
38
|
if (writeReason) {
|
|
36
39
|
await recordCacheEvent(config, "skip", cacheKey, writeReason);
|
|
37
|
-
return result;
|
|
40
|
+
return capFetchContentInlineResult(result, config.maxInlineChars);
|
|
38
41
|
}
|
|
39
42
|
await writeCacheRecord(config, {
|
|
40
43
|
schema: WORKFLOW_FETCH_CONTENT_CACHE_SCHEMA,
|
|
@@ -46,7 +49,7 @@ export function registerWorkflowFetchCacheExtension(pi, config, webAccessExtensi
|
|
|
46
49
|
storedData: storedData,
|
|
47
50
|
});
|
|
48
51
|
await recordCacheEvent(config, "write", cacheKey);
|
|
49
|
-
return withCacheDetails(result, { hit: false });
|
|
52
|
+
return capFetchContentInlineResult(withCacheDetails(result, { hit: false }), config.maxInlineChars);
|
|
50
53
|
},
|
|
51
54
|
});
|
|
52
55
|
};
|
|
@@ -54,7 +57,10 @@ export function registerWorkflowFetchCacheExtension(pi, config, webAccessExtensi
|
|
|
54
57
|
if (property === "appendEntry") {
|
|
55
58
|
return (type, data) => {
|
|
56
59
|
if (type === "web-search-results" && isFetchStoredData(data)) {
|
|
57
|
-
|
|
60
|
+
const cloned = cloneJsonObject(data);
|
|
61
|
+
const responseId = stringValue(cloned?.id);
|
|
62
|
+
if (responseId && cloned)
|
|
63
|
+
capturedFetchDataByResponseId.set(responseId, cloned);
|
|
58
64
|
}
|
|
59
65
|
return pi.appendEntry?.(type, data);
|
|
60
66
|
};
|
|
@@ -176,6 +182,45 @@ function withCacheDetails(result, options) {
|
|
|
176
182
|
},
|
|
177
183
|
};
|
|
178
184
|
}
|
|
185
|
+
function capFetchContentInlineResult(result, maxInlineChars) {
|
|
186
|
+
const maxChars = normalizeInlineCharCap(maxInlineChars);
|
|
187
|
+
if (maxChars === undefined || !Array.isArray(result.content))
|
|
188
|
+
return result;
|
|
189
|
+
let truncated = false;
|
|
190
|
+
const content = result.content.map((entry) => {
|
|
191
|
+
if (entry.type !== "text" || typeof entry.text !== "string")
|
|
192
|
+
return entry;
|
|
193
|
+
if (entry.text.length <= maxChars)
|
|
194
|
+
return entry;
|
|
195
|
+
truncated = true;
|
|
196
|
+
return {
|
|
197
|
+
...entry,
|
|
198
|
+
text: entry.text.slice(0, maxChars) +
|
|
199
|
+
`\n\n[Workflow inline fetch content capped at ${maxChars} chars; full source content remains in workflow source cache.]`,
|
|
200
|
+
};
|
|
201
|
+
});
|
|
202
|
+
if (!truncated)
|
|
203
|
+
return result;
|
|
204
|
+
return {
|
|
205
|
+
...result,
|
|
206
|
+
content,
|
|
207
|
+
details: {
|
|
208
|
+
...(result.details ?? {}),
|
|
209
|
+
truncated: true,
|
|
210
|
+
workflowInlineContentCap: {
|
|
211
|
+
type: "fetch_content",
|
|
212
|
+
maxChars,
|
|
213
|
+
truncated: true,
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
function normalizeInlineCharCap(value) {
|
|
219
|
+
if (value === undefined || !Number.isFinite(value))
|
|
220
|
+
return undefined;
|
|
221
|
+
const cap = Math.floor(value);
|
|
222
|
+
return cap > 0 ? cap : undefined;
|
|
223
|
+
}
|
|
179
224
|
function cacheWriteSkipReason(result, storedData) {
|
|
180
225
|
if (!storedData)
|
|
181
226
|
return "missing-stored-data";
|
|
@@ -227,6 +272,9 @@ function cloneJsonObject(value) {
|
|
|
227
272
|
return undefined;
|
|
228
273
|
return JSON.parse(JSON.stringify(value));
|
|
229
274
|
}
|
|
275
|
+
function stringValue(value) {
|
|
276
|
+
return typeof value === "string" && value ? value : undefined;
|
|
277
|
+
}
|
|
230
278
|
function isFetchStoredData(value) {
|
|
231
279
|
return isRecord(value) && value.type === "fetch" && Array.isArray(value.urls);
|
|
232
280
|
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { TaskRunStatus, WorkflowRunRecord, WorkflowRunStatus, WorkflowRunType } from "./types.js";
|
|
2
|
+
export declare const WORKFLOW_METRICS_SCHEMA_VERSION = 1;
|
|
3
|
+
export declare const WORKFLOW_METRICS_PRICING_MODEL_VERSION = "provider-reported-v1";
|
|
4
|
+
export type WorkflowMetricsSchemaVersion = typeof WORKFLOW_METRICS_SCHEMA_VERSION;
|
|
5
|
+
export type WorkflowMetricsPricingModelVersion = typeof WORKFLOW_METRICS_PRICING_MODEL_VERSION;
|
|
6
|
+
export type WorkflowMetricsPricingSource = "provider-reported";
|
|
7
|
+
export type WorkflowMetricValue = number | null;
|
|
8
|
+
export interface WorkflowUsageMetrics {
|
|
9
|
+
inputTokens: WorkflowMetricValue;
|
|
10
|
+
outputTokens: WorkflowMetricValue;
|
|
11
|
+
totalTokens: WorkflowMetricValue;
|
|
12
|
+
cachedInputTokens: WorkflowMetricValue;
|
|
13
|
+
cacheCreationInputTokens: WorkflowMetricValue;
|
|
14
|
+
cacheReadInputTokens: WorkflowMetricValue;
|
|
15
|
+
reasoningTokens: WorkflowMetricValue;
|
|
16
|
+
/**
|
|
17
|
+
* Provider-reported cost only. This helper intentionally never derives cost
|
|
18
|
+
* from token counts or model names.
|
|
19
|
+
*/
|
|
20
|
+
costUsd: WorkflowMetricValue;
|
|
21
|
+
attempts: number;
|
|
22
|
+
unavailable: boolean;
|
|
23
|
+
incomplete: boolean;
|
|
24
|
+
unavailableTaskIds: string[];
|
|
25
|
+
incompleteTaskIds: string[];
|
|
26
|
+
}
|
|
27
|
+
export interface WorkflowLaunchTimingMetrics {
|
|
28
|
+
launchWaitMs: WorkflowMetricValue;
|
|
29
|
+
launchDurationMs: WorkflowMetricValue;
|
|
30
|
+
executionMs: WorkflowMetricValue;
|
|
31
|
+
totalMs: WorkflowMetricValue;
|
|
32
|
+
launchSlotReleaseDelayMs: WorkflowMetricValue;
|
|
33
|
+
attempts: number;
|
|
34
|
+
unavailable: boolean;
|
|
35
|
+
incomplete: boolean;
|
|
36
|
+
unavailableTaskIds: string[];
|
|
37
|
+
incompleteTaskIds: string[];
|
|
38
|
+
}
|
|
39
|
+
export interface WorkflowRetryMetrics {
|
|
40
|
+
launchRetries: number;
|
|
41
|
+
outputRetries: number;
|
|
42
|
+
resumeEvents: number;
|
|
43
|
+
totalRetryEvents: number;
|
|
44
|
+
tasksWithRetries: number;
|
|
45
|
+
}
|
|
46
|
+
export interface WorkflowTaskStatusCounts {
|
|
47
|
+
pending: number;
|
|
48
|
+
running: number;
|
|
49
|
+
blocked: number;
|
|
50
|
+
completed: number;
|
|
51
|
+
failed: number;
|
|
52
|
+
skipped: number;
|
|
53
|
+
interrupted: number;
|
|
54
|
+
total: number;
|
|
55
|
+
}
|
|
56
|
+
export interface WorkflowRunMetricsRollup {
|
|
57
|
+
taskCount: number;
|
|
58
|
+
statusCounts: WorkflowTaskStatusCounts;
|
|
59
|
+
usage: WorkflowUsageMetrics;
|
|
60
|
+
launchTiming: WorkflowLaunchTimingMetrics;
|
|
61
|
+
retries: WorkflowRetryMetrics;
|
|
62
|
+
}
|
|
63
|
+
export interface WorkflowTaskMetrics {
|
|
64
|
+
taskId: string;
|
|
65
|
+
specId: string;
|
|
66
|
+
displayName: string;
|
|
67
|
+
agent: string;
|
|
68
|
+
status: TaskRunStatus;
|
|
69
|
+
statusDetail: string;
|
|
70
|
+
stageId: string | null;
|
|
71
|
+
kind: string | null;
|
|
72
|
+
provider: string | null;
|
|
73
|
+
model: string | null;
|
|
74
|
+
thinking: string | null;
|
|
75
|
+
usage: WorkflowUsageMetrics;
|
|
76
|
+
launchTiming: WorkflowLaunchTimingMetrics;
|
|
77
|
+
retries: WorkflowRetryMetrics;
|
|
78
|
+
}
|
|
79
|
+
export interface WorkflowStageMetrics extends WorkflowRunMetricsRollup {
|
|
80
|
+
stageId: string | null;
|
|
81
|
+
}
|
|
82
|
+
export interface WorkflowRunMetricsMetadata {
|
|
83
|
+
usageUnavailableTaskIds: string[];
|
|
84
|
+
usageIncompleteTaskIds: string[];
|
|
85
|
+
launchTimingUnavailableTaskIds: string[];
|
|
86
|
+
launchTimingIncompleteTaskIds: string[];
|
|
87
|
+
incomplete: boolean;
|
|
88
|
+
unavailable: boolean;
|
|
89
|
+
}
|
|
90
|
+
export interface WorkflowRunMetrics {
|
|
91
|
+
schemaVersion: WorkflowMetricsSchemaVersion;
|
|
92
|
+
pricingModelVersion: WorkflowMetricsPricingModelVersion;
|
|
93
|
+
pricingSource: WorkflowMetricsPricingSource;
|
|
94
|
+
costsAreProviderReported: true;
|
|
95
|
+
run: {
|
|
96
|
+
runId: string;
|
|
97
|
+
name?: string;
|
|
98
|
+
type: WorkflowRunType;
|
|
99
|
+
status: WorkflowRunStatus;
|
|
100
|
+
createdAt: string;
|
|
101
|
+
updatedAt: string;
|
|
102
|
+
};
|
|
103
|
+
totals: WorkflowRunMetricsRollup;
|
|
104
|
+
byStage: WorkflowStageMetrics[];
|
|
105
|
+
byTask: WorkflowTaskMetrics[];
|
|
106
|
+
metadata: WorkflowRunMetricsMetadata;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Build a deterministic, JSON-serializable metrics export from a persisted
|
|
110
|
+
* workflow run record. The helper is intentionally pure: it reads only the
|
|
111
|
+
* supplied record, performs no pricing inference, and does not mutate the run.
|
|
112
|
+
*/
|
|
113
|
+
export declare function buildWorkflowRunMetrics(run: WorkflowRunRecord): WorkflowRunMetrics;
|