@agwab/pi-workflow 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-graph-runtime.d.ts +1 -1
- package/dist/artifact-graph-runtime.js +10 -5
- package/dist/artifact-graph-schema.js +127 -5
- package/dist/compiler.js +52 -19
- package/dist/dynamic-generated-task-runtime.js +3 -1
- package/dist/dynamic-profiles.d.ts +1 -1
- package/dist/engine-run-graph.d.ts +3 -0
- package/dist/engine-run-graph.js +194 -4
- package/dist/engine.d.ts +5 -0
- package/dist/engine.js +389 -41
- package/dist/extension.d.ts +2 -1
- package/dist/extension.js +30 -8
- package/dist/index.d.ts +11 -3
- package/dist/index.js +6 -1
- package/dist/prompt-json.d.ts +7 -0
- package/dist/prompt-json.js +13 -0
- package/dist/roles.d.ts +1 -1
- package/dist/roles.js +5 -8
- package/dist/store.d.ts +20 -1
- package/dist/store.js +139 -35
- package/dist/strings.d.ts +11 -0
- package/dist/strings.js +24 -0
- package/dist/subagent-backend.js +710 -40
- package/dist/types.d.ts +107 -1
- package/dist/verification-ontology.d.ts +31 -0
- package/dist/verification-ontology.js +66 -0
- package/dist/workflow-artifact-tool.js +5 -6
- package/dist/workflow-artifacts.d.ts +7 -0
- package/dist/workflow-artifacts.js +55 -4
- package/dist/workflow-fetch-cache-extension.d.ts +1 -0
- package/dist/workflow-fetch-cache-extension.js +57 -9
- package/dist/workflow-metrics.d.ts +113 -0
- package/dist/workflow-metrics.js +272 -0
- package/dist/workflow-output-artifacts.js +5 -3
- package/dist/workflow-partial-output.d.ts +45 -0
- package/dist/workflow-partial-output.js +205 -0
- package/dist/workflow-progress-health.js +42 -10
- package/dist/workflow-runtime.js +10 -1
- package/dist/workflow-view.js +3 -1
- package/dist/workflow-web-source-extension.js +194 -52
- package/dist/workflow-web-source.d.ts +2 -1
- package/dist/workflow-web-source.js +109 -30
- package/docs/usage.md +76 -29
- package/node_modules/@agwab/pi-subagent/README.md +3 -3
- package/node_modules/@agwab/pi-subagent/api.mjs +1 -0
- package/node_modules/@agwab/pi-subagent/docs/usage.md +63 -12
- package/node_modules/@agwab/pi-subagent/package.json +2 -2
- package/node_modules/@agwab/pi-subagent/src/api.ts +54 -1
- package/node_modules/@agwab/pi-subagent/src/artifacts/registry.ts +9 -4
- package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +8 -0
- package/node_modules/@agwab/pi-subagent/src/core/constants.ts +9 -0
- package/node_modules/@agwab/pi-subagent/src/core/validation.ts +21 -0
- package/node_modules/@agwab/pi-subagent/src/index.ts +1046 -576
- package/node_modules/@agwab/pi-subagent/src/orchestrate/async.ts +279 -156
- package/node_modules/@agwab/pi-subagent/src/orchestrate/interrupt.ts +165 -89
- package/node_modules/@agwab/pi-subagent/src/orchestrate/reconcile.ts +111 -65
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run-ref.ts +219 -0
- package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +88 -8
- package/node_modules/@agwab/pi-subagent/src/orchestrate/status.ts +614 -298
- package/node_modules/@agwab/pi-subagent/src/panel.ts +1356 -560
- package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +53 -5
- package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +13 -6
- package/package.json +2 -2
- package/skills/workflow-guide/SKILL.md +1 -0
- package/src/artifact-graph-runtime.ts +19 -13
- package/src/artifact-graph-schema.ts +143 -3
- package/src/cli.mjs +52 -0
- package/src/compiler.ts +63 -18
- package/src/dynamic-generated-task-runtime.ts +3 -1
- package/src/dynamic-profiles.ts +1 -1
- package/src/engine-run-graph.ts +246 -4
- package/src/engine.ts +545 -38
- package/src/extension.ts +36 -6
- package/src/index.ts +52 -1
- package/src/prompt-json.ts +13 -0
- package/src/roles.ts +6 -9
- package/src/store.ts +194 -42
- package/src/strings.ts +38 -0
- package/src/subagent-backend.ts +921 -62
- package/src/types.ts +116 -2
- package/src/verification-ontology.ts +88 -0
- package/src/workflow-artifact-tool.ts +5 -7
- package/src/workflow-artifacts.ts +83 -3
- package/src/workflow-fetch-cache-extension.ts +78 -13
- package/src/workflow-metrics.ts +478 -0
- package/src/workflow-output-artifacts.ts +5 -3
- package/src/workflow-partial-output.ts +299 -0
- package/src/workflow-progress-health.ts +47 -15
- package/src/workflow-runtime.ts +18 -2
- package/src/workflow-view.ts +2 -1
- package/src/workflow-web-source-extension.ts +654 -232
- package/src/workflow-web-source.ts +153 -39
- package/workflows/README.md +7 -25
- package/workflows/deep-research/batched-verification.spec.json +253 -0
- package/workflows/deep-research/helpers/batch-verification-candidates.mjs +136 -0
- package/workflows/deep-research/helpers/claim-evidence-gate.mjs +229 -36
- package/workflows/deep-research/helpers/final-audit-packet.mjs +1 -4
- package/workflows/deep-research/helpers/normalize-input-packet.mjs +81 -2
- package/workflows/deep-research/helpers/render-executive.mjs +40 -26
- package/workflows/deep-research/helpers/sanitize-verification-candidates.mjs +89 -15
- package/workflows/deep-research/helpers/shadow-select-verification.mjs +229 -0
- package/workflows/deep-research/helpers/verification-ontology.mjs +77 -0
- package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +3 -3
- package/workflows/deep-research/schemas/deep-research-research-questions-control.schema.json +38 -0
- package/workflows/deep-research/schemas/deep-research-sanitize-claims-control.schema.json +63 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-batch-control.schema.json +47 -0
- package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +13 -3
- package/workflows/deep-research/spec.json +32 -12
- package/workflows/impact-review/spec.json +3 -3
- package/workflows/spec-review/helpers/spec-review-pipeline.mjs +1 -8
- package/dist/dynamic-loader.d.ts +0 -25
- package/dist/dynamic-loader.js +0 -13
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stderr +0 -0
- package/skills/workflow-guide/scaffolds/dag-required-reads/spec.json.validate.stdout +0 -13
- package/src/dynamic-loader.ts +0 -49
- package/workflows/impact-review/schemas/docs-release-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/security-performance-impact-control.schema.json +0 -42
- package/workflows/impact-review/schemas/state-data-impact-control.schema.json +0 -42
|
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { appendFile, mkdir, readFile, readdir, rename, writeFile, } from "node:fs/promises";
|
|
3
3
|
import { isIP } from "node:net";
|
|
4
4
|
import { dirname, resolve } from "node:path";
|
|
5
|
+
import { compactStrings } from "./strings.js";
|
|
5
6
|
export const WORKFLOW_WEB_SOURCE_CACHE_SCHEMA = "workflow-web-source-cache-v1";
|
|
6
7
|
export const WORKFLOW_WEB_SOURCE_INDEX_SCHEMA = "workflow-web-source-index-v1";
|
|
7
8
|
export const WORKFLOW_WEB_SOURCE_INDEX_EVENT_SCHEMA = "workflow-web-source-index-event-v1";
|
|
@@ -22,7 +23,7 @@ export const DEFAULT_WORKFLOW_WEB_SECURITY_POLICY = {
|
|
|
22
23
|
allowPrivateHosts: false,
|
|
23
24
|
cacheRawProviderPayloads: false,
|
|
24
25
|
};
|
|
25
|
-
const SENSITIVE_QUERY_PARAM_PATTERN = /(^|[-_])(access[-_]?token|auth|code|credential|key|password|secret|session|signature|sig|token)([-_]|$)/i;
|
|
26
|
+
const SENSITIVE_QUERY_PARAM_PATTERN = /(^|[-_])(access[-_]?token|auth|code|credential|key|password|secret|session|session[-_]?id|sessionid|signature|sig|sid|jwt|token)([-_]|$)/i;
|
|
26
27
|
const PRIVATE_HOST_PATTERNS = [
|
|
27
28
|
/^localhost$/i,
|
|
28
29
|
/^127\./,
|
|
@@ -165,7 +166,7 @@ export function createWorkflowWebSource(options) {
|
|
|
165
166
|
redactedUrl,
|
|
166
167
|
urlKey: sourceUrlCacheKey(options.url),
|
|
167
168
|
domain,
|
|
168
|
-
...(options.title ? { title: options.title } : {}),
|
|
169
|
+
...(options.title ? { title: redactInlineSecrets(options.title) } : {}),
|
|
169
170
|
...(options.provider ? { provider: options.provider } : {}),
|
|
170
171
|
contentHash,
|
|
171
172
|
text: options.text,
|
|
@@ -355,15 +356,12 @@ export function extractTextFromToolResult(result) {
|
|
|
355
356
|
const content = result.content;
|
|
356
357
|
if (!Array.isArray(content))
|
|
357
358
|
return "";
|
|
358
|
-
return content
|
|
359
|
-
.map((entry) => {
|
|
359
|
+
return compactStrings(content.map((entry) => {
|
|
360
360
|
if (!isRecord(entry))
|
|
361
361
|
return "";
|
|
362
362
|
const text = entry.text;
|
|
363
363
|
return typeof text === "string" ? text : "";
|
|
364
|
-
})
|
|
365
|
-
.filter(Boolean)
|
|
366
|
-
.join("\n\n");
|
|
364
|
+
}), { trim: false, unique: false }).join("\n\n");
|
|
367
365
|
}
|
|
368
366
|
export function extractTitleFromToolResult(result) {
|
|
369
367
|
if (!isRecord(result))
|
|
@@ -446,7 +444,7 @@ function sourceToIndexEntry(source) {
|
|
|
446
444
|
redactedUrl: source.redactedUrl,
|
|
447
445
|
...(source.urlKey ? { urlKey: source.urlKey } : {}),
|
|
448
446
|
domain: source.domain,
|
|
449
|
-
...(source.title ? { title: source.title } : {}),
|
|
447
|
+
...(source.title ? { title: redactInlineSecrets(source.title) } : {}),
|
|
450
448
|
contentHash: source.contentHash,
|
|
451
449
|
textChars: source.textChars,
|
|
452
450
|
...(source.provider ? { provider: source.provider } : {}),
|
|
@@ -526,19 +524,32 @@ function snippetForTerms(options) {
|
|
|
526
524
|
return right.score - left.score;
|
|
527
525
|
return right.matchedTerms.length - left.matchedTerms.length;
|
|
528
526
|
})[0];
|
|
529
|
-
const
|
|
530
|
-
|
|
527
|
+
const consumed = consumeAnchoredSnippet({
|
|
528
|
+
text: options.text,
|
|
529
|
+
anchorStart: best.anchorStart,
|
|
530
|
+
anchorEnd: best.anchorEnd,
|
|
531
|
+
maxChars: options.maxChars,
|
|
532
|
+
budget: options.budget,
|
|
533
|
+
});
|
|
534
|
+
const returnedWindowNorm = normalizeForSearch(options.text.slice(consumed.sourceStart, consumed.sourceEnd)).normalized;
|
|
535
|
+
const matchedTerms = needles
|
|
536
|
+
.filter((term) => returnedWindowNorm.includes(term.normalized))
|
|
537
|
+
.map((term) => term.raw);
|
|
538
|
+
const missingTerms = needles
|
|
539
|
+
.filter((term) => !returnedWindowNorm.includes(term.normalized))
|
|
540
|
+
.map((term) => term.raw);
|
|
531
541
|
return {
|
|
532
|
-
status:
|
|
542
|
+
status: consumed.status,
|
|
533
543
|
matchType: "terms",
|
|
534
|
-
quote: consumed.
|
|
535
|
-
startOffset:
|
|
536
|
-
endOffset:
|
|
537
|
-
visibleChars: consumed.
|
|
538
|
-
matchedTerms
|
|
539
|
-
missingTerms
|
|
540
|
-
coverageRatio:
|
|
544
|
+
quote: consumed.quote || undefined,
|
|
545
|
+
startOffset: consumed.sourceStart,
|
|
546
|
+
endOffset: consumed.sourceEnd,
|
|
547
|
+
visibleChars: consumed.visibleChars,
|
|
548
|
+
matchedTerms,
|
|
549
|
+
missingTerms,
|
|
550
|
+
coverageRatio: matchedTerms.length / Math.max(1, needles.length),
|
|
541
551
|
candidateOnly: true,
|
|
552
|
+
truncated: consumed.truncated || undefined,
|
|
542
553
|
};
|
|
543
554
|
}
|
|
544
555
|
function scoreTermWindow(text, matchStart, matchEnd, maxChars, terms) {
|
|
@@ -559,6 +570,8 @@ function scoreTermWindow(text, matchStart, matchEnd, maxChars, terms) {
|
|
|
559
570
|
return {
|
|
560
571
|
start,
|
|
561
572
|
end,
|
|
573
|
+
anchorStart: matchStart,
|
|
574
|
+
anchorEnd: matchEnd,
|
|
562
575
|
matchedTerms,
|
|
563
576
|
missingTerms,
|
|
564
577
|
score: matchedTerms.length * 1_000 + occurrenceScore,
|
|
@@ -631,20 +644,76 @@ const SOURCE_READ_STOPWORDS = new Set([
|
|
|
631
644
|
"without",
|
|
632
645
|
]);
|
|
633
646
|
function snippetForMatch(options) {
|
|
634
|
-
const
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
647
|
+
const consumed = consumeAnchoredSnippet({
|
|
648
|
+
text: options.text,
|
|
649
|
+
anchorStart: options.start,
|
|
650
|
+
anchorEnd: options.end,
|
|
651
|
+
maxChars: options.maxChars,
|
|
652
|
+
budget: options.budget,
|
|
653
|
+
});
|
|
641
654
|
return {
|
|
642
|
-
status:
|
|
655
|
+
status: consumed.status,
|
|
643
656
|
matchType: options.matchType,
|
|
644
|
-
quote: consumed.
|
|
657
|
+
quote: consumed.quote || undefined,
|
|
645
658
|
startOffset: options.start,
|
|
646
659
|
endOffset: options.end,
|
|
660
|
+
visibleChars: consumed.visibleChars,
|
|
661
|
+
truncated: consumed.truncated || undefined,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
function consumeAnchoredSnippet(options) {
|
|
665
|
+
const maxChars = Math.max(0, Math.floor(options.maxChars));
|
|
666
|
+
const remainingBefore = Math.max(0, options.budget.limit - options.budget.used);
|
|
667
|
+
const visibleLimit = Math.max(0, Math.min(maxChars, remainingBefore));
|
|
668
|
+
const anchorStart = Math.max(0, Math.min(options.text.length, Math.floor(options.anchorStart)));
|
|
669
|
+
const anchorEnd = Math.max(anchorStart, Math.min(options.text.length, Math.floor(options.anchorEnd)));
|
|
670
|
+
const anchorLength = Math.max(0, anchorEnd - anchorStart);
|
|
671
|
+
if (visibleLimit <= 0) {
|
|
672
|
+
return {
|
|
673
|
+
status: "truncated",
|
|
674
|
+
quote: "",
|
|
675
|
+
visibleChars: 0,
|
|
676
|
+
sourceStart: anchorStart,
|
|
677
|
+
sourceEnd: anchorStart,
|
|
678
|
+
truncated: true,
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
let sourceStart;
|
|
682
|
+
let sourceEnd;
|
|
683
|
+
let status = "matched";
|
|
684
|
+
if (anchorLength > visibleLimit) {
|
|
685
|
+
sourceStart = anchorStart;
|
|
686
|
+
sourceEnd = Math.min(options.text.length, sourceStart + visibleLimit);
|
|
687
|
+
status = "truncated";
|
|
688
|
+
}
|
|
689
|
+
else {
|
|
690
|
+
const slack = Math.max(0, visibleLimit - anchorLength);
|
|
691
|
+
sourceStart = Math.max(0, anchorStart - Math.floor(slack / 2));
|
|
692
|
+
sourceEnd = Math.min(options.text.length, sourceStart + visibleLimit);
|
|
693
|
+
if (sourceEnd < anchorEnd) {
|
|
694
|
+
sourceEnd = anchorEnd;
|
|
695
|
+
sourceStart = Math.max(0, sourceEnd - visibleLimit);
|
|
696
|
+
}
|
|
697
|
+
else if (sourceEnd === options.text.length) {
|
|
698
|
+
sourceStart = Math.max(0, sourceEnd - visibleLimit);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
const raw = redactInlineSecrets(options.text.slice(sourceStart, sourceEnd));
|
|
702
|
+
const consumed = consumeWorkflowWebVisibleBudget(options.budget, raw, visibleLimit);
|
|
703
|
+
// Redaction can expand secrets. Promote only when the redacted anchor
|
|
704
|
+
// itself no longer fits; clipping trailing context can remain a match.
|
|
705
|
+
const redactedThroughAnchorLength = consumed.truncated
|
|
706
|
+
? redactInlineSecrets(options.text.slice(sourceStart, Math.min(sourceEnd, anchorEnd))).length
|
|
707
|
+
: 0;
|
|
708
|
+
const anchorTruncated = status === "truncated" || redactedThroughAnchorLength > visibleLimit;
|
|
709
|
+
const truncated = status === "truncated" || consumed.truncated;
|
|
710
|
+
return {
|
|
711
|
+
status: anchorTruncated ? "truncated" : status,
|
|
712
|
+
quote: consumed.text,
|
|
647
713
|
visibleChars: consumed.text.length,
|
|
714
|
+
sourceStart,
|
|
715
|
+
sourceEnd,
|
|
716
|
+
truncated,
|
|
648
717
|
};
|
|
649
718
|
}
|
|
650
719
|
function normalizeForSearch(text) {
|
|
@@ -672,7 +741,15 @@ function normalizeForSearch(text) {
|
|
|
672
741
|
map.push(index);
|
|
673
742
|
}
|
|
674
743
|
}
|
|
675
|
-
|
|
744
|
+
while (normalized.startsWith(" ")) {
|
|
745
|
+
normalized = normalized.slice(1);
|
|
746
|
+
map.shift();
|
|
747
|
+
}
|
|
748
|
+
while (normalized.endsWith(" ")) {
|
|
749
|
+
normalized = normalized.slice(0, -1);
|
|
750
|
+
map.pop();
|
|
751
|
+
}
|
|
752
|
+
return { normalized, map };
|
|
676
753
|
}
|
|
677
754
|
function nearbySnippet(text, needle, maxChars) {
|
|
678
755
|
const index = text.indexOf(needle);
|
|
@@ -769,7 +846,9 @@ function sourceIndexEntryFromUnknown(value) {
|
|
|
769
846
|
redactedUrl: value.redactedUrl,
|
|
770
847
|
...(typeof value.urlKey === "string" ? { urlKey: value.urlKey } : {}),
|
|
771
848
|
domain: value.domain,
|
|
772
|
-
...(typeof value.title === "string"
|
|
849
|
+
...(typeof value.title === "string"
|
|
850
|
+
? { title: redactInlineSecrets(value.title) }
|
|
851
|
+
: {}),
|
|
773
852
|
contentHash: value.contentHash,
|
|
774
853
|
textChars: Number(value.textChars),
|
|
775
854
|
...(typeof value.provider === "string" ? { provider: value.provider } : {}),
|
|
@@ -907,7 +986,7 @@ function redactInlineSecrets(value) {
|
|
|
907
986
|
function redactInlineSecretsNoUrls(value) {
|
|
908
987
|
return value
|
|
909
988
|
.replace(/(authorization|cookie|set-cookie):\s*[^\n\r]+/gi, "$1: REDACTED")
|
|
910
|
-
.replace(/(token|secret|password|api[-_]?key)=([^\s&]+)/gi, "$1=REDACTED")
|
|
989
|
+
.replace(/(token|secret|password|api[-_]?key|jwt|sid|sessionid|session[-_]?id)=([^\s&]+)/gi, "$1=REDACTED")
|
|
911
990
|
.replace(/\/Users\/[^\s:'")]+/g, "/Users/REDACTED");
|
|
912
991
|
}
|
|
913
992
|
function isRecord(value) {
|
package/docs/usage.md
CHANGED
|
@@ -114,9 +114,11 @@ For reusable workflow authoring, `workflow-guide` includes validated scaffold bu
|
|
|
114
114
|
| `/workflow show <run-id-or-workflow-name>` | If the ref starts with `workflow_`, show run details; otherwise show the raw workflow spec. |
|
|
115
115
|
| `/workflow logs <run-id> [task-id] [lines]` | Print captured logs for a workflow task. Defaults to `task-1`. |
|
|
116
116
|
| `/workflow wait <run-id> [timeout-ms]` | Poll until the run finishes or the optional timeout elapses. |
|
|
117
|
+
| `/workflow stop <run-id>` | Interrupt a non-terminal run, best-effort interrupt active subagents, mark unfinished tasks interrupted, and stop the local supervisor watch. Use `/workflow resume <run-id>` if you want to restart unfinished work later. |
|
|
117
118
|
| `/workflow resume <run-id>` | Resume a failed, interrupted, or resumable blocked run (including dynamic approval blocked in headless mode): completed tasks are preserved; failed/interrupted/skipped or resumable blocked tasks reset to pending and reschedule. Loop workflows are not supported yet. |
|
|
119
|
+
| `/workflow stop <run-id>` | Stop a non-terminal run: best-effort interrupt of active subagent workers, then mark unfinished tasks `interrupted`. Completed task artifacts are preserved, and the stopped run can be restarted later with `/workflow resume` (resumed tasks start fresh sessions). |
|
|
118
120
|
|
|
119
|
-
Not implemented: `/workflow continue` and `/workflow delegate`. Use `status`, `show`, `logs`, `wait`, `resume`, and `pi-workflow inspect` for text/CLI inspection. The standalone CLI also offers `pi-workflow supervise <run-id>|--all` to drive scheduling from outside a Pi session (unfinished failed/interrupted or resumable blocked runs within the last 7 days are announced at session start with resume hints).
|
|
121
|
+
Not implemented: `/workflow continue` and `/workflow delegate`. Use `status`, `show`, `logs`, `wait`, `stop`, `resume`, and `pi-workflow inspect` for text/CLI inspection. The standalone CLI also offers `pi-workflow supervise <run-id>|--all` to drive scheduling from outside a Pi session (unfinished failed/interrupted or resumable blocked runs within the last 7 days are announced at session start with resume hints).
|
|
120
122
|
|
|
121
123
|
### Workflow board controls
|
|
122
124
|
|
|
@@ -198,13 +200,30 @@ For lower-latency runs, pass `--thinking low` explicitly:
|
|
|
198
200
|
|
|
199
201
|
This is an opt-in fast mode. Package defaults remain conservative until a separate holdout evaluation provides enough evidence to change them. Current evidence is limited but encouraging for explicit fast runs: the 2026-07-02 `deep-research` combined gate on P1/P2/P3-style prompts resolved non-support tasks to `low`, completed selected valid runs in about 15-17 minutes, passed the strict gate 9/9, and had zero source-ref join failures across those 9 runs. Treat this as a speed option, not proof that every workflow should default to `low`.
|
|
200
202
|
|
|
203
|
+
### Opt-in batched verification for deep-research
|
|
204
|
+
|
|
205
|
+
`deep-research` still verifies one claim per verifier task by default. For controlled runs where verifier batching is acceptable, use the explicit path-ref variant:
|
|
206
|
+
|
|
207
|
+
```text
|
|
208
|
+
/workflow validate ./workflows/deep-research/batched-verification.spec.json
|
|
209
|
+
/workflow run ./workflows/deep-research/batched-verification.spec.json "Research this repository and verify the key claims."
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
This path-ref variant keeps the same planner/research/normalization/audit/final stages, but feeds `verify-claims` from `verification-batches` and requires each verifier task to return one `results[]` row per claim id. It is not registered as an official bundled workflow name and does not change package defaults. Treat speed/cost results as task-specific: claim a win only when the run's audit reports zero missing/duplicate/invalid verifier rows, zero sourceRef join failures, and no verified-floor regression.
|
|
213
|
+
|
|
214
|
+
### Verification outcome ontology
|
|
215
|
+
|
|
216
|
+
The package exports a small verification outcome vocabulary for workflows that verify source-backed claims: `verified`, `partially_supported`, `unsupported`, `conflicting`, and `verification_blocked`. Bundled workflow helpers must use bundle-local shims that stay in parity with the package export, because helper imports are bundled from the workflow spec directory. `verification_blocked` means the verifier could not evaluate the claim because required evidence, source access, tool execution, or policy constraints blocked verification. It is not a weaker form of `verified`, never counts toward verified floors, and should remain visible in audit summaries so operators can decide whether to rerun, change source access, or treat the claim as unresolved.
|
|
217
|
+
|
|
218
|
+
Adopt this vocabulary only for evidence-verification outcomes. Do not force it onto workflow-control, finding-disposition, or ship-readiness verdicts such as `KEEP`, `DROP`, `READY`, or `NEEDS_WORK`. Deep-diff-review revival is not part of this ontology change.
|
|
219
|
+
|
|
201
220
|
### Run-scoped web-source cache
|
|
202
221
|
|
|
203
222
|
Prefer normalized workflow web tools in new workflows:
|
|
204
223
|
|
|
205
224
|
- `workflow_web_search` returns compact candidate cards.
|
|
206
225
|
- `workflow_web_fetch_source` caches one or more URLs and returns compact source cards with `sourceRef` values; pass `urls: [...]` or `sources: [{ url, title }]` to batch several fetches in one tool call.
|
|
207
|
-
- `workflow_web_source_read` reads narrow exact/fuzzy/term-matched evidence snippets by `sourceRef`; pass `queries: [...]` or `reads: [...]` to batch several snippets from the same source in one tool call, or `claim` + distinctive `terms` when the exact quote is unknown. Term/claim reads return candidate metadata (`matchedTerms`, `missingTerms`, `coverageRatio`) rather than a proof verdict.
|
|
226
|
+
- `workflow_web_source_read` reads narrow exact/fuzzy/term-matched evidence snippets by `sourceRef`; pass `queries: [...]` or `reads: [...]` to batch several snippets from the same source in one tool call, or `claim` + distinctive `terms` when the exact quote is unknown. Term/claim reads return candidate metadata (`matchedTerms`, `missingTerms`, `coverageRatio`) rather than a proof verdict. Snippet windows are anchored to the match: a result may report `status: "truncated"` when the per-task visible budget or `maxChars` clips the window (the returned quote still starts at the match), and `status: "budget_exhausted"` when no visible budget remains; both include a `next` hint suggesting smaller queries or a fresh task.
|
|
208
227
|
|
|
209
228
|
The normalized cache is stored under the workflow run directory:
|
|
210
229
|
|
|
@@ -214,7 +233,11 @@ The normalized cache is stored under the workflow run directory:
|
|
|
214
233
|
|
|
215
234
|
Do not instruct agents to read that directory directly; source cards intentionally expose only opaque refs and short previews. The cache also writes an append-only index ledger plus same-URL fetch locks/negative-cache files so duplicate lookup and deterministic terminal failures can recover across parallel worker processes. Custom extension `fetch_content` providers are treated as trusted fetchers and are disabled under the default private-host policy; use the default safe fetch path or opt into trusted private-host behavior only for controlled providers. Legacy workflow tasks that still use `fetch_content` keep the older run-scoped file cache under `.pi/workflows/<run-id>/source-cache/fetch-content/`. Set `PI_WORKFLOW_FETCH_CONTENT_CACHE=0` to disable that legacy fetch cache for a run.
|
|
216
235
|
|
|
217
|
-
|
|
236
|
+
To reduce worker context pressure for legacy `fetch_content` tasks, the bundled
|
|
237
|
+
workflow fetch wrapper caps inline response text while preserving full stored
|
|
238
|
+
source content. Override with `PI_WORKFLOW_FETCH_CONTENT_INLINE_CHARS=<n>` or
|
|
239
|
+
disable the inline cap with `PI_WORKFLOW_FETCH_CONTENT_INLINE_CHARS=0` when you
|
|
240
|
+
intentionally need the provider's full inline response.
|
|
218
241
|
|
|
219
242
|
## Bundled workflows
|
|
220
243
|
|
|
@@ -284,7 +307,15 @@ Dynamic workflows keep JSON as the source of truth while allowing trusted bundle
|
|
|
284
307
|
}
|
|
285
308
|
```
|
|
286
309
|
|
|
287
|
-
Controller/helper/nested workflow refs must be bundle-local `./...` paths. Nested workflow specs are intentionally self-contained at their own directory level: refs inside a nested spec may point to files in that nested spec's subtree, but not to parent-level shared files via
|
|
310
|
+
Controller/helper/nested workflow refs must be bundle-local `./...` paths. Nested workflow specs are intentionally self-contained at their own directory level: refs inside a nested spec may point to files in that nested spec's subtree, but not to parent-level shared files via `../` — put shared helpers/schemas under each nested workflow subtree or expose them through the parent controller/helper layer. Controller/helper code is trusted Node.js code for orchestration and timeout isolation, not a security sandbox.
|
|
311
|
+
|
|
312
|
+
Controller context rules:
|
|
313
|
+
|
|
314
|
+
- Generated agents are real workflow tasks: `ctx.agent({ id, agent, prompt, tools })` inserts a deterministic `stageId.id` task into `compiled.json` and `run.json`, persists a request hash in `dynamic/events.jsonl`, and replays fail-closed if the same id later changes request shape.
|
|
315
|
+
- On resume, controllers must re-issue previously recorded `ctx.agent`, `ctx.helper`, and `ctx.workflow` operations in the same order before issuing new operations; omitted or out-of-order replay fails closed with an explicit replay-invariant error.
|
|
316
|
+
- Use `ctx.parallel([() => ctx.agent(...), ...])` for dynamic fan-out; the runtime records queued sibling generation ops before the controller suspends, and non-suspension operation failures make the controller fail closed. Generated dependency cycles are rejected.
|
|
317
|
+
- `ctx.helper(name, input)` can call only helpers declared in `dynamic.helpers`; pure/retry-safe helpers may set `idempotent: true` so a crash after `helper.started` but before `helper.completed` can retry the helper instead of permanently failing closed.
|
|
318
|
+
- `ctx.workflow(name, input)` can call only nested specs declared in `dynamic.workflows`.
|
|
288
319
|
|
|
289
320
|
Dynamic outputs should be compact typed artifacts. The controller returns normal workflow sections through `{ control, analysis, refs }`; generated child agents must return the same `<control>`, `<analysis>`, `<refs>` protocol as other artifact-graph tasks. When a controller result includes `outputTasks`/`outputTaskIds` (the built-in decision loop sets this from accepted `synthesize` actions), downstream `from: "<dynamic-stage>"` reducers also receive those exported task artifacts as stable sources such as `<dynamic-stage>.output`. Runtime state is stored under `.pi/workflows/<run-id>/dynamic/`:
|
|
290
321
|
|
|
@@ -301,13 +332,7 @@ Budgets bound controller behavior (`maxAgents`, `maxConcurrency`, `maxRuntimeMs`
|
|
|
301
332
|
|
|
302
333
|
### DAG authoring
|
|
303
334
|
|
|
304
|
-
Top-level `artifactGraph.stages` is DAG-capable by default. A nested `type: "dag"` is a workflow/control container, not a leaf subagent task: it must contain child `stages` and should not have its own prompt. The runtime lowers public graph relationships onto the internal dependency scheduler while preserving artifact/data boundaries.
|
|
305
|
-
|
|
306
|
-
Keep these layers distinct:
|
|
307
|
-
|
|
308
|
-
- **Workflow layer**: graph/control/data-dependency semantics such as `id`, `from`, `after`, `sourcePolicy`, `sourceProjection`, scheduling, and artifacts.
|
|
309
|
-
- **Subagent layer**: model-backed execution patterns such as `single`, `foreach`, `reduce`, and loop child stages.
|
|
310
|
-
- **Support layer**: deterministic local helper execution through `support: { uses, options }`.
|
|
335
|
+
Top-level `artifactGraph.stages` is DAG-capable by default. A nested `type: "dag"` is a workflow/control container, not a leaf subagent task: it must contain child `stages` and should not have its own prompt. The runtime lowers public graph relationships onto the internal dependency scheduler while preserving artifact/data boundaries. Keep the authoring layers described under "Stage model" distinct when composing DAGs.
|
|
311
336
|
|
|
312
337
|
DAG rules:
|
|
313
338
|
|
|
@@ -405,6 +430,28 @@ Use workflow-local JSON Schema files when the control plane needs stronger valid
|
|
|
405
430
|
|
|
406
431
|
The built-in validator supports the subset used by bundled workflows: `type`, `required`, `properties`, `items`, `enum`, `const`, length/item/number bounds, `additionalProperties`, and simple `allOf`/`anyOf`/`oneOf`. Unsupported keywords such as `$ref`, `$defs`, `definitions`, and `pattern` are rejected when the workflow is loaded.
|
|
407
432
|
|
|
433
|
+
### Opt-in partial output for streaming foreach
|
|
434
|
+
|
|
435
|
+
A producer stage can declare stable array paths that may be published before terminal completion:
|
|
436
|
+
|
|
437
|
+
```json
|
|
438
|
+
"output": {
|
|
439
|
+
"partial": { "paths": ["$.items"] }
|
|
440
|
+
}
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
A downstream `foreach` may then opt in on the matching `from` edge:
|
|
444
|
+
|
|
445
|
+
```json
|
|
446
|
+
"from": {
|
|
447
|
+
"source": "plan",
|
|
448
|
+
"path": "$.items",
|
|
449
|
+
"streaming": { "enabled": true, "minChunk": 2 }
|
|
450
|
+
}
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
The runtime accepts only partial items for declared paths. Published partial items must be final/stable JSON objects with a non-empty string `id`; the producer may emit them as `<partial-control>{"schema":"workflow-partial-output-v1","path":"$.items","items":[...]}</partial-control>` before the final workflow output. If the final `control.json` later changes a published item with the same id, the streaming foreach placeholder blocks fail-closed. Downstream reducers still wait for the foreach placeholder plus all generated item tasks, so partial output overlaps item work without relaxing final fan-in gates.
|
|
454
|
+
|
|
408
455
|
## Support helpers
|
|
409
456
|
|
|
410
457
|
A support node runs local helper code inline instead of launching a subagent. It is declared by adding a `support` object; it does not use a separate `type` value:
|
|
@@ -542,6 +589,15 @@ Authoring checklist:
|
|
|
542
589
|
7. Add JSON output contracts for model-produced data that later stages depend on.
|
|
543
590
|
8. Run `/workflow validate <workflow-or-file>` before using the workflow.
|
|
544
591
|
|
|
592
|
+
### Roles
|
|
593
|
+
|
|
594
|
+
A workflow can declare reusable role context under top-level `roles`. Compiled role text is injected into subagent task prompts as a `# Role Context` block, and `/workflow roles <workflow>` shows the compiled result per role. Role fields:
|
|
595
|
+
|
|
596
|
+
- `fromAgent`: extract sections from a discoverable Pi agent's markdown body. By default only safe knowledge sections are included (`Core Principles`, `Domain Expertise`, `Safety Review`, `Rules`, `Research Manifest`); orchestration and output-format sections are always excluded.
|
|
597
|
+
- `includeSections` / `excludeSections`: override which agent sections are extracted.
|
|
598
|
+
- `prompt`: literal role text, appended after any extracted agent sections.
|
|
599
|
+
- `maxChars`: compiled role budget (default 12000). Longer content is truncated and flagged in `/workflow roles` output.
|
|
600
|
+
|
|
545
601
|
### Tool allowlists
|
|
546
602
|
|
|
547
603
|
Workflow `tools` are still the child-worker allowlist. Entries can be strings:
|
|
@@ -578,26 +634,17 @@ Scope order is agent frontmatter fallback < `defaults.tools` < stage `tools`: th
|
|
|
578
634
|
- Write-capable workflows should use managed worktrees in git repositories.
|
|
579
635
|
- In non-git workspaces with `worktreePolicy: "off"`, writes mutate the live directory.
|
|
580
636
|
- No backend fallback exists. The compiled backend/strategy is fixed for the run.
|
|
637
|
+
- Subagent process launches are gated per Pi process to avoid boot storms: at most `max(2, floor(cpu cores / 2))` concurrent launches, overridable with the `PI_WORKFLOW_MAX_CONCURRENT_LAUNCHES` environment variable. Queued tasks report a waiting message in their status. Deterministic boot failures (extension load or configuration errors) fail fast instead of consuming transient-failure retries.
|
|
581
638
|
- External content, source files, and web pages used by workflow workers are untrusted data, not instructions.
|
|
582
639
|
|
|
583
640
|
## Web tools
|
|
584
641
|
|
|
585
642
|
New workflows should use `workflow_web_search`, `workflow_web_fetch_source`, and
|
|
586
|
-
`workflow_web_source_read
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
candidate evidence and include matched/missing term metadata; they are not a
|
|
595
|
-
verdict by themselves. The bundled `pi-web-access` adapter remains
|
|
596
|
-
available as the default compatibility provider for this release scope.
|
|
597
|
-
|
|
598
|
-
Legacy workflows that use `web_search`, `fetch_content`, `get_search_content`, or
|
|
599
|
-
`code_search` still use the bundled `pi-web-access` dependency packaged with
|
|
600
|
-
pi-workflow. Object-form custom tool `extensions` are merged with built-in
|
|
601
|
-
mappings and deduplicated for the subagent launch. Web calls can still fail when
|
|
602
|
-
network access, provider credentials, browser state, or quota are unavailable;
|
|
603
|
-
research workflows should report those limits instead of guessing.
|
|
643
|
+
`workflow_web_source_read` — tool semantics, batching forms, and the run-scoped
|
|
644
|
+
cache are documented under "Run-scoped web-source cache" above. The bundled
|
|
645
|
+
`pi-web-access` adapter remains the default compatibility provider for this
|
|
646
|
+
release scope.
|
|
647
|
+
|
|
648
|
+
- Legacy workflows that use `web_search`, `fetch_content`, `get_search_content`, or `code_search` still use the bundled `pi-web-access` dependency packaged with pi-workflow.
|
|
649
|
+
- Object-form custom tool `extensions` are merged with built-in mappings and deduplicated for the subagent launch.
|
|
650
|
+
- Web calls can still fail when network access, provider credentials, browser state, or quota are unavailable; research workflows should report those limits instead of guessing.
|
|
@@ -38,7 +38,6 @@ Run this check in a sandboxed worker and report the artifact paths.
|
|
|
38
38
|
Start a background audit and let me inspect it in /subagent panel.
|
|
39
39
|
```
|
|
40
40
|
|
|
41
|
-
|
|
42
41
|
## What it does
|
|
43
42
|
|
|
44
43
|
Tool: `subagent`
|
|
@@ -121,9 +120,11 @@ Existing run:
|
|
|
121
120
|
{ "action": "status", "runId": "run_..." }
|
|
122
121
|
```
|
|
123
122
|
|
|
123
|
+
Recent runs can be addressed by `runId` even when they were launched from another cwd; legacy records still resolve from the explicit or current cwd.
|
|
124
|
+
|
|
124
125
|
### Panel
|
|
125
126
|
|
|
126
|
-
Inspect runs, attempts, artifacts, and log tails in a live TUI.
|
|
127
|
+
Inspect runs, attempts, artifacts, and log tails in a live TUI. The panel defaults to the current Pi session, can switch to current cwd or all indexed runs, and includes status filters plus a scrollable detail pane. It shows active and recent terminal runs by default, with in-panel `m` to show more, and counts stale/malformed run pointers without exposing raw session ids.
|
|
127
128
|
|
|
128
129
|
Open the run monitor:
|
|
129
130
|
|
|
@@ -147,4 +148,3 @@ const status = await getSubagentStatus({ runId: run.runId });
|
|
|
147
148
|
## Detailed docs
|
|
148
149
|
|
|
149
150
|
- [`docs/usage.md`](./docs/usage.md) — full argument reference, code API, `action` behavior, backend selection, sandbox/worktree behavior, artifacts, and validation notes.
|
|
150
|
-
|
|
@@ -9,4 +9,5 @@ export const getSubagentLogs = api.getSubagentLogs;
|
|
|
9
9
|
export const waitForSubagent = api.waitForSubagent;
|
|
10
10
|
export const interruptSubagent = api.interruptSubagent;
|
|
11
11
|
export const reconcileSubagentRun = api.reconcileSubagentRun;
|
|
12
|
+
export const recordSubagentChildEvent = api.recordSubagentChildEvent;
|
|
12
13
|
export const SubagentValidationError = api.SubagentValidationError;
|
|
@@ -33,20 +33,23 @@ Every call has an `action`. The default is `run`, so omitting `action` starts a
|
|
|
33
33
|
| `action` | Purpose | Key parameters |
|
|
34
34
|
|---|---|---|
|
|
35
35
|
| `run` (default) | Start a new subagent run, or launch independent runs in parallel. | `agent`/`task` or `tasks`; plus `sandbox`, `worktree`, `model`, `async`, etc. |
|
|
36
|
-
| `status` | Read a run's current state. | `runId`, optional `attemptId` |
|
|
37
|
-
| `logs` | Read a run's captured logs. | `runId`, optional `attemptId` |
|
|
38
|
-
| `wait` | Block until a run finishes. | `runId`, optional `timeoutMs`, `pollIntervalMs` |
|
|
39
|
-
| `interrupt` | Signal a process-backed run. | `runId`, optional `attemptId`, `signal`, `escalateAfterMs`, `killAfterMs`, `reason` |
|
|
40
|
-
| `mark-background` | Mark a run as not needed before the final answer. | `runId` |
|
|
41
|
-
| `reconcile` | Re-read durable artifacts and repair stale/orphaned state when possible. | `runId` |
|
|
36
|
+
| `status` | Read a run's current state. | `runId`, optional `cwd`, `attemptId` |
|
|
37
|
+
| `logs` | Read a run's captured logs. | `runId`, optional `cwd`, `attemptId` |
|
|
38
|
+
| `wait` | Block until a run finishes. | `runId`, optional `cwd`, `timeoutMs`, `pollIntervalMs` |
|
|
39
|
+
| `interrupt` | Signal a process-backed run. | `runId`, optional `cwd`, `attemptId`, `signal`, `escalateAfterMs`, `killAfterMs`, `reason` |
|
|
40
|
+
| `mark-background` | Mark a run as not needed before the final answer. | `runId`, optional `cwd` |
|
|
41
|
+
| `reconcile` | Re-read durable artifacts and repair stale/orphaned state when possible. | `runId`, optional `cwd` |
|
|
42
42
|
|
|
43
|
-
State is file-based under `.pi/agent/runs/<run-id>/`. `status`/`logs`/`wait` read those files; `interrupt` sends a real OS signal; `mark-background` updates run metadata; `reconcile` repairs local metadata from durable attempt artifacts without relaunching work.
|
|
43
|
+
State is file-based under `.pi/agent/runs/<run-id>/`. `status`/`logs`/`wait` read those files; `interrupt` sends a real OS signal; `mark-background` updates run metadata; `reconcile` repairs local metadata from durable attempt artifacts without relaunching work. Recent runs also write a global locator pointer, so existing-run actions can often resolve a `runId` even when `cwd` is omitted or the run was launched from another cwd.
|
|
44
|
+
|
|
45
|
+
Parent orchestrators may record descendant state with `recordSubagentChildEvent`, which appends `child.*` events to the parent run's `events.jsonl` (`child.started`, `child.failed`, `child.completed`, or `child.cancelled`). `status` and `/subagent panel` aggregate those into `childSummary`, including failure counts, active child run IDs, and the latest child failure. This keeps parent status distinct from descendant failures and makes retry attempts distinguishable from newly-started child work.
|
|
44
46
|
|
|
45
47
|
Model:
|
|
46
48
|
|
|
47
49
|
```text
|
|
48
50
|
run = one subagent execution
|
|
49
51
|
attempt = one launch attempt
|
|
52
|
+
child = descendant work reported by an orchestrator through child.* events
|
|
50
53
|
correlationId = optional external trace label
|
|
51
54
|
```
|
|
52
55
|
|
|
@@ -68,6 +71,7 @@ import {
|
|
|
68
71
|
waitForSubagent,
|
|
69
72
|
interruptSubagent,
|
|
70
73
|
reconcileSubagentRun,
|
|
74
|
+
recordSubagentChildEvent,
|
|
71
75
|
} from "@agwab/pi-subagent/api";
|
|
72
76
|
|
|
73
77
|
const run = await runSubagent({
|
|
@@ -83,9 +87,17 @@ const logs = await getSubagentLogs({ cwd: process.cwd(), runId: run.runId });
|
|
|
83
87
|
await waitForSubagent({ cwd: process.cwd(), runId: run.runId, timeoutMs: 300000 });
|
|
84
88
|
await interruptSubagent({ cwd: process.cwd(), runId: run.runId, reason: "caller cancelled" });
|
|
85
89
|
await reconcileSubagentRun({ cwd: process.cwd(), runId: run.runId });
|
|
90
|
+
await recordSubagentChildEvent({
|
|
91
|
+
cwd: process.cwd(),
|
|
92
|
+
runId: run.runId,
|
|
93
|
+
event: "failed",
|
|
94
|
+
childRunId: "run_child_123",
|
|
95
|
+
childTaskId: "task-4",
|
|
96
|
+
failureKind: "model",
|
|
97
|
+
});
|
|
86
98
|
```
|
|
87
99
|
|
|
88
|
-
`runSubagent` accepts the same run options as the tool, plus an optional `signal`. Existing-run helpers accept `
|
|
100
|
+
`runSubagent` accepts the same run options as the tool, plus an optional `signal`. Existing-run helpers accept `runId`, optional `cwd`, optional `attemptId`, and optional `runsDir`; when `cwd` is omitted they use the global locator index first and fall back to the current cwd for legacy records. The API is intentionally object-only and does not expose the lower-level runner internals.
|
|
89
101
|
|
|
90
102
|
The code API is ESM-only. Import `@agwab/pi-subagent/api`; do not deep-import internal files such as `src/orchestrate/*` because only documented package subpaths are public.
|
|
91
103
|
|
|
@@ -128,6 +140,21 @@ Use `concurrency` to cap parallel fan-out:
|
|
|
128
140
|
}
|
|
129
141
|
```
|
|
130
142
|
|
|
143
|
+
For synchronous parallel fan-out, `failFast:true` stops scheduling additional siblings after the first failed result. Add `cancelSiblingsOnFailure:true` to also abort siblings that are already running:
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"failFast": true,
|
|
148
|
+
"cancelSiblingsOnFailure": true,
|
|
149
|
+
"tasks": [
|
|
150
|
+
{ "task": "Run check A." },
|
|
151
|
+
{ "task": "Run check B." }
|
|
152
|
+
]
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The parallel response includes `totalTasks`, `startedCount`, `skippedCount`, and `failFastTriggered` so callers can distinguish skipped siblings from completed/failed runs. Async parallel launches return once children are started, so fail-fast decisions for later runtime failures must be handled by the parent/workflow layer.
|
|
157
|
+
|
|
131
158
|
Chain/sequential execution is intentionally not supported by this engine. If step B needs output from step A, keep that sequencing in the parent agent or a workflow layer.
|
|
132
159
|
|
|
133
160
|
## Async and existing runs
|
|
@@ -177,14 +204,26 @@ Interrupt a process-backed run:
|
|
|
177
204
|
|
|
178
205
|
`interrupt` is conservative. It can signal runs with registered process metadata. Unsupported or already-terminal runs return explicit status rather than pretending cancellation succeeded.
|
|
179
206
|
|
|
207
|
+
### Existing-run resolution
|
|
208
|
+
|
|
209
|
+
For `status`, `logs`, `wait`, `interrupt`, `mark-background`, and `reconcile`, the lookup order is:
|
|
210
|
+
|
|
211
|
+
1. Use the explicit `cwd`/`runsDir` when provided.
|
|
212
|
+
2. Otherwise, check the current cwd's `.pi/agent/runs` for legacy/local records.
|
|
213
|
+
3. Otherwise, resolve `runId` through the global locator index and read the pointed-to run directory.
|
|
214
|
+
|
|
215
|
+
The locator index is only a pointer for finding runs across cwd boundaries. `run.json`, `events.jsonl`, and attempt `result.json` files remain the source of truth.
|
|
216
|
+
|
|
180
217
|
## Common run options
|
|
181
218
|
|
|
182
219
|
| Option | Use |
|
|
183
220
|
|---|---|
|
|
184
|
-
| `cwd` | Run from a specific project directory. Existing-run actions
|
|
221
|
+
| `cwd` | Run from a specific project directory. Existing-run actions accept `cwd` to force a registry location; if omitted, recent runs can be found by global locator and older runs fall back to the current cwd. |
|
|
185
222
|
| `timeoutMs` | Limit worker execution time for `run`; limit polling duration for `action: "wait"`. Omit it for no runtime kill deadline; `wait` alone defaults to 60s polling. |
|
|
186
223
|
| `visible` | Use a visible tmux-backed worker (`visible: true`). |
|
|
187
224
|
| `concurrency` | Cap parallel run fan-out. |
|
|
225
|
+
| `failFast` | For synchronous parallel runs, stop scheduling new siblings after the first failed result. |
|
|
226
|
+
| `cancelSiblingsOnFailure` | For synchronous parallel runs, abort already-running siblings after the first failed result; implies fail-fast scheduling. |
|
|
188
227
|
| `model` | Select a Pi model/provider for model-backed workers. |
|
|
189
228
|
| `thinking` / `thinkingLevel` / `reasoningLevel` | Set the reasoning level. |
|
|
190
229
|
| `tools` | Tool allowlist. With a named agent this may only narrow agent-declared tools; it cannot expand authority. For agentless runs it sets the full tool allowlist. |
|
|
@@ -247,8 +286,8 @@ There are three inputs for worktree isolation, in order of preference:
|
|
|
247
286
|
| Input | When to use |
|
|
248
287
|
|---|---|
|
|
249
288
|
| `worktree` | Primary switch. `true` to isolate; or a string path for an explicit worktree location. |
|
|
250
|
-
| `workspace` | Advanced. `"shared"
|
|
251
|
-
| `worktreePolicy` | Advanced. `"auto"
|
|
289
|
+
| `workspace` | Advanced. `"shared" \| "worktree" \| "auto"`, or `{ mode, path }` for an explicit path. |
|
|
290
|
+
| `worktreePolicy` | Advanced. `"auto" \| "required" \| "never"` to force or forbid isolation. |
|
|
252
291
|
|
|
253
292
|
Most calls only need `worktree`:
|
|
254
293
|
|
|
@@ -389,6 +428,8 @@ Runs write durable evidence under:
|
|
|
389
428
|
|
|
390
429
|
`run.json` records a `parentSessionId` field: the Pi session id of the session that launched the run, injected from the tool context (not a model-settable argument). Consumers (e.g. status panels) can use it to scope a shared per-`cwd` runs directory to the session that owns each run. The field is omitted when no session id is available, and older records simply lack it.
|
|
391
430
|
|
|
431
|
+
Recent runs also write a small locator file under Pi's global subagent-run index. A locator contains the `runId`, absolute `cwd`, optional `runsDir`, optional `parentSessionId`, optional `correlationId`, and `updatedAt`. It is not authoritative evidence and can become stale if the pointed-to run directory is moved or deleted; use `run.json`, `events.jsonl`, and attempt `result.json` as the source of truth.
|
|
432
|
+
|
|
392
433
|
Older `schemaVersion: 1` artifacts under `<run-id>/<task-id>/` are still readable for compatibility.
|
|
393
434
|
|
|
394
435
|
Tool responses return compact status and artifact references rather than raw logs.
|
|
@@ -399,7 +440,17 @@ Tool responses return compact status and artifact references rather than raw log
|
|
|
399
440
|
/subagent panel
|
|
400
441
|
```
|
|
401
442
|
|
|
402
|
-
The panel shows
|
|
443
|
+
The panel shows run/attempt details, workspace/artifact paths, dependency metadata, event tail, and log tail. It has three scopes:
|
|
444
|
+
|
|
445
|
+
- `session`: runs whose `run.json.parentSessionId` matches the current Pi session. This is the default when a session id is available.
|
|
446
|
+
- `cwd`: runs under the current workspace's `.pi/agent/runs`, including legacy records that lack `parentSessionId`.
|
|
447
|
+
- `all`: the global locator index plus current-cwd legacy records.
|
|
448
|
+
|
|
449
|
+
Status filters are `all`, `running`, `completed`, and `failed`. In the `all` status view, the default list shows all active runs plus recent terminal runs only: 20 for `session`/`cwd`, 50 for `all`. The `completed` and `failed` filters use the same recent terminal cap; `running` is uncapped. The header reports `shown/total`, and when older matching runs are hidden, press `m` in the panel to show more; no separate command is needed. The panel keeps a fixed-height layout, uses an internally scrollable detail pane, and never renders raw `parentSessionId` values.
|
|
450
|
+
|
|
451
|
+
Stale or malformed locators are counted in the header and skipped. Active runs whose process metadata is dead and whose heartbeat/update timestamp is stale are rendered read-only as `failed` with failure `stale`; the panel does not mutate or delete records. Use `action:"reconcile"` to repair local registry state from durable artifacts when possible.
|
|
452
|
+
|
|
453
|
+
The panel is for human inspection; existing-run tool actions remain the programmatic interface.
|
|
403
454
|
|
|
404
455
|
## Development validation
|
|
405
456
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agwab/pi-subagent",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.2",
|
|
4
4
|
"description": "Minimal subagent runtime for Pi.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"private": false,
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
"image": "https://raw.githubusercontent.com/AgwaB/pi-subagent/main/assets/subagent-panel.png"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
|
-
"check:scripts": "for d in scripts test
|
|
40
|
+
"check:scripts": "for d in scripts test; do [ -d \"$d\" ] && find \"$d\" -name '*.mjs' -print; done | xargs -n1 node --check",
|
|
41
41
|
"check:resolver": "node ./test/checks/resolver.mjs",
|
|
42
42
|
"check:api": "node ./test/checks/api.mjs",
|
|
43
43
|
"check:artifacts": "node ./test/checks/artifacts.mjs",
|