pi-crew 0.2.23 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Bug #20: Infinite Retry Loop - Mock Tasks Never Complete
|
|
2
|
+
|
|
3
|
+
## Symptom
|
|
4
|
+
When running tests with `PI_TEAMS_MOCK_CHILD_PI=json-success`, tasks were stuck in an infinite loop:
|
|
5
|
+
- Task 01_explore ran repeatedly (100+ times)
|
|
6
|
+
- Each run completed quickly but the task status stayed "needs_attention"
|
|
7
|
+
- The DAG scheduler kept re-scheduling the same task
|
|
8
|
+
|
|
9
|
+
## Root Cause
|
|
10
|
+
The DAG-based task scheduler in `team-runner.ts` uses `completedIds` to determine which tasks are "done" and can unblock downstream tasks. However, it only considered `status === "completed"` as terminal.
|
|
11
|
+
|
|
12
|
+
When a task has `yield.enabled` but the worker doesn't call `submit_result`, the task returns `status === "needs_attention"` instead of "completed". This is a terminal state (treated as such in other places), but the DAG scheduler didn't recognize it as complete.
|
|
13
|
+
|
|
14
|
+
As a result:
|
|
15
|
+
1. Task 01_explore returns "needs_attention"
|
|
16
|
+
2. The DAG still thinks 01_explore is NOT completed
|
|
17
|
+
3. The DAG returns all tasks (including 01_explore) as "ready"
|
|
18
|
+
4. 01_explore gets re-scheduled, creating an infinite loop
|
|
19
|
+
|
|
20
|
+
## Fix
|
|
21
|
+
In `src/runtime/team-runner.ts`, change `completedIds` computation to also treat "needs_attention" as a completed state:
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
// Before
|
|
25
|
+
const completedIds = new Set(tasks.filter((t) => t.status === "completed").map((t) => t.id));
|
|
26
|
+
|
|
27
|
+
// After
|
|
28
|
+
const completedIds = new Set(tasks.filter((t) => t.status === "completed" || t.status === "needs_attention").map((t) => t.id));
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
This fix was applied in three places in team-runner.ts:
|
|
32
|
+
- Line 411: DAG completion check
|
|
33
|
+
- Line 422: taskResults for workflow context
|
|
34
|
+
- Line 574: taskResults for phase advancement
|
|
35
|
+
|
|
36
|
+
## Why This Works
|
|
37
|
+
- "needs_attention" is already in the `terminalStatuses` set (used for workflow phase advancement)
|
|
38
|
+
- The task graph scheduler already treats "needs_attention" as a terminal state
|
|
39
|
+
- The only missing piece was the DAG-based dependency check
|
|
40
|
+
|
|
41
|
+
## Verification
|
|
42
|
+
Run a test with the mock:
|
|
43
|
+
```bash
|
|
44
|
+
PI_TEAMS_MOCK_CHILD_PI=json-success PI_TEAMS_EXECUTE_WORKERS=1 node --test test/unit/agent-runtime-files.test.ts
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Expected: Test completes in ~3 seconds with 1 pass, 0 failures, 0 skipped.
|
package/package.json
CHANGED
|
@@ -408,7 +408,7 @@ async function executeTeamRunCore(
|
|
|
408
408
|
// DAG-based execution plan: when tasks have explicit dependsOn, use the
|
|
409
409
|
// topological wave planner to determine ready tasks. Fall back to the
|
|
410
410
|
// existing task-graph-scheduler when no explicit deps exist (backward compat).
|
|
411
|
-
const completedIds = new Set(tasks.filter((t) => t.status === "completed").map((t) => t.id));
|
|
411
|
+
const completedIds = new Set(tasks.filter((t) => t.status === "completed" || t.status === "needs_attention").map((t) => t.id));
|
|
412
412
|
const dagReady = dagReadyTaskIds(tasks, completedIds);
|
|
413
413
|
const effectiveReady = dagReady ?? snapshot.ready;
|
|
414
414
|
|
|
@@ -419,7 +419,7 @@ async function executeTeamRunCore(
|
|
|
419
419
|
const wfContext: PhaseGuardContext = {
|
|
420
420
|
completedArtifacts,
|
|
421
421
|
previousPhaseStatus,
|
|
422
|
-
taskResults: tasks.filter((t) => t.status === "completed").map((t) => ({ taskId: t.id, status: t.status, outputPath: t.resultArtifact?.path })),
|
|
422
|
+
taskResults: tasks.filter((t) => t.status === "completed" || t.status === "needs_attention").map((t) => ({ taskId: t.id, status: t.status, outputPath: t.resultArtifact?.path })),
|
|
423
423
|
};
|
|
424
424
|
const preconditions = validatePhasePreconditions(wfMachine, wfContext);
|
|
425
425
|
if (!preconditions.ready) {
|
|
@@ -571,7 +571,7 @@ async function executeTeamRunCore(
|
|
|
571
571
|
const wfContext: PhaseGuardContext = {
|
|
572
572
|
completedArtifacts,
|
|
573
573
|
previousPhaseStatus,
|
|
574
|
-
taskResults: tasks.filter((t) => t.status === "completed").map((t) => ({ taskId: t.id, status: t.status, outputPath: t.resultArtifact?.path })),
|
|
574
|
+
taskResults: tasks.filter((t) => t.status === "completed" || t.status === "needs_attention").map((t) => ({ taskId: t.id, status: t.status, outputPath: t.resultArtifact?.path })),
|
|
575
575
|
};
|
|
576
576
|
// Determine phase transition status based on individual task outcomes
|
|
577
577
|
const phaseTasks = phaseTaskIds.map((taskId) => tasks.find((t) => t.id === taskId)).filter((t): t is NonNullable<typeof t> => t !== undefined);
|