pi-crew 0.8.13 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +296 -0
- package/README.md +118 -2
- package/docs/FEATURE_INTAKE.md +1 -1
- package/docs/HARNESS.md +20 -19
- package/docs/PROJECT_REVIEW.md +132 -133
- package/docs/PROJECT_REVIEW_FIXES.md +130 -131
- package/docs/actions-reference.md +127 -121
- package/docs/architecture.md +1 -1
- package/docs/code-review-2026-05-11.md +134 -134
- package/docs/commands-reference.md +108 -106
- package/docs/comparison-pi-subagents-vs-pi-crew.md +105 -105
- package/docs/deep-review-report.md +1 -1
- package/docs/dynamic-workflows.md +90 -0
- package/docs/fixes/BATCH_A_H1_H2.md +17 -17
- package/docs/fixes/bug-007-async-notifier-stale-ctx.md +23 -23
- package/docs/followup-plan-2026-05-12.md +135 -135
- package/docs/followup-review-2026-05-12.md +86 -86
- package/docs/followup-review-round3-2026-05-12.md +123 -123
- package/docs/goals.md +59 -0
- package/docs/implementation-plan-top3.md +4 -4
- package/docs/issue-29-analysis.md +2 -2
- package/docs/oh-my-pi-research.md +154 -154
- package/docs/optimization-plan.md +2 -0
- package/docs/perf/baseline-2026-05.md +9 -9
- package/docs/perf/final-report-2026-05.md +2 -2
- package/docs/perf/sprint-1-report.md +2 -2
- package/docs/perf/sprint-2-report.md +1 -1
- package/docs/perf/upgrade-plan-2026-05.md +72 -72
- package/docs/pi-crew-bugs.md +230 -230
- package/docs/pi-crew-investigation-report.md +102 -102
- package/docs/pi-crew-test-round5.md +4 -4
- package/docs/runtime-analysis-child-vs-live.md +57 -57
- package/docs/runtime-migration-in-process-analysis.md +97 -97
- package/install.mjs +3 -2
- package/package.json +2 -4
- package/skills/orchestration/SKILL.md +11 -11
- package/src/agents/agent-config.ts +4 -0
- package/src/config/config.ts +39 -0
- package/src/config/types.ts +11 -0
- package/src/extension/action-suggestions.ts +2 -1
- package/src/extension/async-notifier.ts +10 -0
- package/src/extension/help.ts +14 -0
- package/src/extension/project-init.ts +7 -20
- package/src/extension/registration/commands.ts +27 -0
- package/src/extension/team-tool/destructive-gate.ts +1 -1
- package/src/extension/team-tool/goal-wrap.ts +288 -0
- package/src/extension/team-tool/goal.ts +405 -0
- package/src/extension/team-tool/run.ts +103 -4
- package/src/extension/team-tool/workflow-manage.ts +194 -0
- package/src/extension/team-tool.ts +20 -0
- package/src/hooks/types.ts +3 -1
- package/src/runtime/async-runner.ts +24 -2
- package/src/runtime/background-runner.ts +68 -19
- package/src/runtime/child-pi.ts +6 -1
- package/src/runtime/completion-guard.ts +1 -1
- package/src/runtime/dynamic-workflow-context.ts +450 -0
- package/src/runtime/dynamic-workflow-runner.ts +180 -0
- package/src/runtime/global-worker-cap.ts +96 -0
- package/src/runtime/goal-evaluator.ts +294 -0
- package/src/runtime/goal-loop-runner.ts +612 -0
- package/src/runtime/goal-state-store.ts +209 -0
- package/src/runtime/pi-args.ts +10 -2
- package/src/runtime/result-extractor.ts +32 -0
- package/src/runtime/team-runner.ts +11 -1
- package/src/runtime/verification-gates.ts +85 -5
- package/src/runtime/verification-integrity.ts +110 -0
- package/src/runtime/verification-worktree.ts +136 -0
- package/src/runtime/workspace-lock.ts +448 -0
- package/src/schema/config-schema.ts +26 -0
- package/src/schema/team-tool-schema.ts +39 -4
- package/src/state/atomic-write.ts +9 -0
- package/src/state/contracts.ts +14 -0
- package/src/state/crew-init.ts +18 -5
- package/src/state/event-log.ts +7 -1
- package/src/state/state-store.ts +2 -0
- package/src/state/types.ts +82 -0
- package/src/state/worker-atomic-writer.ts +176 -0
- package/src/utils/redaction.ts +104 -24
- package/src/workflows/discover-workflows.ts +25 -1
- package/src/workflows/workflow-config.ts +13 -0
- package/teams/parallel-research.team.md +1 -1
- package/workflows/examples/hello.dwf.ts +24 -0
|
@@ -1,66 +1,66 @@
|
|
|
1
1
|
# pi-crew Runtime Analysis: child-process vs live-session
|
|
2
2
|
|
|
3
|
-
>
|
|
4
|
-
>
|
|
3
|
+
> Date: 2026-05-12
|
|
4
|
+
> Status: Performance analysis — proposing a default runtime change
|
|
5
5
|
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
## 1.
|
|
8
|
+
## 1. Current problem
|
|
9
9
|
|
|
10
|
-
pi-crew default runtime
|
|
10
|
+
pi-crew's default runtime is **child-process** — each worker spawns its own `pi` CLI child process. This causes:
|
|
11
11
|
|
|
12
12
|
### 1.1 Memory
|
|
13
13
|
|
|
14
|
-
| Scenario | child-process | live-session |
|
|
14
|
+
| Scenario | child-process | live-session | Savings |
|
|
15
15
|
|---|---|---|---|
|
|
16
|
-
| 1 worker | ~150 MB
|
|
17
|
-
| 4 workers (parallel) | ~600 MB
|
|
18
|
-
| 8 workers (max cap) | ~1.2 GB
|
|
16
|
+
| 1 worker | ~150 MB added | ~15 MB added | **135 MB** |
|
|
17
|
+
| 4 workers (parallel) | ~600 MB added | ~60 MB added | **540 MB** |
|
|
18
|
+
| 8 workers (max cap) | ~1.2 GB added | ~120 MB added | **~1.1 GB** |
|
|
19
19
|
|
|
20
|
-
**
|
|
20
|
+
**The parent Pi process already consumes ~308 MB.** Adding 4 child-process workers brings the total to **910 MB**, nearly 1 GB just to run a single team. A machine with 8 GB of RAM will start swapping.
|
|
21
21
|
|
|
22
22
|
### 1.2 Startup latency
|
|
23
23
|
|
|
24
|
-
|
|
|
24
|
+
| Stage | child-process | live-session |
|
|
25
25
|
|---|---|---|
|
|
26
26
|
| Process spawn | ~300ms | 0 |
|
|
27
27
|
| Node.js bootstrap | ~500ms | 0 |
|
|
28
28
|
| Pi CLI init + load extensions | ~1-2s | 0 |
|
|
29
|
-
| pi-crew register() (
|
|
29
|
+
| pi-crew register() (runs again in child) | ~200ms | 0 |
|
|
30
30
|
| createAgentSession() | ~100ms | ~100ms |
|
|
31
31
|
| First LLM token | **2-4s total** | **200-500ms total** |
|
|
32
32
|
|
|
33
|
-
**
|
|
33
|
+
**Each worker takes 2-4s to start.** A team implementation with 3 sequential phases × 2-4s = **6-12s just to spawn processes**, before any work even begins.
|
|
34
34
|
|
|
35
35
|
### 1.3 CPU overhead
|
|
36
36
|
|
|
37
|
-
-
|
|
38
|
-
- `pi-crew register()`
|
|
39
|
-
- JSON parsing/redaction
|
|
37
|
+
- Each child process runs a separate V8 isolate → separate JIT compiler, separate GC
|
|
38
|
+
- `pi-crew register()` runs **repeatedly** in each child (load config, register tools, bind extensions)
|
|
39
|
+
- JSON parsing/redaction on child stdout → CPU cost per event
|
|
40
40
|
|
|
41
41
|
### 1.4 Complexity
|
|
42
42
|
|
|
43
|
-
- `child-pi.ts` = 461
|
|
43
|
+
- `child-pi.ts` = 461 lines just to manage the subprocess lifecycle
|
|
44
44
|
- Hard kill timer (3s), post-exit stdio guard (3s), final drain (5s), response timeout (5 min)
|
|
45
|
-
- Process tree kill (`taskkill /t /f`
|
|
46
|
-
- Mock system
|
|
45
|
+
- Process tree kill (`taskkill /t /f` on Windows, `kill -pgid` on Unix)
|
|
46
|
+
- Mock system for testing (`PI_TEAMS_MOCK_CHILD_PI`)
|
|
47
47
|
|
|
48
48
|
---
|
|
49
49
|
|
|
50
|
-
## 2. live-session
|
|
50
|
+
## 2. live-session is ready
|
|
51
51
|
|
|
52
|
-
pi-crew
|
|
52
|
+
pi-crew **has already implemented** a complete live-session runtime:
|
|
53
53
|
|
|
54
|
-
- `src/runtime/live-session-runtime.ts` — 600 LOC, feature parity
|
|
55
|
-
- `src/runtime/runtime-resolver.ts` — `resolveCrewRuntime()`
|
|
56
|
-
- Soft turn limit + grace period (default 5) —
|
|
57
|
-
- Tool filtering — `filterActiveTools()`
|
|
54
|
+
- `src/runtime/live-session-runtime.ts` — 600 LOC, feature parity with child-process for most use cases
|
|
55
|
+
- `src/runtime/runtime-resolver.ts` — `resolveCrewRuntime()` already handles auto/live-session/child-process
|
|
56
|
+
- Soft turn limit + grace period (default 5) — **already present**, identical to pi-subagents3
|
|
57
|
+
- Tool filtering — `filterActiveTools()` removes recursive tools
|
|
58
58
|
- Yield/submit_result — custom tool + JSON event detection
|
|
59
59
|
- Live agent control — steer, resume, real-time tool activity
|
|
60
|
-
- Extension bridge — `buildExtensionBridge()`
|
|
60
|
+
- Extension bridge — `buildExtensionBridge()` for extension-based APIs
|
|
61
61
|
- Health diagnostics — `collectLiveSessionHealth()`, `formatLiveSessionDiagnostics()`
|
|
62
62
|
|
|
63
|
-
###
|
|
63
|
+
### Current configuration must be set manually:
|
|
64
64
|
|
|
65
65
|
```json
|
|
66
66
|
// .pi/crew-config.json
|
|
@@ -71,7 +71,7 @@ pi-crew **đã implement** live-session runtime hoàn chỉnh:
|
|
|
71
71
|
}
|
|
72
72
|
```
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
Or:
|
|
75
75
|
```json
|
|
76
76
|
{
|
|
77
77
|
"runtime": {
|
|
@@ -81,15 +81,15 @@ Hoặc:
|
|
|
81
81
|
}
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
**
|
|
84
|
+
**The current default is `"auto"` WITHOUT `preferLiveSession`** → it always falls back to child-process.
|
|
85
85
|
|
|
86
86
|
---
|
|
87
87
|
|
|
88
|
-
## 3.
|
|
88
|
+
## 3. Proposal
|
|
89
89
|
|
|
90
|
-
### 3.1
|
|
90
|
+
### 3.1 Change the default: `preferLiveSession: true` when mode = "auto"
|
|
91
91
|
|
|
92
|
-
`resolveCrewRuntime()
|
|
92
|
+
Current `resolveCrewRuntime()`:
|
|
93
93
|
|
|
94
94
|
```typescript
|
|
95
95
|
// src/runtime/runtime-resolver.ts
|
|
@@ -98,10 +98,10 @@ if (requestedMode === "live-session" || (requestedMode === "auto" && config.runt
|
|
|
98
98
|
if (live.available) return liveCaps(requestedMode);
|
|
99
99
|
// fallback to child-process
|
|
100
100
|
}
|
|
101
|
-
return childCaps(requestedMode); // ← default:
|
|
101
|
+
return childCaps(requestedMode); // ← default: always child-process
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
**Proposed change:**
|
|
105
105
|
|
|
106
106
|
```typescript
|
|
107
107
|
if (requestedMode === "live-session" || requestedMode === "auto") {
|
|
@@ -113,11 +113,11 @@ if (requestedMode === "live-session" || requestedMode === "auto") {
|
|
|
113
113
|
}
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
-
**
|
|
116
|
+
**In other words:** `"auto"` → try live-session first, fall back to child-process if the SDK is unavailable. Users can still force `child-process` if they want.
|
|
117
117
|
|
|
118
|
-
### 3.2
|
|
118
|
+
### 3.2 Add an opt-out for risky tasks
|
|
119
119
|
|
|
120
|
-
|
|
120
|
+
A task-level flag to force child-process for specific tasks:
|
|
121
121
|
|
|
122
122
|
```json
|
|
123
123
|
{
|
|
@@ -129,43 +129,43 @@ Task-level flag để force child-process cho tasks cụ thể:
|
|
|
129
129
|
}
|
|
130
130
|
```
|
|
131
131
|
|
|
132
|
-
Tasks
|
|
132
|
+
Tasks with the `executor` role, or tasks running in a worktree → automatically use child-process.
|
|
133
133
|
|
|
134
|
-
### 3.3
|
|
134
|
+
### 3.3 Expected benefits
|
|
135
135
|
|
|
136
|
-
| Metric |
|
|
136
|
+
| Metric | Before (child-process default) | After (live-session default) |
|
|
137
137
|
|---|---|---|
|
|
138
138
|
| **4-worker memory** | ~910 MB | ~370 MB |
|
|
139
139
|
| **First token latency** | 2-4s/worker | 200-500ms/worker |
|
|
140
|
-
| **
|
|
140
|
+
| **Total startup (3 phases)** | 6-12s | 0.6-1.5s |
|
|
141
141
|
| **Steering** | ❌ | ✅ |
|
|
142
142
|
| **Resume** | ❌ | ✅ |
|
|
143
143
|
| **Crash isolation** | ✅ | ❌ (fallback available) |
|
|
144
144
|
| **Parent crash risk** | None | Low (session.abort handles most) |
|
|
145
145
|
|
|
146
|
-
### 3.4
|
|
146
|
+
### 3.4 Risks and mitigations
|
|
147
147
|
|
|
148
|
-
|
|
|
148
|
+
| Risk | Severity | Mitigation |
|
|
149
149
|
|---|---|---|
|
|
150
|
-
| Agent crash → parent crash | Medium | `try/catch`
|
|
151
|
-
| Memory pressure (
|
|
152
|
-
| Recursive team calls | Low | `filterActiveTools()`
|
|
153
|
-
| SDK
|
|
154
|
-
| Unhandled errors
|
|
150
|
+
| Agent crash → parent crash | Medium | `try/catch` around `session.prompt()`, `AbortController` per-agent, cleanup on unhandled rejection |
|
|
151
|
+
| Memory pressure (many sessions) | Low | Keep the `maxConcurrent` cap (default 4); the limit is sufficient |
|
|
152
|
+
| Recursive team calls | Low | `filterActiveTools()` already removes recursive tools |
|
|
153
|
+
| SDK unavailable (old Pi version) | Low | Auto-fallback to child-process |
|
|
154
|
+
| Unhandled errors in session | Medium | Global `unhandledRejection` handler per-session |
|
|
155
155
|
|
|
156
156
|
---
|
|
157
157
|
|
|
158
|
-
## 4.
|
|
158
|
+
## 4. Conclusion
|
|
159
159
|
|
|
160
|
-
**pi-crew
|
|
160
|
+
**pi-crew is using an overly heavy runtime for most use cases.** child-process provides excellent crash isolation, but:
|
|
161
161
|
|
|
162
|
-
- **
|
|
163
|
-
- **
|
|
164
|
-
- **
|
|
162
|
+
- **9× the memory** compared to live-session
|
|
163
|
+
- **8× the startup latency**
|
|
164
|
+
- **No steer/resume** — loses interactive capability
|
|
165
165
|
|
|
166
|
-
live-session
|
|
167
|
-
1. Pi SDK `createAgentSession()`
|
|
168
|
-
2.
|
|
169
|
-
3.
|
|
166
|
+
live-session **is already implemented**; only the default needs to change. The crash isolation trade-off is acceptable because:
|
|
167
|
+
1. The Pi SDK `createAgentSession()` already handles most errors
|
|
168
|
+
2. A child-process fallback is still available when needed
|
|
169
|
+
3. The benefits (540 MB saved, 3s faster startup, steer/resume) outweigh the risks
|
|
170
170
|
|
|
171
|
-
**Action:**
|
|
171
|
+
**Action:** Change the `resolveCrewRuntime()` default so `"auto"` prefers live-session, keeping child-process as a fallback.
|
|
@@ -1,130 +1,130 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Analysis: Migrating pi-crew fully to in-process execution
|
|
2
2
|
|
|
3
|
-
>
|
|
4
|
-
>
|
|
3
|
+
> Date: 2026-05-12
|
|
4
|
+
> Question: What if we move entirely to in-process like pi-subagents3?
|
|
5
5
|
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
## 1.
|
|
8
|
+
## 1. Current state
|
|
9
9
|
|
|
10
|
-
pi-crew
|
|
10
|
+
pi-crew has **3 runtime modes**, with child-process as the default:
|
|
11
11
|
|
|
12
12
|
```
|
|
13
|
-
scaffold →
|
|
14
|
-
child-process → spawn `pi` CLI subprocess per worker (DEFAULT)
|
|
13
|
+
scaffold → no workers run (dry-run)
|
|
14
|
+
child-process → spawn a `pi` CLI subprocess per worker (DEFAULT)
|
|
15
15
|
live-session → createAgentSession() in-process per worker
|
|
16
16
|
```
|
|
17
17
|
|
|
18
|
-
### Code
|
|
18
|
+
### Code related to child-process
|
|
19
19
|
|
|
20
|
-
| File | LOC |
|
|
20
|
+
| File | LOC | Role |
|
|
21
21
|
|---|---|---|
|
|
22
22
|
| `child-pi.ts` | 461 | Subprocess lifecycle, stdout parsing, kill process tree |
|
|
23
|
-
| `pi-args.ts` | 165 | Build CLI args
|
|
24
|
-
| `pi-spawn.ts` | 167 | Detect `pi` binary path (local/global) |
|
|
25
|
-
| `post-exit-stdio-guard.ts` | 86 | Drain child stdout
|
|
26
|
-
| `async-runner.ts` | 153 | Spawn background team runs (
|
|
27
|
-
| **
|
|
23
|
+
| `pi-args.ts` | 165 | Build CLI args for the child `pi` process |
|
|
24
|
+
| `pi-spawn.ts` | 167 | Detect the `pi` binary path (local/global) |
|
|
25
|
+
| `post-exit-stdio-guard.ts` | 86 | Drain child stdout after exit, hard kill timer |
|
|
26
|
+
| `async-runner.ts` | 153 | Spawn background team runs (detached process) |
|
|
27
|
+
| **Total** | **1,032** | **Code only used by child-process** |
|
|
28
28
|
|
|
29
|
-
### Code
|
|
29
|
+
### Code related to live-session (already present)
|
|
30
30
|
|
|
31
|
-
| File | LOC |
|
|
31
|
+
| File | LOC | Role |
|
|
32
32
|
|---|---|---|
|
|
33
33
|
| `live-session-runtime.ts` | 600 | In-process execution, soft turn limit, yield, custom tools |
|
|
34
34
|
| `runtime-resolver.ts` | 92 | Auto-detect available runtime |
|
|
35
35
|
| `task-runner/live-executor.ts` | 95 | Adapter: live-session → task-runner interface |
|
|
36
36
|
|
|
37
|
-
### Files
|
|
37
|
+
### Files that use the child-process path
|
|
38
38
|
|
|
39
|
-
- `task-runner.ts` — 8 references, ~120
|
|
39
|
+
- `task-runner.ts` — 8 references, ~120 lines of child-process-specific logic (heartbeat, progress, model retry)
|
|
40
40
|
- `register.ts` — `terminateActiveChildPiProcesses()` cleanup
|
|
41
41
|
- `doctor.ts` — diagnose child-process issues
|
|
42
42
|
- `async-runner.ts` — spawn background team runs
|
|
43
43
|
|
|
44
|
-
###
|
|
44
|
+
### Related tests
|
|
45
45
|
|
|
46
46
|
- ~37 test files reference child-process / mock child
|
|
47
|
-
- ~3 test files reference live-session mock
|
|
48
|
-
-
|
|
47
|
+
- ~3 test files reference the live-session mock
|
|
48
|
+
- All integration tests use `PI_TEAMS_MOCK_CHILD_PI` — **would need rewriting** if child-process is dropped
|
|
49
49
|
|
|
50
50
|
---
|
|
51
51
|
|
|
52
|
-
## 2.
|
|
52
|
+
## 2. If we move fully to in-process
|
|
53
53
|
|
|
54
|
-
### 2.1
|
|
54
|
+
### 2.1 What we GAIN
|
|
55
55
|
|
|
56
|
-
####
|
|
56
|
+
#### Immediate benefits
|
|
57
57
|
|
|
58
|
-
| Metric | child-process | in-process |
|
|
58
|
+
| Metric | child-process | in-process | Improvement |
|
|
59
59
|
|---|---|---|---|
|
|
60
|
-
| Memory / worker | ~150 MB | ~15 MB | **10×
|
|
61
|
-
| 4 workers peak | ~600 MB
|
|
62
|
-
| Startup / worker | 2-4s | 200-500ms | **8×
|
|
63
|
-
| Team startup (3 phases) | 6-12s overhead | ~1s overhead | **6-12×
|
|
64
|
-
| Steering | ❌ | ✅ | **
|
|
65
|
-
| Resume | ❌ | ✅ | **
|
|
66
|
-
| Context inheritance | ❌ | ✅ (parentContext) | **
|
|
67
|
-
| Live tool activity | ❌ | ✅ | **
|
|
60
|
+
| Memory / worker | ~150 MB | ~15 MB | **10× lighter** |
|
|
61
|
+
| 4 workers peak | ~600 MB added | ~60 MB added | **540 MB saved** |
|
|
62
|
+
| Startup / worker | 2-4s | 200-500ms | **8× faster** |
|
|
63
|
+
| Team startup (3 phases) | 6-12s overhead | ~1s overhead | **6-12× faster** |
|
|
64
|
+
| Steering | ❌ | ✅ | **New feature** |
|
|
65
|
+
| Resume | ❌ | ✅ | **New feature** |
|
|
66
|
+
| Context inheritance | ❌ | ✅ (parentContext) | **New feature** |
|
|
67
|
+
| Live tool activity | ❌ | ✅ | **New feature** |
|
|
68
68
|
| Yield/submit_result | ✅ (JSON event) | ✅ (custom tool) | Parity |
|
|
69
69
|
| Worktree isolation | ✅ | ✅ | Parity |
|
|
70
70
|
|
|
71
|
-
####
|
|
71
|
+
#### Architectural benefits
|
|
72
72
|
|
|
73
|
-
- **
|
|
74
|
-
-
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
- **Zero npm dependencies
|
|
73
|
+
- **Delete ~1,000 LOC** of subprocess management code
|
|
74
|
+
- Simplify `task-runner.ts` (remove 120 lines of child-process logic)
|
|
75
|
+
- Remove `post-exit-stdio-guard.ts`, `pi-spawn.ts`, `pi-args.ts` subprocess overhead
|
|
76
|
+
- Remove `responseTimeoutMs`, `hardKillMs`, `postExitStdioGuardMs` — no need to kill a process tree
|
|
77
|
+
- **Zero npm dependencies for execution** (currently requires `jiti` for async-runner TypeScript loading)
|
|
78
78
|
|
|
79
|
-
### 2.2
|
|
79
|
+
### 2.2 What we LOSE
|
|
80
80
|
|
|
81
81
|
#### ❌ Process isolation — biggest loss
|
|
82
82
|
|
|
83
83
|
```
|
|
84
84
|
child-process: worker crash → worker dies → parent continues
|
|
85
|
-
in-process: worker crash →
|
|
85
|
+
in-process: worker crash → can crash parent → entire team lost
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
-
Pi SDK `createAgentSession()`
|
|
89
|
-
- **Unhandled promise rejection**
|
|
90
|
-
- **Infinite loop**
|
|
91
|
-
- **OOM** —
|
|
92
|
-
- **Node.js segfault** — rare
|
|
88
|
+
The Pi SDK `createAgentSession()` handles most errors, but:
|
|
89
|
+
- **Unhandled promise rejection** within a session
|
|
90
|
+
- **Infinite loop** inside a custom tool
|
|
91
|
+
- **OOM** — one session consuming all memory affects everything
|
|
92
|
+
- **Node.js segfault** — rare, but when it happens = total death
|
|
93
93
|
|
|
94
94
|
#### ❌ Async background team runs
|
|
95
95
|
|
|
96
|
-
`async-runner.ts`
|
|
96
|
+
`async-runner.ts` spawns a **detached process** to run a team when the user closes the terminal. In-process cannot do this — the process dies when the terminal closes.
|
|
97
97
|
|
|
98
|
-
**
|
|
98
|
+
**Solution:** Keep `async-runner.ts` specifically for background runs — it spawns the entire team runner, not individual workers.
|
|
99
99
|
|
|
100
|
-
#### ❌
|
|
100
|
+
#### ❌ Simple depth guard
|
|
101
101
|
|
|
102
|
-
`checkCrewDepth()`
|
|
102
|
+
`checkCrewDepth()` counts the `PI_CREW_PARENT_PID` env var. In-process has no process boundary → counting depth is harder. A global counter or thread-local equivalent is needed.
|
|
103
103
|
|
|
104
|
-
#### ❌ 37+ test files
|
|
104
|
+
#### ❌ 37+ test files need updating
|
|
105
105
|
|
|
106
|
-
|
|
106
|
+
All integration tests use `PI_TEAMS_MOCK_CHILD_PI`. They would need to switch to `PI_CREW_MOCK_LIVE_SESSION` or have new mocks written.
|
|
107
107
|
|
|
108
108
|
#### ❌ `_CrewRuntimeKind` type union
|
|
109
109
|
|
|
110
|
-
`"scaffold" | "child-process" | "live-session"` →
|
|
110
|
+
`"scaffold" | "child-process" | "live-session"` → if child-process is dropped, only `"scaffold" | "in-process"` remains. Breaking change for config.
|
|
111
111
|
|
|
112
|
-
### 2.3
|
|
112
|
+
### 2.3 Specific risks
|
|
113
113
|
|
|
114
|
-
|
|
|
114
|
+
| Risk | Severity | Details |
|
|
115
115
|
|---|---|---|
|
|
116
|
-
| Parent crash | **Medium** | Unhandled error in agent session → parent dies. Pi SDK wraps
|
|
117
|
-
| Memory pressure | **Medium** | 4 in-process sessions + context windows
|
|
118
|
-
| Extension conflicts | **Low** | In-process extensions
|
|
119
|
-
| Recursive team calls | **Low** | `team` tool
|
|
120
|
-
| Background runs | **Solved** |
|
|
121
|
-
| Breaking config | **Low** |
|
|
116
|
+
| Parent crash | **Medium** | Unhandled error in agent session → parent dies. The Pi SDK wraps most of them but not 100%. |
|
|
117
|
+
| Memory pressure | **Medium** | 4 in-process sessions + context windows can consume >500MB within the same heap. V8 GC pauses. |
|
|
118
|
+
| Extension conflicts | **Low** | In-process extensions can conflict (global state, tool registry). Filtering exists but there are edge cases. |
|
|
119
|
+
| Recursive team calls | **Low** | The `team` tool inside an agent session → infinite recursion. Already filtered but a guarantee is needed. |
|
|
120
|
+
| Background runs | **Solved** | Keep `async-runner.ts` separate; it only spawns 1 detached process for the full team. |
|
|
121
|
+
| Breaking config | **Low** | Users currently setting `mode: "child-process"` → need a migration path. |
|
|
122
122
|
|
|
123
123
|
---
|
|
124
124
|
|
|
125
|
-
## 3.
|
|
125
|
+
## 3. Two directions
|
|
126
126
|
|
|
127
|
-
###
|
|
127
|
+
### Direction A: Drop child-process entirely (like pi-subagents3)
|
|
128
128
|
|
|
129
129
|
```
|
|
130
130
|
┌─────────────────────────┐
|
|
@@ -140,15 +140,15 @@ Tất cả integration tests dùng `PI_TEAMS_MOCK_CHILD_PI`. Cần chuyển sang
|
|
|
140
140
|
└─────────────────────────┘
|
|
141
141
|
```
|
|
142
142
|
|
|
143
|
-
**
|
|
144
|
-
**
|
|
145
|
-
|
|
146
|
-
|
|
143
|
+
**Delete:** `child-pi.ts`, `pi-args.ts`, `pi-spawn.ts`, `post-exit-stdio-guard.ts` (~879 LOC)
|
|
144
|
+
**Keep:** `async-runner.ts` (for background team runs — spawns 1 process for the whole team, not per-worker)
|
|
145
|
+
**Change:** `task-runner.ts` → drop the child-process branch, use only live-session
|
|
146
|
+
**Change:** All 37+ test files
|
|
147
147
|
|
|
148
|
-
**Pros:** Clean architecture,
|
|
149
|
-
**Cons:**
|
|
148
|
+
**Pros:** Clean architecture, simplest, lowest maintenance
|
|
149
|
+
**Cons:** Loses per-worker crash isolation; many tests need rewriting
|
|
150
150
|
|
|
151
|
-
###
|
|
151
|
+
### Direction B: Live-session default + child-process opt-in (recommended)
|
|
152
152
|
|
|
153
153
|
```
|
|
154
154
|
┌─────────────────────────────────┐
|
|
@@ -162,26 +162,26 @@ Tất cả integration tests dùng `PI_TEAMS_MOCK_CHILD_PI`. Cần chuyển sang
|
|
|
162
162
|
└──────────┴───────────────────────┘
|
|
163
163
|
```
|
|
164
164
|
|
|
165
|
-
|
|
166
|
-
**
|
|
167
|
-
**
|
|
168
|
-
**
|
|
165
|
+
**Change:** `runtime-resolver.ts` → `"auto"` prefers live-session
|
|
166
|
+
**Keep:** All child-process code (as fallback)
|
|
167
|
+
**Keep:** All tests
|
|
168
|
+
**Add:** Config `"riskyIsolation": true` so the executor role auto-uses child-process
|
|
169
169
|
|
|
170
|
-
**Pros:** Best of both worlds, zero breaking
|
|
171
|
-
**Cons:**
|
|
170
|
+
**Pros:** Best of both worlds, zero breaking changes
|
|
171
|
+
**Cons:** Still maintaining 2 code paths
|
|
172
172
|
|
|
173
173
|
---
|
|
174
174
|
|
|
175
|
-
## 4.
|
|
175
|
+
## 4. Recommendation: Direction B
|
|
176
176
|
|
|
177
|
-
**
|
|
177
|
+
**Do not drop child-process entirely** — too risky for production. Instead:
|
|
178
178
|
|
|
179
|
-
###
|
|
179
|
+
### Step 1: Change the default runtime (fast, low risk)
|
|
180
180
|
|
|
181
181
|
```typescript
|
|
182
182
|
// runtime-resolver.ts
|
|
183
|
-
//
|
|
184
|
-
//
|
|
183
|
+
// Before: "auto" → always child-process
|
|
184
|
+
// After: "auto" → try live-session, fall back to child-process
|
|
185
185
|
|
|
186
186
|
export async function resolveCrewRuntime(config, env) {
|
|
187
187
|
const requestedMode = config.runtime?.mode ?? "auto";
|
|
@@ -197,7 +197,7 @@ export async function resolveCrewRuntime(config, env) {
|
|
|
197
197
|
}
|
|
198
198
|
```
|
|
199
199
|
|
|
200
|
-
###
|
|
200
|
+
### Step 2: Add a per-role isolation policy
|
|
201
201
|
|
|
202
202
|
```json
|
|
203
203
|
// crew-config.json
|
|
@@ -213,38 +213,38 @@ export async function resolveCrewRuntime(config, env) {
|
|
|
213
213
|
}
|
|
214
214
|
```
|
|
215
215
|
|
|
216
|
-
###
|
|
216
|
+
### Step 3: Observability for in-process errors
|
|
217
217
|
|
|
218
218
|
```typescript
|
|
219
|
-
// Wrap session.prompt()
|
|
219
|
+
// Wrap session.prompt() with a global error handler
|
|
220
220
|
process.on('unhandledRejection', (err) => {
|
|
221
221
|
logInternalError('live-session.unhandled', err);
|
|
222
222
|
// Don't crash — attempt recovery
|
|
223
223
|
});
|
|
224
224
|
```
|
|
225
225
|
|
|
226
|
-
###
|
|
226
|
+
### Expected benefits of Direction B
|
|
227
227
|
|
|
228
|
-
| |
|
|
228
|
+
| | Current | After Step 1 | After Step 2 |
|
|
229
229
|
|---|---|---|---|
|
|
230
230
|
| **Default runtime** | child-process | live-session (auto) | live-session + per-role |
|
|
231
|
-
| **Memory (4 workers)** | ~910 MB | ~370 MB | ~450 MB (
|
|
232
|
-
| **Startup** | 2-4s/worker | 200-500ms/worker |
|
|
231
|
+
| **Memory (4 workers)** | ~910 MB | ~370 MB | ~450 MB (mixed) |
|
|
232
|
+
| **Startup** | 2-4s/worker | 200-500ms/worker | Mixed |
|
|
233
233
|
| **Crash isolation** | ✅ all | ✅ fallback | ✅ risky roles |
|
|
234
234
|
| **Steering** | ❌ | ✅ | ✅ |
|
|
235
235
|
| **Breaking changes** | — | None | None |
|
|
236
|
-
| **Code
|
|
237
|
-
| **Tests
|
|
236
|
+
| **Code deleted** | — | 0 | 0 (keep fallback) |
|
|
237
|
+
| **Tests to change** | — | 0 | 0 |
|
|
238
238
|
|
|
239
239
|
---
|
|
240
240
|
|
|
241
|
-
## 5.
|
|
241
|
+
## 5. Conclusion
|
|
242
242
|
|
|
243
|
-
**
|
|
243
|
+
**Do not move 100% to in-process.** Reasons:
|
|
244
244
|
|
|
245
|
-
1. **Crash isolation
|
|
246
|
-
2. **Background runs
|
|
247
|
-
3. **37+ test files
|
|
248
|
-
4. **Breaking change**
|
|
245
|
+
1. **Crash isolation is too important** for executor/test-engineer roles — these agents run code, write files, and can infinite-loop
|
|
246
|
+
2. **Background runs need a detached process** — impossible in-process
|
|
247
|
+
3. **37+ test files need rewriting** — high migration cost
|
|
248
|
+
4. **Breaking change** for users currently using `mode: "child-process"`
|
|
249
249
|
|
|
250
|
-
**
|
|
250
|
+
**Instead: Change the default to live-session + keep child-process as a fallback/opt-in.** This is precisely the design already built into `resolveCrewRuntime()` — just flip the default in `"auto"` mode. Zero code deleted, zero breaking changes, and users choose isolation when they need it.
|
package/install.mjs
CHANGED
|
@@ -70,11 +70,12 @@ console.log("To force-disable or force-enable workers in a shell, use PI_TEAMS_E
|
|
|
70
70
|
console.log("\n--- What pi-crew writes (and how to undo it) ---");
|
|
71
71
|
console.log("pi-crew itself writes nothing on install. The following only happens when you");
|
|
72
72
|
console.log("explicitly run `team action=init` in a project:");
|
|
73
|
-
console.log(" - A marker-delimited block is injected into the project's AGENTS.md.");
|
|
74
|
-
console.log(" (Wrapped in <!-- PI-CREW:GUIDANCE:START/END --> — your content is never touched.)");
|
|
75
73
|
console.log(" - A `.crew/` runtime state dir is created in the project (run history + artifacts).");
|
|
76
74
|
console.log(" - With --copy-builtins: bundled agents/teams/workflows are copied into the project.");
|
|
77
75
|
console.log("This install also created the global config above (`~/.pi/agent/pi-crew.json`).");
|
|
76
|
+
console.log("Note: pi-crew v0.8.14+ no longer injects a guidance block into AGENTS.md on init");
|
|
77
|
+
console.log(" (it was redundant — the `team` tool self-describes via tool registration).");
|
|
78
|
+
console.log(" Versions <0.8.14 did inject one; `team action=cleanup` removes it.");
|
|
78
79
|
console.log("\nFull uninstall (in order):");
|
|
79
80
|
console.log(" team action=cleanup dryRun=true # preview what would be removed (project)");
|
|
80
81
|
console.log(" team action=cleanup # remove the AGENTS.md guidance block");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-crew",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
|
|
5
5
|
"author": "baphuongna",
|
|
6
6
|
"license": "MIT",
|
|
@@ -81,12 +81,10 @@
|
|
|
81
81
|
},
|
|
82
82
|
"dependencies": {
|
|
83
83
|
"@sinclair/typebox": "^0.34.49",
|
|
84
|
-
"acorn": "^8.16.0",
|
|
85
84
|
"ajv": "^8.20.0",
|
|
86
85
|
"cli-highlight": "^2.1.11",
|
|
87
86
|
"diff": "^5.2.0",
|
|
88
|
-
"jiti": "^2.7.0"
|
|
89
|
-
"typebox": "^1.1.38"
|
|
87
|
+
"jiti": "^2.7.0"
|
|
90
88
|
},
|
|
91
89
|
"devDependencies": {
|
|
92
90
|
"@biomejs/biome": "^2.4.15",
|