pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/README.md +17 -1
  3. package/docs/architecture.md +2 -0
  4. package/docs/bugs/cross-session-notification-leakage.md +82 -0
  5. package/docs/coding-agent-optimization.md +268 -0
  6. package/docs/deep-review-report.md +384 -0
  7. package/docs/distillation/cybersecurity-patterns.md +294 -0
  8. package/docs/migration-v0.4-v0.5.md +208 -0
  9. package/docs/optimization-plan.md +642 -0
  10. package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
  11. package/docs/pi-mono-opportunities.md +969 -0
  12. package/docs/pi-mono-review.md +291 -0
  13. package/docs/skills/REFERENCE.md +144 -0
  14. package/package.json +12 -9
  15. package/skills/artifact-analysis-loop/SKILL.md +302 -0
  16. package/skills/async-worker-recovery/SKILL.md +19 -1
  17. package/skills/child-pi-spawning/SKILL.md +19 -6
  18. package/skills/context-artifact-hygiene/SKILL.md +19 -2
  19. package/skills/delegation-patterns/SKILL.md +68 -3
  20. package/skills/detection-pipeline-design/SKILL.md +285 -0
  21. package/skills/event-log-tracing/SKILL.md +20 -6
  22. package/skills/git-master/SKILL.md +20 -6
  23. package/skills/hunting-investigation-loop/SKILL.md +401 -0
  24. package/skills/incident-playbook-construction/SKILL.md +383 -0
  25. package/skills/live-agent-lifecycle/SKILL.md +20 -6
  26. package/skills/mailbox-interactive/SKILL.md +19 -6
  27. package/skills/model-routing-context/SKILL.md +19 -1
  28. package/skills/multi-perspective-review/SKILL.md +19 -4
  29. package/skills/observability-reliability/SKILL.md +19 -2
  30. package/skills/orchestration/SKILL.md +20 -2
  31. package/skills/ownership-session-security/SKILL.md +20 -2
  32. package/skills/pi-extension-lifecycle/SKILL.md +20 -2
  33. package/skills/post-mortem/SKILL.md +7 -2
  34. package/skills/read-only-explorer/SKILL.md +20 -6
  35. package/skills/requirements-to-task-packet/SKILL.md +23 -3
  36. package/skills/resource-discovery-config/SKILL.md +20 -2
  37. package/skills/runtime-state-reader/SKILL.md +20 -2
  38. package/skills/safe-bash/SKILL.md +21 -6
  39. package/skills/scrutinize/SKILL.md +20 -2
  40. package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
  41. package/skills/security-review/SKILL.md +560 -0
  42. package/skills/state-mutation-locking/SKILL.md +22 -2
  43. package/skills/systematic-debugging/SKILL.md +8 -6
  44. package/skills/threat-hypothesis-framework/SKILL.md +175 -0
  45. package/skills/ui-render-performance/SKILL.md +20 -2
  46. package/skills/verification-before-done/SKILL.md +17 -2
  47. package/skills/widget-rendering/SKILL.md +21 -6
  48. package/skills/workspace-isolation/SKILL.md +20 -6
  49. package/skills/worktree-isolation/SKILL.md +20 -6
  50. package/src/agents/agent-config.ts +40 -1
  51. package/src/benchmark/benchmark-runner.ts +45 -0
  52. package/src/benchmark/feedback-loop.ts +5 -0
  53. package/src/config/config.ts +32 -5
  54. package/src/config/role-tools.ts +82 -0
  55. package/src/config/suggestions.ts +8 -0
  56. package/src/config/types.ts +4 -0
  57. package/src/extension/async-notifier.ts +10 -1
  58. package/src/extension/crew-cleanup.ts +114 -0
  59. package/src/extension/cross-extension-rpc.ts +1 -1
  60. package/src/extension/notification-router.ts +18 -0
  61. package/src/extension/register.ts +27 -19
  62. package/src/extension/registration/subagent-tools.ts +1 -1
  63. package/src/extension/team-tool/anchor.ts +201 -0
  64. package/src/extension/team-tool/api.ts +2 -1
  65. package/src/extension/team-tool/auto-summarize.ts +154 -0
  66. package/src/extension/team-tool/run.ts +42 -7
  67. package/src/extension/team-tool.ts +44 -2
  68. package/src/hooks/registry.ts +1 -3
  69. package/src/observability/event-bus.ts +69 -0
  70. package/src/observability/event-to-metric.ts +0 -2
  71. package/src/runtime/anchor-manager.ts +473 -0
  72. package/src/runtime/async-runner.ts +8 -4
  73. package/src/runtime/auto-summarize.ts +350 -0
  74. package/src/runtime/background-runner.ts +10 -3
  75. package/src/runtime/budget-tracker.ts +354 -0
  76. package/src/runtime/chain-runner.ts +507 -0
  77. package/src/runtime/child-pi.ts +123 -35
  78. package/src/runtime/crash-recovery.ts +5 -4
  79. package/src/runtime/crew-agent-runtime.ts +1 -0
  80. package/src/runtime/custom-tools/irc-tool.ts +13 -0
  81. package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
  82. package/src/runtime/delivery-coordinator.ts +10 -3
  83. package/src/runtime/dynamic-script-runner.ts +482 -0
  84. package/src/runtime/foreground-control.ts +87 -17
  85. package/src/runtime/handoff-manager.ts +589 -0
  86. package/src/runtime/hidden-handoff.ts +424 -0
  87. package/src/runtime/live-agent-manager.ts +20 -4
  88. package/src/runtime/live-session-runtime.ts +39 -4
  89. package/src/runtime/manifest-cache.ts +2 -1
  90. package/src/runtime/model-resolver.ts +16 -4
  91. package/src/runtime/phase-tracker.ts +373 -0
  92. package/src/runtime/pi-args.ts +11 -1
  93. package/src/runtime/pi-json-output.ts +31 -0
  94. package/src/runtime/pipeline-runner.ts +514 -0
  95. package/src/runtime/progress-tracker.ts +124 -0
  96. package/src/runtime/retry-runner.ts +354 -0
  97. package/src/runtime/sandbox.ts +252 -0
  98. package/src/runtime/scheduler.ts +7 -2
  99. package/src/runtime/skill-effectiveness.ts +473 -0
  100. package/src/runtime/skill-instructions.ts +37 -3
  101. package/src/runtime/subagent-manager.ts +1 -1
  102. package/src/runtime/task-graph.ts +11 -1
  103. package/src/runtime/task-runner.ts +92 -18
  104. package/src/runtime/team-runner.ts +13 -12
  105. package/src/runtime/tool-progress.ts +10 -3
  106. package/src/runtime/verification-gates.ts +367 -0
  107. package/src/schema/team-tool-schema.ts +37 -0
  108. package/src/skills/discover-skills.ts +5 -0
  109. package/src/state/active-run-registry.ts +9 -2
  110. package/src/state/contracts.ts +9 -0
  111. package/src/state/crew-init.ts +3 -3
  112. package/src/state/decision-ledger.ts +98 -55
  113. package/src/state/event-log-rotation.ts +2 -2
  114. package/src/state/event-log.ts +144 -10
  115. package/src/state/hook-instinct-bridge.ts +5 -5
  116. package/src/state/mailbox.ts +10 -0
  117. package/src/state/run-cache.ts +18 -8
  118. package/src/state/state-store.ts +3 -1
  119. package/src/state/types.ts +4 -0
  120. package/src/tools/safe-bash-extension.ts +1 -0
  121. package/src/tools/safe-bash.ts +152 -20
  122. package/src/types/new-api-types.ts +34 -0
  123. package/src/ui/agent-management-overlay.ts +5 -1
  124. package/src/ui/crew-widget.ts +29 -15
  125. package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
  126. package/src/ui/powerbar-publisher.ts +101 -7
  127. package/src/ui/tool-render.ts +15 -15
  128. package/src/ui/transcript-cache.ts +13 -0
  129. package/src/utils/bm25-search.ts +16 -8
  130. package/src/utils/env-filter.ts +8 -5
  131. package/src/utils/redaction.ts +169 -15
  132. package/src/utils/session-utils.ts +52 -0
  133. package/src/utils/sse-parser.ts +10 -1
  134. package/src/worktree/cleanup.ts +6 -1
  135. package/src/worktree/worktree-manager.ts +32 -13
  136. package/workflows/chain.workflow.md +252 -0
  137. package/workflows/pipeline.workflow.md +27 -0
@@ -0,0 +1,175 @@
1
+ ---
2
+ name: threat-hypothesis-framework
3
+ description: "Structured investigation using testable hypotheses."
4
+ triggers:
5
+ - "hunt for"
6
+ - "investigate"
7
+ - "threat hypothesis"
8
+ - "test this pattern"
9
+ - "find evidence of"
10
+
11
+ ---
12
+ # threat-hypothesis-framework
13
+
14
+ Use this skill when conducting hypothesis-driven investigation and threat hunting.
15
+
16
+ ## Source
17
+
18
+ Distilled from `building-threat-hunt-hypothesis-framework` (Anthropic Cybersecurity Skills) and generalized for software/codebase context.
19
+
20
+ ## When to Use
21
+
22
+ - Proactively hunting for indicators of compromise in code
23
+ - After threat intelligence indicates suspicious patterns
24
+ - During incident investigation to scope extent
25
+ - When EDR/logs alert on related indicators
26
+ - During periodic security assessments
27
+
28
+ ## Workflow
29
+
30
+ ```markdown
31
+ ## Hypothesis Investigation Loop
32
+
33
+ 1. **Formulate** → Given [observed IOCs/patterns], hypothesize [attack scenario]
34
+ 2. **Identify** → List data sources: [files, commits, logs, configs]
35
+ 3. **Search** → Run queries across identified sources
36
+ 4. **Analyze** → Pattern match: [technique, indicator, artifact]
37
+ 5. **Validate** → Confirm with [secondary source, cross-reference]
38
+ 6. **Correlate** → Link findings to [broader campaign, actor]
39
+ 7. **Report** → Document: [finding, confidence, next_action]
40
+ ```
41
+
42
+ ## Hypothesis Structure
43
+
44
+ ```yaml
45
+ hypothesis:
46
+ id: string # e.g., "HY-2026-001"
47
+ technique: string # e.g., "credential-theft", "supply-chain"
48
+ description: string # What we're testing
49
+ data_sources:
50
+ - type: [file|commit|log|config]
51
+ locations: [paths, globs]
52
+ search_patterns:
53
+ - pattern: string
54
+ type: [regex|AST|signature]
55
+ validation:
56
+ - method: string
57
+ expected_result: string
58
+ confidence_levels:
59
+ high: [confirmed by multiple sources]
60
+ medium: [single source, needs validation]
61
+ low: [heuristic match, requires investigation]
62
+ ```
63
+
64
+ ## Hunt Report Format
65
+
66
+ ```
67
+ Hunt ID: [HY-runid-date-seq]
68
+ Hypothesis: [what we're testing]
69
+ Data Sources: [where we looked]
70
+ Search Patterns: [what we searched for]
71
+ Findings:
72
+ - File: [path]
73
+ Line: [number]
74
+ Evidence: [what matched]
75
+ Confidence: [High/Medium/Low]
76
+ Correlation: [link to other findings]
77
+ Next Actions:
78
+ - investigate: [further analysis needed]
79
+ - contain: [immediate action required]
80
+ - close: [false positive, no action]
81
+ ```
82
+
83
+ ## Investigation Examples
84
+
85
+ ### Example 1: Credential Detection Hunt
86
+
87
+ ```yaml
88
+ hypothesis:
89
+ id: HY-2026-042
90
+ technique: hardcoded-credentials
91
+ description: Search for hardcoded secrets in codebase
92
+ data_sources:
93
+ - type: file
94
+ locations: ["**/*.ts", "**/*.js", "**/*.env"]
95
+ search_patterns:
96
+ - pattern: '(api[_-]?key|secret|token|password)\s*[=:]'
97
+ type: regex
98
+ - pattern: 'process\.env\.[A-Z_]+'
99
+ type: AST
100
+ validation:
101
+ - method: git history check
102
+ expected_result: No recent secret additions
103
+ - method: secret scanning tool
104
+ expected_result: Zero findings in main branch
105
+ ```
106
+
107
+ ### Example 2: Supply Chain Hunt
108
+
109
+ ```yaml
110
+ hypothesis:
111
+ id: HY-2026-043
112
+ technique: dependency-confusion
113
+ description: Detect potential dependency confusion attacks
114
+ data_sources:
115
+ - type: file
116
+ locations: ["**/package.json", "**/requirements.txt"]
117
+ search_patterns:
118
+ - pattern: '"@private/.*
119
+ type: regex
120
+ - pattern: 'version.*>.*<.*9999999'
121
+ type: regex
122
+ validation:
123
+ - method: npm audit
124
+ expected_result: No anomalies
125
+ - method: typosquat check
126
+ expected_result: No similar package names
127
+ ```
128
+
129
+ ## Confidence Scoring
130
+
131
+ | Level | Criteria | Action |
132
+ |-------|----------|--------|
133
+ | **High** | Confirmed by 2+ independent sources, exact match | Immediate action |
134
+ | **Medium** | Single source, pattern match, needs validation | Investigate further |
135
+ | **Low** | Heuristic match, possible false positive | Log and monitor |
136
+
137
+ ## Enforcement — Threat Hypothesis Framework Gate
138
+
139
+ **Before reporting hunt findings, verify:**
140
+
141
+ - [ ] Hypothesis clearly stated before search (not scattershot searching)
142
+ - [ ] Data sources identified (files, commits, logs, configs)
143
+ - [ ] Search patterns defined (regex, AST, signature)
144
+ - [ ] Findings validated with secondary source or cross-reference
145
+ - [ ] Confidence level assigned (High/Medium/Low) based on validation
146
+ - [ ] Report includes: finding, confidence, next_action (investigate/contain/close)
147
+
148
+ If ANY answer is NO → Stop. Complete hypothesis framework before reporting.
149
+
150
+ ## Anti-Patterns
151
+
152
+ - **Don't** run hunt without clear hypothesis (scattershot searching)
153
+ - **Don't** claim finding without validation (false positive risk)
154
+ - **Don't** skip correlation step (missing broader context)
155
+ - **Don't** report without confidence level (misleads stakeholders)
156
+
157
+ ## Tools
158
+
159
+ | Tool | Purpose |
160
+ |------|---------|
161
+ | `rg` (ripgrep) | Pattern search in files |
162
+ | `git log` | History investigation |
163
+ | `semgrep` | AST-based pattern matching |
164
+ | `npm audit` | Dependency vulnerability check |
165
+
166
+ ## Verification
167
+
168
+ For hypothesis framework changes:
169
+ ```bash
170
+ cd pi-crew
171
+ npx tsc --noEmit
172
+ node --experimental-strip-types --test test/unit/security-patterns.test.ts
173
+ ```
174
+
175
+ *See also: `hunting-investigation-loop` skill for active hunting workflows.*
@@ -1,8 +1,13 @@
1
1
  ---
2
2
  name: ui-render-performance
3
- description: Non-blocking Pi TUI render workflow. Use when changing widgets, powerbar/statusbar segments, dashboard panes, overlays, snapshot caches, or live UI refresh behavior.
3
+ description: "Non-blocking Pi TUI render workflow."
4
+ triggers:
5
+ - "widget render"
6
+ - "dashboard pane"
7
+ - "overlay update"
8
+ - "snapshot cache"
9
+ - "UI refresh"
4
10
  ---
5
-
6
11
  # ui-render-performance
7
12
 
8
13
  Use this skill for Pi/pi-crew TUI work.
@@ -23,6 +28,19 @@ Use this skill for Pi/pi-crew TUI work.
23
28
  - On session switch, cancel timers and ensure in-flight async preloads cannot update stale session UI.
24
29
  - Watch TTL interactions: a preload interval shorter than cache TTL prevents render-time refresh gaps.
25
30
 
31
+ ## Enforcement — UI Render Performance Gate
32
+
33
+ **Before modifying widgets or UI rendering, verify:**
34
+
35
+ - [ ] Render path is non-blocking (no fs calls, no network, no large JSON parsing)
36
+ - [ ] All data preloaded async before first render
37
+ - [ ] Snapshot cache TTL appropriate (500ms or less)
38
+ - [ ] Render scheduler used for coalescing renders
39
+ - [ ] Stale warnings filtered for terminal status (completed/failed/cancelled)
40
+ - [ ] TTL interactions understood (preload interval < cache TTL)
41
+
42
+ If ANY answer is NO → Stop. Fix render performance issues before proceeding.
43
+
26
44
  ## Anti-patterns
27
45
 
28
46
  - Do not call `loadConfig()`, `manifestCache.list()`, or `refreshIfStale()` repeatedly inside `renderTick()` unless backed by preloaded frame data.
@@ -1,8 +1,15 @@
1
1
  ---
2
2
  name: verification-before-done
3
- description: "Evidence before claims. Use before claiming work is complete, fixed, passing, reviewed, committed, or ready to hand off. Triggers: done, fixed, complete, ready to merge, can I close, is it working, verify this, check if it passes, all good, LGTM, ready to ship."
4
- ---
3
+ description: "Evidence before claims."
4
+ triggers:
5
+ - "done"
6
+ - "verify this"
7
+ - "is it working"
8
+ - "check if it passes"
9
+ - "ready to ship"
10
+
5
11
 
12
+ ---
6
13
  # verification-before-done
7
14
 
8
15
  Core principle: evidence before claims. A worker report, green-looking log, or previous run is not fresh verification.
@@ -64,3 +71,11 @@ Before finalizing any work, report:
64
71
  ## Red Flags
65
72
 
66
73
  Stop before saying done if you are using words like "should", "probably", "looks", "seems", "I think", or if you are trusting an agent report without checking evidence.
74
+
75
+ ## Anti-Patterns
76
+
77
+ - **Don't** claim "tests pass" without running them in the current session
78
+ - **Don't** trust agent reports without checking evidence yourself
79
+ - **Don't** use fuzzy language like "seems", "probably", "looks like"
80
+ - **Don't** skip providing verification commands for claims
81
+ - **Don't** claim done if you're still using hypotheses instead of evidence
@@ -1,8 +1,14 @@
1
1
  ---
2
2
  name: widget-rendering
3
- description: Pi TUI crew widget data sources, display priority, and rendering performance. Use when debugging empty agents, ghost runs, or widget timing issues.
4
- ---
3
+ description: "Pi TUI crew widget data sources, display priority, and rendering performance."
4
+ triggers:
5
+ - "empty agent"
6
+ - "ghost run"
7
+ - "widget timing"
8
+ - "display priority"
9
+ - "snapshot cache"
5
10
 
11
+ ---
6
12
  # widget-rendering
7
13
 
8
14
  The crew widget (`src/ui/crew-widget.ts`) displays active runs and their agents in the Pi TUI. It must render synchronously at TTY refresh rate without blocking. Understanding the data sources and timing rules is essential for debugging display issues.
@@ -44,8 +50,6 @@ In-memory map from `live-agent-manager.ts`. Provides:
44
50
 
45
51
  **When used:** For completed agents, or when snapshot cache misses.
46
52
 
47
- ---
48
-
49
53
  ## Display Priority
50
54
 
51
55
  ```
@@ -66,8 +70,6 @@ for each active run:
66
70
  → apply linger rules (finishedAgents: 1min, errors: 2min)
67
71
  ```
68
72
 
69
- ---
70
-
71
73
  ## Active Runs Filtering
72
74
 
73
75
  `activeWidgetRuns()` determines which runs to show. Key filter: `isDisplayActiveRun(manifest, tasks)` from `process-status.ts`.
@@ -227,6 +229,19 @@ const TOOL_LABELS = {
227
229
 
228
230
  ---
229
231
 
232
+ ## Enforcement — Widget Rendering Gate
233
+
234
+ **Before modifying widget rendering or display logic, verify:**
235
+
236
+ - [ ] Render path is synchronous and non-blocking (no fs/network calls)
237
+ - [ ] Display priority chain correct (liveAgents → snapshot cache → agents.json)
238
+ - [ ] Ghost run filtering works (stale async PID + age > 30min hidden)
239
+ - [ ] Stale handle eviction runs on every refresh (10min terminal handles removed)
240
+ - [ ] Cache invalidation handles empty results (forces refresh on next tick)
241
+ - [ ] Tool name extraction uses TOOL_LABELS for readable activity descriptions
242
+
243
+ If ANY answer is NO → Stop. Fix widget rendering issues before proceeding.
244
+
230
245
  ## Anti-patterns
231
246
 
232
247
  - **Blocking render with fs calls**: Every `readFileSync`, `readdirSync`, `fs.statSync` in the render path causes frame drops. Preload everything async.
@@ -1,8 +1,13 @@
1
1
  ---
2
2
  name: workspace-isolation
3
- description: "Workspace isolation boundaries. Use when ensuring agents from workspace A cannot access workspace B, or worktree-based parallel execution. Triggers: workspace isolation, cross-workspace access, escape boundary, worktree safety."
3
+ description: "Workspace isolation boundaries."
4
+ triggers:
5
+ - "workspace isolation"
6
+ - "cross-workspace access"
7
+ - "escape boundary"
8
+ - "worktree safety"
9
+ - "agent isolation"
4
10
  ---
5
-
6
11
  # workspace-isolation
7
12
 
8
13
  pi-crew enforces workspace isolation so that agents, runs, and live sessions from one project folder cannot be accessed from another. The workspace boundary is `manifest.cwd` — the directory where a run was initiated.
@@ -157,6 +162,19 @@ const DEFAULT_PATHS = {
157
162
 
158
163
  All paths are resolved relative to `manifest.cwd`, ensuring state stays under the project root.
159
164
 
165
+ ## Enforcement — Workspace Isolation Gate
166
+
167
+ **Before performing cross-workspace operations, verify:**
168
+
169
+ - [ ] workspaceId carried from manifest.cwd through all operations
170
+ - [ ] Live agent operations filtered by workspaceId (list, steer, follow-up, stop, resume)
171
+ - [ ] resolveContainedPath used (not startsWith) for path validation
172
+ - [ ] resolveRealContainedPath used for symlink detection
173
+ - [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
174
+ - [ ] Cross-session cancel/respond rejected (force=true only when explicit)
175
+
176
+ If ANY answer is NO → Stop. Verify workspace isolation before proceeding.
177
+
160
178
  ## Anti-patterns
161
179
 
162
180
  - **Passing raw cwd without validation**: Always use `resolveContainedPath` to ensure paths stay under workspace root.
@@ -165,8 +183,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
165
183
  - **Worktree name collision**: Use deterministic names from run/task IDs. Never accept user-controlled branch names.
166
184
  - **Dirty worktree removal**: Never force-remove worktrees with uncommitted changes unless explicitly confirmed.
167
185
 
168
- ---
169
-
170
186
  ## Source patterns
171
187
 
172
188
  - `src/extension/team-tool/api.ts` — workspaceId filter in list-live-agents, steer-agent, follow-up-agent, stop-agent, resume-agent
@@ -177,8 +193,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
177
193
  - `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent
178
194
  - `src/config/defaults.ts` — DEFAULT_PATHS (state under project root)
179
195
 
180
- ---
181
-
182
196
  ## Verification
183
197
 
184
198
  ```bash
@@ -1,8 +1,13 @@
1
1
  ---
2
2
  name: worktree-isolation
3
- description: Conflict-safe git worktree workflow. Use when running parallel implementation workers, isolating risky edits, or cleaning up task worktrees.
3
+ description: "Conflict-safe git worktree workflow."
4
+ triggers:
5
+ - "create worktree"
6
+ - "parallel workers"
7
+ - "isolate edits"
8
+ - "cleanup worktree"
9
+ - "branch freshness"
4
10
  ---
5
-
6
11
  # worktree-isolation
7
12
 
8
13
  Use this skill for worktree-based execution or cleanup. Git worktrees create isolated working directories that allow parallel code-changing tasks without git conflicts.
@@ -177,6 +182,19 @@ If a task crashes mid-worktree:
177
182
  3. If run is failed/cancelled and worktree is dirty → report to operator
178
183
  4. If run is completed → safe to clean up
179
184
 
185
+ ## Enforcement — Worktree Isolation Gate
186
+
187
+ **Before creating or cleaning up worktrees, verify:**
188
+
189
+ - [ ] Leader repo is clean before creating worktrees (assertCleanLeader passes)
190
+ - [ ] One owner per file/symbol (no two worktrees edit same file)
191
+ - [ ] Worktree naming is deterministic from run/task IDs (no user-controlled fragments)
192
+ - [ ] Branch freshness checked before reuse (base branch hasn't diverged)
193
+ - [ ] Dirty worktrees preserved by default (force=true only for forced removal)
194
+ - [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
195
+
196
+ If ANY answer is NO → Stop. Verify worktree safety before proceeding.
197
+
180
198
  ## Anti-patterns
181
199
 
182
200
  - **Parallel editing same file**: Assign one owner per file. Use the task ID in branch names to track ownership.
@@ -185,8 +203,6 @@ If a task crashes mid-worktree:
185
203
  - **Storing worktrees outside workspace root**: All worktrees must be under `<repo-root>/.worktrees/`. Never store outside.
186
204
  - **Worktree name collision**: Use deterministic naming from run/task IDs, not user input.
187
205
 
188
- ---
189
-
190
206
  ## Source patterns
191
207
 
192
208
  - `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent, sanitizeBranchPart
@@ -195,8 +211,6 @@ If a task crashes mid-worktree:
195
211
  - `src/runtime/team-runner.ts` — workspaceMode handling, worktree passed to task
196
212
  - `src/runtime/task-runner.ts` — worktreePath in task context
197
213
 
198
- ---
199
-
200
214
  ## Verification
201
215
 
202
216
  ```bash
@@ -1,4 +1,7 @@
1
- export type ResourceSource = "builtin" | "user" | "project" | "git";
1
+ import type { RoleToolConfig } from "../config/role-tools.ts";
2
+ import { getToolConfig } from "../config/role-tools.ts";
3
+
4
+ export type ResourceSource = "builtin" | "user" | "project" | "git" | "dynamic";
2
5
 
3
6
  export interface RoutingMetadata {
4
7
  triggers?: string[];
@@ -38,3 +41,39 @@ export interface AgentConfig {
38
41
  disabled?: boolean;
39
42
  override?: { source: "config"; path: string };
40
43
  }
44
+
45
+ /**
46
+ * Get session options (tools/excludeTools) for a specific role.
47
+ * Used by child-pi to apply role-based tool restrictions.
48
+ */
49
+ export function getAgentSessionOptions(role: string): {
50
+ tools?: string[];
51
+ excludeTools?: string[];
52
+ } {
53
+ const config: RoleToolConfig = getToolConfig(role);
54
+
55
+ if (config.tools || config.excludeTools) {
56
+ return {
57
+ tools: config.tools,
58
+ excludeTools: config.excludeTools,
59
+ };
60
+ }
61
+
62
+ return {};
63
+ }
64
+
65
+ /**
66
+ * Build agent session options including role-based tool restrictions.
67
+ * @param agent - The agent configuration
68
+ * @param role - The role name to use for tool restrictions (defaults to agent.name)
69
+ */
70
+ export function buildAgentSessionOptions(
71
+ agent: AgentConfig,
72
+ role?: string,
73
+ ): {
74
+ tools?: string[];
75
+ excludeTools?: string[];
76
+ } {
77
+ const effectiveRole = role ?? agent.name;
78
+ return getAgentSessionOptions(effectiveRole);
79
+ }
@@ -32,6 +32,45 @@ export interface BenchmarkResult {
32
32
  cost: number;
33
33
  }
34
34
 
35
+ /**
36
+ * Validate command against allowlist to prevent shell injection.
37
+ * Only allows specific safe commands with arguments.
38
+ */
39
+ /**
40
+ * Validate command against allowlist to prevent shell injection.
41
+ * Uses comprehensive shell metacharacter blocking similar to safe-bash.ts.
42
+ */
43
+ function validateCommand(command: string): void {
44
+ // Basic allowlist - must start with allowed command
45
+ const allowlist = /^(pytest|grep|npm test|npx) /;
46
+ if (!allowlist.test(command)) {
47
+ throw new Error(`Command not allowed: ${command}. Only pytest, grep, npm test, npx allowed.`);
48
+ }
49
+
50
+ // Block shell metacharacters after command name
51
+ const afterCommand = command.substring(command.indexOf(" ") + 1);
52
+
53
+ // Block dangerous shell metacharacters
54
+ const dangerousPatterns = [
55
+ /[;&|`$(){}[\]<>\\]/, // Shell metacharacters
56
+ /\$\([^)]*\)/, // Command substitution $(...)
57
+ /`[^`]*`/, // Backtick command substitution
58
+ /\|/, // Pipe
59
+ /&&/, // And
60
+ /\|\|/, // Or
61
+ />>/, // Append redirect
62
+ /2>&1/, // stderr redirect
63
+ />/, // Output redirect
64
+ /</, // Input redirect
65
+ ];
66
+
67
+ for (const pattern of dangerousPatterns) {
68
+ if (pattern.test(afterCommand)) {
69
+ throw new Error(`Shell metacharacters not allowed in command arguments`);
70
+ }
71
+ }
72
+ }
73
+
35
74
  /**
36
75
  * Run a single benchmark task with tiered judges.
37
76
  * Tier 1: pytest (fast, deterministic)
@@ -49,6 +88,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
49
88
  let output: string | undefined;
50
89
 
51
90
  if (judge.type === "pytest" && judge.command) {
91
+ // Validate command before execution
92
+ validateCommand(judge.command);
52
93
  // Tier 1: pytest - fast deterministic check
53
94
  output = execSync(judge.command, {
54
95
  timeout: 5000,
@@ -58,6 +99,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
58
99
  // Look for pytest summary line with passed count
59
100
  passed = output.includes("passed");
60
101
  } else if (judge.type === "grep" && judge.pattern && judge.command) {
102
+ // Validate command before execution
103
+ validateCommand(judge.command);
61
104
  // Tier 2: grep pattern matching
62
105
  output = execSync(judge.command, {
63
106
  timeout: 5000,
@@ -66,6 +109,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
66
109
  });
67
110
  passed = output.includes(judge.pattern);
68
111
  } else if (judge.type === "command" && judge.command) {
112
+ // Validate command before execution
113
+ validateCommand(judge.command);
69
114
  // Tier 3: command execution
70
115
  output = execSync(judge.command, {
71
116
  timeout: 10000,
@@ -12,12 +12,17 @@ export interface FeedbackLoopStats {
12
12
 
13
13
  export class FeedbackLoop {
14
14
  private runs: RunMetrics[] = [];
15
+ private static readonly MAX_RUNS = 1000;
15
16
 
16
17
  /**
17
18
  * Record a run's metrics for learning.
18
19
  */
19
20
  recordRun(metrics: RunMetrics): void {
20
21
  this.runs.push(metrics);
22
+ // Trim to MAX_RUNS to prevent unbounded memory growth
23
+ if (this.runs.length > FeedbackLoop.MAX_RUNS) {
24
+ this.runs = this.runs.slice(-FeedbackLoop.MAX_RUNS);
25
+ }
21
26
  }
22
27
 
23
28
  /**
@@ -300,6 +300,7 @@ function mergeConfig(
300
300
  base: PiTeamsConfig,
301
301
  override: PiTeamsConfig,
302
302
  ): PiTeamsConfig {
303
+ const warnings: string[] = [];
303
304
  const merged: PiTeamsConfig = {
304
305
  ...base,
305
306
  ...withoutUndefined(override as Record<string, unknown>),
@@ -439,6 +440,15 @@ function mergeConfig(
439
440
  };
440
441
  if (Object.keys(merged.otlp.headers ?? {}).length === 0)
441
442
  delete merged.otlp.headers;
443
+ // Validate OTLP headers for injection attacks (newlines, CR, null bytes)
444
+ const invalidHeaders: string[] = [];
445
+ for (const [k, v] of Object.entries(merged.otlp.headers ?? {})) {
446
+ if (/[\r\n\x00]/.test(String(v))) { invalidHeaders.push(k); }
447
+ }
448
+ if (invalidHeaders.length > 0) {
449
+ delete merged.otlp.headers;
450
+ warnings.push(`OTLP headers blocked due to invalid characters: ${invalidHeaders.join(", ")}`);
451
+ }
442
452
  }
443
453
  if (
444
454
  merged.agents?.overrides &&
@@ -1171,6 +1181,10 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
1171
1181
  if (cwd) {
1172
1182
  const projectPath = projectConfigPath(cwd);
1173
1183
  const projectConfig = readOptionalConfig(projectPath);
1184
+ // SECURITY FIX: Merge project config FIRST, then user config on top.
1185
+ // This ensures user preferences always take precedence over project settings.
1186
+ // Sensitive fields have already been sanitized by sanitizeProjectConfig.
1187
+ let effectiveConfig = {};
1174
1188
  if (projectConfig.exists) {
1175
1189
  const projectSafeConfig = sanitizeProjectConfig(
1176
1190
  projectPath,
@@ -1181,16 +1195,29 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
1181
1195
  ...projectConfig.warnings,
1182
1196
  ...projectSafeConfig.warnings,
1183
1197
  );
1184
- config = mergeConfig(config, projectSafeConfig.config);
1198
+ effectiveConfig = mergeConfig(effectiveConfig, projectSafeConfig.config);
1185
1199
  }
1186
- // `.pi/pi-crew.json` is the project-owned override file. If present and valid,
1187
- // it may override all pi-crew config fields, including agents.overrides.
1188
- // If missing or invalid, it is ignored and defaults/user config remain effective.
1200
+ // User config always takes precedence over project config
1201
+ effectiveConfig = mergeConfig(effectiveConfig, config);
1202
+ config = effectiveConfig;
1203
+
1204
+
1205
+ // `.pi/pi-crew.json` is the project-owned config file.
1206
+ // SECURITY FIX: User config takes precedence over project-level `.pi/pi-crew.json`.
1207
+ // This prevents malicious project configs from overriding user preferences.
1189
1208
  const piCrewJsonPath = projectPiCrewJsonPath(cwd);
1190
1209
  const piCrewJsonConfig = readOptionalConfig(piCrewJsonPath);
1191
1210
  if (piCrewJsonConfig.exists) {
1192
1211
  warnings.push(...piCrewJsonConfig.warnings);
1193
- config = mergeConfig(config, piCrewJsonConfig.config);
1212
+ // Merge project config first, then user config on top
1213
+ const projectPart = sanitizeProjectConfig(
1214
+ piCrewJsonPath,
1215
+ config,
1216
+ piCrewJsonConfig.config,
1217
+ );
1218
+ warnings.push(...projectPart.warnings);
1219
+ const mergedProject = mergeConfig(projectPart.config, config);
1220
+ config = mergedProject;
1194
1221
  paths.push(piCrewJsonPath);
1195
1222
  }
1196
1223
  }