pi-crew 0.5.2 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +183 -0
- package/README.md +17 -1
- package/docs/architecture.md +2 -0
- package/docs/bugs/cross-session-notification-leakage.md +82 -0
- package/docs/coding-agent-optimization.md +268 -0
- package/docs/deep-review-report.md +384 -0
- package/docs/distillation/cybersecurity-patterns.md +294 -0
- package/docs/migration-v0.4-v0.5.md +208 -0
- package/docs/optimization-plan.md +642 -0
- package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
- package/docs/pi-mono-opportunities.md +969 -0
- package/docs/pi-mono-review.md +291 -0
- package/docs/skills/REFERENCE.md +144 -0
- package/package.json +12 -9
- package/skills/artifact-analysis-loop/SKILL.md +302 -0
- package/skills/async-worker-recovery/SKILL.md +19 -1
- package/skills/child-pi-spawning/SKILL.md +19 -6
- package/skills/context-artifact-hygiene/SKILL.md +19 -2
- package/skills/delegation-patterns/SKILL.md +68 -3
- package/skills/detection-pipeline-design/SKILL.md +285 -0
- package/skills/event-log-tracing/SKILL.md +20 -6
- package/skills/git-master/SKILL.md +20 -6
- package/skills/hunting-investigation-loop/SKILL.md +401 -0
- package/skills/incident-playbook-construction/SKILL.md +383 -0
- package/skills/live-agent-lifecycle/SKILL.md +20 -6
- package/skills/mailbox-interactive/SKILL.md +19 -6
- package/skills/model-routing-context/SKILL.md +19 -1
- package/skills/multi-perspective-review/SKILL.md +19 -4
- package/skills/observability-reliability/SKILL.md +19 -2
- package/skills/orchestration/SKILL.md +20 -2
- package/skills/ownership-session-security/SKILL.md +20 -2
- package/skills/pi-extension-lifecycle/SKILL.md +20 -2
- package/skills/post-mortem/SKILL.md +7 -2
- package/skills/read-only-explorer/SKILL.md +20 -6
- package/skills/requirements-to-task-packet/SKILL.md +23 -3
- package/skills/resource-discovery-config/SKILL.md +20 -2
- package/skills/runtime-state-reader/SKILL.md +20 -2
- package/skills/safe-bash/SKILL.md +21 -6
- package/skills/scrutinize/SKILL.md +20 -2
- package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
- package/skills/security-review/SKILL.md +560 -0
- package/skills/state-mutation-locking/SKILL.md +22 -2
- package/skills/systematic-debugging/SKILL.md +8 -6
- package/skills/threat-hypothesis-framework/SKILL.md +175 -0
- package/skills/ui-render-performance/SKILL.md +20 -2
- package/skills/verification-before-done/SKILL.md +17 -2
- package/skills/widget-rendering/SKILL.md +21 -6
- package/skills/workspace-isolation/SKILL.md +20 -6
- package/skills/worktree-isolation/SKILL.md +20 -6
- package/src/agents/agent-config.ts +40 -1
- package/src/benchmark/benchmark-runner.ts +45 -0
- package/src/benchmark/feedback-loop.ts +5 -0
- package/src/config/config.ts +32 -5
- package/src/config/role-tools.ts +82 -0
- package/src/config/suggestions.ts +8 -0
- package/src/config/types.ts +4 -0
- package/src/extension/async-notifier.ts +10 -1
- package/src/extension/crew-cleanup.ts +114 -0
- package/src/extension/cross-extension-rpc.ts +1 -1
- package/src/extension/notification-router.ts +18 -0
- package/src/extension/register.ts +27 -19
- package/src/extension/registration/subagent-tools.ts +1 -1
- package/src/extension/team-tool/anchor.ts +201 -0
- package/src/extension/team-tool/api.ts +2 -1
- package/src/extension/team-tool/auto-summarize.ts +154 -0
- package/src/extension/team-tool/run.ts +42 -7
- package/src/extension/team-tool.ts +44 -2
- package/src/hooks/registry.ts +1 -3
- package/src/observability/event-bus.ts +69 -0
- package/src/observability/event-to-metric.ts +0 -2
- package/src/runtime/anchor-manager.ts +473 -0
- package/src/runtime/async-runner.ts +8 -4
- package/src/runtime/auto-summarize.ts +350 -0
- package/src/runtime/background-runner.ts +10 -3
- package/src/runtime/budget-tracker.ts +354 -0
- package/src/runtime/chain-runner.ts +507 -0
- package/src/runtime/child-pi.ts +123 -35
- package/src/runtime/crash-recovery.ts +5 -4
- package/src/runtime/crew-agent-runtime.ts +1 -0
- package/src/runtime/custom-tools/irc-tool.ts +13 -0
- package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
- package/src/runtime/delivery-coordinator.ts +10 -3
- package/src/runtime/dynamic-script-runner.ts +482 -0
- package/src/runtime/foreground-control.ts +87 -17
- package/src/runtime/handoff-manager.ts +589 -0
- package/src/runtime/hidden-handoff.ts +424 -0
- package/src/runtime/live-agent-manager.ts +20 -4
- package/src/runtime/live-session-runtime.ts +39 -4
- package/src/runtime/manifest-cache.ts +2 -1
- package/src/runtime/model-resolver.ts +16 -4
- package/src/runtime/phase-tracker.ts +373 -0
- package/src/runtime/pi-args.ts +11 -1
- package/src/runtime/pi-json-output.ts +31 -0
- package/src/runtime/pipeline-runner.ts +514 -0
- package/src/runtime/progress-tracker.ts +124 -0
- package/src/runtime/retry-runner.ts +354 -0
- package/src/runtime/sandbox.ts +252 -0
- package/src/runtime/scheduler.ts +7 -2
- package/src/runtime/skill-effectiveness.ts +473 -0
- package/src/runtime/skill-instructions.ts +37 -3
- package/src/runtime/subagent-manager.ts +1 -1
- package/src/runtime/task-graph.ts +11 -1
- package/src/runtime/task-runner.ts +92 -18
- package/src/runtime/team-runner.ts +13 -12
- package/src/runtime/tool-progress.ts +10 -3
- package/src/runtime/verification-gates.ts +367 -0
- package/src/schema/team-tool-schema.ts +37 -0
- package/src/skills/discover-skills.ts +5 -0
- package/src/state/active-run-registry.ts +9 -2
- package/src/state/contracts.ts +9 -0
- package/src/state/crew-init.ts +3 -3
- package/src/state/decision-ledger.ts +98 -55
- package/src/state/event-log-rotation.ts +2 -2
- package/src/state/event-log.ts +144 -10
- package/src/state/hook-instinct-bridge.ts +5 -5
- package/src/state/mailbox.ts +10 -0
- package/src/state/run-cache.ts +18 -8
- package/src/state/state-store.ts +3 -1
- package/src/state/types.ts +4 -0
- package/src/tools/safe-bash-extension.ts +1 -0
- package/src/tools/safe-bash.ts +152 -20
- package/src/types/new-api-types.ts +34 -0
- package/src/ui/agent-management-overlay.ts +5 -1
- package/src/ui/crew-widget.ts +29 -15
- package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
- package/src/ui/powerbar-publisher.ts +101 -7
- package/src/ui/tool-render.ts +15 -15
- package/src/ui/transcript-cache.ts +13 -0
- package/src/utils/bm25-search.ts +16 -8
- package/src/utils/env-filter.ts +8 -5
- package/src/utils/redaction.ts +169 -15
- package/src/utils/session-utils.ts +52 -0
- package/src/utils/sse-parser.ts +10 -1
- package/src/worktree/cleanup.ts +6 -1
- package/src/worktree/worktree-manager.ts +32 -13
- package/workflows/chain.workflow.md +252 -0
- package/workflows/pipeline.workflow.md +27 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: threat-hypothesis-framework
|
|
3
|
+
description: "Structured investigation using testable hypotheses."
|
|
4
|
+
triggers:
|
|
5
|
+
- "hunt for"
|
|
6
|
+
- "investigate"
|
|
7
|
+
- "threat hypothesis"
|
|
8
|
+
- "test this pattern"
|
|
9
|
+
- "find evidence of"
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
# threat-hypothesis-framework
|
|
13
|
+
|
|
14
|
+
Use this skill when conducting hypothesis-driven investigation and threat hunting.
|
|
15
|
+
|
|
16
|
+
## Source
|
|
17
|
+
|
|
18
|
+
Distilled from `building-threat-hunt-hypothesis-framework` (Anthropic Cybersecurity Skills) and generalized for software/codebase context.
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- Proactively hunting for indicators of compromise in code
|
|
23
|
+
- After threat intelligence indicates suspicious patterns
|
|
24
|
+
- During incident investigation to scope extent
|
|
25
|
+
- When EDR/logs alert on related indicators
|
|
26
|
+
- During periodic security assessments
|
|
27
|
+
|
|
28
|
+
## Workflow
|
|
29
|
+
|
|
30
|
+
```markdown
|
|
31
|
+
## Hypothesis Investigation Loop
|
|
32
|
+
|
|
33
|
+
1. **Formulate** → Given [observed IOCs/patterns], hypothesize [attack scenario]
|
|
34
|
+
2. **Identify** → List data sources: [files, commits, logs, configs]
|
|
35
|
+
3. **Search** → Run queries across identified sources
|
|
36
|
+
4. **Analyze** → Pattern match: [technique, indicator, artifact]
|
|
37
|
+
5. **Validate** → Confirm with [secondary source, cross-reference]
|
|
38
|
+
6. **Correlate** → Link findings to [broader campaign, actor]
|
|
39
|
+
7. **Report** → Document: [finding, confidence, next_action]
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Hypothesis Structure
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
hypothesis:
|
|
46
|
+
id: string # e.g., "HY-2026-001"
|
|
47
|
+
technique: string # e.g., "credential-theft", "supply-chain"
|
|
48
|
+
description: string # What we're testing
|
|
49
|
+
data_sources:
|
|
50
|
+
- type: [file|commit|log|config]
|
|
51
|
+
locations: [paths, globs]
|
|
52
|
+
search_patterns:
|
|
53
|
+
- pattern: string
|
|
54
|
+
type: [regex|AST|signature]
|
|
55
|
+
validation:
|
|
56
|
+
- method: string
|
|
57
|
+
expected_result: string
|
|
58
|
+
confidence_levels:
|
|
59
|
+
high: [confirmed by multiple sources]
|
|
60
|
+
medium: [single source, needs validation]
|
|
61
|
+
low: [heuristic match, requires investigation]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Hunt Report Format
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
Hunt ID: [HY-runid-date-seq]
|
|
68
|
+
Hypothesis: [what we're testing]
|
|
69
|
+
Data Sources: [where we looked]
|
|
70
|
+
Search Patterns: [what we searched for]
|
|
71
|
+
Findings:
|
|
72
|
+
- File: [path]
|
|
73
|
+
Line: [number]
|
|
74
|
+
Evidence: [what matched]
|
|
75
|
+
Confidence: [High/Medium/Low]
|
|
76
|
+
Correlation: [link to other findings]
|
|
77
|
+
Next Actions:
|
|
78
|
+
- investigate: [further analysis needed]
|
|
79
|
+
- contain: [immediate action required]
|
|
80
|
+
- close: [false positive, no action]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Investigation Examples
|
|
84
|
+
|
|
85
|
+
### Example 1: Credential Detection Hunt
|
|
86
|
+
|
|
87
|
+
```yaml
|
|
88
|
+
hypothesis:
|
|
89
|
+
id: HY-2026-042
|
|
90
|
+
technique: hardcoded-credentials
|
|
91
|
+
description: Search for hardcoded secrets in codebase
|
|
92
|
+
data_sources:
|
|
93
|
+
- type: file
|
|
94
|
+
locations: ["**/*.ts", "**/*.js", "**/*.env"]
|
|
95
|
+
search_patterns:
|
|
96
|
+
- pattern: '(api[_-]?key|secret|token|password)\s*[=:]'
|
|
97
|
+
type: regex
|
|
98
|
+
- pattern: 'process\.env\.[A-Z_]+'
|
|
99
|
+
type: AST
|
|
100
|
+
validation:
|
|
101
|
+
- method: git history check
|
|
102
|
+
expected_result: No recent secret additions
|
|
103
|
+
- method: secret scanning tool
|
|
104
|
+
expected_result: Zero findings in main branch
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Example 2: Supply Chain Hunt
|
|
108
|
+
|
|
109
|
+
```yaml
|
|
110
|
+
hypothesis:
|
|
111
|
+
id: HY-2026-043
|
|
112
|
+
technique: dependency-confusion
|
|
113
|
+
description: Detect potential dependency confusion attacks
|
|
114
|
+
data_sources:
|
|
115
|
+
- type: file
|
|
116
|
+
locations: ["**/package.json", "**/requirements.txt"]
|
|
117
|
+
search_patterns:
|
|
118
|
+
- pattern: '"@private/.*
|
|
119
|
+
type: regex
|
|
120
|
+
- pattern: 'version.*>.*<.*9999999'
|
|
121
|
+
type: regex
|
|
122
|
+
validation:
|
|
123
|
+
- method: npm audit
|
|
124
|
+
expected_result: No anomalies
|
|
125
|
+
- method: typosquat check
|
|
126
|
+
expected_result: No similar package names
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Confidence Scoring
|
|
130
|
+
|
|
131
|
+
| Level | Criteria | Action |
|
|
132
|
+
|-------|----------|--------|
|
|
133
|
+
| **High** | Confirmed by 2+ independent sources, exact match | Immediate action |
|
|
134
|
+
| **Medium** | Single source, pattern match, needs validation | Investigate further |
|
|
135
|
+
| **Low** | Heuristic match, possible false positive | Log and monitor |
|
|
136
|
+
|
|
137
|
+
## Enforcement — Threat Hypothesis Framework Gate
|
|
138
|
+
|
|
139
|
+
**Before reporting hunt findings, verify:**
|
|
140
|
+
|
|
141
|
+
- [ ] Hypothesis clearly stated before search (not scattershot searching)
|
|
142
|
+
- [ ] Data sources identified (files, commits, logs, configs)
|
|
143
|
+
- [ ] Search patterns defined (regex, AST, signature)
|
|
144
|
+
- [ ] Findings validated with secondary source or cross-reference
|
|
145
|
+
- [ ] Confidence level assigned (High/Medium/Low) based on validation
|
|
146
|
+
- [ ] Report includes: finding, confidence, next_action (investigate/contain/close)
|
|
147
|
+
|
|
148
|
+
If ANY answer is NO → Stop. Complete hypothesis framework before reporting.
|
|
149
|
+
|
|
150
|
+
## Anti-Patterns
|
|
151
|
+
|
|
152
|
+
- **Don't** run hunt without clear hypothesis (scattershot searching)
|
|
153
|
+
- **Don't** claim finding without validation (false positive risk)
|
|
154
|
+
- **Don't** skip correlation step (missing broader context)
|
|
155
|
+
- **Don't** report without confidence level (misleads stakeholders)
|
|
156
|
+
|
|
157
|
+
## Tools
|
|
158
|
+
|
|
159
|
+
| Tool | Purpose |
|
|
160
|
+
|------|---------|
|
|
161
|
+
| `rg` (ripgrep) | Pattern search in files |
|
|
162
|
+
| `git log` | History investigation |
|
|
163
|
+
| `semgrep` | AST-based pattern matching |
|
|
164
|
+
| `npm audit` | Dependency vulnerability check |
|
|
165
|
+
|
|
166
|
+
## Verification
|
|
167
|
+
|
|
168
|
+
For hypothesis framework changes:
|
|
169
|
+
```bash
|
|
170
|
+
cd pi-crew
|
|
171
|
+
npx tsc --noEmit
|
|
172
|
+
node --experimental-strip-types --test test/unit/security-patterns.test.ts
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
*See also: `hunting-investigation-loop` skill for active hunting workflows.*
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ui-render-performance
|
|
3
|
-
description: Non-blocking Pi TUI render workflow.
|
|
3
|
+
description: "Non-blocking Pi TUI render workflow."
|
|
4
|
+
triggers:
|
|
5
|
+
- "widget render"
|
|
6
|
+
- "dashboard pane"
|
|
7
|
+
- "overlay update"
|
|
8
|
+
- "snapshot cache"
|
|
9
|
+
- "UI refresh"
|
|
4
10
|
---
|
|
5
|
-
|
|
6
11
|
# ui-render-performance
|
|
7
12
|
|
|
8
13
|
Use this skill for Pi/pi-crew TUI work.
|
|
@@ -23,6 +28,19 @@ Use this skill for Pi/pi-crew TUI work.
|
|
|
23
28
|
- On session switch, cancel timers and ensure in-flight async preloads cannot update stale session UI.
|
|
24
29
|
- Watch TTL interactions: a preload interval shorter than cache TTL prevents render-time refresh gaps.
|
|
25
30
|
|
|
31
|
+
## Enforcement — UI Render Performance Gate
|
|
32
|
+
|
|
33
|
+
**Before modifying widgets or UI rendering, verify:**
|
|
34
|
+
|
|
35
|
+
- [ ] Render path is non-blocking (no fs calls, no network, no large JSON parsing)
|
|
36
|
+
- [ ] All data preloaded async before first render
|
|
37
|
+
- [ ] Snapshot cache TTL appropriate (500ms or less)
|
|
38
|
+
- [ ] Render scheduler used for coalescing renders
|
|
39
|
+
- [ ] Stale warnings filtered for terminal status (completed/failed/cancelled)
|
|
40
|
+
- [ ] TTL interactions understood (preload interval < cache TTL)
|
|
41
|
+
|
|
42
|
+
If ANY answer is NO → Stop. Fix render performance issues before proceeding.
|
|
43
|
+
|
|
26
44
|
## Anti-patterns
|
|
27
45
|
|
|
28
46
|
- Do not call `loadConfig()`, `manifestCache.list()`, or `refreshIfStale()` repeatedly inside `renderTick()` unless backed by preloaded frame data.
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: verification-before-done
|
|
3
|
-
description: "Evidence before claims.
|
|
4
|
-
|
|
3
|
+
description: "Evidence before claims."
|
|
4
|
+
triggers:
|
|
5
|
+
- "done"
|
|
6
|
+
- "verify this"
|
|
7
|
+
- "is it working"
|
|
8
|
+
- "check if it passes"
|
|
9
|
+
- "ready to ship"
|
|
10
|
+
|
|
5
11
|
|
|
12
|
+
---
|
|
6
13
|
# verification-before-done
|
|
7
14
|
|
|
8
15
|
Core principle: evidence before claims. A worker report, green-looking log, or previous run is not fresh verification.
|
|
@@ -64,3 +71,11 @@ Before finalizing any work, report:
|
|
|
64
71
|
## Red Flags
|
|
65
72
|
|
|
66
73
|
Stop before saying done if you are using words like "should", "probably", "looks", "seems", "I think", or if you are trusting an agent report without checking evidence.
|
|
74
|
+
|
|
75
|
+
## Anti-Patterns
|
|
76
|
+
|
|
77
|
+
- **Don't** claim "tests pass" without running them in the current session
|
|
78
|
+
- **Don't** trust agent reports without checking evidence yourself
|
|
79
|
+
- **Don't** use fuzzy language like "seems", "probably", "looks like"
|
|
80
|
+
- **Don't** skip providing verification commands for claims
|
|
81
|
+
- **Don't** claim done if you're still using hypotheses instead of evidence
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: widget-rendering
|
|
3
|
-
description: Pi TUI crew widget data sources, display priority, and rendering performance.
|
|
4
|
-
|
|
3
|
+
description: "Pi TUI crew widget data sources, display priority, and rendering performance."
|
|
4
|
+
triggers:
|
|
5
|
+
- "empty agent"
|
|
6
|
+
- "ghost run"
|
|
7
|
+
- "widget timing"
|
|
8
|
+
- "display priority"
|
|
9
|
+
- "snapshot cache"
|
|
5
10
|
|
|
11
|
+
---
|
|
6
12
|
# widget-rendering
|
|
7
13
|
|
|
8
14
|
The crew widget (`src/ui/crew-widget.ts`) displays active runs and their agents in the Pi TUI. It must render synchronously at TTY refresh rate without blocking. Understanding the data sources and timing rules is essential for debugging display issues.
|
|
@@ -44,8 +50,6 @@ In-memory map from `live-agent-manager.ts`. Provides:
|
|
|
44
50
|
|
|
45
51
|
**When used:** For completed agents, or when snapshot cache misses.
|
|
46
52
|
|
|
47
|
-
---
|
|
48
|
-
|
|
49
53
|
## Display Priority
|
|
50
54
|
|
|
51
55
|
```
|
|
@@ -66,8 +70,6 @@ for each active run:
|
|
|
66
70
|
→ apply linger rules (finishedAgents: 1min, errors: 2min)
|
|
67
71
|
```
|
|
68
72
|
|
|
69
|
-
---
|
|
70
|
-
|
|
71
73
|
## Active Runs Filtering
|
|
72
74
|
|
|
73
75
|
`activeWidgetRuns()` determines which runs to show. Key filter: `isDisplayActiveRun(manifest, tasks)` from `process-status.ts`.
|
|
@@ -227,6 +229,19 @@ const TOOL_LABELS = {
|
|
|
227
229
|
|
|
228
230
|
---
|
|
229
231
|
|
|
232
|
+
## Enforcement — Widget Rendering Gate
|
|
233
|
+
|
|
234
|
+
**Before modifying widget rendering or display logic, verify:**
|
|
235
|
+
|
|
236
|
+
- [ ] Render path is synchronous and non-blocking (no fs/network calls)
|
|
237
|
+
- [ ] Display priority chain correct (liveAgents → snapshot cache → agents.json)
|
|
238
|
+
- [ ] Ghost run filtering works (stale async PID + age > 30min hidden)
|
|
239
|
+
- [ ] Stale handle eviction runs on every refresh (10min terminal handles removed)
|
|
240
|
+
- [ ] Cache invalidation handles empty results (forces refresh on next tick)
|
|
241
|
+
- [ ] Tool name extraction uses TOOL_LABELS for readable activity descriptions
|
|
242
|
+
|
|
243
|
+
If ANY answer is NO → Stop. Fix widget rendering issues before proceeding.
|
|
244
|
+
|
|
230
245
|
## Anti-patterns
|
|
231
246
|
|
|
232
247
|
- **Blocking render with fs calls**: Every `readFileSync`, `readdirSync`, `fs.statSync` in the render path causes frame drops. Preload everything async.
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: workspace-isolation
|
|
3
|
-
description: "Workspace isolation boundaries.
|
|
3
|
+
description: "Workspace isolation boundaries."
|
|
4
|
+
triggers:
|
|
5
|
+
- "workspace isolation"
|
|
6
|
+
- "cross-workspace access"
|
|
7
|
+
- "escape boundary"
|
|
8
|
+
- "worktree safety"
|
|
9
|
+
- "agent isolation"
|
|
4
10
|
---
|
|
5
|
-
|
|
6
11
|
# workspace-isolation
|
|
7
12
|
|
|
8
13
|
pi-crew enforces workspace isolation so that agents, runs, and live sessions from one project folder cannot be accessed from another. The workspace boundary is `manifest.cwd` — the directory where a run was initiated.
|
|
@@ -157,6 +162,19 @@ const DEFAULT_PATHS = {
|
|
|
157
162
|
|
|
158
163
|
All paths are resolved relative to `manifest.cwd`, ensuring state stays under the project root.
|
|
159
164
|
|
|
165
|
+
## Enforcement — Workspace Isolation Gate
|
|
166
|
+
|
|
167
|
+
**Before performing cross-workspace operations, verify:**
|
|
168
|
+
|
|
169
|
+
- [ ] workspaceId carried from manifest.cwd through all operations
|
|
170
|
+
- [ ] Live agent operations filtered by workspaceId (list, steer, follow-up, stop, resume)
|
|
171
|
+
- [ ] resolveContainedPath used (not startsWith) for path validation
|
|
172
|
+
- [ ] resolveRealContainedPath used for symlink detection
|
|
173
|
+
- [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
|
|
174
|
+
- [ ] Cross-session cancel/respond rejected (force=true only when explicit)
|
|
175
|
+
|
|
176
|
+
If ANY answer is NO → Stop. Verify workspace isolation before proceeding.
|
|
177
|
+
|
|
160
178
|
## Anti-patterns
|
|
161
179
|
|
|
162
180
|
- **Passing raw cwd without validation**: Always use `resolveContainedPath` to ensure paths stay under workspace root.
|
|
@@ -165,8 +183,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
|
|
|
165
183
|
- **Worktree name collision**: Use deterministic names from run/task IDs. Never accept user-controlled branch names.
|
|
166
184
|
- **Dirty worktree removal**: Never force-remove worktrees with uncommitted changes unless explicitly confirmed.
|
|
167
185
|
|
|
168
|
-
---
|
|
169
|
-
|
|
170
186
|
## Source patterns
|
|
171
187
|
|
|
172
188
|
- `src/extension/team-tool/api.ts` — workspaceId filter in list-live-agents, steer-agent, follow-up-agent, stop-agent, resume-agent
|
|
@@ -177,8 +193,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
|
|
|
177
193
|
- `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent
|
|
178
194
|
- `src/config/defaults.ts` — DEFAULT_PATHS (state under project root)
|
|
179
195
|
|
|
180
|
-
---
|
|
181
|
-
|
|
182
196
|
## Verification
|
|
183
197
|
|
|
184
198
|
```bash
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: worktree-isolation
|
|
3
|
-
description: Conflict-safe git worktree workflow.
|
|
3
|
+
description: "Conflict-safe git worktree workflow."
|
|
4
|
+
triggers:
|
|
5
|
+
- "create worktree"
|
|
6
|
+
- "parallel workers"
|
|
7
|
+
- "isolate edits"
|
|
8
|
+
- "cleanup worktree"
|
|
9
|
+
- "branch freshness"
|
|
4
10
|
---
|
|
5
|
-
|
|
6
11
|
# worktree-isolation
|
|
7
12
|
|
|
8
13
|
Use this skill for worktree-based execution or cleanup. Git worktrees create isolated working directories that allow parallel code-changing tasks without git conflicts.
|
|
@@ -177,6 +182,19 @@ If a task crashes mid-worktree:
|
|
|
177
182
|
3. If run is failed/cancelled and worktree is dirty → report to operator
|
|
178
183
|
4. If run is completed → safe to clean up
|
|
179
184
|
|
|
185
|
+
## Enforcement — Worktree Isolation Gate
|
|
186
|
+
|
|
187
|
+
**Before creating or cleaning up worktrees, verify:**
|
|
188
|
+
|
|
189
|
+
- [ ] Leader repo is clean before creating worktrees (assertCleanLeader passes)
|
|
190
|
+
- [ ] One owner per file/symbol (no two worktrees edit same file)
|
|
191
|
+
- [ ] Worktree naming is deterministic from run/task IDs (no user-controlled fragments)
|
|
192
|
+
- [ ] Branch freshness checked before reuse (base branch hasn't diverged)
|
|
193
|
+
- [ ] Dirty worktrees preserved by default (force=true only for forced removal)
|
|
194
|
+
- [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
|
|
195
|
+
|
|
196
|
+
If ANY answer is NO → Stop. Verify worktree safety before proceeding.
|
|
197
|
+
|
|
180
198
|
## Anti-patterns
|
|
181
199
|
|
|
182
200
|
- **Parallel editing same file**: Assign one owner per file. Use the task ID in branch names to track ownership.
|
|
@@ -185,8 +203,6 @@ If a task crashes mid-worktree:
|
|
|
185
203
|
- **Storing worktrees outside workspace root**: All worktrees must be under `<repo-root>/.worktrees/`. Never store outside.
|
|
186
204
|
- **Worktree name collision**: Use deterministic naming from run/task IDs, not user input.
|
|
187
205
|
|
|
188
|
-
---
|
|
189
|
-
|
|
190
206
|
## Source patterns
|
|
191
207
|
|
|
192
208
|
- `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent, sanitizeBranchPart
|
|
@@ -195,8 +211,6 @@ If a task crashes mid-worktree:
|
|
|
195
211
|
- `src/runtime/team-runner.ts` — workspaceMode handling, worktree passed to task
|
|
196
212
|
- `src/runtime/task-runner.ts` — worktreePath in task context
|
|
197
213
|
|
|
198
|
-
---
|
|
199
|
-
|
|
200
214
|
## Verification
|
|
201
215
|
|
|
202
216
|
```bash
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import type { RoleToolConfig } from "../config/role-tools.ts";
|
|
2
|
+
import { getToolConfig } from "../config/role-tools.ts";
|
|
3
|
+
|
|
4
|
+
export type ResourceSource = "builtin" | "user" | "project" | "git" | "dynamic";
|
|
2
5
|
|
|
3
6
|
export interface RoutingMetadata {
|
|
4
7
|
triggers?: string[];
|
|
@@ -38,3 +41,39 @@ export interface AgentConfig {
|
|
|
38
41
|
disabled?: boolean;
|
|
39
42
|
override?: { source: "config"; path: string };
|
|
40
43
|
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Get session options (tools/excludeTools) for a specific role.
|
|
47
|
+
* Used by child-pi to apply role-based tool restrictions.
|
|
48
|
+
*/
|
|
49
|
+
export function getAgentSessionOptions(role: string): {
|
|
50
|
+
tools?: string[];
|
|
51
|
+
excludeTools?: string[];
|
|
52
|
+
} {
|
|
53
|
+
const config: RoleToolConfig = getToolConfig(role);
|
|
54
|
+
|
|
55
|
+
if (config.tools || config.excludeTools) {
|
|
56
|
+
return {
|
|
57
|
+
tools: config.tools,
|
|
58
|
+
excludeTools: config.excludeTools,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return {};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Build agent session options including role-based tool restrictions.
|
|
67
|
+
* @param agent - The agent configuration
|
|
68
|
+
* @param role - The role name to use for tool restrictions (defaults to agent.name)
|
|
69
|
+
*/
|
|
70
|
+
export function buildAgentSessionOptions(
|
|
71
|
+
agent: AgentConfig,
|
|
72
|
+
role?: string,
|
|
73
|
+
): {
|
|
74
|
+
tools?: string[];
|
|
75
|
+
excludeTools?: string[];
|
|
76
|
+
} {
|
|
77
|
+
const effectiveRole = role ?? agent.name;
|
|
78
|
+
return getAgentSessionOptions(effectiveRole);
|
|
79
|
+
}
|
|
@@ -32,6 +32,45 @@ export interface BenchmarkResult {
|
|
|
32
32
|
cost: number;
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Validate command against allowlist to prevent shell injection.
|
|
37
|
+
* Only allows specific safe commands with arguments.
|
|
38
|
+
*/
|
|
39
|
+
/**
|
|
40
|
+
* Validate command against allowlist to prevent shell injection.
|
|
41
|
+
* Uses comprehensive shell metacharacter blocking similar to safe-bash.ts.
|
|
42
|
+
*/
|
|
43
|
+
function validateCommand(command: string): void {
|
|
44
|
+
// Basic allowlist - must start with allowed command
|
|
45
|
+
const allowlist = /^(pytest|grep|npm test|npx) /;
|
|
46
|
+
if (!allowlist.test(command)) {
|
|
47
|
+
throw new Error(`Command not allowed: ${command}. Only pytest, grep, npm test, npx allowed.`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Block shell metacharacters after command name
|
|
51
|
+
const afterCommand = command.substring(command.indexOf(" ") + 1);
|
|
52
|
+
|
|
53
|
+
// Block dangerous shell metacharacters
|
|
54
|
+
const dangerousPatterns = [
|
|
55
|
+
/[;&|`$(){}[\]<>\\]/, // Shell metacharacters
|
|
56
|
+
/\$\([^)]*\)/, // Command substitution $(...)
|
|
57
|
+
/`[^`]*`/, // Backtick command substitution
|
|
58
|
+
/\|/, // Pipe
|
|
59
|
+
/&&/, // And
|
|
60
|
+
/\|\|/, // Or
|
|
61
|
+
/>>/, // Append redirect
|
|
62
|
+
/2>&1/, // stderr redirect
|
|
63
|
+
/>/, // Output redirect
|
|
64
|
+
/</, // Input redirect
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
for (const pattern of dangerousPatterns) {
|
|
68
|
+
if (pattern.test(afterCommand)) {
|
|
69
|
+
throw new Error(`Shell metacharacters not allowed in command arguments`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
35
74
|
/**
|
|
36
75
|
* Run a single benchmark task with tiered judges.
|
|
37
76
|
* Tier 1: pytest (fast, deterministic)
|
|
@@ -49,6 +88,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
|
|
|
49
88
|
let output: string | undefined;
|
|
50
89
|
|
|
51
90
|
if (judge.type === "pytest" && judge.command) {
|
|
91
|
+
// Validate command before execution
|
|
92
|
+
validateCommand(judge.command);
|
|
52
93
|
// Tier 1: pytest - fast deterministic check
|
|
53
94
|
output = execSync(judge.command, {
|
|
54
95
|
timeout: 5000,
|
|
@@ -58,6 +99,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
|
|
|
58
99
|
// Look for pytest summary line with passed count
|
|
59
100
|
passed = output.includes("passed");
|
|
60
101
|
} else if (judge.type === "grep" && judge.pattern && judge.command) {
|
|
102
|
+
// Validate command before execution
|
|
103
|
+
validateCommand(judge.command);
|
|
61
104
|
// Tier 2: grep pattern matching
|
|
62
105
|
output = execSync(judge.command, {
|
|
63
106
|
timeout: 5000,
|
|
@@ -66,6 +109,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
|
|
|
66
109
|
});
|
|
67
110
|
passed = output.includes(judge.pattern);
|
|
68
111
|
} else if (judge.type === "command" && judge.command) {
|
|
112
|
+
// Validate command before execution
|
|
113
|
+
validateCommand(judge.command);
|
|
69
114
|
// Tier 3: command execution
|
|
70
115
|
output = execSync(judge.command, {
|
|
71
116
|
timeout: 10000,
|
|
@@ -12,12 +12,17 @@ export interface FeedbackLoopStats {
|
|
|
12
12
|
|
|
13
13
|
export class FeedbackLoop {
|
|
14
14
|
private runs: RunMetrics[] = [];
|
|
15
|
+
private static readonly MAX_RUNS = 1000;
|
|
15
16
|
|
|
16
17
|
/**
|
|
17
18
|
* Record a run's metrics for learning.
|
|
18
19
|
*/
|
|
19
20
|
recordRun(metrics: RunMetrics): void {
|
|
20
21
|
this.runs.push(metrics);
|
|
22
|
+
// Trim to MAX_RUNS to prevent unbounded memory growth
|
|
23
|
+
if (this.runs.length > FeedbackLoop.MAX_RUNS) {
|
|
24
|
+
this.runs = this.runs.slice(-FeedbackLoop.MAX_RUNS);
|
|
25
|
+
}
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
/**
|
package/src/config/config.ts
CHANGED
|
@@ -300,6 +300,7 @@ function mergeConfig(
|
|
|
300
300
|
base: PiTeamsConfig,
|
|
301
301
|
override: PiTeamsConfig,
|
|
302
302
|
): PiTeamsConfig {
|
|
303
|
+
const warnings: string[] = [];
|
|
303
304
|
const merged: PiTeamsConfig = {
|
|
304
305
|
...base,
|
|
305
306
|
...withoutUndefined(override as Record<string, unknown>),
|
|
@@ -439,6 +440,15 @@ function mergeConfig(
|
|
|
439
440
|
};
|
|
440
441
|
if (Object.keys(merged.otlp.headers ?? {}).length === 0)
|
|
441
442
|
delete merged.otlp.headers;
|
|
443
|
+
// Validate OTLP headers for injection attacks (newlines, CR, null bytes)
|
|
444
|
+
const invalidHeaders: string[] = [];
|
|
445
|
+
for (const [k, v] of Object.entries(merged.otlp.headers ?? {})) {
|
|
446
|
+
if (/[\r\n\x00]/.test(String(v))) { invalidHeaders.push(k); }
|
|
447
|
+
}
|
|
448
|
+
if (invalidHeaders.length > 0) {
|
|
449
|
+
delete merged.otlp.headers;
|
|
450
|
+
warnings.push(`OTLP headers blocked due to invalid characters: ${invalidHeaders.join(", ")}`);
|
|
451
|
+
}
|
|
442
452
|
}
|
|
443
453
|
if (
|
|
444
454
|
merged.agents?.overrides &&
|
|
@@ -1171,6 +1181,10 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
|
|
|
1171
1181
|
if (cwd) {
|
|
1172
1182
|
const projectPath = projectConfigPath(cwd);
|
|
1173
1183
|
const projectConfig = readOptionalConfig(projectPath);
|
|
1184
|
+
// SECURITY FIX: Merge project config FIRST, then user config on top.
|
|
1185
|
+
// This ensures user preferences always take precedence over project settings.
|
|
1186
|
+
// Sensitive fields have already been sanitized by sanitizeProjectConfig.
|
|
1187
|
+
let effectiveConfig = {};
|
|
1174
1188
|
if (projectConfig.exists) {
|
|
1175
1189
|
const projectSafeConfig = sanitizeProjectConfig(
|
|
1176
1190
|
projectPath,
|
|
@@ -1181,16 +1195,29 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
|
|
|
1181
1195
|
...projectConfig.warnings,
|
|
1182
1196
|
...projectSafeConfig.warnings,
|
|
1183
1197
|
);
|
|
1184
|
-
|
|
1198
|
+
effectiveConfig = mergeConfig(effectiveConfig, projectSafeConfig.config);
|
|
1185
1199
|
}
|
|
1186
|
-
//
|
|
1187
|
-
|
|
1188
|
-
|
|
1200
|
+
// User config always takes precedence over project config
|
|
1201
|
+
effectiveConfig = mergeConfig(effectiveConfig, config);
|
|
1202
|
+
config = effectiveConfig;
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
// `.pi/pi-crew.json` is the project-owned config file.
|
|
1206
|
+
// SECURITY FIX: User config takes precedence over project-level `.pi/pi-crew.json`.
|
|
1207
|
+
// This prevents malicious project configs from overriding user preferences.
|
|
1189
1208
|
const piCrewJsonPath = projectPiCrewJsonPath(cwd);
|
|
1190
1209
|
const piCrewJsonConfig = readOptionalConfig(piCrewJsonPath);
|
|
1191
1210
|
if (piCrewJsonConfig.exists) {
|
|
1192
1211
|
warnings.push(...piCrewJsonConfig.warnings);
|
|
1193
|
-
|
|
1212
|
+
// Merge project config first, then user config on top
|
|
1213
|
+
const projectPart = sanitizeProjectConfig(
|
|
1214
|
+
piCrewJsonPath,
|
|
1215
|
+
config,
|
|
1216
|
+
piCrewJsonConfig.config,
|
|
1217
|
+
);
|
|
1218
|
+
warnings.push(...projectPart.warnings);
|
|
1219
|
+
const mergedProject = mergeConfig(projectPart.config, config);
|
|
1220
|
+
config = mergedProject;
|
|
1194
1221
|
paths.push(piCrewJsonPath);
|
|
1195
1222
|
}
|
|
1196
1223
|
}
|